npm - clawmem - Versions diffs - 0.8.4 → 0.9.0 - Mend

clawmem 0.8.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/AGENTS.md +31 -20
package/CLAUDE.md +21 -9
package/README.md +20 -22
package/SKILL.md +22 -9
package/package.json +1 -1
package/src/amem.ts +8 -1
package/src/clawmem.ts +97 -0
package/src/config.ts +14 -3
package/src/entity.ts +63 -0
package/src/hooks/context-surfacing.ts +87 -6
package/src/hooks/decision-extractor.ts +145 -115
package/src/mcp.ts +19 -6
package/src/observer.ts +132 -15
package/src/session-focus.ts +227 -0
package/src/store.ts +5 -0
package/src/vault-facts.ts +506 -0

package/src/hooks/context-surfacing.ts CHANGED Viewed

@@ -31,6 +31,12 @@ import { sanitizeSnippet } from "../promptguard.ts";
 import { shouldSkipRetrieval, isRetrievedNoise } from "../retrieval-gate.ts";
 import { MAX_QUERY_LENGTH } from "../limits.ts";
 import { writeRecallEvents, hashQuery } from "../recall-buffer.ts";
+import { resolveSessionTopic, applyTopicBoost } from "../session-focus.ts";
+import {
+  extractPromptEntities,
+  buildVaultFactsBlock,
+  type VaultFactsTriple,
+} from "../vault-facts.ts";
 // =============================================================================
 // Config
@@ -143,6 +149,20 @@ export async function contextSurfacing(
   const tokenBudget = profile.tokenBudget;
   const startTime = Date.now();
+  // §11.4: Resolve session-scoped focus topic. Primary signal is the
+  // per-session focus file at ~/.cache/clawmem/sessions/<id>.focus
+  // (file > env var precedence via resolveSessionTopic). Env var
+  // CLAWMEM_SESSION_FOCUS is a debug-only override and does NOT
+  // provide per-session scoping on multi-session hosts. Used as
+  // (a) optional `intent` on expandQuery/rerank/extractSnippet call
+  // sites below, and (b) the driver for the post-composite topic
+  // boost stage. Fail-open: missing / unreadable / corrupt / empty /
+  // oversized focus file → undefined → every consumer no-ops.
+  const sessionTopic = resolveSessionTopic(
+    input.sessionId,
+    process.env.CLAWMEM_SESSION_FOCUS
+  );
   const isRecency = hasRecencyIntent(prompt);
   const minScore = isRecency ? MIN_COMPOSITE_SCORE_RECENCY : profile.minScore;
@@ -239,7 +259,7 @@ export async function contextSurfacing(
     if (elapsed < profile.escalationBudgetMs) {
       try {
         // Phase 1: Query expansion — discover candidates BM25+vector missed
-        const expanded = await store.expandQuery(retrievalQuery, DEFAULT_QUERY_MODEL);
+        const expanded = await store.expandQuery(retrievalQuery, DEFAULT_QUERY_MODEL, sessionTopic);
         if (expanded.length > 0) {
           const seen = new Set(results.map(r => r.filepath));
           for (const eq of expanded.slice(0, 3)) {
@@ -263,7 +283,7 @@ export async function contextSurfacing(
             file: r.filepath,
             text: (r.body || "").slice(0, 2000),
           }));
-          const reranked = await store.rerank(prompt, toRerank, DEFAULT_RERANK_MODEL);
+          const reranked = await store.rerank(prompt, toRerank, DEFAULT_RERANK_MODEL, sessionTopic);
           if (reranked.length > 0) {
             const rerankedMap = new Map(reranked.map(r => [r.file, r.score]));
             // Blend: 60% original score + 40% reranker score for stability
@@ -335,6 +355,15 @@ export async function contextSurfacing(
   // Apply composite scoring
   const allScored = applyCompositeScoring(enriched, prompt);
+  // §11.4: Session-scoped topic boost — post-composite, pre-threshold.
+  // Boosts docs whose title/path/body match all tokens of the declared
+  // session focus topic (1.4×); demotes non-matching docs (0.75×, floor
+  // 50%). Mutates compositeScore in place and re-sorts. Fail-open: no
+  // topic set → no-op (byte-identical pre-§11.4 output).
+  if (sessionTopic) {
+    applyTopicBoost(allScored, sessionTopic, { boostFactor: 1.4, demoteFactor: 0.75 });
+  }
   // Threshold filtering — adaptive (ratio-based) or absolute (legacy)
   let scored: typeof allScored;
   if (profile.thresholdMode === "adaptive") {
@@ -400,7 +429,7 @@ export async function contextSurfacing(
   // in afterward using whatever budget remains and are the first thing
   // truncated when the payload would overflow.
   const factsBudget = Math.max(0, tokenBudget - INSTRUCTION_TOKEN_COST);
-  const { context, paths, tokens } = buildContext(scored, prompt, factsBudget);
+  const { context, paths, tokens } = buildContext(scored, prompt, factsBudget, sessionTopic);
   if (!context) {
     logEmptyTurn(store, input, prompt);
@@ -489,9 +518,60 @@ export async function contextSurfacing(
   );
   const vaultInner = buildVaultContextInner(context, relationSnippets, relationBudget);
+  // §11.1 (v0.9.0): `<vault-facts>` KG injection.
+  //
+  // Stage ordering (frozen in BACKLOG.md §11.1): retrieval + rerank +
+  // scoring + topic boost (§11.4) + threshold + diversification → build
+  // <facts>/<relationships> → compute remaining facts-block budget →
+  // inject <vault-facts> if entities resolve AND budget allows.
+  //
+  // Prompt-only seeding (HARD CONSTRAINT): entity seeds come from the
+  // raw user prompt ONLY, never from `surfacedDocs[i].body`, snippets,
+  // or any retrieval-phase field. Without this, a topic-boosted
+  // off-topic doc (§11.4) could pollute the facts block with facts
+  // about entities that have nothing to do with the user's actual
+  // prompt.
+  //
+  // Profile-gated via `profile.factsTokens`: `speed` profile sets this
+  // to 0, which naturally disables the stage. `balanced`/`deep` get a
+  // dedicated sub-budget that cannot steal from <facts>/<relationships>.
+  //
+  // Fail-open: any DB error, empty entity set, empty triple set, or
+  // budget-too-small case returns the baseline `vaultInner` unchanged
+  // (byte-identical pre-§11.1 output).
+  let vaultInnerWithFacts = vaultInner;
+  if (profile.factsTokens > 0) {
+    try {
+      const entities = extractPromptEntities(prompt, store.db, "default");
+      if (entities.length > 0) {
+        const queryTriples = (entityId: string): VaultFactsTriple[] =>
+          store
+            .queryEntityTriples(entityId)
+            .map(t => ({
+              subject: t.subject,
+              predicate: t.predicate,
+              object: t.object,
+              validTo: t.validTo,
+              confidence: t.confidence,
+            }));
+        const factsBlock = buildVaultFactsBlock(
+          entities,
+          queryTriples,
+          profile.factsTokens,
+          { estimateTokens }
+        );
+        if (factsBlock) {
+          vaultInnerWithFacts = `${vaultInner}\n${factsBlock}`;
+        }
+      }
+    } catch {
+      /* fail-open: degraded vault behaves identically to pre-§11.1 */
+    }
+  }
   const parts: string[] = [];
   if (routingHint) parts.push(`<vault-routing>${routingHint}</vault-routing>`);
-  parts.push(`<vault-context>\n${vaultInner}\n</vault-context>`);
+  parts.push(`<vault-context>\n${vaultInnerWithFacts}\n</vault-context>`);
   if (nudge) parts.push(`<vault-nudge>${NUDGE_TEXT}</vault-nudge>`);
   return makeContextOutput("context-surfacing", parts.join("\n"));
@@ -552,7 +632,8 @@ function detectRoutingHint(prompt: string): string | null {
 function buildContext(
   scored: ScoredResult[],
   query: string,
-  budget: number = DEFAULT_TOKEN_BUDGET
+  budget: number = DEFAULT_TOKEN_BUDGET,
+  intent?: string
 ): { context: string; paths: string[]; tokens: number } {
   const lines: string[] = [];
   const paths: string[] = [];
@@ -579,7 +660,7 @@ function buildContext(
       if (sanitized === "[content filtered for security]") continue;
       const snippet = smartTruncate(
-        extractSnippet(sanitized, query, tier.snippetLen, r.chunkPos).snippet,
+        extractSnippet(sanitized, query, tier.snippetLen, r.chunkPos, intent).snippet,
         tier.snippetLen
       );
       entry = `**${safeTitle}**${typeTag}\n${safePath}\n${snippet}`;

package/src/hooks/decision-extractor.ts CHANGED Viewed

@@ -17,13 +17,23 @@ import {
   validateTranscriptPath,
 } from "../hooks.ts";
 import { hashContent } from "../indexer.ts";
-import { extractObservations, type Observation } from "../observer.ts";
+import { extractObservations, type Observation, LITERAL_PREDICATES } from "../observer.ts";
 import { updateDirectoryContext } from "../directory-context.ts";
 import { loadConfig } from "../collections.ts";
 import { getDefaultLlamaCpp } from "../llm.ts";
 import type { ObservationWithDoc } from "../amem.ts";
 import { extractJsonFromLLM } from "../amem.ts";
 import { DEFAULT_EMBED_MODEL, extractSnippet, type SearchResult } from "../store.ts";
+import { ensureEntityCanonical, resolveEntityTypeExact } from "../entity.ts";
+// Observation types that are allowed to contribute SPO triples. Widened from the
+// original {decision, preference, milestone, problem} gate, which rejected 77% of
+// real observations in production vaults (the majority type is 'discovery').
+// See BACKLOG.md §1.6 for the full diagnosis.
+const SPO_ELIGIBLE_OBSERVATION_TYPES = new Set<Observation["type"]>([
+  "decision", "preference", "milestone", "problem",
+  "discovery", "feature",
+]);
 // =============================================================================
 // Facet-Based Merge Policy
@@ -325,42 +335,8 @@ export async function decisionExtractor(
   const observationsWithDocs: ObservationWithDoc[] = [];
   if (observations.length > 0) {
     for (const obs of observations) {
-      const obsPath = `observations/${dateStr}-${sessionId.slice(0, 8)}-${obs.type}.md`;
-      const obsBody = formatObservation(obs, dateStr, sessionId);
-      const obsHash = hashContent(obsBody);
-      store.insertContent(obsHash, obsBody, timestamp);
-      try {
-        store.insertDocument("_clawmem", obsPath, obs.title, obsHash, timestamp, timestamp);
-        const doc = store.findActiveDocument("_clawmem", obsPath);
-        if (doc) {
-          store.updateDocumentMeta(doc.id, {
-            content_type: obs.type === "decision" ? "decision"
-              : obs.type === "preference" ? "preference"
-              : obs.type === "milestone" ? "milestone"
-              : obs.type === "problem" ? "problem"
-              : "observation",
-            confidence: 0.80,
-          });
-          store.updateObservationFields(obsPath, "_clawmem", {
-            observation_type: obs.type,
-            facts: JSON.stringify(obs.facts),
-            narrative: obs.narrative,
-            concepts: JSON.stringify(obs.concepts),
-            files_read: JSON.stringify(obs.filesRead),
-            files_modified: JSON.stringify(obs.filesModified),
-          });
-          if (obs.facts.length > 0) {
-            observationsWithDocs.push({
-              docId: doc.id,
-              facts: obs.facts,
-            });
-          }
-        }
-      } catch {
-        // May already exist
-      }
+      const wit = persistObservationDoc(store, obs, sessionId, dateStr, timestamp);
+      if (wit) observationsWithDocs.push(wit);
     }
     // Infer causal links from observations with facts
@@ -375,31 +351,12 @@ export async function decisionExtractor(
       }
     }
-    // Extract SPO triples from observation facts (preference/decision types get priority)
-    for (const obs of observations) {
-      if (!obs.facts || obs.facts.length === 0) continue;
-      for (const fact of obs.facts) {
-        const triple = extractTripleFromFact(fact, obs.type);
-        if (triple) {
-          try {
-            store.db.prepare(
-              "INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
-            ).run(triple.subjectId, triple.subject, "auto", new Date().toISOString());
-            if (triple.objectId) {
-              store.db.prepare(
-                "INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
-              ).run(triple.objectId, triple.object, "auto", new Date().toISOString());
-            }
-            store.addTriple(triple.subjectId, triple.predicate, triple.objectId, triple.objectId ? null : triple.object, {
-              confidence: obs.type === "decision" || obs.type === "preference" ? 0.9 : 0.7,
-              sourceFact: fact,
-            });
-          } catch {
-            // Triple insertion errors are non-fatal
-          }
-        }
-      }
-    }
+    // Extract SPO triples from observation-emitted <triples> blocks (Fix A).
+    // The regex-based extractTripleFromFact is gone — the observer LLM now emits
+    // structured triples alongside facts, parsed and validated in parseObservationXml.
+    // We iterate observationsWithDocs (not raw observations) so every triple gets
+    // real source_doc_id provenance from the persisted observation document (Fix F).
+    insertObservationTriples(store, observations, observationsWithDocs);
   }
   // Extract decisions (observer-first, regex fallback)
@@ -691,67 +648,140 @@ function formatObservation(obs: Observation, dateStr: string, sessionId: string)
 }
 // =============================================================================
-// SPO Triple Extraction from Facts
+// Observation persistence
 // =============================================================================
-type ExtractedTriple = {
-  subject: string;
-  subjectId: string;
-  predicate: string;
-  object: string;
-  objectId: string | null;
-};
+/**
+ * Persist a single observation as a `_clawmem` document and return an
+ * `ObservationWithDoc` for downstream consumers (causal inference + SPO
+ * triples).
+ *
+ * Path format: `observations/${date}-${session8}-${type}-${hash8}.md`. The
+ * 8-char hash slice (SHA256 of the formatted body) disambiguates multiple
+ * observations of the same type within a single session — without it, the
+ * second insert hits the `UNIQUE(collection, path)` constraint, is silently
+ * dropped, and its triples never reach `entity_triples`. See Codex Turn 3
+ * for the regression this guards against.
+ *
+ * Returns null when the doc cannot be looked up after insert OR when the
+ * observation has no facts (triples without facts wouldn't survive the
+ * causal-links/facts filter downstream).
+ */
+export function persistObservationDoc(
+  store: Store,
+  obs: Observation,
+  sessionId: string,
+  dateStr: string,
+  timestamp: string
+): ObservationWithDoc | null {
+  const obsBody = formatObservation(obs, dateStr, sessionId);
+  const obsHash = hashContent(obsBody);
+  const obsPath = `observations/${dateStr}-${sessionId.slice(0, 8)}-${obs.type}-${obsHash.slice(0, 8)}.md`;
+  store.insertContent(obsHash, obsBody, timestamp);
+  try {
+    store.insertDocument("_clawmem", obsPath, obs.title, obsHash, timestamp, timestamp);
+    const doc = store.findActiveDocument("_clawmem", obsPath);
+    if (!doc) return null;
+    store.updateDocumentMeta(doc.id, {
+      content_type: obs.type === "decision" ? "decision"
+        : obs.type === "preference" ? "preference"
+        : obs.type === "milestone" ? "milestone"
+        : obs.type === "problem" ? "problem"
+        : "observation",
+      confidence: 0.80,
+    });
+    store.updateObservationFields(obsPath, "_clawmem", {
+      observation_type: obs.type,
+      facts: JSON.stringify(obs.facts),
+      narrative: obs.narrative,
+      concepts: JSON.stringify(obs.concepts),
+      files_read: JSON.stringify(obs.filesRead),
+      files_modified: JSON.stringify(obs.filesModified),
+    });
-function toEntityId(name: string): string {
-  return name.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "");
+    if (obs.facts.length === 0) return null;
+    return {
+      docId: doc.id,
+      facts: obs.facts,
+      obsType: obs.type,
+      triples: obs.triples,
+    };
+  } catch (err) {
+    console.log(`[decision-extractor] Failed to persist observation ${obs.type}/${obs.title}:`, err);
+    return null;
+  }
 }
-function extractTripleFromFact(fact: string, obsType: string): ExtractedTriple | null {
-  // Only extract from decision/preference/milestone/problem types — skip noisy bugfix/feature/change facts
-  if (!["decision", "preference", "milestone", "problem"].includes(obsType)) return null;
+// =============================================================================
+// SPO Triple Extraction from Facts
+// =============================================================================
-  // Conservative verb patterns — only clear relational predicates
-  const verbPatterns = [
-    /^(.+?)\s+(chose|selected|switched to|migrated to|adopted)\s+(.+?)\.?$/i,
-    /^(.+?)\s+(deployed to|runs on|hosted on|installed on)\s+(.+?)\.?$/i,
-    /^(.+?)\s+(replaced|superseded|deprecated)\s+(.+?)\.?$/i,
-    /^(.+?)\s+(depends on|integrates with|connects to)\s+(.+?)\.?$/i,
-  ];
+/**
+ * Insert SPO triples emitted by the observer into `entity_triples`.
+ *
+ * Uses canonical vault:type:slug entity IDs via `ensureEntityCanonical` so the
+ * knowledge graph stays in one namespace with A-MEM entities. Type inheritance
+ * is exact-match-only and ambiguity-safe: if a name resolves to exactly one type
+ * already in `entity_nodes`, inherit it; otherwise default to `concept`.
+ *
+ * Provenance: every triple carries `source_doc_id` from the persisted observation
+ * document. Iterates `observationsWithDocs` directly so triples from observations
+ * whose doc insert failed are naturally skipped — no order-matching gymnastics.
+ */
+function insertObservationTriples(
+  store: Store,
+  _observations: Observation[],
+  observationsWithDocs: ObservationWithDoc[]
+): void {
+  if (observationsWithDocs.length === 0) return;
+  // Per-invocation cache keyed on (vault, normalizedName, resolvedType) to avoid
+  // redundant SQL for repeated entity references within a single extraction.
+  const vault = "default";
+  const cache = new Map<string, string>();
+  const resolveEntity = (name: string, type: string): string => {
+    const key = `${vault}:${type}:${name.toLowerCase().trim()}`;
+    const cached = cache.get(key);
+    if (cached) return cached;
+    const id = ensureEntityCanonical(store.db, name, type, vault);
+    cache.set(key, id);
+    return id;
+  };
+  for (const wit of observationsWithDocs) {
+    if (!wit.triples || wit.triples.length === 0) continue;
+    const obsType = wit.obsType as Observation["type"] | undefined;
+    if (!obsType || !SPO_ELIGIBLE_OBSERVATION_TYPES.has(obsType)) continue;
+    const confidence = obsType === "decision" || obsType === "preference" ? 0.9 : 0.7;
+    for (const triple of wit.triples) {
+      try {
+        const subjectType = resolveEntityTypeExact(store.db, triple.subject, vault) ?? "concept";
+        const subjectId = resolveEntity(triple.subject, subjectType);
-  for (const pattern of verbPatterns) {
-    const match = fact.match(pattern);
-    if (match) {
-      const subject = match[1]!.trim();
-      const predicate = match[2]!.trim();
-      const object = match[3]!.trim();
-      // Reject subjects/objects that look like sentences rather than entity names
-      if (subject.length < 3 || object.length < 3 || subject.length > 60 || object.length > 60) continue;
-      if (subject.includes(",") || object.includes(",")) continue; // likely a clause, not an entity
-      return {
-        subject,
-        subjectId: toEntityId(subject),
-        predicate: predicate.toLowerCase().replace(/\s+/g, "_"),
-        object,
-        objectId: toEntityId(object),
-      };
-    }
-  }
+        let objectId: string | null = null;
+        let objectLiteral: string | null = null;
+        if (LITERAL_PREDICATES.has(triple.predicate)) {
+          objectLiteral = triple.object;
+        } else {
+          const objectType = resolveEntityTypeExact(store.db, triple.object, vault) ?? "concept";
+          objectId = resolveEntity(triple.object, objectType);
+        }
-  // Preference facts only: "User prefers X" / "Prefers X"
-  if (obsType === "preference") {
-    const prefMatch = fact.match(/^(?:user\s+)?(?:prefers?|avoids?)\s+(.+?)\.?$/i);
-    if (prefMatch && prefMatch[1]!.trim().length > 2) {
-      return {
-        subject: "user",
-        subjectId: "user",
-        predicate: "prefers",
-        object: prefMatch[1]!.trim(),
-        objectId: null, // literal, not entity
-      };
+        store.addTriple(subjectId, triple.predicate, objectId, objectLiteral, {
+          confidence,
+          sourceFact: `${triple.subject} ${triple.predicate} ${triple.object}`,
+          sourceDocId: wit.docId,
+        });
+      } catch (err) {
+        // Triple insertion errors are non-fatal — log at debug
+        console.log(`[decision-extractor] Failed to insert triple ${triple.subject}/${triple.predicate}/${triple.object}:`, err);
+      }
     }
   }
-  return null;
 }

package/src/mcp.ts CHANGED Viewed

@@ -1930,9 +1930,9 @@ This is the recommended entry point for ALL memory queries.`,
     "kg_query",
     {
       title: "Knowledge Graph Query",
-      description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'.",
+      description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'. Accepts an entity name (e.g. 'ClawMem') OR a canonical entity ID in the form 'vault:type:slug' (e.g. 'default:service:clawmem').",
       inputSchema: {
-        entity: z.string().describe("Entity name or ID to query"),
+        entity: z.string().describe("Entity name or canonical ID ('vault:type:slug') to query"),
         as_of: z.string().optional().describe("Date filter (YYYY-MM-DD) — only facts valid at this date"),
         direction: z.enum(["outgoing", "incoming", "both"]).optional().default("both").describe("Relationship direction"),
         vault: z.string().optional().describe("Named vault (omit for default vault)"),
@@ -1941,17 +1941,30 @@ This is the recommended entry point for ALL memory queries.`,
     async ({ entity, as_of, direction, vault }) => {
       const store = getStore(vault);
+      // Canonical IDs look like `vault:type:slug` — accept them directly so callers
+      // that already resolved an entity can round-trip its ID without losing it to
+      // a name-search fallback that would never match.
+      const CANONICAL_ID_RE = /^[a-z][a-z0-9-]*:[a-z_]+:[a-z0-9_]+$/;
       const entityResults = store.searchEntities(entity, 1);
-      const entityId = entityResults.length > 0
-        ? entityResults[0]!.entity_id
-        : entity.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "");
+      let entityId: string;
+      if (entityResults.length > 0) {
+        entityId = entityResults[0]!.entity_id;
+      } else if (CANONICAL_ID_RE.test(entity)) {
+        entityId = entity; // caller passed a canonical ID directly
+      } else {
+        const stats = store.getTripleStats();
+        return {
+          content: [{ type: "text", text: `No entity found matching "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current). Try a shorter/broader name, or pass a canonical ID in the form 'vault:type:slug'.` }],
+        };
+      }
       const triples = store.queryEntityTriples(entityId, { asOf: as_of, direction });
       const stats = store.getTripleStats();
       if (triples.length === 0) {
         return {
-          content: [{ type: "text", text: `No knowledge graph facts found for "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
+          content: [{ type: "text", text: `No knowledge graph facts found for "${entity}" (resolved to ${entityId}). The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
         };
       }