npm - clawmem - Versions diffs - 0.8.4 → 0.8.5 - Mend

clawmem 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/AGENTS.md +18 -19
package/CLAUDE.md +8 -8
package/README.md +18 -22
package/SKILL.md +8 -8
package/package.json +1 -1
package/src/amem.ts +8 -1
package/src/entity.ts +63 -0
package/src/hooks/decision-extractor.ts +145 -115
package/src/mcp.ts +19 -6
package/src/observer.ts +132 -15

package/AGENTS.md CHANGED Viewed

@@ -128,15 +128,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
 clawmem bootstrap ~/notes --name notes
 # Or step by step:
-./bin/clawmem init
-./bin/clawmem collection add ~/notes --name notes
-./bin/clawmem update --embed
-./bin/clawmem setup hooks
-./bin/clawmem setup mcp
+clawmem init
+clawmem collection add ~/notes --name notes
+clawmem update --embed
+clawmem setup hooks
+clawmem setup mcp
 # Verify
-./bin/clawmem doctor    # Full health check
-./bin/clawmem status    # Quick index status
+clawmem doctor    # Full health check
+clawmem status    # Quick index status
 ```
 ### Background Services (systemd user units)
@@ -206,18 +206,17 @@ systemctl --user status clawmem-watcher.service clawmem-embed.timer
 When using ClawMem with OpenClaw, choose one of two deployment options:
-### Option 1: ClawMem Exclusive (Recommended)
+**Active Memory coexistence:** ClawMem is fully compatible with OpenClaw's Active Memory plugin (v2026.4.10+). They search different backends (ClawMem vault vs dreaming/wiki) and inject into different prompt regions (user prompt vs system prompt). Both can run simultaneously — no configuration needed.
+**OpenClaw v2026.4.10+ recommended:** Fixes a config normalization bug where `plugins.slots.contextEngine` was silently dropped (#64192).
-ClawMem handles 100% of memory operations via hooks + MCP tools. Zero redundancy.
+### Option 1: ClawMem Exclusive (Recommended)
-**Benefits:**
-- No context window waste (avoids 10-15% duplicate injection)
-- Prevents OpenClaw native memory auto-initialization on updates
-- All memory in ClawMem's hybrid search + graph traversal system
+ClawMem handles 100% of structured memory. Disable native memory search (not Active Memory — that's separate and compatible):
 **Configuration:**
 ```bash
-# Disable OpenClaw's native memory
+# Disable OpenClaw's native memory search
 openclaw config set agents.defaults.memorySearch.extraPaths "[]"
 # Verify
@@ -235,7 +234,7 @@ ls ~/.openclaw/agents/main/memory/
 ### Option 2: Hybrid (ClawMem + Native)
-Run both ClawMem and OpenClaw's native memory for redundancy.
+Run both ClawMem and OpenClaw's native memory search for redundancy.
 **Configuration:**
 ```bash
@@ -243,9 +242,9 @@ openclaw config set agents.defaults.memorySearch.extraPaths '["~/documents", "~/
 ```
 **Tradeoffs:**
-- ✅ Redundant recall from two independent systems
-- ❌ 10-15% context window waste from duplicate facts
-- ❌ Two memory indices to maintain
+- Redundant recall from two independent systems
+- 10-15% context window waste from duplicate facts
+- Two memory indices to maintain
 **Recommendation:** Use Option 1 unless you have a specific need for redundant memory systems.
@@ -263,7 +262,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
 | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
 | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
 | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
-| `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, extracts SPO triples from decision/preference/milestone/problem facts. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
+| `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, persists observer-emitted SPO triples via `ensureEntityCanonical` (canonical `vault:type:slug` IDs shared with A-MEM) using the tight predicate vocabulary (adopted, migrated_to, deployed_to, runs_on, replaced, depends_on, integrates_with, uses, prefers, avoids, caused_by, resolved_by, owned_by). Eligible observation types: decision/preference/milestone/problem/discovery/feature. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
 | `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
 | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation), per-turn recall attribution (marks which surfaced docs were cited in which turn) |

package/CLAUDE.md CHANGED Viewed

@@ -128,15 +128,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
 clawmem bootstrap ~/notes --name notes
 # Or step by step:
-./bin/clawmem init
-./bin/clawmem collection add ~/notes --name notes
-./bin/clawmem update --embed
-./bin/clawmem setup hooks
-./bin/clawmem setup mcp
+clawmem init
+clawmem collection add ~/notes --name notes
+clawmem update --embed
+clawmem setup hooks
+clawmem setup mcp
 # Verify
-./bin/clawmem doctor    # Full health check
-./bin/clawmem status    # Quick index status
+clawmem doctor    # Full health check
+clawmem status    # Quick index status
 ```
 ### Background Services (systemd user units)
@@ -262,7 +262,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
 | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
 | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
 | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
-| `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, extracts SPO triples from decision/preference/milestone/problem facts. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
+| `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, persists observer-emitted SPO triples via `ensureEntityCanonical` (canonical `vault:type:slug` IDs shared with A-MEM) using the tight predicate vocabulary (adopted, migrated_to, deployed_to, runs_on, replaced, depends_on, integrates_with, uses, prefers, avoids, caused_by, resolved_by, owned_by). Eligible observation types: decision/preference/milestone/problem/discovery/feature. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
 | `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
 | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation), per-turn recall attribution (marks which surfaced docs were cited in which turn) |

package/README.md CHANGED Viewed

@@ -717,7 +717,7 @@ Registered by `clawmem setup mcp`. Available to any MCP-compatible client.
 |---|---|
 | `build_graphs` | Build temporal and/or semantic graphs from document corpus |
 | `find_causal_links` | Trace decision chains: "what led to X", "how we got from A to B". Follow up `intent_search` with this tool on a top result to walk the full causal chain. Traverses causes / caused_by / both up to N hops with depth-annotated reasoning. |
-| `kg_query` | Query the SPO knowledge graph: "what does X relate to?", "what was true about X when?". Returns temporal entity-relationship triples with validity windows. Uses entity resolution for lookup. |
+| `kg_query` | Query the SPO knowledge graph: "what does X relate to?", "what was true about X when?". Returns temporal entity-relationship triples with validity windows. Accepts entity name (resolved via `searchEntities`) or canonical ID in `vault:type:slug` form. Triples are populated by the decision-extractor hook from observer-emitted `<triples>` blocks. |
 | `memory_evolution_status` | Show how a document's A-MEM metadata evolved over time |
 | `timeline` | Show the temporal neighborhood around a document — what was created/modified before and after it. Progressive disclosure: search → timeline (context) → get (full content). Supports same-collection scoping and session correlation. |
@@ -1073,40 +1073,36 @@ Manual layers benefit from periodic re-indexing — a cron job running `clawmem
 ### Setup
 ```bash
-# Bootstrap workspace collection (use your agent's workspace path)
-./bin/clawmem bootstrap ~/workspace --name workspace
-# Bootstrap each project
-./bin/clawmem bootstrap ~/Projects/my-project --name my-project
+# Bootstrap a content directory (creates vault + indexes + embeds + installs hooks + MCP)
+clawmem bootstrap ~/notes --name notes
-# Enable auto-embed for real-time indexing
-# Edit ~/.config/clawmem/config.yaml → autoEmbed: true
+# Bootstrap each project you want indexed
+clawmem bootstrap ~/Projects/my-project --name my-project
-# Install watcher as systemd service
-./bin/clawmem install-service --enable
+# Install watcher + embed timer as systemd services
+clawmem install-service --enable
 ```
-#### OpenClaw-Specific
+#### OpenClaw-specific
 ```bash
-# OpenClaw uses ~/.openclaw/workspace/ as its workspace root
-./bin/clawmem bootstrap ~/.openclaw/workspace --name workspace
+# Install the ContextEngine plugin (auto-symlinks into ~/.openclaw/extensions/)
+clawmem setup openclaw
+# Then follow the printed next steps: restart gateway, set slot, configure GPU endpoints
 ```
-#### Hermes-Specific
+Index your content directories with `clawmem bootstrap` as above. The OpenClaw plugin shares the same vault as Claude Code hooks.
-```bash
-# Hermes uses ~/.hermes/ as its home directory
-./bin/clawmem bootstrap ~/.hermes --name hermes-home
+#### Hermes-specific
-# Install the memory provider plugin
-cp -r src/hermes /path/to/hermes-agent/plugins/memory/clawmem
+```bash
+# Install the memory provider plugin (symlink or copy)
+ln -s $(npm root -g)/clawmem/src/hermes /path/to/hermes-agent/plugins/memory/clawmem
-# Start clawmem serve (external mode)
+# Start the REST API (required for Hermes tool calls)
 clawmem serve --port 7438 &
-# Configure Hermes to use ClawMem
-# In your Hermes config.yaml:
+# Configure Hermes to use ClawMem (in your Hermes config.yaml):
 #   memory:
 #     provider: clawmem
 ```

package/SKILL.md CHANGED Viewed

@@ -118,15 +118,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
 clawmem bootstrap ~/notes --name notes
 # Or step by step:
-./bin/clawmem init
-./bin/clawmem collection add ~/notes --name notes
-./bin/clawmem update --embed
-./bin/clawmem setup hooks
-./bin/clawmem setup mcp
+clawmem init
+clawmem collection add ~/notes --name notes
+clawmem update --embed
+clawmem setup hooks
+clawmem setup mcp
 # Verify
-./bin/clawmem doctor    # Full health check
-./bin/clawmem status    # Quick index status
+clawmem doctor    # Full health check
+clawmem status    # Quick index status
 ```
 ### Background Services (systemd user units)
@@ -294,7 +294,7 @@ Once escalated, route by query type:
 | `timeline` | Temporal neighborhood around a document — what was modified before/after. Progressive disclosure: search → timeline → get. Supports same-collection scoping and session correlation. |
 | `list_vaults` | Show configured vault names and paths. Empty in single-vault mode. |
 | `vault_sync` | Index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories. |
-| `kg_query` | Query SPO knowledge graph for entity relationships with temporal validity. Uses entity resolution. |
+| `kg_query` | Query SPO knowledge graph for entity relationships with temporal validity. Accepts entity name or canonical ID (`vault:type:slug`). Triples are populated by decision-extractor from observer-emitted `<triples>` blocks using a canonical predicate vocabulary. |
 | `diary_write` | Write diary entry. Use proactively in non-hooked environments. Do NOT use in Claude Code. |
 | `diary_read` | Read recent diary entries. Filter by agent name. |
 | `lifecycle_status` | Document lifecycle statistics: active, archived, forgotten, pinned, snoozed counts and policy summary. |

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clawmem",
-  "version": "0.8.4",
+  "version": "0.8.5",
   "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
   "type": "module",
   "bin": {

package/src/amem.ts CHANGED Viewed

@@ -649,11 +649,18 @@ export async function postIndexEnrich(
 }
 /**
- * Observation with document ID for causal inference
+ * Observation with document ID for causal inference and SPO triple extraction.
+ *
+ * Populated by the decision-extractor hook after an observation is successfully
+ * persisted. Consumed by:
+ *   - `inferCausalLinks` (A-MEM) — uses docId + facts
+ *   - `insertObservationTriples` (decision-extractor) — uses docId + obsType + triples
  */
 export interface ObservationWithDoc {
   docId: number;
   facts: string[];
+  obsType?: string;
+  triples?: Array<{ subject: string; predicate: string; object: string }>;
 }
 /**

package/src/entity.ts CHANGED Viewed

@@ -354,6 +354,69 @@ export function resolveEntityCanonical(
 // Entity Storage + Mentions + Co-occurrences
 // =============================================================================
+/**
+ * Resolve the entity_type for a name via exact case-insensitive match.
+ *
+ * Returns the type only when EXACTLY ONE active entity in the given vault shares
+ * the name. Zero matches → null (caller should default to a safe type). Multiple
+ * matches (ambiguous across buckets, e.g. "Alice" as person AND "Alice" as project)
+ * → null so the caller falls back to a safe default instead of arbitrarily picking.
+ *
+ * Exact match only — no fuzzy matching — to avoid false inheritance on near-names.
+ */
+export function resolveEntityTypeExact(
+  db: Database,
+  name: string,
+  vault: string = 'default'
+): string | null {
+  const rows = db.prepare(`
+    SELECT DISTINCT entity_type FROM entity_nodes
+    WHERE LOWER(name) = LOWER(?) AND vault = ?
+  `).all(name, vault) as Array<{ entity_type: string }>;
+  if (rows.length !== 1) return null; // zero or ambiguous
+  return rows[0]!.entity_type;
+}
+/**
+ * Resolve-or-create a canonical entity without incrementing mention_count.
+ *
+ * Used by consumers that reference an entity but do NOT constitute a document
+ * mention (e.g. SPO triple extraction). Semantically distinct from upsertEntity,
+ * which treats every call as a doc mention and inflates the count.
+ *
+ * Flow: resolveEntityCanonical (FTS5 + fuzzy + bucket match) → reuse if found,
+ * otherwise mint a new canonical `vault:type:slug` entity with mention_count = 0.
+ *
+ * Returns the entity_id.
+ */
+export function ensureEntityCanonical(
+  db: Database,
+  name: string,
+  type: string,
+  vault: string = 'default'
+): string {
+  const canonicalId = resolveEntityCanonical(db, name, type, vault);
+  if (canonicalId) return canonicalId;
+  const entityId = makeEntityId(name, type, vault);
+  db.prepare(`
+    INSERT OR IGNORE INTO entity_nodes (entity_id, entity_type, name, description, created_at, mention_count, last_seen, vault)
+    VALUES (?, ?, ?, NULL, datetime('now'), 0, datetime('now'), ?)
+  `).run(entityId, type, name, vault);
+  try {
+    db.prepare(`
+      INSERT OR IGNORE INTO entities_fts (entity_id, name, entity_type)
+      VALUES (?, ?, ?)
+    `).run(entityId, name.toLowerCase(), type);
+  } catch {
+    // FTS insert may fail if table doesn't exist yet — non-fatal
+  }
+  return entityId;
+}
 /**
  * Upsert an entity into entity_nodes and entities_fts.
  * Returns the entity_id (canonical or new).

package/src/hooks/decision-extractor.ts CHANGED Viewed

@@ -17,13 +17,23 @@ import {
   validateTranscriptPath,
 } from "../hooks.ts";
 import { hashContent } from "../indexer.ts";
-import { extractObservations, type Observation } from "../observer.ts";
+import { extractObservations, type Observation, LITERAL_PREDICATES } from "../observer.ts";
 import { updateDirectoryContext } from "../directory-context.ts";
 import { loadConfig } from "../collections.ts";
 import { getDefaultLlamaCpp } from "../llm.ts";
 import type { ObservationWithDoc } from "../amem.ts";
 import { extractJsonFromLLM } from "../amem.ts";
 import { DEFAULT_EMBED_MODEL, extractSnippet, type SearchResult } from "../store.ts";
+import { ensureEntityCanonical, resolveEntityTypeExact } from "../entity.ts";
+// Observation types that are allowed to contribute SPO triples. Widened from the
+// original {decision, preference, milestone, problem} gate, which rejected 77% of
+// real observations in production vaults (the majority type is 'discovery').
+// See BACKLOG.md §1.6 for the full diagnosis.
+const SPO_ELIGIBLE_OBSERVATION_TYPES = new Set<Observation["type"]>([
+  "decision", "preference", "milestone", "problem",
+  "discovery", "feature",
+]);
 // =============================================================================
 // Facet-Based Merge Policy
@@ -325,42 +335,8 @@ export async function decisionExtractor(
   const observationsWithDocs: ObservationWithDoc[] = [];
   if (observations.length > 0) {
     for (const obs of observations) {
-      const obsPath = `observations/${dateStr}-${sessionId.slice(0, 8)}-${obs.type}.md`;
-      const obsBody = formatObservation(obs, dateStr, sessionId);
-      const obsHash = hashContent(obsBody);
-      store.insertContent(obsHash, obsBody, timestamp);
-      try {
-        store.insertDocument("_clawmem", obsPath, obs.title, obsHash, timestamp, timestamp);
-        const doc = store.findActiveDocument("_clawmem", obsPath);
-        if (doc) {
-          store.updateDocumentMeta(doc.id, {
-            content_type: obs.type === "decision" ? "decision"
-              : obs.type === "preference" ? "preference"
-              : obs.type === "milestone" ? "milestone"
-              : obs.type === "problem" ? "problem"
-              : "observation",
-            confidence: 0.80,
-          });
-          store.updateObservationFields(obsPath, "_clawmem", {
-            observation_type: obs.type,
-            facts: JSON.stringify(obs.facts),
-            narrative: obs.narrative,
-            concepts: JSON.stringify(obs.concepts),
-            files_read: JSON.stringify(obs.filesRead),
-            files_modified: JSON.stringify(obs.filesModified),
-          });
-          if (obs.facts.length > 0) {
-            observationsWithDocs.push({
-              docId: doc.id,
-              facts: obs.facts,
-            });
-          }
-        }
-      } catch {
-        // May already exist
-      }
+      const wit = persistObservationDoc(store, obs, sessionId, dateStr, timestamp);
+      if (wit) observationsWithDocs.push(wit);
     }
     // Infer causal links from observations with facts
@@ -375,31 +351,12 @@ export async function decisionExtractor(
       }
     }
-    // Extract SPO triples from observation facts (preference/decision types get priority)
-    for (const obs of observations) {
-      if (!obs.facts || obs.facts.length === 0) continue;
-      for (const fact of obs.facts) {
-        const triple = extractTripleFromFact(fact, obs.type);
-        if (triple) {
-          try {
-            store.db.prepare(
-              "INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
-            ).run(triple.subjectId, triple.subject, "auto", new Date().toISOString());
-            if (triple.objectId) {
-              store.db.prepare(
-                "INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
-              ).run(triple.objectId, triple.object, "auto", new Date().toISOString());
-            }
-            store.addTriple(triple.subjectId, triple.predicate, triple.objectId, triple.objectId ? null : triple.object, {
-              confidence: obs.type === "decision" || obs.type === "preference" ? 0.9 : 0.7,
-              sourceFact: fact,
-            });
-          } catch {
-            // Triple insertion errors are non-fatal
-          }
-        }
-      }
-    }
+    // Extract SPO triples from observation-emitted <triples> blocks (Fix A).
+    // The regex-based extractTripleFromFact is gone — the observer LLM now emits
+    // structured triples alongside facts, parsed and validated in parseObservationXml.
+    // We iterate observationsWithDocs (not raw observations) so every triple gets
+    // real source_doc_id provenance from the persisted observation document (Fix F).
+    insertObservationTriples(store, observations, observationsWithDocs);
   }
   // Extract decisions (observer-first, regex fallback)
@@ -691,67 +648,140 @@ function formatObservation(obs: Observation, dateStr: string, sessionId: string)
 }
 // =============================================================================
-// SPO Triple Extraction from Facts
+// Observation persistence
 // =============================================================================
-type ExtractedTriple = {
-  subject: string;
-  subjectId: string;
-  predicate: string;
-  object: string;
-  objectId: string | null;
-};
+/**
+ * Persist a single observation as a `_clawmem` document and return an
+ * `ObservationWithDoc` for downstream consumers (causal inference + SPO
+ * triples).
+ *
+ * Path format: `observations/${date}-${session8}-${type}-${hash8}.md`. The
+ * 8-char hash slice (SHA256 of the formatted body) disambiguates multiple
+ * observations of the same type within a single session — without it, the
+ * second insert hits the `UNIQUE(collection, path)` constraint, is silently
+ * dropped, and its triples never reach `entity_triples`. See Codex Turn 3
+ * for the regression this guards against.
+ *
+ * Returns null when the doc cannot be looked up after insert OR when the
+ * observation has no facts (triples without facts wouldn't survive the
+ * causal-links/facts filter downstream).
+ */
+export function persistObservationDoc(
+  store: Store,
+  obs: Observation,
+  sessionId: string,
+  dateStr: string,
+  timestamp: string
+): ObservationWithDoc | null {
+  const obsBody = formatObservation(obs, dateStr, sessionId);
+  const obsHash = hashContent(obsBody);
+  const obsPath = `observations/${dateStr}-${sessionId.slice(0, 8)}-${obs.type}-${obsHash.slice(0, 8)}.md`;
+  store.insertContent(obsHash, obsBody, timestamp);
+  try {
+    store.insertDocument("_clawmem", obsPath, obs.title, obsHash, timestamp, timestamp);
+    const doc = store.findActiveDocument("_clawmem", obsPath);
+    if (!doc) return null;
+    store.updateDocumentMeta(doc.id, {
+      content_type: obs.type === "decision" ? "decision"
+        : obs.type === "preference" ? "preference"
+        : obs.type === "milestone" ? "milestone"
+        : obs.type === "problem" ? "problem"
+        : "observation",
+      confidence: 0.80,
+    });
+    store.updateObservationFields(obsPath, "_clawmem", {
+      observation_type: obs.type,
+      facts: JSON.stringify(obs.facts),
+      narrative: obs.narrative,
+      concepts: JSON.stringify(obs.concepts),
+      files_read: JSON.stringify(obs.filesRead),
+      files_modified: JSON.stringify(obs.filesModified),
+    });
-function toEntityId(name: string): string {
-  return name.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "");
+    if (obs.facts.length === 0) return null;
+    return {
+      docId: doc.id,
+      facts: obs.facts,
+      obsType: obs.type,
+      triples: obs.triples,
+    };
+  } catch (err) {
+    console.log(`[decision-extractor] Failed to persist observation ${obs.type}/${obs.title}:`, err);
+    return null;
+  }
 }
-function extractTripleFromFact(fact: string, obsType: string): ExtractedTriple | null {
-  // Only extract from decision/preference/milestone/problem types — skip noisy bugfix/feature/change facts
-  if (!["decision", "preference", "milestone", "problem"].includes(obsType)) return null;
+// =============================================================================
+// SPO Triple Extraction from Facts
+// =============================================================================
-  // Conservative verb patterns — only clear relational predicates
-  const verbPatterns = [
-    /^(.+?)\s+(chose|selected|switched to|migrated to|adopted)\s+(.+?)\.?$/i,
-    /^(.+?)\s+(deployed to|runs on|hosted on|installed on)\s+(.+?)\.?$/i,
-    /^(.+?)\s+(replaced|superseded|deprecated)\s+(.+?)\.?$/i,
-    /^(.+?)\s+(depends on|integrates with|connects to)\s+(.+?)\.?$/i,
-  ];
+/**
+ * Insert SPO triples emitted by the observer into `entity_triples`.
+ *
+ * Uses canonical vault:type:slug entity IDs via `ensureEntityCanonical` so the
+ * knowledge graph stays in one namespace with A-MEM entities. Type inheritance
+ * is exact-match-only and ambiguity-safe: if a name resolves to exactly one type
+ * already in `entity_nodes`, inherit it; otherwise default to `concept`.
+ *
+ * Provenance: every triple carries `source_doc_id` from the persisted observation
+ * document. Iterates `observationsWithDocs` directly so triples from observations
+ * whose doc insert failed are naturally skipped — no order-matching gymnastics.
+ */
+function insertObservationTriples(
+  store: Store,
+  _observations: Observation[],
+  observationsWithDocs: ObservationWithDoc[]
+): void {
+  if (observationsWithDocs.length === 0) return;
+  // Per-invocation cache keyed on (vault, normalizedName, resolvedType) to avoid
+  // redundant SQL for repeated entity references within a single extraction.
+  const vault = "default";
+  const cache = new Map<string, string>();
+  const resolveEntity = (name: string, type: string): string => {
+    const key = `${vault}:${type}:${name.toLowerCase().trim()}`;
+    const cached = cache.get(key);
+    if (cached) return cached;
+    const id = ensureEntityCanonical(store.db, name, type, vault);
+    cache.set(key, id);
+    return id;
+  };
+  for (const wit of observationsWithDocs) {
+    if (!wit.triples || wit.triples.length === 0) continue;
+    const obsType = wit.obsType as Observation["type"] | undefined;
+    if (!obsType || !SPO_ELIGIBLE_OBSERVATION_TYPES.has(obsType)) continue;
+    const confidence = obsType === "decision" || obsType === "preference" ? 0.9 : 0.7;
+    for (const triple of wit.triples) {
+      try {
+        const subjectType = resolveEntityTypeExact(store.db, triple.subject, vault) ?? "concept";
+        const subjectId = resolveEntity(triple.subject, subjectType);
-  for (const pattern of verbPatterns) {
-    const match = fact.match(pattern);
-    if (match) {
-      const subject = match[1]!.trim();
-      const predicate = match[2]!.trim();
-      const object = match[3]!.trim();
-      // Reject subjects/objects that look like sentences rather than entity names
-      if (subject.length < 3 || object.length < 3 || subject.length > 60 || object.length > 60) continue;
-      if (subject.includes(",") || object.includes(",")) continue; // likely a clause, not an entity
-      return {
-        subject,
-        subjectId: toEntityId(subject),
-        predicate: predicate.toLowerCase().replace(/\s+/g, "_"),
-        object,
-        objectId: toEntityId(object),
-      };
-    }
-  }
+        let objectId: string | null = null;
+        let objectLiteral: string | null = null;
+        if (LITERAL_PREDICATES.has(triple.predicate)) {
+          objectLiteral = triple.object;
+        } else {
+          const objectType = resolveEntityTypeExact(store.db, triple.object, vault) ?? "concept";
+          objectId = resolveEntity(triple.object, objectType);
+        }
-  // Preference facts only: "User prefers X" / "Prefers X"
-  if (obsType === "preference") {
-    const prefMatch = fact.match(/^(?:user\s+)?(?:prefers?|avoids?)\s+(.+?)\.?$/i);
-    if (prefMatch && prefMatch[1]!.trim().length > 2) {
-      return {
-        subject: "user",
-        subjectId: "user",
-        predicate: "prefers",
-        object: prefMatch[1]!.trim(),
-        objectId: null, // literal, not entity
-      };
+        store.addTriple(subjectId, triple.predicate, objectId, objectLiteral, {
+          confidence,
+          sourceFact: `${triple.subject} ${triple.predicate} ${triple.object}`,
+          sourceDocId: wit.docId,
+        });
+      } catch (err) {
+        // Triple insertion errors are non-fatal — log at debug
+        console.log(`[decision-extractor] Failed to insert triple ${triple.subject}/${triple.predicate}/${triple.object}:`, err);
+      }
     }
   }
-  return null;
 }

package/src/mcp.ts CHANGED Viewed

@@ -1930,9 +1930,9 @@ This is the recommended entry point for ALL memory queries.`,
     "kg_query",
     {
       title: "Knowledge Graph Query",
-      description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'.",
+      description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'. Accepts an entity name (e.g. 'ClawMem') OR a canonical entity ID in the form 'vault:type:slug' (e.g. 'default:service:clawmem').",
       inputSchema: {
-        entity: z.string().describe("Entity name or ID to query"),
+        entity: z.string().describe("Entity name or canonical ID ('vault:type:slug') to query"),
         as_of: z.string().optional().describe("Date filter (YYYY-MM-DD) — only facts valid at this date"),
         direction: z.enum(["outgoing", "incoming", "both"]).optional().default("both").describe("Relationship direction"),
         vault: z.string().optional().describe("Named vault (omit for default vault)"),
@@ -1941,17 +1941,30 @@ This is the recommended entry point for ALL memory queries.`,
     async ({ entity, as_of, direction, vault }) => {
       const store = getStore(vault);
+      // Canonical IDs look like `vault:type:slug` — accept them directly so callers
+      // that already resolved an entity can round-trip its ID without losing it to
+      // a name-search fallback that would never match.
+      const CANONICAL_ID_RE = /^[a-z][a-z0-9-]*:[a-z_]+:[a-z0-9_]+$/;
       const entityResults = store.searchEntities(entity, 1);
-      const entityId = entityResults.length > 0
-        ? entityResults[0]!.entity_id
-        : entity.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "");
+      let entityId: string;
+      if (entityResults.length > 0) {
+        entityId = entityResults[0]!.entity_id;
+      } else if (CANONICAL_ID_RE.test(entity)) {
+        entityId = entity; // caller passed a canonical ID directly
+      } else {
+        const stats = store.getTripleStats();
+        return {
+          content: [{ type: "text", text: `No entity found matching "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current). Try a shorter/broader name, or pass a canonical ID in the form 'vault:type:slug'.` }],
+        };
+      }
       const triples = store.queryEntityTriples(entityId, { asOf: as_of, direction });
       const stats = store.getTripleStats();
       if (triples.length === 0) {
         return {
-          content: [{ type: "text", text: `No knowledge graph facts found for "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
+          content: [{ type: "text", text: `No knowledge graph facts found for "${entity}" (resolved to ${entityId}). The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
         };
       }

package/src/observer.ts CHANGED Viewed

@@ -22,6 +22,13 @@ export type Observation = {
   concepts: string[];
   filesRead: string[];
   filesModified: string[];
+  triples?: ParsedTriple[];
+};
+export type ParsedTriple = {
+  subject: string;
+  predicate: string;
+  object: string;
 };
 export type SessionSummary = {
@@ -48,28 +55,54 @@ const GENERATION_TEMPERATURE = 0.3;
 // =============================================================================
 const OBSERVATION_SYSTEM_PROMPT = `You are an observer analyzing a coding session transcript. Extract structured observations.
-For each significant action, decision, or discovery, output an <observation> XML element.
+For each significant action, decision, or discovery, output an <observation> XML element with the structure below.
+Structure:
 <observation>
-  <type>one of: decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem</type>
-  <title>Brief descriptive title (max 80 chars)</title>
+  <type>...</type>
+  <title>...</title>
   <facts>
-    <fact>Individual atomic fact</fact>
+    <fact>...</fact>
   </facts>
-  <narrative>2-3 sentences explaining context and reasoning</narrative>
+  <triples>
+    <triple>
+      <subject>...</subject>
+      <predicate>...</predicate>
+      <object>...</object>
+    </triple>
+  </triples>
+  <narrative>...</narrative>
   <concepts>
-    <concept>one of: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off</concept>
+    <concept>...</concept>
   </concepts>
-  <files_read><file>path/to/file</file></files_read>
-  <files_modified><file>path/to/file</file></files_modified>
+  <files_read><file>...</file></files_read>
+  <files_modified><file>...</file></files_modified>
 </observation>
-Rules:
+Field rules:
+- <type>: one of decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem
+- <title>: brief descriptive title, max 80 chars
+- <facts>: 1-5 <fact> elements, each a standalone atomic claim about what happened or what is true (concrete, specific, no schema placeholders or template text)
+- <triples>: 0-3 <triple> elements for structural relationships between named entities (see predicate vocabulary below). Omit entirely if no relational claims apply. Do NOT emit triples for descriptive facts — only for explicit S-P-O relations.
+- <narrative>: 2-3 sentences explaining WHY something was done, not just WHAT
+- <concepts>: 0-3 <concept> elements from: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off
+- <files_read>, <files_modified>: only files explicitly mentioned in the transcript
+Predicate vocabulary (use EXACTLY these predicates in <predicate>, nothing else):
+- adopted, migrated_to — switching to a new tool/framework/approach
+- deployed_to, runs_on — where something runs
+- replaced — when one thing supersedes another
+- depends_on, integrates_with, uses — structural dependencies
+- prefers, avoids — user preferences (use for <subject>user</subject>)
+- caused_by, resolved_by — causal relationships between problems and fixes
+- owned_by — responsibility / ownership
+<subject> and <object> must be short canonical entity names (2-80 chars). No sentences. No placeholder text. If you cannot fit a claim into this vocabulary, keep it in <facts> instead and omit the triple.
+Observation rules:
 - Output 1-5 observations, focusing on the MOST significant events
-- Each fact should be a standalone, atomic piece of information
-- The narrative should explain WHY something was done, not just WHAT
-- Only include files that were explicitly mentioned in the transcript
 - If no significant observations, output nothing
+- Never use schema example text or template placeholders in <fact>, <subject>, or <object> — emit only real content extracted from the transcript
 Type guidance:
 - preference: user expresses a preference, habit, or way of working (e.g., "don't use subagents for this", "I prefer single PRs")
@@ -131,6 +164,47 @@ const VALID_CONCEPTS = new Set([
   "gotcha", "pattern", "trade-off",
 ]);
+// Canonical SPO predicate vocabulary — parser rejects anything outside this set.
+// Must stay in sync with the predicate list in OBSERVATION_SYSTEM_PROMPT.
+export const VALID_PREDICATES = new Set([
+  "adopted", "migrated_to",
+  "deployed_to", "runs_on",
+  "replaced",
+  "depends_on", "integrates_with", "uses",
+  "prefers", "avoids",
+  "caused_by", "resolved_by",
+  "owned_by",
+]);
+// Predicates whose <object> should be stored as a literal (not resolved to an entity).
+export const LITERAL_PREDICATES = new Set(["prefers", "avoids"]);
+// Exact placeholder strings that must never be persisted as facts or triple components.
+// Defense-in-depth: even though the prompt no longer places example text inside
+// <fact>/<subject>/<object> tags, a weak model could still echo these phrases.
+const SCHEMA_PLACEHOLDER_STRINGS = new Set([
+  "individual atomic fact",
+  "atomic fact",
+  "one atomic claim per fact element",
+  "brief descriptive title",
+  "canonical entity name",
+]);
+// Regex for template placeholder markers: {{...}}, <!--...-->, ${...}.
+// Intentionally narrow — earlier drafts rejected any line starting with
+// "example:" / "placeholder:", which false-positived legitimate facts like
+// "Example: QMD switched to Bun in v0.2". Shape-only matching avoids that
+// drift; the exact-string blocklist above handles known echoed placeholders.
+const PLACEHOLDER_REGEX = /^(\{\{.*\}\}|<!--.*-->|\$\{.*\})/;
+function isSchemaPlaceholder(text: string): boolean {
+  if (!text) return true;
+  const normalized = text.trim().toLowerCase();
+  if (SCHEMA_PLACEHOLDER_STRINGS.has(normalized)) return true;
+  if (PLACEHOLDER_REGEX.test(normalized)) return true;
+  return false;
+}
 export function parseObservationXml(xml: string): Observation | null {
   const typeMatch = xml.match(/<type>\s*(.*?)\s*<\/type>/s);
   const titleMatch = xml.match(/<title>\s*(.*?)\s*<\/title>/s);
@@ -141,24 +215,67 @@ export function parseObservationXml(xml: string): Observation | null {
   const type = typeMatch[1].trim().toLowerCase();
   if (!VALID_OBSERVATION_TYPES.has(type)) return null;
-  const facts = extractMultiple(xml, "fact");
+  const rawTitle = titleMatch[1].trim();
+  if (isSchemaPlaceholder(rawTitle)) return null;
+  const facts = extractMultiple(xml, "fact")
+    .filter(f => f.length >= 5)
+    .filter(f => !isSchemaPlaceholder(f));
   const concepts = extractMultiple(xml, "concept")
     .filter(c => VALID_CONCEPTS.has(c.toLowerCase()))
     .map(c => c.toLowerCase());
   const filesRead = extractMultiple(xml, "file", "files_read");
   const filesModified = extractMultiple(xml, "file", "files_modified");
+  // Parse triples (Fix A): strict validation against canonical predicate vocabulary.
+  // Missing/malformed triples are silently dropped — fail-closed on ambiguity.
+  const triples = extractTriples(xml);
   return {
     type: type as Observation["type"],
-    title: titleMatch[1].trim().slice(0, 80),
-    facts: facts.filter(f => f.length >= 5),
+    title: rawTitle.slice(0, 80),
+    facts,
     narrative: narrativeMatch?.[1]?.trim() || "",
     concepts,
     filesRead,
     filesModified,
+    triples: triples.length > 0 ? triples : undefined,
   };
 }
+function extractTriples(xml: string): ParsedTriple[] {
+  const parentMatch = xml.match(/<triples>([\s\S]*?)<\/triples>/s);
+  if (!parentMatch?.[1]) return [];
+  const blockRegex = /<triple>([\s\S]*?)<\/triple>/g;
+  const results: ParsedTriple[] = [];
+  let match;
+  while ((match = blockRegex.exec(parentMatch[1])) !== null) {
+    const block = match[1] ?? "";
+    const subject = block.match(/<subject>\s*(.*?)\s*<\/subject>/s)?.[1]?.trim();
+    const rawPredicate = block.match(/<predicate>\s*(.*?)\s*<\/predicate>/s)?.[1]?.trim();
+    const object = block.match(/<object>\s*(.*?)\s*<\/object>/s)?.[1]?.trim();
+    if (!subject || !rawPredicate || !object) continue;
+    const predicate = rawPredicate.toLowerCase().replace(/\s+/g, "_");
+    if (!VALID_PREDICATES.has(predicate)) continue;
+    // Length bounds — guards against sentence-shaped subjects/objects that the
+    // regex-era tests expected. Subject and object should be short canonical names.
+    if (subject.length < 2 || subject.length > 80) continue;
+    if (object.length < 2 || object.length > 120) continue;
+    if (isSchemaPlaceholder(subject) || isSchemaPlaceholder(object)) continue;
+    results.push({ subject, predicate, object });
+    if (results.length >= 5) break; // cap per observation
+  }
+  return results;
+}
 export function parseSummaryXml(xml: string): SessionSummary | null {
   const request = extractSingle(xml, "request");
   const investigated = extractSingle(xml, "investigated");