clawmem 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -128,15 +128,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
128
128
  clawmem bootstrap ~/notes --name notes
129
129
 
130
130
  # Or step by step:
131
- ./bin/clawmem init
132
- ./bin/clawmem collection add ~/notes --name notes
133
- ./bin/clawmem update --embed
134
- ./bin/clawmem setup hooks
135
- ./bin/clawmem setup mcp
131
+ clawmem init
132
+ clawmem collection add ~/notes --name notes
133
+ clawmem update --embed
134
+ clawmem setup hooks
135
+ clawmem setup mcp
136
136
 
137
137
  # Verify
138
- ./bin/clawmem doctor # Full health check
139
- ./bin/clawmem status # Quick index status
138
+ clawmem doctor # Full health check
139
+ clawmem status # Quick index status
140
140
  ```
141
141
 
142
142
  ### Background Services (systemd user units)
@@ -206,18 +206,17 @@ systemctl --user status clawmem-watcher.service clawmem-embed.timer
206
206
 
207
207
  When using ClawMem with OpenClaw, choose one of two deployment options:
208
208
 
209
- ### Option 1: ClawMem Exclusive (Recommended)
209
+ **Active Memory coexistence:** ClawMem is fully compatible with OpenClaw's Active Memory plugin (v2026.4.10+). They search different backends (ClawMem vault vs dreaming/wiki) and inject into different prompt regions (user prompt vs system prompt). Both can run simultaneously — no configuration needed.
210
+
211
+ **OpenClaw v2026.4.10+ recommended:** Fixes a config normalization bug where `plugins.slots.contextEngine` was silently dropped (#64192).
210
212
 
211
- ClawMem handles 100% of memory operations via hooks + MCP tools. Zero redundancy.
213
+ ### Option 1: ClawMem Exclusive (Recommended)
212
214
 
213
- **Benefits:**
214
- - No context window waste (avoids 10-15% duplicate injection)
215
- - Prevents OpenClaw native memory auto-initialization on updates
216
- - All memory in ClawMem's hybrid search + graph traversal system
215
+ ClawMem handles 100% of structured memory. Disable native memory search (not Active Memory — that's separate and compatible):
217
216
 
218
217
  **Configuration:**
219
218
  ```bash
220
- # Disable OpenClaw's native memory
219
+ # Disable OpenClaw's native memory search
221
220
  openclaw config set agents.defaults.memorySearch.extraPaths "[]"
222
221
 
223
222
  # Verify
@@ -235,7 +234,7 @@ ls ~/.openclaw/agents/main/memory/
235
234
 
236
235
  ### Option 2: Hybrid (ClawMem + Native)
237
236
 
238
- Run both ClawMem and OpenClaw's native memory for redundancy.
237
+ Run both ClawMem and OpenClaw's native memory search for redundancy.
239
238
 
240
239
  **Configuration:**
241
240
  ```bash
@@ -243,9 +242,9 @@ openclaw config set agents.defaults.memorySearch.extraPaths '["~/documents", "~/
243
242
  ```
244
243
 
245
244
  **Tradeoffs:**
246
- - Redundant recall from two independent systems
247
- - 10-15% context window waste from duplicate facts
248
- - Two memory indices to maintain
245
+ - Redundant recall from two independent systems
246
+ - 10-15% context window waste from duplicate facts
247
+ - Two memory indices to maintain
249
248
 
250
249
  **Recommendation:** Use Option 1 unless you have a specific need for redundant memory systems.
251
250
 
@@ -263,7 +262,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
263
262
  | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
264
263
  | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
265
264
  | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
266
- | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, extracts SPO triples from decision/preference/milestone/problem facts. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
265
+ | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, persists observer-emitted SPO triples via `ensureEntityCanonical` (canonical `vault:type:slug` IDs shared with A-MEM) using the tight predicate vocabulary (adopted, migrated_to, deployed_to, runs_on, replaced, depends_on, integrates_with, uses, prefers, avoids, caused_by, resolved_by, owned_by). Eligible observation types: decision/preference/milestone/problem/discovery/feature. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
267
266
  | `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
268
267
  | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation), per-turn recall attribution (marks which surfaced docs were cited in which turn) |
269
268
 
package/CLAUDE.md CHANGED
@@ -128,15 +128,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
128
128
  clawmem bootstrap ~/notes --name notes
129
129
 
130
130
  # Or step by step:
131
- ./bin/clawmem init
132
- ./bin/clawmem collection add ~/notes --name notes
133
- ./bin/clawmem update --embed
134
- ./bin/clawmem setup hooks
135
- ./bin/clawmem setup mcp
131
+ clawmem init
132
+ clawmem collection add ~/notes --name notes
133
+ clawmem update --embed
134
+ clawmem setup hooks
135
+ clawmem setup mcp
136
136
 
137
137
  # Verify
138
- ./bin/clawmem doctor # Full health check
139
- ./bin/clawmem status # Quick index status
138
+ clawmem doctor # Full health check
139
+ clawmem status # Quick index status
140
140
  ```
141
141
 
142
142
  ### Background Services (systemd user units)
@@ -262,7 +262,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
262
262
  | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
263
263
  | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
264
264
  | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
265
- | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, extracts SPO triples from decision/preference/milestone/problem facts. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
265
+ | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, persists observer-emitted SPO triples via `ensureEntityCanonical` (canonical `vault:type:slug` IDs shared with A-MEM) using the tight predicate vocabulary (adopted, migrated_to, deployed_to, runs_on, replaced, depends_on, integrates_with, uses, prefers, avoids, caused_by, resolved_by, owned_by). Eligible observation types: decision/preference/milestone/problem/discovery/feature. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
266
266
  | `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
267
267
  | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation), per-turn recall attribution (marks which surfaced docs were cited in which turn) |
268
268
 
package/README.md CHANGED
@@ -717,7 +717,7 @@ Registered by `clawmem setup mcp`. Available to any MCP-compatible client.
717
717
  |---|---|
718
718
  | `build_graphs` | Build temporal and/or semantic graphs from document corpus |
719
719
  | `find_causal_links` | Trace decision chains: "what led to X", "how we got from A to B". Follow up `intent_search` with this tool on a top result to walk the full causal chain. Traverses causes / caused_by / both up to N hops with depth-annotated reasoning. |
720
- | `kg_query` | Query the SPO knowledge graph: "what does X relate to?", "what was true about X when?". Returns temporal entity-relationship triples with validity windows. Uses entity resolution for lookup. |
720
+ | `kg_query` | Query the SPO knowledge graph: "what does X relate to?", "what was true about X when?". Returns temporal entity-relationship triples with validity windows. Accepts entity name (resolved via `searchEntities`) or canonical ID in `vault:type:slug` form. Triples are populated by the decision-extractor hook from observer-emitted `<triples>` blocks. |
721
721
  | `memory_evolution_status` | Show how a document's A-MEM metadata evolved over time |
722
722
  | `timeline` | Show the temporal neighborhood around a document — what was created/modified before and after it. Progressive disclosure: search → timeline (context) → get (full content). Supports same-collection scoping and session correlation. |
723
723
 
@@ -1073,40 +1073,36 @@ Manual layers benefit from periodic re-indexing — a cron job running `clawmem
1073
1073
  ### Setup
1074
1074
 
1075
1075
  ```bash
1076
- # Bootstrap workspace collection (use your agent's workspace path)
1077
- ./bin/clawmem bootstrap ~/workspace --name workspace
1078
-
1079
- # Bootstrap each project
1080
- ./bin/clawmem bootstrap ~/Projects/my-project --name my-project
1076
+ # Bootstrap a content directory (creates vault + indexes + embeds + installs hooks + MCP)
1077
+ clawmem bootstrap ~/notes --name notes
1081
1078
 
1082
- # Enable auto-embed for real-time indexing
1083
- # Edit ~/.config/clawmem/config.yaml autoEmbed: true
1079
+ # Bootstrap each project you want indexed
1080
+ clawmem bootstrap ~/Projects/my-project --name my-project
1084
1081
 
1085
- # Install watcher as systemd service
1086
- ./bin/clawmem install-service --enable
1082
+ # Install watcher + embed timer as systemd services
1083
+ clawmem install-service --enable
1087
1084
  ```
1088
1085
 
1089
- #### OpenClaw-Specific
1086
+ #### OpenClaw-specific
1090
1087
 
1091
1088
  ```bash
1092
- # OpenClaw uses ~/.openclaw/workspace/ as its workspace root
1093
- ./bin/clawmem bootstrap ~/.openclaw/workspace --name workspace
1089
+ # Install the ContextEngine plugin (auto-symlinks into ~/.openclaw/extensions/)
1090
+ clawmem setup openclaw
1091
+ # Then follow the printed next steps: restart gateway, set slot, configure GPU endpoints
1094
1092
  ```
1095
1093
 
1096
- #### Hermes-Specific
1094
+ Index your content directories with `clawmem bootstrap` as above. The OpenClaw plugin shares the same vault as Claude Code hooks.
1097
1095
 
1098
- ```bash
1099
- # Hermes uses ~/.hermes/ as its home directory
1100
- ./bin/clawmem bootstrap ~/.hermes --name hermes-home
1096
+ #### Hermes-specific
1101
1097
 
1102
- # Install the memory provider plugin
1103
- cp -r src/hermes /path/to/hermes-agent/plugins/memory/clawmem
1098
+ ```bash
1099
+ # Install the memory provider plugin (symlink or copy)
1100
+ ln -s $(npm root -g)/clawmem/src/hermes /path/to/hermes-agent/plugins/memory/clawmem
1104
1101
 
1105
- # Start clawmem serve (external mode)
1102
+ # Start the REST API (required for Hermes tool calls)
1106
1103
  clawmem serve --port 7438 &
1107
1104
 
1108
- # Configure Hermes to use ClawMem
1109
- # In your Hermes config.yaml:
1105
+ # Configure Hermes to use ClawMem (in your Hermes config.yaml):
1110
1106
  # memory:
1111
1107
  # provider: clawmem
1112
1108
  ```
package/SKILL.md CHANGED
@@ -118,15 +118,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
118
118
  clawmem bootstrap ~/notes --name notes
119
119
 
120
120
  # Or step by step:
121
- ./bin/clawmem init
122
- ./bin/clawmem collection add ~/notes --name notes
123
- ./bin/clawmem update --embed
124
- ./bin/clawmem setup hooks
125
- ./bin/clawmem setup mcp
121
+ clawmem init
122
+ clawmem collection add ~/notes --name notes
123
+ clawmem update --embed
124
+ clawmem setup hooks
125
+ clawmem setup mcp
126
126
 
127
127
  # Verify
128
- ./bin/clawmem doctor # Full health check
129
- ./bin/clawmem status # Quick index status
128
+ clawmem doctor # Full health check
129
+ clawmem status # Quick index status
130
130
  ```
131
131
 
132
132
  ### Background Services (systemd user units)
@@ -294,7 +294,7 @@ Once escalated, route by query type:
294
294
  | `timeline` | Temporal neighborhood around a document — what was modified before/after. Progressive disclosure: search → timeline → get. Supports same-collection scoping and session correlation. |
295
295
  | `list_vaults` | Show configured vault names and paths. Empty in single-vault mode. |
296
296
  | `vault_sync` | Index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories. |
297
- | `kg_query` | Query SPO knowledge graph for entity relationships with temporal validity. Uses entity resolution. |
297
+ | `kg_query` | Query SPO knowledge graph for entity relationships with temporal validity. Accepts entity name or canonical ID (`vault:type:slug`). Triples are populated by decision-extractor from observer-emitted `<triples>` blocks using a canonical predicate vocabulary. |
298
298
  | `diary_write` | Write diary entry. Use proactively in non-hooked environments. Do NOT use in Claude Code. |
299
299
  | `diary_read` | Read recent diary entries. Filter by agent name. |
300
300
  | `lifecycle_status` | Document lifecycle statistics: active, archived, forgotten, pinned, snoozed counts and policy summary. |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmem",
3
- "version": "0.8.4",
3
+ "version": "0.8.5",
4
4
  "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/amem.ts CHANGED
@@ -649,11 +649,18 @@ export async function postIndexEnrich(
649
649
  }
650
650
 
651
651
  /**
652
- * Observation with document ID for causal inference
652
+ * Observation with document ID for causal inference and SPO triple extraction.
653
+ *
654
+ * Populated by the decision-extractor hook after an observation is successfully
655
+ * persisted. Consumed by:
656
+ * - `inferCausalLinks` (A-MEM) — uses docId + facts
657
+ * - `insertObservationTriples` (decision-extractor) — uses docId + obsType + triples
653
658
  */
654
659
  export interface ObservationWithDoc {
655
660
  docId: number;
656
661
  facts: string[];
662
+ obsType?: string;
663
+ triples?: Array<{ subject: string; predicate: string; object: string }>;
657
664
  }
658
665
 
659
666
  /**
package/src/entity.ts CHANGED
@@ -354,6 +354,69 @@ export function resolveEntityCanonical(
354
354
  // Entity Storage + Mentions + Co-occurrences
355
355
  // =============================================================================
356
356
 
357
+ /**
358
+ * Resolve the entity_type for a name via exact case-insensitive match.
359
+ *
360
+ * Returns the type only when EXACTLY ONE active entity in the given vault shares
361
+ * the name. Zero matches → null (caller should default to a safe type). Multiple
362
+ * matches (ambiguous across buckets, e.g. "Alice" as person AND "Alice" as project)
363
+ * → null so the caller falls back to a safe default instead of arbitrarily picking.
364
+ *
365
+ * Exact match only — no fuzzy matching — to avoid false inheritance on near-names.
366
+ */
367
+ export function resolveEntityTypeExact(
368
+ db: Database,
369
+ name: string,
370
+ vault: string = 'default'
371
+ ): string | null {
372
+ const rows = db.prepare(`
373
+ SELECT DISTINCT entity_type FROM entity_nodes
374
+ WHERE LOWER(name) = LOWER(?) AND vault = ?
375
+ `).all(name, vault) as Array<{ entity_type: string }>;
376
+
377
+ if (rows.length !== 1) return null; // zero or ambiguous
378
+ return rows[0]!.entity_type;
379
+ }
380
+
381
+ /**
382
+ * Resolve-or-create a canonical entity without incrementing mention_count.
383
+ *
384
+ * Used by consumers that reference an entity but do NOT constitute a document
385
+ * mention (e.g. SPO triple extraction). Semantically distinct from upsertEntity,
386
+ * which treats every call as a doc mention and inflates the count.
387
+ *
388
+ * Flow: resolveEntityCanonical (FTS5 + fuzzy + bucket match) → reuse if found,
389
+ * otherwise mint a new canonical `vault:type:slug` entity with mention_count = 0.
390
+ *
391
+ * Returns the entity_id.
392
+ */
393
+ export function ensureEntityCanonical(
394
+ db: Database,
395
+ name: string,
396
+ type: string,
397
+ vault: string = 'default'
398
+ ): string {
399
+ const canonicalId = resolveEntityCanonical(db, name, type, vault);
400
+ if (canonicalId) return canonicalId;
401
+
402
+ const entityId = makeEntityId(name, type, vault);
403
+ db.prepare(`
404
+ INSERT OR IGNORE INTO entity_nodes (entity_id, entity_type, name, description, created_at, mention_count, last_seen, vault)
405
+ VALUES (?, ?, ?, NULL, datetime('now'), 0, datetime('now'), ?)
406
+ `).run(entityId, type, name, vault);
407
+
408
+ try {
409
+ db.prepare(`
410
+ INSERT OR IGNORE INTO entities_fts (entity_id, name, entity_type)
411
+ VALUES (?, ?, ?)
412
+ `).run(entityId, name.toLowerCase(), type);
413
+ } catch {
414
+ // FTS insert may fail if table doesn't exist yet — non-fatal
415
+ }
416
+
417
+ return entityId;
418
+ }
419
+
357
420
  /**
358
421
  * Upsert an entity into entity_nodes and entities_fts.
359
422
  * Returns the entity_id (canonical or new).
@@ -17,13 +17,23 @@ import {
17
17
  validateTranscriptPath,
18
18
  } from "../hooks.ts";
19
19
  import { hashContent } from "../indexer.ts";
20
- import { extractObservations, type Observation } from "../observer.ts";
20
+ import { extractObservations, type Observation, LITERAL_PREDICATES } from "../observer.ts";
21
21
  import { updateDirectoryContext } from "../directory-context.ts";
22
22
  import { loadConfig } from "../collections.ts";
23
23
  import { getDefaultLlamaCpp } from "../llm.ts";
24
24
  import type { ObservationWithDoc } from "../amem.ts";
25
25
  import { extractJsonFromLLM } from "../amem.ts";
26
26
  import { DEFAULT_EMBED_MODEL, extractSnippet, type SearchResult } from "../store.ts";
27
+ import { ensureEntityCanonical, resolveEntityTypeExact } from "../entity.ts";
28
+
29
+ // Observation types that are allowed to contribute SPO triples. Widened from the
30
+ // original {decision, preference, milestone, problem} gate, which rejected 77% of
31
+ // real observations in production vaults (the majority type is 'discovery').
32
+ // See BACKLOG.md §1.6 for the full diagnosis.
33
+ const SPO_ELIGIBLE_OBSERVATION_TYPES = new Set<Observation["type"]>([
34
+ "decision", "preference", "milestone", "problem",
35
+ "discovery", "feature",
36
+ ]);
27
37
 
28
38
  // =============================================================================
29
39
  // Facet-Based Merge Policy
@@ -325,42 +335,8 @@ export async function decisionExtractor(
325
335
  const observationsWithDocs: ObservationWithDoc[] = [];
326
336
  if (observations.length > 0) {
327
337
  for (const obs of observations) {
328
- const obsPath = `observations/${dateStr}-${sessionId.slice(0, 8)}-${obs.type}.md`;
329
- const obsBody = formatObservation(obs, dateStr, sessionId);
330
- const obsHash = hashContent(obsBody);
331
-
332
- store.insertContent(obsHash, obsBody, timestamp);
333
- try {
334
- store.insertDocument("_clawmem", obsPath, obs.title, obsHash, timestamp, timestamp);
335
- const doc = store.findActiveDocument("_clawmem", obsPath);
336
- if (doc) {
337
- store.updateDocumentMeta(doc.id, {
338
- content_type: obs.type === "decision" ? "decision"
339
- : obs.type === "preference" ? "preference"
340
- : obs.type === "milestone" ? "milestone"
341
- : obs.type === "problem" ? "problem"
342
- : "observation",
343
- confidence: 0.80,
344
- });
345
- store.updateObservationFields(obsPath, "_clawmem", {
346
- observation_type: obs.type,
347
- facts: JSON.stringify(obs.facts),
348
- narrative: obs.narrative,
349
- concepts: JSON.stringify(obs.concepts),
350
- files_read: JSON.stringify(obs.filesRead),
351
- files_modified: JSON.stringify(obs.filesModified),
352
- });
353
-
354
- if (obs.facts.length > 0) {
355
- observationsWithDocs.push({
356
- docId: doc.id,
357
- facts: obs.facts,
358
- });
359
- }
360
- }
361
- } catch {
362
- // May already exist
363
- }
338
+ const wit = persistObservationDoc(store, obs, sessionId, dateStr, timestamp);
339
+ if (wit) observationsWithDocs.push(wit);
364
340
  }
365
341
 
366
342
  // Infer causal links from observations with facts
@@ -375,31 +351,12 @@ export async function decisionExtractor(
375
351
  }
376
352
  }
377
353
 
378
- // Extract SPO triples from observation facts (preference/decision types get priority)
379
- for (const obs of observations) {
380
- if (!obs.facts || obs.facts.length === 0) continue;
381
- for (const fact of obs.facts) {
382
- const triple = extractTripleFromFact(fact, obs.type);
383
- if (triple) {
384
- try {
385
- store.db.prepare(
386
- "INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
387
- ).run(triple.subjectId, triple.subject, "auto", new Date().toISOString());
388
- if (triple.objectId) {
389
- store.db.prepare(
390
- "INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
391
- ).run(triple.objectId, triple.object, "auto", new Date().toISOString());
392
- }
393
- store.addTriple(triple.subjectId, triple.predicate, triple.objectId, triple.objectId ? null : triple.object, {
394
- confidence: obs.type === "decision" || obs.type === "preference" ? 0.9 : 0.7,
395
- sourceFact: fact,
396
- });
397
- } catch {
398
- // Triple insertion errors are non-fatal
399
- }
400
- }
401
- }
402
- }
354
+ // Extract SPO triples from observation-emitted <triples> blocks (Fix A).
355
+ // The regex-based extractTripleFromFact is gone — the observer LLM now emits
356
+ // structured triples alongside facts, parsed and validated in parseObservationXml.
357
+ // We iterate observationsWithDocs (not raw observations) so every triple gets
358
+ // real source_doc_id provenance from the persisted observation document (Fix F).
359
+ insertObservationTriples(store, observations, observationsWithDocs);
403
360
  }
404
361
 
405
362
  // Extract decisions (observer-first, regex fallback)
@@ -691,67 +648,140 @@ function formatObservation(obs: Observation, dateStr: string, sessionId: string)
691
648
  }
692
649
 
693
650
  // =============================================================================
694
- // SPO Triple Extraction from Facts
651
+ // Observation persistence
695
652
  // =============================================================================
696
653
 
697
- type ExtractedTriple = {
698
- subject: string;
699
- subjectId: string;
700
- predicate: string;
701
- object: string;
702
- objectId: string | null;
703
- };
654
+ /**
655
+ * Persist a single observation as a `_clawmem` document and return an
656
+ * `ObservationWithDoc` for downstream consumers (causal inference + SPO
657
+ * triples).
658
+ *
659
+ * Path format: `observations/${date}-${session8}-${type}-${hash8}.md`. The
660
+ * 8-char hash slice (SHA256 of the formatted body) disambiguates multiple
661
+ * observations of the same type within a single session — without it, the
662
+ * second insert hits the `UNIQUE(collection, path)` constraint, is silently
663
+ * dropped, and its triples never reach `entity_triples`. See Codex Turn 3
664
+ * for the regression this guards against.
665
+ *
666
+ * Returns null when the doc cannot be looked up after insert OR when the
667
+ * observation has no facts (triples without facts wouldn't survive the
668
+ * causal-links/facts filter downstream).
669
+ */
670
+ export function persistObservationDoc(
671
+ store: Store,
672
+ obs: Observation,
673
+ sessionId: string,
674
+ dateStr: string,
675
+ timestamp: string
676
+ ): ObservationWithDoc | null {
677
+ const obsBody = formatObservation(obs, dateStr, sessionId);
678
+ const obsHash = hashContent(obsBody);
679
+ const obsPath = `observations/${dateStr}-${sessionId.slice(0, 8)}-${obs.type}-${obsHash.slice(0, 8)}.md`;
680
+
681
+ store.insertContent(obsHash, obsBody, timestamp);
682
+ try {
683
+ store.insertDocument("_clawmem", obsPath, obs.title, obsHash, timestamp, timestamp);
684
+ const doc = store.findActiveDocument("_clawmem", obsPath);
685
+ if (!doc) return null;
686
+
687
+ store.updateDocumentMeta(doc.id, {
688
+ content_type: obs.type === "decision" ? "decision"
689
+ : obs.type === "preference" ? "preference"
690
+ : obs.type === "milestone" ? "milestone"
691
+ : obs.type === "problem" ? "problem"
692
+ : "observation",
693
+ confidence: 0.80,
694
+ });
695
+ store.updateObservationFields(obsPath, "_clawmem", {
696
+ observation_type: obs.type,
697
+ facts: JSON.stringify(obs.facts),
698
+ narrative: obs.narrative,
699
+ concepts: JSON.stringify(obs.concepts),
700
+ files_read: JSON.stringify(obs.filesRead),
701
+ files_modified: JSON.stringify(obs.filesModified),
702
+ });
704
703
 
705
- function toEntityId(name: string): string {
706
- return name.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "");
704
+ if (obs.facts.length === 0) return null;
705
+ return {
706
+ docId: doc.id,
707
+ facts: obs.facts,
708
+ obsType: obs.type,
709
+ triples: obs.triples,
710
+ };
711
+ } catch (err) {
712
+ console.log(`[decision-extractor] Failed to persist observation ${obs.type}/${obs.title}:`, err);
713
+ return null;
714
+ }
707
715
  }
708
716
 
709
- function extractTripleFromFact(fact: string, obsType: string): ExtractedTriple | null {
710
- // Only extract from decision/preference/milestone/problem types — skip noisy bugfix/feature/change facts
711
- if (!["decision", "preference", "milestone", "problem"].includes(obsType)) return null;
717
+ // =============================================================================
718
+ // SPO Triple Extraction from Facts
719
+ // =============================================================================
712
720
 
713
- // Conservative verb patterns — only clear relational predicates
714
- const verbPatterns = [
715
- /^(.+?)\s+(chose|selected|switched to|migrated to|adopted)\s+(.+?)\.?$/i,
716
- /^(.+?)\s+(deployed to|runs on|hosted on|installed on)\s+(.+?)\.?$/i,
717
- /^(.+?)\s+(replaced|superseded|deprecated)\s+(.+?)\.?$/i,
718
- /^(.+?)\s+(depends on|integrates with|connects to)\s+(.+?)\.?$/i,
719
- ];
721
+ /**
722
+ * Insert SPO triples emitted by the observer into `entity_triples`.
723
+ *
724
+ * Uses canonical vault:type:slug entity IDs via `ensureEntityCanonical` so the
725
+ * knowledge graph stays in one namespace with A-MEM entities. Type inheritance
726
+ * is exact-match-only and ambiguity-safe: if a name resolves to exactly one type
727
+ * already in `entity_nodes`, inherit it; otherwise default to `concept`.
728
+ *
729
+ * Provenance: every triple carries `source_doc_id` from the persisted observation
730
+ * document. Iterates `observationsWithDocs` directly so triples from observations
731
+ * whose doc insert failed are naturally skipped — no order-matching gymnastics.
732
+ */
733
+ function insertObservationTriples(
734
+ store: Store,
735
+ _observations: Observation[],
736
+ observationsWithDocs: ObservationWithDoc[]
737
+ ): void {
738
+ if (observationsWithDocs.length === 0) return;
739
+
740
+ // Per-invocation cache keyed on (vault, normalizedName, resolvedType) to avoid
741
+ // redundant SQL for repeated entity references within a single extraction.
742
+ const vault = "default";
743
+ const cache = new Map<string, string>();
744
+
745
+ const resolveEntity = (name: string, type: string): string => {
746
+ const key = `${vault}:${type}:${name.toLowerCase().trim()}`;
747
+ const cached = cache.get(key);
748
+ if (cached) return cached;
749
+ const id = ensureEntityCanonical(store.db, name, type, vault);
750
+ cache.set(key, id);
751
+ return id;
752
+ };
753
+
754
+ for (const wit of observationsWithDocs) {
755
+ if (!wit.triples || wit.triples.length === 0) continue;
756
+ const obsType = wit.obsType as Observation["type"] | undefined;
757
+ if (!obsType || !SPO_ELIGIBLE_OBSERVATION_TYPES.has(obsType)) continue;
758
+
759
+ const confidence = obsType === "decision" || obsType === "preference" ? 0.9 : 0.7;
760
+
761
+ for (const triple of wit.triples) {
762
+ try {
763
+ const subjectType = resolveEntityTypeExact(store.db, triple.subject, vault) ?? "concept";
764
+ const subjectId = resolveEntity(triple.subject, subjectType);
720
765
 
721
- for (const pattern of verbPatterns) {
722
- const match = fact.match(pattern);
723
- if (match) {
724
- const subject = match[1]!.trim();
725
- const predicate = match[2]!.trim();
726
- const object = match[3]!.trim();
727
-
728
- // Reject subjects/objects that look like sentences rather than entity names
729
- if (subject.length < 3 || object.length < 3 || subject.length > 60 || object.length > 60) continue;
730
- if (subject.includes(",") || object.includes(",")) continue; // likely a clause, not an entity
731
-
732
- return {
733
- subject,
734
- subjectId: toEntityId(subject),
735
- predicate: predicate.toLowerCase().replace(/\s+/g, "_"),
736
- object,
737
- objectId: toEntityId(object),
738
- };
739
- }
740
- }
766
+ let objectId: string | null = null;
767
+ let objectLiteral: string | null = null;
768
+
769
+ if (LITERAL_PREDICATES.has(triple.predicate)) {
770
+ objectLiteral = triple.object;
771
+ } else {
772
+ const objectType = resolveEntityTypeExact(store.db, triple.object, vault) ?? "concept";
773
+ objectId = resolveEntity(triple.object, objectType);
774
+ }
741
775
 
742
- // Preference facts only: "User prefers X" / "Prefers X"
743
- if (obsType === "preference") {
744
- const prefMatch = fact.match(/^(?:user\s+)?(?:prefers?|avoids?)\s+(.+?)\.?$/i);
745
- if (prefMatch && prefMatch[1]!.trim().length > 2) {
746
- return {
747
- subject: "user",
748
- subjectId: "user",
749
- predicate: "prefers",
750
- object: prefMatch[1]!.trim(),
751
- objectId: null, // literal, not entity
752
- };
776
+ store.addTriple(subjectId, triple.predicate, objectId, objectLiteral, {
777
+ confidence,
778
+ sourceFact: `${triple.subject} ${triple.predicate} ${triple.object}`,
779
+ sourceDocId: wit.docId,
780
+ });
781
+ } catch (err) {
782
+ // Triple insertion errors are non-fatal — log at debug
783
+ console.log(`[decision-extractor] Failed to insert triple ${triple.subject}/${triple.predicate}/${triple.object}:`, err);
784
+ }
753
785
  }
754
786
  }
755
-
756
- return null;
757
787
  }
package/src/mcp.ts CHANGED
@@ -1930,9 +1930,9 @@ This is the recommended entry point for ALL memory queries.`,
1930
1930
  "kg_query",
1931
1931
  {
1932
1932
  title: "Knowledge Graph Query",
1933
- description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'.",
1933
+ description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'. Accepts an entity name (e.g. 'ClawMem') OR a canonical entity ID in the form 'vault:type:slug' (e.g. 'default:service:clawmem').",
1934
1934
  inputSchema: {
1935
- entity: z.string().describe("Entity name or ID to query"),
1935
+ entity: z.string().describe("Entity name or canonical ID ('vault:type:slug') to query"),
1936
1936
  as_of: z.string().optional().describe("Date filter (YYYY-MM-DD) — only facts valid at this date"),
1937
1937
  direction: z.enum(["outgoing", "incoming", "both"]).optional().default("both").describe("Relationship direction"),
1938
1938
  vault: z.string().optional().describe("Named vault (omit for default vault)"),
@@ -1941,17 +1941,30 @@ This is the recommended entry point for ALL memory queries.`,
1941
1941
  async ({ entity, as_of, direction, vault }) => {
1942
1942
  const store = getStore(vault);
1943
1943
 
1944
+ // Canonical IDs look like `vault:type:slug` — accept them directly so callers
1945
+ // that already resolved an entity can round-trip its ID without losing it to
1946
+ // a name-search fallback that would never match.
1947
+ const CANONICAL_ID_RE = /^[a-z][a-z0-9-]*:[a-z_]+:[a-z0-9_]+$/;
1948
+
1944
1949
  const entityResults = store.searchEntities(entity, 1);
1945
- const entityId = entityResults.length > 0
1946
- ? entityResults[0]!.entity_id
1947
- : entity.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "");
1950
+ let entityId: string;
1951
+ if (entityResults.length > 0) {
1952
+ entityId = entityResults[0]!.entity_id;
1953
+ } else if (CANONICAL_ID_RE.test(entity)) {
1954
+ entityId = entity; // caller passed a canonical ID directly
1955
+ } else {
1956
+ const stats = store.getTripleStats();
1957
+ return {
1958
+ content: [{ type: "text", text: `No entity found matching "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current). Try a shorter/broader name, or pass a canonical ID in the form 'vault:type:slug'.` }],
1959
+ };
1960
+ }
1948
1961
 
1949
1962
  const triples = store.queryEntityTriples(entityId, { asOf: as_of, direction });
1950
1963
  const stats = store.getTripleStats();
1951
1964
 
1952
1965
  if (triples.length === 0) {
1953
1966
  return {
1954
- content: [{ type: "text", text: `No knowledge graph facts found for "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
1967
+ content: [{ type: "text", text: `No knowledge graph facts found for "${entity}" (resolved to ${entityId}). The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
1955
1968
  };
1956
1969
  }
1957
1970
 
package/src/observer.ts CHANGED
@@ -22,6 +22,13 @@ export type Observation = {
22
22
  concepts: string[];
23
23
  filesRead: string[];
24
24
  filesModified: string[];
25
+ triples?: ParsedTriple[];
26
+ };
27
+
28
+ export type ParsedTriple = {
29
+ subject: string;
30
+ predicate: string;
31
+ object: string;
25
32
  };
26
33
 
27
34
  export type SessionSummary = {
@@ -48,28 +55,54 @@ const GENERATION_TEMPERATURE = 0.3;
48
55
  // =============================================================================
49
56
 
50
57
  const OBSERVATION_SYSTEM_PROMPT = `You are an observer analyzing a coding session transcript. Extract structured observations.
51
- For each significant action, decision, or discovery, output an <observation> XML element.
58
+ For each significant action, decision, or discovery, output an <observation> XML element with the structure below.
52
59
 
60
+ Structure:
53
61
  <observation>
54
- <type>one of: decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem</type>
55
- <title>Brief descriptive title (max 80 chars)</title>
62
+ <type>...</type>
63
+ <title>...</title>
56
64
  <facts>
57
- <fact>Individual atomic fact</fact>
65
+ <fact>...</fact>
58
66
  </facts>
59
- <narrative>2-3 sentences explaining context and reasoning</narrative>
67
+ <triples>
68
+ <triple>
69
+ <subject>...</subject>
70
+ <predicate>...</predicate>
71
+ <object>...</object>
72
+ </triple>
73
+ </triples>
74
+ <narrative>...</narrative>
60
75
  <concepts>
61
- <concept>one of: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off</concept>
76
+ <concept>...</concept>
62
77
  </concepts>
63
- <files_read><file>path/to/file</file></files_read>
64
- <files_modified><file>path/to/file</file></files_modified>
78
+ <files_read><file>...</file></files_read>
79
+ <files_modified><file>...</file></files_modified>
65
80
  </observation>
66
81
 
67
- Rules:
82
+ Field rules:
83
+ - <type>: one of decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem
84
+ - <title>: brief descriptive title, max 80 chars
85
+ - <facts>: 1-5 <fact> elements, each a standalone atomic claim about what happened or what is true (concrete, specific, no schema placeholders or template text)
86
+ - <triples>: 0-3 <triple> elements for structural relationships between named entities (see predicate vocabulary below). Omit entirely if no relational claims apply. Do NOT emit triples for descriptive facts — only for explicit S-P-O relations.
87
+ - <narrative>: 2-3 sentences explaining WHY something was done, not just WHAT
88
+ - <concepts>: 0-3 <concept> elements from: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off
89
+ - <files_read>, <files_modified>: only files explicitly mentioned in the transcript
90
+
91
+ Predicate vocabulary (use EXACTLY these predicates in <predicate>, nothing else):
92
+ - adopted, migrated_to — switching to a new tool/framework/approach
93
+ - deployed_to, runs_on — where something runs
94
+ - replaced — when one thing supersedes another
95
+ - depends_on, integrates_with, uses — structural dependencies
96
+ - prefers, avoids — user preferences (use for <subject>user</subject>)
97
+ - caused_by, resolved_by — causal relationships between problems and fixes
98
+ - owned_by — responsibility / ownership
99
+
100
+ <subject> and <object> must be short canonical entity names (2-80 chars). No sentences. No placeholder text. If you cannot fit a claim into this vocabulary, keep it in <facts> instead and omit the triple.
101
+
102
+ Observation rules:
68
103
  - Output 1-5 observations, focusing on the MOST significant events
69
- - Each fact should be a standalone, atomic piece of information
70
- - The narrative should explain WHY something was done, not just WHAT
71
- - Only include files that were explicitly mentioned in the transcript
72
104
  - If no significant observations, output nothing
105
+ - Never use schema example text or template placeholders in <fact>, <subject>, or <object> — emit only real content extracted from the transcript
73
106
 
74
107
  Type guidance:
75
108
  - preference: user expresses a preference, habit, or way of working (e.g., "don't use subagents for this", "I prefer single PRs")
@@ -131,6 +164,47 @@ const VALID_CONCEPTS = new Set([
131
164
  "gotcha", "pattern", "trade-off",
132
165
  ]);
133
166
 
167
+ // Canonical SPO predicate vocabulary — parser rejects anything outside this set.
168
+ // Must stay in sync with the predicate list in OBSERVATION_SYSTEM_PROMPT.
169
+ export const VALID_PREDICATES = new Set([
170
+ "adopted", "migrated_to",
171
+ "deployed_to", "runs_on",
172
+ "replaced",
173
+ "depends_on", "integrates_with", "uses",
174
+ "prefers", "avoids",
175
+ "caused_by", "resolved_by",
176
+ "owned_by",
177
+ ]);
178
+
179
+ // Predicates whose <object> should be stored as a literal (not resolved to an entity).
180
+ export const LITERAL_PREDICATES = new Set(["prefers", "avoids"]);
181
+
182
+ // Exact placeholder strings that must never be persisted as facts or triple components.
183
+ // Defense-in-depth: even though the prompt no longer places example text inside
184
+ // <fact>/<subject>/<object> tags, a weak model could still echo these phrases.
185
+ const SCHEMA_PLACEHOLDER_STRINGS = new Set([
186
+ "individual atomic fact",
187
+ "atomic fact",
188
+ "one atomic claim per fact element",
189
+ "brief descriptive title",
190
+ "canonical entity name",
191
+ ]);
192
+
193
+ // Regex for template placeholder markers: {{...}}, <!--...-->, ${...}.
194
+ // Intentionally narrow — earlier drafts rejected any line starting with
195
+ // "example:" / "placeholder:", which false-positived legitimate facts like
196
+ // "Example: QMD switched to Bun in v0.2". Shape-only matching avoids that
197
+ // drift; the exact-string blocklist above handles known echoed placeholders.
198
+ const PLACEHOLDER_REGEX = /^(\{\{.*\}\}|<!--.*-->|\$\{.*\})/;
199
+
200
+ function isSchemaPlaceholder(text: string): boolean {
201
+ if (!text) return true;
202
+ const normalized = text.trim().toLowerCase();
203
+ if (SCHEMA_PLACEHOLDER_STRINGS.has(normalized)) return true;
204
+ if (PLACEHOLDER_REGEX.test(normalized)) return true;
205
+ return false;
206
+ }
207
+
134
208
  export function parseObservationXml(xml: string): Observation | null {
135
209
  const typeMatch = xml.match(/<type>\s*(.*?)\s*<\/type>/s);
136
210
  const titleMatch = xml.match(/<title>\s*(.*?)\s*<\/title>/s);
@@ -141,24 +215,67 @@ export function parseObservationXml(xml: string): Observation | null {
141
215
  const type = typeMatch[1].trim().toLowerCase();
142
216
  if (!VALID_OBSERVATION_TYPES.has(type)) return null;
143
217
 
144
- const facts = extractMultiple(xml, "fact");
218
+ const rawTitle = titleMatch[1].trim();
219
+ if (isSchemaPlaceholder(rawTitle)) return null;
220
+
221
+ const facts = extractMultiple(xml, "fact")
222
+ .filter(f => f.length >= 5)
223
+ .filter(f => !isSchemaPlaceholder(f));
224
+
145
225
  const concepts = extractMultiple(xml, "concept")
146
226
  .filter(c => VALID_CONCEPTS.has(c.toLowerCase()))
147
227
  .map(c => c.toLowerCase());
148
228
  const filesRead = extractMultiple(xml, "file", "files_read");
149
229
  const filesModified = extractMultiple(xml, "file", "files_modified");
150
230
 
231
+ // Parse triples (Fix A): strict validation against canonical predicate vocabulary.
232
+ // Missing/malformed triples are silently dropped — fail-closed on ambiguity.
233
+ const triples = extractTriples(xml);
234
+
151
235
  return {
152
236
  type: type as Observation["type"],
153
- title: titleMatch[1].trim().slice(0, 80),
154
- facts: facts.filter(f => f.length >= 5),
237
+ title: rawTitle.slice(0, 80),
238
+ facts,
155
239
  narrative: narrativeMatch?.[1]?.trim() || "",
156
240
  concepts,
157
241
  filesRead,
158
242
  filesModified,
243
+ triples: triples.length > 0 ? triples : undefined,
159
244
  };
160
245
  }
161
246
 
247
+ function extractTriples(xml: string): ParsedTriple[] {
248
+ const parentMatch = xml.match(/<triples>([\s\S]*?)<\/triples>/s);
249
+ if (!parentMatch?.[1]) return [];
250
+
251
+ const blockRegex = /<triple>([\s\S]*?)<\/triple>/g;
252
+ const results: ParsedTriple[] = [];
253
+ let match;
254
+ while ((match = blockRegex.exec(parentMatch[1])) !== null) {
255
+ const block = match[1] ?? "";
256
+ const subject = block.match(/<subject>\s*(.*?)\s*<\/subject>/s)?.[1]?.trim();
257
+ const rawPredicate = block.match(/<predicate>\s*(.*?)\s*<\/predicate>/s)?.[1]?.trim();
258
+ const object = block.match(/<object>\s*(.*?)\s*<\/object>/s)?.[1]?.trim();
259
+
260
+ if (!subject || !rawPredicate || !object) continue;
261
+
262
+ const predicate = rawPredicate.toLowerCase().replace(/\s+/g, "_");
263
+ if (!VALID_PREDICATES.has(predicate)) continue;
264
+
265
+ // Length bounds — guards against sentence-shaped subjects/objects that the
266
+ // regex-era tests expected. Subject and object should be short canonical names.
267
+ if (subject.length < 2 || subject.length > 80) continue;
268
+ if (object.length < 2 || object.length > 120) continue;
269
+
270
+ if (isSchemaPlaceholder(subject) || isSchemaPlaceholder(object)) continue;
271
+
272
+ results.push({ subject, predicate, object });
273
+
274
+ if (results.length >= 5) break; // cap per observation
275
+ }
276
+ return results;
277
+ }
278
+
162
279
  export function parseSummaryXml(xml: string): SessionSummary | null {
163
280
  const request = extractSingle(xml, "request");
164
281
  const investigated = extractSingle(xml, "investigated");