clawmem 0.8.3 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -128,15 +128,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
128
128
  clawmem bootstrap ~/notes --name notes
129
129
 
130
130
  # Or step by step:
131
- ./bin/clawmem init
132
- ./bin/clawmem collection add ~/notes --name notes
133
- ./bin/clawmem update --embed
134
- ./bin/clawmem setup hooks
135
- ./bin/clawmem setup mcp
131
+ clawmem init
132
+ clawmem collection add ~/notes --name notes
133
+ clawmem update --embed
134
+ clawmem setup hooks
135
+ clawmem setup mcp
136
136
 
137
137
  # Verify
138
- ./bin/clawmem doctor # Full health check
139
- ./bin/clawmem status # Quick index status
138
+ clawmem doctor # Full health check
139
+ clawmem status # Quick index status
140
140
  ```
141
141
 
142
142
  ### Background Services (systemd user units)
@@ -206,18 +206,17 @@ systemctl --user status clawmem-watcher.service clawmem-embed.timer
206
206
 
207
207
  When using ClawMem with OpenClaw, choose one of two deployment options:
208
208
 
209
- ### Option 1: ClawMem Exclusive (Recommended)
209
+ **Active Memory coexistence:** ClawMem is fully compatible with OpenClaw's Active Memory plugin (v2026.4.10+). They search different backends (ClawMem vault vs dreaming/wiki) and inject into different prompt regions (user prompt vs system prompt). Both can run simultaneously — no configuration needed.
210
+
211
+ **OpenClaw v2026.4.10+ recommended:** Fixes a config normalization bug where `plugins.slots.contextEngine` was silently dropped (#64192).
210
212
 
211
- ClawMem handles 100% of memory operations via hooks + MCP tools. Zero redundancy.
213
+ ### Option 1: ClawMem Exclusive (Recommended)
212
214
 
213
- **Benefits:**
214
- - No context window waste (avoids 10-15% duplicate injection)
215
- - Prevents OpenClaw native memory auto-initialization on updates
216
- - All memory in ClawMem's hybrid search + graph traversal system
215
+ ClawMem handles 100% of structured memory. Disable native memory search (not Active Memory — that's separate and compatible):
217
216
 
218
217
  **Configuration:**
219
218
  ```bash
220
- # Disable OpenClaw's native memory
219
+ # Disable OpenClaw's native memory search
221
220
  openclaw config set agents.defaults.memorySearch.extraPaths "[]"
222
221
 
223
222
  # Verify
@@ -235,7 +234,7 @@ ls ~/.openclaw/agents/main/memory/
235
234
 
236
235
  ### Option 2: Hybrid (ClawMem + Native)
237
236
 
238
- Run both ClawMem and OpenClaw's native memory for redundancy.
237
+ Run both ClawMem and OpenClaw's native memory search for redundancy.
239
238
 
240
239
  **Configuration:**
241
240
  ```bash
@@ -243,9 +242,9 @@ openclaw config set agents.defaults.memorySearch.extraPaths '["~/documents", "~/
243
242
  ```
244
243
 
245
244
  **Tradeoffs:**
246
- - Redundant recall from two independent systems
247
- - 10-15% context window waste from duplicate facts
248
- - Two memory indices to maintain
245
+ - Redundant recall from two independent systems
246
+ - 10-15% context window waste from duplicate facts
247
+ - Two memory indices to maintain
249
248
 
250
249
  **Recommendation:** Use Option 1 unless you have a specific need for redundant memory systems.
251
250
 
@@ -263,7 +262,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
263
262
  | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
264
263
  | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
265
264
  | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
266
- | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, extracts SPO triples from decision/preference/milestone/problem facts. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
265
+ | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, persists observer-emitted SPO triples via `ensureEntityCanonical` (canonical `vault:type:slug` IDs shared with A-MEM) using the tight predicate vocabulary (adopted, migrated_to, deployed_to, runs_on, replaced, depends_on, integrates_with, uses, prefers, avoids, caused_by, resolved_by, owned_by). Eligible observation types: decision/preference/milestone/problem/discovery/feature. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
267
266
  | `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
268
267
  | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation), per-turn recall attribution (marks which surfaced docs were cited in which turn) |
269
268
 
package/CLAUDE.md CHANGED
@@ -128,15 +128,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
128
128
  clawmem bootstrap ~/notes --name notes
129
129
 
130
130
  # Or step by step:
131
- ./bin/clawmem init
132
- ./bin/clawmem collection add ~/notes --name notes
133
- ./bin/clawmem update --embed
134
- ./bin/clawmem setup hooks
135
- ./bin/clawmem setup mcp
131
+ clawmem init
132
+ clawmem collection add ~/notes --name notes
133
+ clawmem update --embed
134
+ clawmem setup hooks
135
+ clawmem setup mcp
136
136
 
137
137
  # Verify
138
- ./bin/clawmem doctor # Full health check
139
- ./bin/clawmem status # Quick index status
138
+ clawmem doctor # Full health check
139
+ clawmem status # Quick index status
140
140
  ```
141
141
 
142
142
  ### Background Services (systemd user units)
@@ -206,18 +206,17 @@ systemctl --user status clawmem-watcher.service clawmem-embed.timer
206
206
 
207
207
  When using ClawMem with OpenClaw, choose one of two deployment options:
208
208
 
209
- ### Option 1: ClawMem Exclusive (Recommended)
209
+ **Active Memory coexistence:** ClawMem is fully compatible with OpenClaw's Active Memory plugin (v2026.4.10+). They search different backends (ClawMem vault vs dreaming/wiki) and inject into different prompt regions (user prompt vs system prompt). Both can run simultaneously — no configuration needed.
210
+
211
+ **OpenClaw v2026.4.10+ recommended:** Fixes a config normalization bug where `plugins.slots.contextEngine` was silently dropped (#64192).
210
212
 
211
- ClawMem handles 100% of memory operations via hooks + MCP tools. Zero redundancy.
213
+ ### Option 1: ClawMem Exclusive (Recommended)
212
214
 
213
- **Benefits:**
214
- - No context window waste (avoids 10-15% duplicate injection)
215
- - Prevents OpenClaw native memory auto-initialization on updates
216
- - All memory in ClawMem's hybrid search + graph traversal system
215
+ ClawMem handles 100% of structured memory. Disable native memory search (not Active Memory — that's separate and compatible):
217
216
 
218
217
  **Configuration:**
219
218
  ```bash
220
- # Disable OpenClaw's native memory
219
+ # Disable OpenClaw's native memory search
221
220
  openclaw config set agents.defaults.memorySearch.extraPaths "[]"
222
221
 
223
222
  # Verify
@@ -235,7 +234,7 @@ ls ~/.openclaw/agents/main/memory/
235
234
 
236
235
  ### Option 2: Hybrid (ClawMem + Native)
237
236
 
238
- Run both ClawMem and OpenClaw's native memory for redundancy.
237
+ Run both ClawMem and OpenClaw's native memory search for redundancy.
239
238
 
240
239
  **Configuration:**
241
240
  ```bash
@@ -243,9 +242,9 @@ openclaw config set agents.defaults.memorySearch.extraPaths '["~/documents", "~/
243
242
  ```
244
243
 
245
244
  **Tradeoffs:**
246
- - Redundant recall from two independent systems
247
- - 10-15% context window waste from duplicate facts
248
- - Two memory indices to maintain
245
+ - Redundant recall from two independent systems
246
+ - 10-15% context window waste from duplicate facts
247
+ - Two memory indices to maintain
249
248
 
250
249
  **Recommendation:** Use Option 1 unless you have a specific need for redundant memory systems.
251
250
 
@@ -263,7 +262,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
263
262
  | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
264
263
  | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
265
264
  | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
266
- | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, extracts SPO triples from decision/preference/milestone/problem facts. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
265
+ | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, persists observer-emitted SPO triples via `ensureEntityCanonical` (canonical `vault:type:slug` IDs shared with A-MEM) using the tight predicate vocabulary (adopted, migrated_to, deployed_to, runs_on, replaced, depends_on, integrates_with, uses, prefers, avoids, caused_by, resolved_by, owned_by). Eligible observation types: decision/preference/milestone/problem/discovery/feature. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
267
266
  | `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
268
267
  | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation), per-turn recall attribution (marks which surfaced docs were cited in which turn) |
269
268
 
package/README.md CHANGED
@@ -186,7 +186,7 @@ clawmem setup mcp # Register MCP server in ~/.claude.json (31 tools)
186
186
  ClawMem registers as a native ContextEngine plugin - OpenClaw's pluggable interface for context management. Same 90/10 automatic retrieval, delivered through OpenClaw's lifecycle system instead of Claude Code hooks.
187
187
 
188
188
  ```bash
189
- clawmem setup openclaw # Shows installation steps
189
+ clawmem setup openclaw # Auto-installs plugin, prints remaining steps
190
190
  ```
191
191
 
192
192
  **What the plugin provides:**
@@ -196,11 +196,15 @@ clawmem setup openclaw # Shows installation steps
196
196
  - **5 agent tools** - `clawmem_search`, `clawmem_get`, `clawmem_session_log`, `clawmem_timeline`, `clawmem_similar`
197
197
  - **Session lifecycle hooks** - `session_start`, `session_end`, `before_reset` safety net
198
198
 
199
- Disable OpenClaw's native memory and `memory-lancedb` auto-recall/capture to avoid duplicate injection:
199
+ Disable OpenClaw's native memory search to avoid duplicate injection:
200
200
  ```bash
201
201
  openclaw config set agents.defaults.memorySearch.extraPaths "[]"
202
202
  ```
203
203
 
204
+ ClawMem coexists cleanly with OpenClaw's [Active Memory](https://docs.openclaw.ai/concepts/active-memory) plugin (v2026.4.10+) — they search different backends and inject into different prompt regions, so both can run simultaneously without conflict. See the [OpenClaw plugin guide](docs/guides/openclaw-plugin.md#coexistence-with-openclaw-active-memory) for details.
205
+
206
+ > **OpenClaw v2026.4.10+** recommended — fixes a config normalization bug where `plugins.slots.contextEngine` was silently dropped (#64192).
207
+
204
208
  **Alternative:** OpenClaw agents can also use ClawMem's MCP server directly (`clawmem setup mcp`), with or without hooks. This gives full access to all 31 MCP tools but bypasses OpenClaw's ContextEngine lifecycle, so you lose token budget awareness, native compaction orchestration, and the `afterTurn()` message pipeline. The ContextEngine plugin is recommended for new OpenClaw setups; MCP is available as an additional or standalone integration.
205
209
 
206
210
  #### Hermes Agent
@@ -713,7 +717,7 @@ Registered by `clawmem setup mcp`. Available to any MCP-compatible client.
713
717
  |---|---|
714
718
  | `build_graphs` | Build temporal and/or semantic graphs from document corpus |
715
719
  | `find_causal_links` | Trace decision chains: "what led to X", "how we got from A to B". Follow up `intent_search` with this tool on a top result to walk the full causal chain. Traverses causes / caused_by / both up to N hops with depth-annotated reasoning. |
716
- | `kg_query` | Query the SPO knowledge graph: "what does X relate to?", "what was true about X when?". Returns temporal entity-relationship triples with validity windows. Uses entity resolution for lookup. |
720
+ | `kg_query` | Query the SPO knowledge graph: "what does X relate to?", "what was true about X when?". Returns temporal entity-relationship triples with validity windows. Accepts entity name (resolved via `searchEntities`) or canonical ID in `vault:type:slug` form. Triples are populated by the decision-extractor hook from observer-emitted `<triples>` blocks. |
717
721
  | `memory_evolution_status` | Show how a document's A-MEM metadata evolved over time |
718
722
  | `timeline` | Show the temporal neighborhood around a document — what was created/modified before and after it. Progressive disclosure: search → timeline (context) → get (full content). Supports same-collection scoping and session correlation. |
719
723
 
@@ -1069,40 +1073,36 @@ Manual layers benefit from periodic re-indexing — a cron job running `clawmem
1069
1073
  ### Setup
1070
1074
 
1071
1075
  ```bash
1072
- # Bootstrap workspace collection (use your agent's workspace path)
1073
- ./bin/clawmem bootstrap ~/workspace --name workspace
1074
-
1075
- # Bootstrap each project
1076
- ./bin/clawmem bootstrap ~/Projects/my-project --name my-project
1076
+ # Bootstrap a content directory (creates vault + indexes + embeds + installs hooks + MCP)
1077
+ clawmem bootstrap ~/notes --name notes
1077
1078
 
1078
- # Enable auto-embed for real-time indexing
1079
- # Edit ~/.config/clawmem/config.yaml autoEmbed: true
1079
+ # Bootstrap each project you want indexed
1080
+ clawmem bootstrap ~/Projects/my-project --name my-project
1080
1081
 
1081
- # Install watcher as systemd service
1082
- ./bin/clawmem install-service --enable
1082
+ # Install watcher + embed timer as systemd services
1083
+ clawmem install-service --enable
1083
1084
  ```
1084
1085
 
1085
- #### OpenClaw-Specific
1086
+ #### OpenClaw-specific
1086
1087
 
1087
1088
  ```bash
1088
- # OpenClaw uses ~/.openclaw/workspace/ as its workspace root
1089
- ./bin/clawmem bootstrap ~/.openclaw/workspace --name workspace
1089
+ # Install the ContextEngine plugin (auto-symlinks into ~/.openclaw/extensions/)
1090
+ clawmem setup openclaw
1091
+ # Then follow the printed next steps: restart gateway, set slot, configure GPU endpoints
1090
1092
  ```
1091
1093
 
1092
- #### Hermes-Specific
1094
+ Index your content directories with `clawmem bootstrap` as above. The OpenClaw plugin shares the same vault as Claude Code hooks.
1093
1095
 
1094
- ```bash
1095
- # Hermes uses ~/.hermes/ as its home directory
1096
- ./bin/clawmem bootstrap ~/.hermes --name hermes-home
1096
+ #### Hermes-specific
1097
1097
 
1098
- # Install the memory provider plugin
1099
- cp -r src/hermes /path/to/hermes-agent/plugins/memory/clawmem
1098
+ ```bash
1099
+ # Install the memory provider plugin (symlink or copy)
1100
+ ln -s $(npm root -g)/clawmem/src/hermes /path/to/hermes-agent/plugins/memory/clawmem
1100
1101
 
1101
- # Start clawmem serve (external mode)
1102
+ # Start the REST API (required for Hermes tool calls)
1102
1103
  clawmem serve --port 7438 &
1103
1104
 
1104
- # Configure Hermes to use ClawMem
1105
- # In your Hermes config.yaml:
1105
+ # Configure Hermes to use ClawMem (in your Hermes config.yaml):
1106
1106
  # memory:
1107
1107
  # provider: clawmem
1108
1108
  ```
package/SKILL.md CHANGED
@@ -118,15 +118,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
118
118
  clawmem bootstrap ~/notes --name notes
119
119
 
120
120
  # Or step by step:
121
- ./bin/clawmem init
122
- ./bin/clawmem collection add ~/notes --name notes
123
- ./bin/clawmem update --embed
124
- ./bin/clawmem setup hooks
125
- ./bin/clawmem setup mcp
121
+ clawmem init
122
+ clawmem collection add ~/notes --name notes
123
+ clawmem update --embed
124
+ clawmem setup hooks
125
+ clawmem setup mcp
126
126
 
127
127
  # Verify
128
- ./bin/clawmem doctor # Full health check
129
- ./bin/clawmem status # Quick index status
128
+ clawmem doctor # Full health check
129
+ clawmem status # Quick index status
130
130
  ```
131
131
 
132
132
  ### Background Services (systemd user units)
@@ -294,7 +294,7 @@ Once escalated, route by query type:
294
294
  | `timeline` | Temporal neighborhood around a document — what was modified before/after. Progressive disclosure: search → timeline → get. Supports same-collection scoping and session correlation. |
295
295
  | `list_vaults` | Show configured vault names and paths. Empty in single-vault mode. |
296
296
  | `vault_sync` | Index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories. |
297
- | `kg_query` | Query SPO knowledge graph for entity relationships with temporal validity. Uses entity resolution. |
297
+ | `kg_query` | Query SPO knowledge graph for entity relationships with temporal validity. Accepts entity name or canonical ID (`vault:type:slug`). Triples are populated by decision-extractor from observer-emitted `<triples>` blocks using a canonical predicate vocabulary. |
298
298
  | `diary_write` | Write diary entry. Use proactively in non-hooked environments. Do NOT use in Claude Code. |
299
299
  | `diary_read` | Read recent diary entries. Filter by agent name. |
300
300
  | `lifecycle_status` | Document lifecycle statistics: active, archived, forgotten, pinned, snoozed counts and policy summary. |
@@ -605,12 +605,15 @@ Phase 3 deductive synthesis applies the same `contradicts` link for any draft th
605
605
 
606
606
  ## OpenClaw Integration
607
607
 
608
+ **Active Memory coexistence:** ClawMem is fully compatible with OpenClaw's Active Memory plugin (v2026.4.10+). They search different backends and inject into different prompt regions — both can run simultaneously. The deployment options below control native memory search (`memorySearch.extraPaths`), not Active Memory.
609
+
610
+ **OpenClaw v2026.4.10+ recommended** — fixes contextEngine slot being silently dropped during config normalization (#64192).
611
+
608
612
  ### Option 1: ClawMem Exclusive (Recommended)
609
613
 
610
- ClawMem handles 100% of memory. No redundancy.
614
+ ClawMem handles 100% of structured memory. Disable native memory search:
611
615
 
612
616
  ```bash
613
- # Disable OpenClaw's native memory
614
617
  openclaw config set agents.defaults.memorySearch.extraPaths "[]"
615
618
  ```
616
619
 
@@ -618,7 +621,7 @@ openclaw config set agents.defaults.memorySearch.extraPaths "[]"
618
621
 
619
622
  ### Option 2: Hybrid
620
623
 
621
- Run both ClawMem and OpenClaw native memory.
624
+ Run both ClawMem and OpenClaw native memory search.
622
625
 
623
626
  ```bash
624
627
  openclaw config set agents.defaults.memorySearch.extraPaths '["~/documents", "~/notes"]'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmem",
3
- "version": "0.8.3",
3
+ "version": "0.8.5",
4
4
  "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/amem.ts CHANGED
@@ -649,11 +649,18 @@ export async function postIndexEnrich(
649
649
  }
650
650
 
651
651
  /**
652
- * Observation with document ID for causal inference
652
+ * Observation with document ID for causal inference and SPO triple extraction.
653
+ *
654
+ * Populated by the decision-extractor hook after an observation is successfully
655
+ * persisted. Consumed by:
656
+ * - `inferCausalLinks` (A-MEM) — uses docId + facts
657
+ * - `insertObservationTriples` (decision-extractor) — uses docId + obsType + triples
653
658
  */
654
659
  export interface ObservationWithDoc {
655
660
  docId: number;
656
661
  facts: string[];
662
+ obsType?: string;
663
+ triples?: Array<{ subject: string; predicate: string; object: string }>;
657
664
  }
658
665
 
659
666
  /**
package/src/clawmem.ts CHANGED
@@ -1300,43 +1300,115 @@ function cmdPath() {
1300
1300
 
1301
1301
  async function cmdSetupOpenClaw(args: string[]) {
1302
1302
  const remove = args.includes("--remove");
1303
- const binPath = findClawmemBinary();
1304
1303
  const pluginDir = pathResolve(import.meta.dir, "openclaw");
1304
+ const extensionsDir = pathResolve(process.env.HOME || "~", ".openclaw", "extensions");
1305
+ const linkPath = pathResolve(extensionsDir, "clawmem");
1306
+
1307
+ // Check if openclaw CLI is available
1308
+ const hasOpenClawCli = (() => {
1309
+ try {
1310
+ const r = Bun.spawnSync(["openclaw", "--version"], { stdout: "pipe", stderr: "pipe" });
1311
+ return r.exitCode === 0;
1312
+ } catch { return false; }
1313
+ })();
1305
1314
 
1306
1315
  if (remove) {
1307
- console.log(`${c.green}To remove ClawMem from OpenClaw:${c.reset}`);
1308
- console.log(` 1. Remove the symlink: rm ~/.openclaw/extensions/clawmem`);
1309
- console.log(` 2. Remove from config: openclaw config set plugins.slots.contextEngine legacy`);
1316
+ // Actually uninstall mirror of install behavior
1317
+ let removed = false;
1318
+ try {
1319
+ const stat = await import("fs").then(m => m.lstatSync(linkPath));
1320
+ if (stat.isSymbolicLink() || stat.isDirectory()) {
1321
+ const { unlinkSync, rmSync } = await import("fs");
1322
+ if (stat.isSymbolicLink()) {
1323
+ unlinkSync(linkPath);
1324
+ } else {
1325
+ rmSync(linkPath, { recursive: true });
1326
+ }
1327
+ console.log(`${c.green}Removed plugin from ${linkPath}${c.reset}`);
1328
+ removed = true;
1329
+ }
1330
+ } catch (e: any) {
1331
+ if (e.code !== "ENOENT") throw e;
1332
+ console.log(`${c.dim}Plugin not installed at ${linkPath}${c.reset}`);
1333
+ }
1334
+
1335
+ if (hasOpenClawCli) {
1336
+ Bun.spawnSync(["openclaw", "config", "set", "plugins.slots.contextEngine", "legacy"], { stdout: "inherit", stderr: "inherit" });
1337
+ console.log(`${c.green}Reset context engine slot to legacy${c.reset}`);
1338
+ } else if (removed) {
1339
+ console.log(`${c.dim}openclaw CLI not found — manually run: openclaw config set plugins.slots.contextEngine legacy${c.reset}`);
1340
+ }
1310
1341
  return;
1311
1342
  }
1312
1343
 
1313
- // Check that the OpenClaw plugin files exist
1344
+ // Verify plugin source files exist
1314
1345
  if (!existsSync(pathResolve(pluginDir, "index.ts"))) {
1315
1346
  die(`OpenClaw plugin files not found at ${pluginDir}`);
1316
1347
  }
1348
+ if (!existsSync(pathResolve(pluginDir, "openclaw.plugin.json"))) {
1349
+ die(`Plugin manifest not found at ${pluginDir}/openclaw.plugin.json`);
1350
+ }
1317
1351
 
1318
- console.log(`${c.green}ClawMem OpenClaw Plugin Setup${c.reset}`);
1319
- console.log();
1320
- console.log(`Plugin source: ${pluginDir}`);
1321
- console.log(`ClawMem binary: ${binPath}`);
1352
+ // Create extensions directory
1353
+ if (!existsSync(extensionsDir)) {
1354
+ mkdirSync(extensionsDir, { recursive: true });
1355
+ }
1356
+
1357
+ // Remove stale symlink/directory if present
1358
+ try {
1359
+ const { lstatSync, unlinkSync, rmSync } = await import("fs");
1360
+ const stat = lstatSync(linkPath);
1361
+ if (stat.isSymbolicLink()) {
1362
+ const { readlinkSync } = await import("fs");
1363
+ const target = readlinkSync(linkPath);
1364
+ if (target === pluginDir) {
1365
+ console.log(`${c.dim}Symlink already correct at ${linkPath}${c.reset}`);
1366
+ } else {
1367
+ unlinkSync(linkPath);
1368
+ console.log(`${c.dim}Replaced stale symlink (was → ${target})${c.reset}`);
1369
+ }
1370
+ } else if (stat.isDirectory()) {
1371
+ rmSync(linkPath, { recursive: true });
1372
+ console.log(`${c.dim}Replaced existing directory at ${linkPath}${c.reset}`);
1373
+ } else {
1374
+ // Regular file or other non-symlink, non-directory — conflict
1375
+ die(`${linkPath} exists but is not a symlink or directory. Remove it manually and re-run setup.`);
1376
+ }
1377
+ } catch (e: any) {
1378
+ if (e.code !== "ENOENT") throw e;
1379
+ }
1380
+
1381
+ // Create symlink
1382
+ if (!existsSync(linkPath)) {
1383
+ const { symlinkSync } = await import("fs");
1384
+ symlinkSync(pluginDir, linkPath);
1385
+ }
1386
+ console.log(`${c.green}Installed plugin: ${linkPath} → ${pluginDir}${c.reset}`);
1387
+
1388
+ // Version warning
1322
1389
  console.log();
1323
- console.log(`${c.bold}Installation steps:${c.reset}`);
1390
+ console.log(`${c.bold}Note:${c.reset} OpenClaw v2026.4.10+ recommended — earlier versions`);
1391
+ console.log(`have a bug where plugins.slots.contextEngine is silently dropped`);
1392
+ console.log(`during config normalization (openclaw/openclaw#64192).`);
1393
+
1394
+ // Remaining steps — gateway must restart BEFORE setting the context engine slot,
1395
+ // otherwise OpenClaw hasn't discovered the plugin yet and the slot assignment
1396
+ // fails or is ignored (the exact bug reported in issue #5).
1324
1397
  console.log();
1325
- console.log(` 1. Symlink the plugin into OpenClaw extensions:`);
1326
- console.log(` ${c.cyan}ln -s ${pluginDir} ~/.openclaw/extensions/clawmem${c.reset}`);
1398
+ console.log(`${c.bold}Next steps:${c.reset}`);
1327
1399
  console.log();
1328
- console.log(` 2. Copy the plugin manifest:`);
1329
- console.log(` ${c.cyan}cp ${pluginDir}/plugin.json ~/.openclaw/extensions/clawmem/openclaw.plugin.json${c.reset}`);
1400
+ console.log(` 1. Restart OpenClaw gateway to discover the plugin:`);
1401
+ console.log(` ${c.cyan}openclaw gateway restart${c.reset}`);
1330
1402
  console.log();
1331
- console.log(` 3. Set ClawMem as the active context engine:`);
1403
+ console.log(` 2. Set ClawMem as the active context engine (after restart):`);
1332
1404
  console.log(` ${c.cyan}openclaw config set plugins.slots.contextEngine clawmem${c.reset}`);
1333
1405
  console.log();
1334
- console.log(` 4. Configure GPU endpoints (if not using defaults):`);
1406
+ console.log(` 3. Configure GPU endpoints (if not using defaults):`);
1335
1407
  console.log(` ${c.cyan}openclaw config set plugins.entries.clawmem.config.gpuEmbed http://YOUR_GPU:8088${c.reset}`);
1336
1408
  console.log(` ${c.cyan}openclaw config set plugins.entries.clawmem.config.gpuLlm http://YOUR_GPU:8089${c.reset}`);
1337
1409
  console.log(` ${c.cyan}openclaw config set plugins.entries.clawmem.config.gpuRerank http://YOUR_GPU:8090${c.reset}`);
1338
1410
  console.log();
1339
- console.log(` 5. Start the REST API (for tool calls):`);
1411
+ console.log(` 4. Start the REST API (for agent tools):`);
1340
1412
  console.log(` ${c.cyan}clawmem serve &${c.reset}`);
1341
1413
  console.log();
1342
1414
  console.log(`${c.dim}ClawMem will work alongside Claude Code hooks — both modes share the same vault.${c.reset}`);
package/src/entity.ts CHANGED
@@ -354,6 +354,69 @@ export function resolveEntityCanonical(
354
354
  // Entity Storage + Mentions + Co-occurrences
355
355
  // =============================================================================
356
356
 
357
+ /**
358
+ * Resolve the entity_type for a name via exact case-insensitive match.
359
+ *
360
+ * Returns the type only when EXACTLY ONE active entity in the given vault shares
361
+ * the name. Zero matches → null (caller should default to a safe type). Multiple
362
+ * matches (ambiguous across buckets, e.g. "Alice" as person AND "Alice" as project)
363
+ * → null so the caller falls back to a safe default instead of arbitrarily picking.
364
+ *
365
+ * Exact match only — no fuzzy matching — to avoid false inheritance on near-names.
366
+ */
367
+ export function resolveEntityTypeExact(
368
+ db: Database,
369
+ name: string,
370
+ vault: string = 'default'
371
+ ): string | null {
372
+ const rows = db.prepare(`
373
+ SELECT DISTINCT entity_type FROM entity_nodes
374
+ WHERE LOWER(name) = LOWER(?) AND vault = ?
375
+ `).all(name, vault) as Array<{ entity_type: string }>;
376
+
377
+ if (rows.length !== 1) return null; // zero or ambiguous
378
+ return rows[0]!.entity_type;
379
+ }
380
+
381
+ /**
382
+ * Resolve-or-create a canonical entity without incrementing mention_count.
383
+ *
384
+ * Used by consumers that reference an entity but do NOT constitute a document
385
+ * mention (e.g. SPO triple extraction). Semantically distinct from upsertEntity,
386
+ * which treats every call as a doc mention and inflates the count.
387
+ *
388
+ * Flow: resolveEntityCanonical (FTS5 + fuzzy + bucket match) → reuse if found,
389
+ * otherwise mint a new canonical `vault:type:slug` entity with mention_count = 0.
390
+ *
391
+ * Returns the entity_id.
392
+ */
393
+ export function ensureEntityCanonical(
394
+ db: Database,
395
+ name: string,
396
+ type: string,
397
+ vault: string = 'default'
398
+ ): string {
399
+ const canonicalId = resolveEntityCanonical(db, name, type, vault);
400
+ if (canonicalId) return canonicalId;
401
+
402
+ const entityId = makeEntityId(name, type, vault);
403
+ db.prepare(`
404
+ INSERT OR IGNORE INTO entity_nodes (entity_id, entity_type, name, description, created_at, mention_count, last_seen, vault)
405
+ VALUES (?, ?, ?, NULL, datetime('now'), 0, datetime('now'), ?)
406
+ `).run(entityId, type, name, vault);
407
+
408
+ try {
409
+ db.prepare(`
410
+ INSERT OR IGNORE INTO entities_fts (entity_id, name, entity_type)
411
+ VALUES (?, ?, ?)
412
+ `).run(entityId, name.toLowerCase(), type);
413
+ } catch {
414
+ // FTS insert may fail if table doesn't exist yet — non-fatal
415
+ }
416
+
417
+ return entityId;
418
+ }
419
+
357
420
  /**
358
421
  * Upsert an entity into entity_nodes and entities_fts.
359
422
  * Returns the entity_id (canonical or new).
@@ -17,13 +17,23 @@ import {
17
17
  validateTranscriptPath,
18
18
  } from "../hooks.ts";
19
19
  import { hashContent } from "../indexer.ts";
20
- import { extractObservations, type Observation } from "../observer.ts";
20
+ import { extractObservations, type Observation, LITERAL_PREDICATES } from "../observer.ts";
21
21
  import { updateDirectoryContext } from "../directory-context.ts";
22
22
  import { loadConfig } from "../collections.ts";
23
23
  import { getDefaultLlamaCpp } from "../llm.ts";
24
24
  import type { ObservationWithDoc } from "../amem.ts";
25
25
  import { extractJsonFromLLM } from "../amem.ts";
26
26
  import { DEFAULT_EMBED_MODEL, extractSnippet, type SearchResult } from "../store.ts";
27
+ import { ensureEntityCanonical, resolveEntityTypeExact } from "../entity.ts";
28
+
29
+ // Observation types that are allowed to contribute SPO triples. Widened from the
30
+ // original {decision, preference, milestone, problem} gate, which rejected 77% of
31
+ // real observations in production vaults (the majority type is 'discovery').
32
+ // See BACKLOG.md §1.6 for the full diagnosis.
33
+ const SPO_ELIGIBLE_OBSERVATION_TYPES = new Set<Observation["type"]>([
34
+ "decision", "preference", "milestone", "problem",
35
+ "discovery", "feature",
36
+ ]);
27
37
 
28
38
  // =============================================================================
29
39
  // Facet-Based Merge Policy
@@ -325,42 +335,8 @@ export async function decisionExtractor(
325
335
  const observationsWithDocs: ObservationWithDoc[] = [];
326
336
  if (observations.length > 0) {
327
337
  for (const obs of observations) {
328
- const obsPath = `observations/${dateStr}-${sessionId.slice(0, 8)}-${obs.type}.md`;
329
- const obsBody = formatObservation(obs, dateStr, sessionId);
330
- const obsHash = hashContent(obsBody);
331
-
332
- store.insertContent(obsHash, obsBody, timestamp);
333
- try {
334
- store.insertDocument("_clawmem", obsPath, obs.title, obsHash, timestamp, timestamp);
335
- const doc = store.findActiveDocument("_clawmem", obsPath);
336
- if (doc) {
337
- store.updateDocumentMeta(doc.id, {
338
- content_type: obs.type === "decision" ? "decision"
339
- : obs.type === "preference" ? "preference"
340
- : obs.type === "milestone" ? "milestone"
341
- : obs.type === "problem" ? "problem"
342
- : "observation",
343
- confidence: 0.80,
344
- });
345
- store.updateObservationFields(obsPath, "_clawmem", {
346
- observation_type: obs.type,
347
- facts: JSON.stringify(obs.facts),
348
- narrative: obs.narrative,
349
- concepts: JSON.stringify(obs.concepts),
350
- files_read: JSON.stringify(obs.filesRead),
351
- files_modified: JSON.stringify(obs.filesModified),
352
- });
353
-
354
- if (obs.facts.length > 0) {
355
- observationsWithDocs.push({
356
- docId: doc.id,
357
- facts: obs.facts,
358
- });
359
- }
360
- }
361
- } catch {
362
- // May already exist
363
- }
338
+ const wit = persistObservationDoc(store, obs, sessionId, dateStr, timestamp);
339
+ if (wit) observationsWithDocs.push(wit);
364
340
  }
365
341
 
366
342
  // Infer causal links from observations with facts
@@ -375,31 +351,12 @@ export async function decisionExtractor(
375
351
  }
376
352
  }
377
353
 
378
- // Extract SPO triples from observation facts (preference/decision types get priority)
379
- for (const obs of observations) {
380
- if (!obs.facts || obs.facts.length === 0) continue;
381
- for (const fact of obs.facts) {
382
- const triple = extractTripleFromFact(fact, obs.type);
383
- if (triple) {
384
- try {
385
- store.db.prepare(
386
- "INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
387
- ).run(triple.subjectId, triple.subject, "auto", new Date().toISOString());
388
- if (triple.objectId) {
389
- store.db.prepare(
390
- "INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
391
- ).run(triple.objectId, triple.object, "auto", new Date().toISOString());
392
- }
393
- store.addTriple(triple.subjectId, triple.predicate, triple.objectId, triple.objectId ? null : triple.object, {
394
- confidence: obs.type === "decision" || obs.type === "preference" ? 0.9 : 0.7,
395
- sourceFact: fact,
396
- });
397
- } catch {
398
- // Triple insertion errors are non-fatal
399
- }
400
- }
401
- }
402
- }
354
+ // Extract SPO triples from observation-emitted <triples> blocks (Fix A).
355
+ // The regex-based extractTripleFromFact is gone — the observer LLM now emits
356
+ // structured triples alongside facts, parsed and validated in parseObservationXml.
357
+ // We iterate observationsWithDocs (not raw observations) so every triple gets
358
+ // real source_doc_id provenance from the persisted observation document (Fix F).
359
+ insertObservationTriples(store, observations, observationsWithDocs);
403
360
  }
404
361
 
405
362
  // Extract decisions (observer-first, regex fallback)
@@ -691,67 +648,140 @@ function formatObservation(obs: Observation, dateStr: string, sessionId: string)
691
648
  }
692
649
 
693
650
  // =============================================================================
694
- // SPO Triple Extraction from Facts
651
+ // Observation persistence
695
652
  // =============================================================================
696
653
 
697
- type ExtractedTriple = {
698
- subject: string;
699
- subjectId: string;
700
- predicate: string;
701
- object: string;
702
- objectId: string | null;
703
- };
654
+ /**
655
+ * Persist a single observation as a `_clawmem` document and return an
656
+ * `ObservationWithDoc` for downstream consumers (causal inference + SPO
657
+ * triples).
658
+ *
659
+ * Path format: `observations/${date}-${session8}-${type}-${hash8}.md`. The
660
+ * 8-char hash slice (SHA256 of the formatted body) disambiguates multiple
661
+ * observations of the same type within a single session — without it, the
662
+ * second insert hits the `UNIQUE(collection, path)` constraint, is silently
663
+ * dropped, and its triples never reach `entity_triples`. See Codex Turn 3
664
+ * for the regression this guards against.
665
+ *
666
+ * Returns null when the doc cannot be looked up after insert OR when the
667
+ * observation has no facts (triples without facts wouldn't survive the
668
+ * causal-links/facts filter downstream).
669
+ */
670
+ export function persistObservationDoc(
671
+ store: Store,
672
+ obs: Observation,
673
+ sessionId: string,
674
+ dateStr: string,
675
+ timestamp: string
676
+ ): ObservationWithDoc | null {
677
+ const obsBody = formatObservation(obs, dateStr, sessionId);
678
+ const obsHash = hashContent(obsBody);
679
+ const obsPath = `observations/${dateStr}-${sessionId.slice(0, 8)}-${obs.type}-${obsHash.slice(0, 8)}.md`;
680
+
681
+ store.insertContent(obsHash, obsBody, timestamp);
682
+ try {
683
+ store.insertDocument("_clawmem", obsPath, obs.title, obsHash, timestamp, timestamp);
684
+ const doc = store.findActiveDocument("_clawmem", obsPath);
685
+ if (!doc) return null;
686
+
687
+ store.updateDocumentMeta(doc.id, {
688
+ content_type: obs.type === "decision" ? "decision"
689
+ : obs.type === "preference" ? "preference"
690
+ : obs.type === "milestone" ? "milestone"
691
+ : obs.type === "problem" ? "problem"
692
+ : "observation",
693
+ confidence: 0.80,
694
+ });
695
+ store.updateObservationFields(obsPath, "_clawmem", {
696
+ observation_type: obs.type,
697
+ facts: JSON.stringify(obs.facts),
698
+ narrative: obs.narrative,
699
+ concepts: JSON.stringify(obs.concepts),
700
+ files_read: JSON.stringify(obs.filesRead),
701
+ files_modified: JSON.stringify(obs.filesModified),
702
+ });
704
703
 
705
- function toEntityId(name: string): string {
706
- return name.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "");
704
+ if (obs.facts.length === 0) return null;
705
+ return {
706
+ docId: doc.id,
707
+ facts: obs.facts,
708
+ obsType: obs.type,
709
+ triples: obs.triples,
710
+ };
711
+ } catch (err) {
712
+ console.log(`[decision-extractor] Failed to persist observation ${obs.type}/${obs.title}:`, err);
713
+ return null;
714
+ }
707
715
  }
708
716
 
709
- function extractTripleFromFact(fact: string, obsType: string): ExtractedTriple | null {
710
- // Only extract from decision/preference/milestone/problem types — skip noisy bugfix/feature/change facts
711
- if (!["decision", "preference", "milestone", "problem"].includes(obsType)) return null;
717
+ // =============================================================================
718
+ // SPO Triple Extraction from Facts
719
+ // =============================================================================
712
720
 
713
- // Conservative verb patterns — only clear relational predicates
714
- const verbPatterns = [
715
- /^(.+?)\s+(chose|selected|switched to|migrated to|adopted)\s+(.+?)\.?$/i,
716
- /^(.+?)\s+(deployed to|runs on|hosted on|installed on)\s+(.+?)\.?$/i,
717
- /^(.+?)\s+(replaced|superseded|deprecated)\s+(.+?)\.?$/i,
718
- /^(.+?)\s+(depends on|integrates with|connects to)\s+(.+?)\.?$/i,
719
- ];
721
+ /**
722
+ * Insert SPO triples emitted by the observer into `entity_triples`.
723
+ *
724
+ * Uses canonical vault:type:slug entity IDs via `ensureEntityCanonical` so the
725
+ * knowledge graph stays in one namespace with A-MEM entities. Type inheritance
726
+ * is exact-match-only and ambiguity-safe: if a name resolves to exactly one type
727
+ * already in `entity_nodes`, inherit it; otherwise default to `concept`.
728
+ *
729
+ * Provenance: every triple carries `source_doc_id` from the persisted observation
730
+ * document. Iterates `observationsWithDocs` directly so triples from observations
731
+ * whose doc insert failed are naturally skipped — no order-matching gymnastics.
732
+ */
733
+ function insertObservationTriples(
734
+ store: Store,
735
+ _observations: Observation[],
736
+ observationsWithDocs: ObservationWithDoc[]
737
+ ): void {
738
+ if (observationsWithDocs.length === 0) return;
739
+
740
+ // Per-invocation cache keyed on (vault, normalizedName, resolvedType) to avoid
741
+ // redundant SQL for repeated entity references within a single extraction.
742
+ const vault = "default";
743
+ const cache = new Map<string, string>();
744
+
745
+ const resolveEntity = (name: string, type: string): string => {
746
+ const key = `${vault}:${type}:${name.toLowerCase().trim()}`;
747
+ const cached = cache.get(key);
748
+ if (cached) return cached;
749
+ const id = ensureEntityCanonical(store.db, name, type, vault);
750
+ cache.set(key, id);
751
+ return id;
752
+ };
753
+
754
+ for (const wit of observationsWithDocs) {
755
+ if (!wit.triples || wit.triples.length === 0) continue;
756
+ const obsType = wit.obsType as Observation["type"] | undefined;
757
+ if (!obsType || !SPO_ELIGIBLE_OBSERVATION_TYPES.has(obsType)) continue;
758
+
759
+ const confidence = obsType === "decision" || obsType === "preference" ? 0.9 : 0.7;
760
+
761
+ for (const triple of wit.triples) {
762
+ try {
763
+ const subjectType = resolveEntityTypeExact(store.db, triple.subject, vault) ?? "concept";
764
+ const subjectId = resolveEntity(triple.subject, subjectType);
720
765
 
721
- for (const pattern of verbPatterns) {
722
- const match = fact.match(pattern);
723
- if (match) {
724
- const subject = match[1]!.trim();
725
- const predicate = match[2]!.trim();
726
- const object = match[3]!.trim();
727
-
728
- // Reject subjects/objects that look like sentences rather than entity names
729
- if (subject.length < 3 || object.length < 3 || subject.length > 60 || object.length > 60) continue;
730
- if (subject.includes(",") || object.includes(",")) continue; // likely a clause, not an entity
731
-
732
- return {
733
- subject,
734
- subjectId: toEntityId(subject),
735
- predicate: predicate.toLowerCase().replace(/\s+/g, "_"),
736
- object,
737
- objectId: toEntityId(object),
738
- };
739
- }
740
- }
766
+ let objectId: string | null = null;
767
+ let objectLiteral: string | null = null;
768
+
769
+ if (LITERAL_PREDICATES.has(triple.predicate)) {
770
+ objectLiteral = triple.object;
771
+ } else {
772
+ const objectType = resolveEntityTypeExact(store.db, triple.object, vault) ?? "concept";
773
+ objectId = resolveEntity(triple.object, objectType);
774
+ }
741
775
 
742
- // Preference facts only: "User prefers X" / "Prefers X"
743
- if (obsType === "preference") {
744
- const prefMatch = fact.match(/^(?:user\s+)?(?:prefers?|avoids?)\s+(.+?)\.?$/i);
745
- if (prefMatch && prefMatch[1]!.trim().length > 2) {
746
- return {
747
- subject: "user",
748
- subjectId: "user",
749
- predicate: "prefers",
750
- object: prefMatch[1]!.trim(),
751
- objectId: null, // literal, not entity
752
- };
776
+ store.addTriple(subjectId, triple.predicate, objectId, objectLiteral, {
777
+ confidence,
778
+ sourceFact: `${triple.subject} ${triple.predicate} ${triple.object}`,
779
+ sourceDocId: wit.docId,
780
+ });
781
+ } catch (err) {
782
+ // Triple insertion errors are non-fatal — log at debug
783
+ console.log(`[decision-extractor] Failed to insert triple ${triple.subject}/${triple.predicate}/${triple.object}:`, err);
784
+ }
753
785
  }
754
786
  }
755
-
756
- return null;
757
787
  }
package/src/mcp.ts CHANGED
@@ -1930,9 +1930,9 @@ This is the recommended entry point for ALL memory queries.`,
1930
1930
  "kg_query",
1931
1931
  {
1932
1932
  title: "Knowledge Graph Query",
1933
- description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'.",
1933
+ description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'. Accepts an entity name (e.g. 'ClawMem') OR a canonical entity ID in the form 'vault:type:slug' (e.g. 'default:service:clawmem').",
1934
1934
  inputSchema: {
1935
- entity: z.string().describe("Entity name or ID to query"),
1935
+ entity: z.string().describe("Entity name or canonical ID ('vault:type:slug') to query"),
1936
1936
  as_of: z.string().optional().describe("Date filter (YYYY-MM-DD) — only facts valid at this date"),
1937
1937
  direction: z.enum(["outgoing", "incoming", "both"]).optional().default("both").describe("Relationship direction"),
1938
1938
  vault: z.string().optional().describe("Named vault (omit for default vault)"),
@@ -1941,17 +1941,30 @@ This is the recommended entry point for ALL memory queries.`,
1941
1941
  async ({ entity, as_of, direction, vault }) => {
1942
1942
  const store = getStore(vault);
1943
1943
 
1944
+ // Canonical IDs look like `vault:type:slug` — accept them directly so callers
1945
+ // that already resolved an entity can round-trip its ID without losing it to
1946
+ // a name-search fallback that would never match.
1947
+ const CANONICAL_ID_RE = /^[a-z][a-z0-9-]*:[a-z_]+:[a-z0-9_]+$/;
1948
+
1944
1949
  const entityResults = store.searchEntities(entity, 1);
1945
- const entityId = entityResults.length > 0
1946
- ? entityResults[0]!.entity_id
1947
- : entity.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "");
1950
+ let entityId: string;
1951
+ if (entityResults.length > 0) {
1952
+ entityId = entityResults[0]!.entity_id;
1953
+ } else if (CANONICAL_ID_RE.test(entity)) {
1954
+ entityId = entity; // caller passed a canonical ID directly
1955
+ } else {
1956
+ const stats = store.getTripleStats();
1957
+ return {
1958
+ content: [{ type: "text", text: `No entity found matching "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current). Try a shorter/broader name, or pass a canonical ID in the form 'vault:type:slug'.` }],
1959
+ };
1960
+ }
1948
1961
 
1949
1962
  const triples = store.queryEntityTriples(entityId, { asOf: as_of, direction });
1950
1963
  const stats = store.getTripleStats();
1951
1964
 
1952
1965
  if (triples.length === 0) {
1953
1966
  return {
1954
- content: [{ type: "text", text: `No knowledge graph facts found for "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
1967
+ content: [{ type: "text", text: `No knowledge graph facts found for "${entity}" (resolved to ${entityId}). The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
1955
1968
  };
1956
1969
  }
1957
1970
 
package/src/observer.ts CHANGED
@@ -22,6 +22,13 @@ export type Observation = {
22
22
  concepts: string[];
23
23
  filesRead: string[];
24
24
  filesModified: string[];
25
+ triples?: ParsedTriple[];
26
+ };
27
+
28
+ export type ParsedTriple = {
29
+ subject: string;
30
+ predicate: string;
31
+ object: string;
25
32
  };
26
33
 
27
34
  export type SessionSummary = {
@@ -48,28 +55,54 @@ const GENERATION_TEMPERATURE = 0.3;
48
55
  // =============================================================================
49
56
 
50
57
  const OBSERVATION_SYSTEM_PROMPT = `You are an observer analyzing a coding session transcript. Extract structured observations.
51
- For each significant action, decision, or discovery, output an <observation> XML element.
58
+ For each significant action, decision, or discovery, output an <observation> XML element with the structure below.
52
59
 
60
+ Structure:
53
61
  <observation>
54
- <type>one of: decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem</type>
55
- <title>Brief descriptive title (max 80 chars)</title>
62
+ <type>...</type>
63
+ <title>...</title>
56
64
  <facts>
57
- <fact>Individual atomic fact</fact>
65
+ <fact>...</fact>
58
66
  </facts>
59
- <narrative>2-3 sentences explaining context and reasoning</narrative>
67
+ <triples>
68
+ <triple>
69
+ <subject>...</subject>
70
+ <predicate>...</predicate>
71
+ <object>...</object>
72
+ </triple>
73
+ </triples>
74
+ <narrative>...</narrative>
60
75
  <concepts>
61
- <concept>one of: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off</concept>
76
+ <concept>...</concept>
62
77
  </concepts>
63
- <files_read><file>path/to/file</file></files_read>
64
- <files_modified><file>path/to/file</file></files_modified>
78
+ <files_read><file>...</file></files_read>
79
+ <files_modified><file>...</file></files_modified>
65
80
  </observation>
66
81
 
67
- Rules:
82
+ Field rules:
83
+ - <type>: one of decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem
84
+ - <title>: brief descriptive title, max 80 chars
85
+ - <facts>: 1-5 <fact> elements, each a standalone atomic claim about what happened or what is true (concrete, specific, no schema placeholders or template text)
86
+ - <triples>: 0-3 <triple> elements for structural relationships between named entities (see predicate vocabulary below). Omit entirely if no relational claims apply. Do NOT emit triples for descriptive facts — only for explicit S-P-O relations.
87
+ - <narrative>: 2-3 sentences explaining WHY something was done, not just WHAT
88
+ - <concepts>: 0-3 <concept> elements from: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off
89
+ - <files_read>, <files_modified>: only files explicitly mentioned in the transcript
90
+
91
+ Predicate vocabulary (use EXACTLY these predicates in <predicate>, nothing else):
92
+ - adopted, migrated_to — switching to a new tool/framework/approach
93
+ - deployed_to, runs_on — where something runs
94
+ - replaced — when one thing supersedes another
95
+ - depends_on, integrates_with, uses — structural dependencies
96
+ - prefers, avoids — user preferences (use for <subject>user</subject>)
97
+ - caused_by, resolved_by — causal relationships between problems and fixes
98
+ - owned_by — responsibility / ownership
99
+
100
+ <subject> and <object> must be short canonical entity names (2-80 chars). No sentences. No placeholder text. If you cannot fit a claim into this vocabulary, keep it in <facts> instead and omit the triple.
101
+
102
+ Observation rules:
68
103
  - Output 1-5 observations, focusing on the MOST significant events
69
- - Each fact should be a standalone, atomic piece of information
70
- - The narrative should explain WHY something was done, not just WHAT
71
- - Only include files that were explicitly mentioned in the transcript
72
104
  - If no significant observations, output nothing
105
+ - Never use schema example text or template placeholders in <fact>, <subject>, or <object> — emit only real content extracted from the transcript
73
106
 
74
107
  Type guidance:
75
108
  - preference: user expresses a preference, habit, or way of working (e.g., "don't use subagents for this", "I prefer single PRs")
@@ -131,6 +164,47 @@ const VALID_CONCEPTS = new Set([
131
164
  "gotcha", "pattern", "trade-off",
132
165
  ]);
133
166
 
167
+ // Canonical SPO predicate vocabulary — parser rejects anything outside this set.
168
+ // Must stay in sync with the predicate list in OBSERVATION_SYSTEM_PROMPT.
169
+ export const VALID_PREDICATES = new Set([
170
+ "adopted", "migrated_to",
171
+ "deployed_to", "runs_on",
172
+ "replaced",
173
+ "depends_on", "integrates_with", "uses",
174
+ "prefers", "avoids",
175
+ "caused_by", "resolved_by",
176
+ "owned_by",
177
+ ]);
178
+
179
+ // Predicates whose <object> should be stored as a literal (not resolved to an entity).
180
+ export const LITERAL_PREDICATES = new Set(["prefers", "avoids"]);
181
+
182
+ // Exact placeholder strings that must never be persisted as facts or triple components.
183
+ // Defense-in-depth: even though the prompt no longer places example text inside
184
+ // <fact>/<subject>/<object> tags, a weak model could still echo these phrases.
185
+ const SCHEMA_PLACEHOLDER_STRINGS = new Set([
186
+ "individual atomic fact",
187
+ "atomic fact",
188
+ "one atomic claim per fact element",
189
+ "brief descriptive title",
190
+ "canonical entity name",
191
+ ]);
192
+
193
+ // Regex for template placeholder markers: {{...}}, <!--...-->, ${...}.
194
+ // Intentionally narrow — earlier drafts rejected any line starting with
195
+ // "example:" / "placeholder:", which false-positived legitimate facts like
196
+ // "Example: QMD switched to Bun in v0.2". Shape-only matching avoids that
197
+ // drift; the exact-string blocklist above handles known echoed placeholders.
198
+ const PLACEHOLDER_REGEX = /^(\{\{.*\}\}|<!--.*-->|\$\{.*\})/;
199
+
200
+ function isSchemaPlaceholder(text: string): boolean {
201
+ if (!text) return true;
202
+ const normalized = text.trim().toLowerCase();
203
+ if (SCHEMA_PLACEHOLDER_STRINGS.has(normalized)) return true;
204
+ if (PLACEHOLDER_REGEX.test(normalized)) return true;
205
+ return false;
206
+ }
207
+
134
208
  export function parseObservationXml(xml: string): Observation | null {
135
209
  const typeMatch = xml.match(/<type>\s*(.*?)\s*<\/type>/s);
136
210
  const titleMatch = xml.match(/<title>\s*(.*?)\s*<\/title>/s);
@@ -141,24 +215,67 @@ export function parseObservationXml(xml: string): Observation | null {
141
215
  const type = typeMatch[1].trim().toLowerCase();
142
216
  if (!VALID_OBSERVATION_TYPES.has(type)) return null;
143
217
 
144
- const facts = extractMultiple(xml, "fact");
218
+ const rawTitle = titleMatch[1].trim();
219
+ if (isSchemaPlaceholder(rawTitle)) return null;
220
+
221
+ const facts = extractMultiple(xml, "fact")
222
+ .filter(f => f.length >= 5)
223
+ .filter(f => !isSchemaPlaceholder(f));
224
+
145
225
  const concepts = extractMultiple(xml, "concept")
146
226
  .filter(c => VALID_CONCEPTS.has(c.toLowerCase()))
147
227
  .map(c => c.toLowerCase());
148
228
  const filesRead = extractMultiple(xml, "file", "files_read");
149
229
  const filesModified = extractMultiple(xml, "file", "files_modified");
150
230
 
231
+ // Parse triples (Fix A): strict validation against canonical predicate vocabulary.
232
+ // Missing/malformed triples are silently dropped — fail-closed on ambiguity.
233
+ const triples = extractTriples(xml);
234
+
151
235
  return {
152
236
  type: type as Observation["type"],
153
- title: titleMatch[1].trim().slice(0, 80),
154
- facts: facts.filter(f => f.length >= 5),
237
+ title: rawTitle.slice(0, 80),
238
+ facts,
155
239
  narrative: narrativeMatch?.[1]?.trim() || "",
156
240
  concepts,
157
241
  filesRead,
158
242
  filesModified,
243
+ triples: triples.length > 0 ? triples : undefined,
159
244
  };
160
245
  }
161
246
 
247
+ function extractTriples(xml: string): ParsedTriple[] {
248
+ const parentMatch = xml.match(/<triples>([\s\S]*?)<\/triples>/s);
249
+ if (!parentMatch?.[1]) return [];
250
+
251
+ const blockRegex = /<triple>([\s\S]*?)<\/triple>/g;
252
+ const results: ParsedTriple[] = [];
253
+ let match;
254
+ while ((match = blockRegex.exec(parentMatch[1])) !== null) {
255
+ const block = match[1] ?? "";
256
+ const subject = block.match(/<subject>\s*(.*?)\s*<\/subject>/s)?.[1]?.trim();
257
+ const rawPredicate = block.match(/<predicate>\s*(.*?)\s*<\/predicate>/s)?.[1]?.trim();
258
+ const object = block.match(/<object>\s*(.*?)\s*<\/object>/s)?.[1]?.trim();
259
+
260
+ if (!subject || !rawPredicate || !object) continue;
261
+
262
+ const predicate = rawPredicate.toLowerCase().replace(/\s+/g, "_");
263
+ if (!VALID_PREDICATES.has(predicate)) continue;
264
+
265
+ // Length bounds — guards against sentence-shaped subjects/objects that the
266
+ // regex-era tests expected. Subject and object should be short canonical names.
267
+ if (subject.length < 2 || subject.length > 80) continue;
268
+ if (object.length < 2 || object.length > 120) continue;
269
+
270
+ if (isSchemaPlaceholder(subject) || isSchemaPlaceholder(object)) continue;
271
+
272
+ results.push({ subject, predicate, object });
273
+
274
+ if (results.length >= 5) break; // cap per observation
275
+ }
276
+ return results;
277
+ }
278
+
162
279
  export function parseSummaryXml(xml: string): SessionSummary | null {
163
280
  const request = extractSingle(xml, "request");
164
281
  const investigated = extractSingle(xml, "investigated");