clawmem 0.8.4 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +18 -19
- package/CLAUDE.md +8 -8
- package/README.md +18 -22
- package/SKILL.md +8 -8
- package/package.json +1 -1
- package/src/amem.ts +8 -1
- package/src/entity.ts +63 -0
- package/src/hooks/decision-extractor.ts +145 -115
- package/src/mcp.ts +19 -6
- package/src/observer.ts +132 -15
package/AGENTS.md
CHANGED
|
@@ -128,15 +128,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
|
|
|
128
128
|
clawmem bootstrap ~/notes --name notes
|
|
129
129
|
|
|
130
130
|
# Or step by step:
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
131
|
+
clawmem init
|
|
132
|
+
clawmem collection add ~/notes --name notes
|
|
133
|
+
clawmem update --embed
|
|
134
|
+
clawmem setup hooks
|
|
135
|
+
clawmem setup mcp
|
|
136
136
|
|
|
137
137
|
# Verify
|
|
138
|
-
|
|
139
|
-
|
|
138
|
+
clawmem doctor # Full health check
|
|
139
|
+
clawmem status # Quick index status
|
|
140
140
|
```
|
|
141
141
|
|
|
142
142
|
### Background Services (systemd user units)
|
|
@@ -206,18 +206,17 @@ systemctl --user status clawmem-watcher.service clawmem-embed.timer
|
|
|
206
206
|
|
|
207
207
|
When using ClawMem with OpenClaw, choose one of two deployment options:
|
|
208
208
|
|
|
209
|
-
|
|
209
|
+
**Active Memory coexistence:** ClawMem is fully compatible with OpenClaw's Active Memory plugin (v2026.4.10+). They search different backends (ClawMem vault vs dreaming/wiki) and inject into different prompt regions (user prompt vs system prompt). Both can run simultaneously — no configuration needed.
|
|
210
|
+
|
|
211
|
+
**OpenClaw v2026.4.10+ recommended:** Fixes a config normalization bug where `plugins.slots.contextEngine` was silently dropped (#64192).
|
|
210
212
|
|
|
211
|
-
|
|
213
|
+
### Option 1: ClawMem Exclusive (Recommended)
|
|
212
214
|
|
|
213
|
-
|
|
214
|
-
- No context window waste (avoids 10-15% duplicate injection)
|
|
215
|
-
- Prevents OpenClaw native memory auto-initialization on updates
|
|
216
|
-
- All memory in ClawMem's hybrid search + graph traversal system
|
|
215
|
+
ClawMem handles 100% of structured memory. Disable native memory search (not Active Memory — that's separate and compatible):
|
|
217
216
|
|
|
218
217
|
**Configuration:**
|
|
219
218
|
```bash
|
|
220
|
-
# Disable OpenClaw's native memory
|
|
219
|
+
# Disable OpenClaw's native memory search
|
|
221
220
|
openclaw config set agents.defaults.memorySearch.extraPaths "[]"
|
|
222
221
|
|
|
223
222
|
# Verify
|
|
@@ -235,7 +234,7 @@ ls ~/.openclaw/agents/main/memory/
|
|
|
235
234
|
|
|
236
235
|
### Option 2: Hybrid (ClawMem + Native)
|
|
237
236
|
|
|
238
|
-
Run both ClawMem and OpenClaw's native memory for redundancy.
|
|
237
|
+
Run both ClawMem and OpenClaw's native memory search for redundancy.
|
|
239
238
|
|
|
240
239
|
**Configuration:**
|
|
241
240
|
```bash
|
|
@@ -243,9 +242,9 @@ openclaw config set agents.defaults.memorySearch.extraPaths '["~/documents", "~/
|
|
|
243
242
|
```
|
|
244
243
|
|
|
245
244
|
**Tradeoffs:**
|
|
246
|
-
-
|
|
247
|
-
-
|
|
248
|
-
-
|
|
245
|
+
- Redundant recall from two independent systems
|
|
246
|
+
- 10-15% context window waste from duplicate facts
|
|
247
|
+
- Two memory indices to maintain
|
|
249
248
|
|
|
250
249
|
**Recommendation:** Use Option 1 unless you have a specific need for redundant memory systems.
|
|
251
250
|
|
|
@@ -263,7 +262,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
|
|
|
263
262
|
| `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
|
|
264
263
|
| `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
|
|
265
264
|
| `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
|
|
266
|
-
| `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions,
|
|
265
|
+
| `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, persists observer-emitted SPO triples via `ensureEntityCanonical` (canonical `vault:type:slug` IDs shared with A-MEM) using the tight predicate vocabulary (adopted, migrated_to, deployed_to, runs_on, replaced, depends_on, integrates_with, uses, prefers, avoids, caused_by, resolved_by, owned_by). Eligible observation types: decision/preference/milestone/problem/discovery/feature. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
|
|
267
266
|
| `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
|
|
268
267
|
| `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation), per-turn recall attribution (marks which surfaced docs were cited in which turn) |
|
|
269
268
|
|
package/CLAUDE.md
CHANGED
|
@@ -128,15 +128,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
|
|
|
128
128
|
clawmem bootstrap ~/notes --name notes
|
|
129
129
|
|
|
130
130
|
# Or step by step:
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
131
|
+
clawmem init
|
|
132
|
+
clawmem collection add ~/notes --name notes
|
|
133
|
+
clawmem update --embed
|
|
134
|
+
clawmem setup hooks
|
|
135
|
+
clawmem setup mcp
|
|
136
136
|
|
|
137
137
|
# Verify
|
|
138
|
-
|
|
139
|
-
|
|
138
|
+
clawmem doctor # Full health check
|
|
139
|
+
clawmem status # Quick index status
|
|
140
140
|
```
|
|
141
141
|
|
|
142
142
|
### Background Services (systemd user units)
|
|
@@ -262,7 +262,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
|
|
|
262
262
|
| `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
|
|
263
263
|
| `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
|
|
264
264
|
| `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
|
|
265
|
-
| `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions,
|
|
265
|
+
| `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, persists observer-emitted SPO triples via `ensureEntityCanonical` (canonical `vault:type:slug` IDs shared with A-MEM) using the tight predicate vocabulary (adopted, migrated_to, deployed_to, runs_on, replaced, depends_on, integrates_with, uses, prefers, avoids, caused_by, resolved_by, owned_by). Eligible observation types: decision/preference/milestone/problem/discovery/feature. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
|
|
266
266
|
| `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
|
|
267
267
|
| `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation), per-turn recall attribution (marks which surfaced docs were cited in which turn) |
|
|
268
268
|
|
package/README.md
CHANGED
|
@@ -717,7 +717,7 @@ Registered by `clawmem setup mcp`. Available to any MCP-compatible client.
|
|
|
717
717
|
|---|---|
|
|
718
718
|
| `build_graphs` | Build temporal and/or semantic graphs from document corpus |
|
|
719
719
|
| `find_causal_links` | Trace decision chains: "what led to X", "how we got from A to B". Follow up `intent_search` with this tool on a top result to walk the full causal chain. Traverses causes / caused_by / both up to N hops with depth-annotated reasoning. |
|
|
720
|
-
| `kg_query` | Query the SPO knowledge graph: "what does X relate to?", "what was true about X when?". Returns temporal entity-relationship triples with validity windows.
|
|
720
|
+
| `kg_query` | Query the SPO knowledge graph: "what does X relate to?", "what was true about X when?". Returns temporal entity-relationship triples with validity windows. Accepts entity name (resolved via `searchEntities`) or canonical ID in `vault:type:slug` form. Triples are populated by the decision-extractor hook from observer-emitted `<triples>` blocks. |
|
|
721
721
|
| `memory_evolution_status` | Show how a document's A-MEM metadata evolved over time |
|
|
722
722
|
| `timeline` | Show the temporal neighborhood around a document — what was created/modified before and after it. Progressive disclosure: search → timeline (context) → get (full content). Supports same-collection scoping and session correlation. |
|
|
723
723
|
|
|
@@ -1073,40 +1073,36 @@ Manual layers benefit from periodic re-indexing — a cron job running `clawmem
|
|
|
1073
1073
|
### Setup
|
|
1074
1074
|
|
|
1075
1075
|
```bash
|
|
1076
|
-
# Bootstrap
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
# Bootstrap each project
|
|
1080
|
-
./bin/clawmem bootstrap ~/Projects/my-project --name my-project
|
|
1076
|
+
# Bootstrap a content directory (creates vault + indexes + embeds + installs hooks + MCP)
|
|
1077
|
+
clawmem bootstrap ~/notes --name notes
|
|
1081
1078
|
|
|
1082
|
-
#
|
|
1083
|
-
|
|
1079
|
+
# Bootstrap each project you want indexed
|
|
1080
|
+
clawmem bootstrap ~/Projects/my-project --name my-project
|
|
1084
1081
|
|
|
1085
|
-
# Install watcher as systemd
|
|
1086
|
-
|
|
1082
|
+
# Install watcher + embed timer as systemd services
|
|
1083
|
+
clawmem install-service --enable
|
|
1087
1084
|
```
|
|
1088
1085
|
|
|
1089
|
-
#### OpenClaw-
|
|
1086
|
+
#### OpenClaw-specific
|
|
1090
1087
|
|
|
1091
1088
|
```bash
|
|
1092
|
-
#
|
|
1093
|
-
|
|
1089
|
+
# Install the ContextEngine plugin (auto-symlinks into ~/.openclaw/extensions/)
|
|
1090
|
+
clawmem setup openclaw
|
|
1091
|
+
# Then follow the printed next steps: restart gateway, set slot, configure GPU endpoints
|
|
1094
1092
|
```
|
|
1095
1093
|
|
|
1096
|
-
|
|
1094
|
+
Index your content directories with `clawmem bootstrap` as above. The OpenClaw plugin shares the same vault as Claude Code hooks.
|
|
1097
1095
|
|
|
1098
|
-
|
|
1099
|
-
# Hermes uses ~/.hermes/ as its home directory
|
|
1100
|
-
./bin/clawmem bootstrap ~/.hermes --name hermes-home
|
|
1096
|
+
#### Hermes-specific
|
|
1101
1097
|
|
|
1102
|
-
|
|
1103
|
-
|
|
1098
|
+
```bash
|
|
1099
|
+
# Install the memory provider plugin (symlink or copy)
|
|
1100
|
+
ln -s $(npm root -g)/clawmem/src/hermes /path/to/hermes-agent/plugins/memory/clawmem
|
|
1104
1101
|
|
|
1105
|
-
# Start
|
|
1102
|
+
# Start the REST API (required for Hermes tool calls)
|
|
1106
1103
|
clawmem serve --port 7438 &
|
|
1107
1104
|
|
|
1108
|
-
# Configure Hermes to use ClawMem
|
|
1109
|
-
# In your Hermes config.yaml:
|
|
1105
|
+
# Configure Hermes to use ClawMem (in your Hermes config.yaml):
|
|
1110
1106
|
# memory:
|
|
1111
1107
|
# provider: clawmem
|
|
1112
1108
|
```
|
package/SKILL.md
CHANGED
|
@@ -118,15 +118,15 @@ ln -sf ~/clawmem/bin/clawmem ~/.bun/bin/clawmem
|
|
|
118
118
|
clawmem bootstrap ~/notes --name notes
|
|
119
119
|
|
|
120
120
|
# Or step by step:
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
121
|
+
clawmem init
|
|
122
|
+
clawmem collection add ~/notes --name notes
|
|
123
|
+
clawmem update --embed
|
|
124
|
+
clawmem setup hooks
|
|
125
|
+
clawmem setup mcp
|
|
126
126
|
|
|
127
127
|
# Verify
|
|
128
|
-
|
|
129
|
-
|
|
128
|
+
clawmem doctor # Full health check
|
|
129
|
+
clawmem status # Quick index status
|
|
130
130
|
```
|
|
131
131
|
|
|
132
132
|
### Background Services (systemd user units)
|
|
@@ -294,7 +294,7 @@ Once escalated, route by query type:
|
|
|
294
294
|
| `timeline` | Temporal neighborhood around a document — what was modified before/after. Progressive disclosure: search → timeline → get. Supports same-collection scoping and session correlation. |
|
|
295
295
|
| `list_vaults` | Show configured vault names and paths. Empty in single-vault mode. |
|
|
296
296
|
| `vault_sync` | Index markdown from a directory into a named vault. Restricted-path validation rejects sensitive directories. |
|
|
297
|
-
| `kg_query` | Query SPO knowledge graph for entity relationships with temporal validity.
|
|
297
|
+
| `kg_query` | Query SPO knowledge graph for entity relationships with temporal validity. Accepts entity name or canonical ID (`vault:type:slug`). Triples are populated by decision-extractor from observer-emitted `<triples>` blocks using a canonical predicate vocabulary. |
|
|
298
298
|
| `diary_write` | Write diary entry. Use proactively in non-hooked environments. Do NOT use in Claude Code. |
|
|
299
299
|
| `diary_read` | Read recent diary entries. Filter by agent name. |
|
|
300
300
|
| `lifecycle_status` | Document lifecycle statistics: active, archived, forgotten, pinned, snoozed counts and policy summary. |
|
package/package.json
CHANGED
package/src/amem.ts
CHANGED
|
@@ -649,11 +649,18 @@ export async function postIndexEnrich(
|
|
|
649
649
|
}
|
|
650
650
|
|
|
651
651
|
/**
|
|
652
|
-
* Observation with document ID for causal inference
|
|
652
|
+
* Observation with document ID for causal inference and SPO triple extraction.
|
|
653
|
+
*
|
|
654
|
+
* Populated by the decision-extractor hook after an observation is successfully
|
|
655
|
+
* persisted. Consumed by:
|
|
656
|
+
* - `inferCausalLinks` (A-MEM) — uses docId + facts
|
|
657
|
+
* - `insertObservationTriples` (decision-extractor) — uses docId + obsType + triples
|
|
653
658
|
*/
|
|
654
659
|
export interface ObservationWithDoc {
|
|
655
660
|
docId: number;
|
|
656
661
|
facts: string[];
|
|
662
|
+
obsType?: string;
|
|
663
|
+
triples?: Array<{ subject: string; predicate: string; object: string }>;
|
|
657
664
|
}
|
|
658
665
|
|
|
659
666
|
/**
|
package/src/entity.ts
CHANGED
|
@@ -354,6 +354,69 @@ export function resolveEntityCanonical(
|
|
|
354
354
|
// Entity Storage + Mentions + Co-occurrences
|
|
355
355
|
// =============================================================================
|
|
356
356
|
|
|
357
|
+
/**
|
|
358
|
+
* Resolve the entity_type for a name via exact case-insensitive match.
|
|
359
|
+
*
|
|
360
|
+
* Returns the type only when EXACTLY ONE active entity in the given vault shares
|
|
361
|
+
* the name. Zero matches → null (caller should default to a safe type). Multiple
|
|
362
|
+
* matches (ambiguous across buckets, e.g. "Alice" as person AND "Alice" as project)
|
|
363
|
+
* → null so the caller falls back to a safe default instead of arbitrarily picking.
|
|
364
|
+
*
|
|
365
|
+
* Exact match only — no fuzzy matching — to avoid false inheritance on near-names.
|
|
366
|
+
*/
|
|
367
|
+
export function resolveEntityTypeExact(
|
|
368
|
+
db: Database,
|
|
369
|
+
name: string,
|
|
370
|
+
vault: string = 'default'
|
|
371
|
+
): string | null {
|
|
372
|
+
const rows = db.prepare(`
|
|
373
|
+
SELECT DISTINCT entity_type FROM entity_nodes
|
|
374
|
+
WHERE LOWER(name) = LOWER(?) AND vault = ?
|
|
375
|
+
`).all(name, vault) as Array<{ entity_type: string }>;
|
|
376
|
+
|
|
377
|
+
if (rows.length !== 1) return null; // zero or ambiguous
|
|
378
|
+
return rows[0]!.entity_type;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Resolve-or-create a canonical entity without incrementing mention_count.
|
|
383
|
+
*
|
|
384
|
+
* Used by consumers that reference an entity but do NOT constitute a document
|
|
385
|
+
* mention (e.g. SPO triple extraction). Semantically distinct from upsertEntity,
|
|
386
|
+
* which treats every call as a doc mention and inflates the count.
|
|
387
|
+
*
|
|
388
|
+
* Flow: resolveEntityCanonical (FTS5 + fuzzy + bucket match) → reuse if found,
|
|
389
|
+
* otherwise mint a new canonical `vault:type:slug` entity with mention_count = 0.
|
|
390
|
+
*
|
|
391
|
+
* Returns the entity_id.
|
|
392
|
+
*/
|
|
393
|
+
export function ensureEntityCanonical(
|
|
394
|
+
db: Database,
|
|
395
|
+
name: string,
|
|
396
|
+
type: string,
|
|
397
|
+
vault: string = 'default'
|
|
398
|
+
): string {
|
|
399
|
+
const canonicalId = resolveEntityCanonical(db, name, type, vault);
|
|
400
|
+
if (canonicalId) return canonicalId;
|
|
401
|
+
|
|
402
|
+
const entityId = makeEntityId(name, type, vault);
|
|
403
|
+
db.prepare(`
|
|
404
|
+
INSERT OR IGNORE INTO entity_nodes (entity_id, entity_type, name, description, created_at, mention_count, last_seen, vault)
|
|
405
|
+
VALUES (?, ?, ?, NULL, datetime('now'), 0, datetime('now'), ?)
|
|
406
|
+
`).run(entityId, type, name, vault);
|
|
407
|
+
|
|
408
|
+
try {
|
|
409
|
+
db.prepare(`
|
|
410
|
+
INSERT OR IGNORE INTO entities_fts (entity_id, name, entity_type)
|
|
411
|
+
VALUES (?, ?, ?)
|
|
412
|
+
`).run(entityId, name.toLowerCase(), type);
|
|
413
|
+
} catch {
|
|
414
|
+
// FTS insert may fail if table doesn't exist yet — non-fatal
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
return entityId;
|
|
418
|
+
}
|
|
419
|
+
|
|
357
420
|
/**
|
|
358
421
|
* Upsert an entity into entity_nodes and entities_fts.
|
|
359
422
|
* Returns the entity_id (canonical or new).
|
|
@@ -17,13 +17,23 @@ import {
|
|
|
17
17
|
validateTranscriptPath,
|
|
18
18
|
} from "../hooks.ts";
|
|
19
19
|
import { hashContent } from "../indexer.ts";
|
|
20
|
-
import { extractObservations, type Observation } from "../observer.ts";
|
|
20
|
+
import { extractObservations, type Observation, LITERAL_PREDICATES } from "../observer.ts";
|
|
21
21
|
import { updateDirectoryContext } from "../directory-context.ts";
|
|
22
22
|
import { loadConfig } from "../collections.ts";
|
|
23
23
|
import { getDefaultLlamaCpp } from "../llm.ts";
|
|
24
24
|
import type { ObservationWithDoc } from "../amem.ts";
|
|
25
25
|
import { extractJsonFromLLM } from "../amem.ts";
|
|
26
26
|
import { DEFAULT_EMBED_MODEL, extractSnippet, type SearchResult } from "../store.ts";
|
|
27
|
+
import { ensureEntityCanonical, resolveEntityTypeExact } from "../entity.ts";
|
|
28
|
+
|
|
29
|
+
// Observation types that are allowed to contribute SPO triples. Widened from the
|
|
30
|
+
// original {decision, preference, milestone, problem} gate, which rejected 77% of
|
|
31
|
+
// real observations in production vaults (the majority type is 'discovery').
|
|
32
|
+
// See BACKLOG.md §1.6 for the full diagnosis.
|
|
33
|
+
const SPO_ELIGIBLE_OBSERVATION_TYPES = new Set<Observation["type"]>([
|
|
34
|
+
"decision", "preference", "milestone", "problem",
|
|
35
|
+
"discovery", "feature",
|
|
36
|
+
]);
|
|
27
37
|
|
|
28
38
|
// =============================================================================
|
|
29
39
|
// Facet-Based Merge Policy
|
|
@@ -325,42 +335,8 @@ export async function decisionExtractor(
|
|
|
325
335
|
const observationsWithDocs: ObservationWithDoc[] = [];
|
|
326
336
|
if (observations.length > 0) {
|
|
327
337
|
for (const obs of observations) {
|
|
328
|
-
const
|
|
329
|
-
|
|
330
|
-
const obsHash = hashContent(obsBody);
|
|
331
|
-
|
|
332
|
-
store.insertContent(obsHash, obsBody, timestamp);
|
|
333
|
-
try {
|
|
334
|
-
store.insertDocument("_clawmem", obsPath, obs.title, obsHash, timestamp, timestamp);
|
|
335
|
-
const doc = store.findActiveDocument("_clawmem", obsPath);
|
|
336
|
-
if (doc) {
|
|
337
|
-
store.updateDocumentMeta(doc.id, {
|
|
338
|
-
content_type: obs.type === "decision" ? "decision"
|
|
339
|
-
: obs.type === "preference" ? "preference"
|
|
340
|
-
: obs.type === "milestone" ? "milestone"
|
|
341
|
-
: obs.type === "problem" ? "problem"
|
|
342
|
-
: "observation",
|
|
343
|
-
confidence: 0.80,
|
|
344
|
-
});
|
|
345
|
-
store.updateObservationFields(obsPath, "_clawmem", {
|
|
346
|
-
observation_type: obs.type,
|
|
347
|
-
facts: JSON.stringify(obs.facts),
|
|
348
|
-
narrative: obs.narrative,
|
|
349
|
-
concepts: JSON.stringify(obs.concepts),
|
|
350
|
-
files_read: JSON.stringify(obs.filesRead),
|
|
351
|
-
files_modified: JSON.stringify(obs.filesModified),
|
|
352
|
-
});
|
|
353
|
-
|
|
354
|
-
if (obs.facts.length > 0) {
|
|
355
|
-
observationsWithDocs.push({
|
|
356
|
-
docId: doc.id,
|
|
357
|
-
facts: obs.facts,
|
|
358
|
-
});
|
|
359
|
-
}
|
|
360
|
-
}
|
|
361
|
-
} catch {
|
|
362
|
-
// May already exist
|
|
363
|
-
}
|
|
338
|
+
const wit = persistObservationDoc(store, obs, sessionId, dateStr, timestamp);
|
|
339
|
+
if (wit) observationsWithDocs.push(wit);
|
|
364
340
|
}
|
|
365
341
|
|
|
366
342
|
// Infer causal links from observations with facts
|
|
@@ -375,31 +351,12 @@ export async function decisionExtractor(
|
|
|
375
351
|
}
|
|
376
352
|
}
|
|
377
353
|
|
|
378
|
-
// Extract SPO triples from observation
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
try {
|
|
385
|
-
store.db.prepare(
|
|
386
|
-
"INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
|
|
387
|
-
).run(triple.subjectId, triple.subject, "auto", new Date().toISOString());
|
|
388
|
-
if (triple.objectId) {
|
|
389
|
-
store.db.prepare(
|
|
390
|
-
"INSERT OR IGNORE INTO entity_nodes (entity_id, name, entity_type, created_at) VALUES (?, ?, ?, ?)"
|
|
391
|
-
).run(triple.objectId, triple.object, "auto", new Date().toISOString());
|
|
392
|
-
}
|
|
393
|
-
store.addTriple(triple.subjectId, triple.predicate, triple.objectId, triple.objectId ? null : triple.object, {
|
|
394
|
-
confidence: obs.type === "decision" || obs.type === "preference" ? 0.9 : 0.7,
|
|
395
|
-
sourceFact: fact,
|
|
396
|
-
});
|
|
397
|
-
} catch {
|
|
398
|
-
// Triple insertion errors are non-fatal
|
|
399
|
-
}
|
|
400
|
-
}
|
|
401
|
-
}
|
|
402
|
-
}
|
|
354
|
+
// Extract SPO triples from observation-emitted <triples> blocks (Fix A).
|
|
355
|
+
// The regex-based extractTripleFromFact is gone — the observer LLM now emits
|
|
356
|
+
// structured triples alongside facts, parsed and validated in parseObservationXml.
|
|
357
|
+
// We iterate observationsWithDocs (not raw observations) so every triple gets
|
|
358
|
+
// real source_doc_id provenance from the persisted observation document (Fix F).
|
|
359
|
+
insertObservationTriples(store, observations, observationsWithDocs);
|
|
403
360
|
}
|
|
404
361
|
|
|
405
362
|
// Extract decisions (observer-first, regex fallback)
|
|
@@ -691,67 +648,140 @@ function formatObservation(obs: Observation, dateStr: string, sessionId: string)
|
|
|
691
648
|
}
|
|
692
649
|
|
|
693
650
|
// =============================================================================
|
|
694
|
-
//
|
|
651
|
+
// Observation persistence
|
|
695
652
|
// =============================================================================
|
|
696
653
|
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
654
|
+
/**
|
|
655
|
+
* Persist a single observation as a `_clawmem` document and return an
|
|
656
|
+
* `ObservationWithDoc` for downstream consumers (causal inference + SPO
|
|
657
|
+
* triples).
|
|
658
|
+
*
|
|
659
|
+
* Path format: `observations/${date}-${session8}-${type}-${hash8}.md`. The
|
|
660
|
+
* 8-char hash slice (SHA256 of the formatted body) disambiguates multiple
|
|
661
|
+
* observations of the same type within a single session — without it, the
|
|
662
|
+
* second insert hits the `UNIQUE(collection, path)` constraint, is silently
|
|
663
|
+
* dropped, and its triples never reach `entity_triples`. See Codex Turn 3
|
|
664
|
+
* for the regression this guards against.
|
|
665
|
+
*
|
|
666
|
+
* Returns null when the doc cannot be looked up after insert OR when the
|
|
667
|
+
* observation has no facts (triples without facts wouldn't survive the
|
|
668
|
+
* causal-links/facts filter downstream).
|
|
669
|
+
*/
|
|
670
|
+
export function persistObservationDoc(
|
|
671
|
+
store: Store,
|
|
672
|
+
obs: Observation,
|
|
673
|
+
sessionId: string,
|
|
674
|
+
dateStr: string,
|
|
675
|
+
timestamp: string
|
|
676
|
+
): ObservationWithDoc | null {
|
|
677
|
+
const obsBody = formatObservation(obs, dateStr, sessionId);
|
|
678
|
+
const obsHash = hashContent(obsBody);
|
|
679
|
+
const obsPath = `observations/${dateStr}-${sessionId.slice(0, 8)}-${obs.type}-${obsHash.slice(0, 8)}.md`;
|
|
680
|
+
|
|
681
|
+
store.insertContent(obsHash, obsBody, timestamp);
|
|
682
|
+
try {
|
|
683
|
+
store.insertDocument("_clawmem", obsPath, obs.title, obsHash, timestamp, timestamp);
|
|
684
|
+
const doc = store.findActiveDocument("_clawmem", obsPath);
|
|
685
|
+
if (!doc) return null;
|
|
686
|
+
|
|
687
|
+
store.updateDocumentMeta(doc.id, {
|
|
688
|
+
content_type: obs.type === "decision" ? "decision"
|
|
689
|
+
: obs.type === "preference" ? "preference"
|
|
690
|
+
: obs.type === "milestone" ? "milestone"
|
|
691
|
+
: obs.type === "problem" ? "problem"
|
|
692
|
+
: "observation",
|
|
693
|
+
confidence: 0.80,
|
|
694
|
+
});
|
|
695
|
+
store.updateObservationFields(obsPath, "_clawmem", {
|
|
696
|
+
observation_type: obs.type,
|
|
697
|
+
facts: JSON.stringify(obs.facts),
|
|
698
|
+
narrative: obs.narrative,
|
|
699
|
+
concepts: JSON.stringify(obs.concepts),
|
|
700
|
+
files_read: JSON.stringify(obs.filesRead),
|
|
701
|
+
files_modified: JSON.stringify(obs.filesModified),
|
|
702
|
+
});
|
|
704
703
|
|
|
705
|
-
|
|
706
|
-
|
|
704
|
+
if (obs.facts.length === 0) return null;
|
|
705
|
+
return {
|
|
706
|
+
docId: doc.id,
|
|
707
|
+
facts: obs.facts,
|
|
708
|
+
obsType: obs.type,
|
|
709
|
+
triples: obs.triples,
|
|
710
|
+
};
|
|
711
|
+
} catch (err) {
|
|
712
|
+
console.log(`[decision-extractor] Failed to persist observation ${obs.type}/${obs.title}:`, err);
|
|
713
|
+
return null;
|
|
714
|
+
}
|
|
707
715
|
}
|
|
708
716
|
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
717
|
+
// =============================================================================
|
|
718
|
+
// SPO Triple Extraction from Facts
|
|
719
|
+
// =============================================================================
|
|
712
720
|
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
721
|
+
/**
|
|
722
|
+
* Insert SPO triples emitted by the observer into `entity_triples`.
|
|
723
|
+
*
|
|
724
|
+
* Uses canonical vault:type:slug entity IDs via `ensureEntityCanonical` so the
|
|
725
|
+
* knowledge graph stays in one namespace with A-MEM entities. Type inheritance
|
|
726
|
+
* is exact-match-only and ambiguity-safe: if a name resolves to exactly one type
|
|
727
|
+
* already in `entity_nodes`, inherit it; otherwise default to `concept`.
|
|
728
|
+
*
|
|
729
|
+
* Provenance: every triple carries `source_doc_id` from the persisted observation
|
|
730
|
+
* document. Iterates `observationsWithDocs` directly so triples from observations
|
|
731
|
+
* whose doc insert failed are naturally skipped — no order-matching gymnastics.
|
|
732
|
+
*/
|
|
733
|
+
function insertObservationTriples(
|
|
734
|
+
store: Store,
|
|
735
|
+
_observations: Observation[],
|
|
736
|
+
observationsWithDocs: ObservationWithDoc[]
|
|
737
|
+
): void {
|
|
738
|
+
if (observationsWithDocs.length === 0) return;
|
|
739
|
+
|
|
740
|
+
// Per-invocation cache keyed on (vault, normalizedName, resolvedType) to avoid
|
|
741
|
+
// redundant SQL for repeated entity references within a single extraction.
|
|
742
|
+
const vault = "default";
|
|
743
|
+
const cache = new Map<string, string>();
|
|
744
|
+
|
|
745
|
+
const resolveEntity = (name: string, type: string): string => {
|
|
746
|
+
const key = `${vault}:${type}:${name.toLowerCase().trim()}`;
|
|
747
|
+
const cached = cache.get(key);
|
|
748
|
+
if (cached) return cached;
|
|
749
|
+
const id = ensureEntityCanonical(store.db, name, type, vault);
|
|
750
|
+
cache.set(key, id);
|
|
751
|
+
return id;
|
|
752
|
+
};
|
|
753
|
+
|
|
754
|
+
for (const wit of observationsWithDocs) {
|
|
755
|
+
if (!wit.triples || wit.triples.length === 0) continue;
|
|
756
|
+
const obsType = wit.obsType as Observation["type"] | undefined;
|
|
757
|
+
if (!obsType || !SPO_ELIGIBLE_OBSERVATION_TYPES.has(obsType)) continue;
|
|
758
|
+
|
|
759
|
+
const confidence = obsType === "decision" || obsType === "preference" ? 0.9 : 0.7;
|
|
760
|
+
|
|
761
|
+
for (const triple of wit.triples) {
|
|
762
|
+
try {
|
|
763
|
+
const subjectType = resolveEntityTypeExact(store.db, triple.subject, vault) ?? "concept";
|
|
764
|
+
const subjectId = resolveEntity(triple.subject, subjectType);
|
|
720
765
|
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
if (subject.includes(",") || object.includes(",")) continue; // likely a clause, not an entity
|
|
731
|
-
|
|
732
|
-
return {
|
|
733
|
-
subject,
|
|
734
|
-
subjectId: toEntityId(subject),
|
|
735
|
-
predicate: predicate.toLowerCase().replace(/\s+/g, "_"),
|
|
736
|
-
object,
|
|
737
|
-
objectId: toEntityId(object),
|
|
738
|
-
};
|
|
739
|
-
}
|
|
740
|
-
}
|
|
766
|
+
let objectId: string | null = null;
|
|
767
|
+
let objectLiteral: string | null = null;
|
|
768
|
+
|
|
769
|
+
if (LITERAL_PREDICATES.has(triple.predicate)) {
|
|
770
|
+
objectLiteral = triple.object;
|
|
771
|
+
} else {
|
|
772
|
+
const objectType = resolveEntityTypeExact(store.db, triple.object, vault) ?? "concept";
|
|
773
|
+
objectId = resolveEntity(triple.object, objectType);
|
|
774
|
+
}
|
|
741
775
|
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
predicate
|
|
750
|
-
|
|
751
|
-
objectId: null, // literal, not entity
|
|
752
|
-
};
|
|
776
|
+
store.addTriple(subjectId, triple.predicate, objectId, objectLiteral, {
|
|
777
|
+
confidence,
|
|
778
|
+
sourceFact: `${triple.subject} ${triple.predicate} ${triple.object}`,
|
|
779
|
+
sourceDocId: wit.docId,
|
|
780
|
+
});
|
|
781
|
+
} catch (err) {
|
|
782
|
+
// Triple insertion errors are non-fatal — log at debug
|
|
783
|
+
console.log(`[decision-extractor] Failed to insert triple ${triple.subject}/${triple.predicate}/${triple.object}:`, err);
|
|
784
|
+
}
|
|
753
785
|
}
|
|
754
786
|
}
|
|
755
|
-
|
|
756
|
-
return null;
|
|
757
787
|
}
|
package/src/mcp.ts
CHANGED
|
@@ -1930,9 +1930,9 @@ This is the recommended entry point for ALL memory queries.`,
|
|
|
1930
1930
|
"kg_query",
|
|
1931
1931
|
{
|
|
1932
1932
|
title: "Knowledge Graph Query",
|
|
1933
|
-
description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'.",
|
|
1933
|
+
description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'. Accepts an entity name (e.g. 'ClawMem') OR a canonical entity ID in the form 'vault:type:slug' (e.g. 'default:service:clawmem').",
|
|
1934
1934
|
inputSchema: {
|
|
1935
|
-
entity: z.string().describe("Entity name or ID to query"),
|
|
1935
|
+
entity: z.string().describe("Entity name or canonical ID ('vault:type:slug') to query"),
|
|
1936
1936
|
as_of: z.string().optional().describe("Date filter (YYYY-MM-DD) — only facts valid at this date"),
|
|
1937
1937
|
direction: z.enum(["outgoing", "incoming", "both"]).optional().default("both").describe("Relationship direction"),
|
|
1938
1938
|
vault: z.string().optional().describe("Named vault (omit for default vault)"),
|
|
@@ -1941,17 +1941,30 @@ This is the recommended entry point for ALL memory queries.`,
|
|
|
1941
1941
|
async ({ entity, as_of, direction, vault }) => {
|
|
1942
1942
|
const store = getStore(vault);
|
|
1943
1943
|
|
|
1944
|
+
// Canonical IDs look like `vault:type:slug` — accept them directly so callers
|
|
1945
|
+
// that already resolved an entity can round-trip its ID without losing it to
|
|
1946
|
+
// a name-search fallback that would never match.
|
|
1947
|
+
const CANONICAL_ID_RE = /^[a-z][a-z0-9-]*:[a-z_]+:[a-z0-9_]+$/;
|
|
1948
|
+
|
|
1944
1949
|
const entityResults = store.searchEntities(entity, 1);
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1950
|
+
let entityId: string;
|
|
1951
|
+
if (entityResults.length > 0) {
|
|
1952
|
+
entityId = entityResults[0]!.entity_id;
|
|
1953
|
+
} else if (CANONICAL_ID_RE.test(entity)) {
|
|
1954
|
+
entityId = entity; // caller passed a canonical ID directly
|
|
1955
|
+
} else {
|
|
1956
|
+
const stats = store.getTripleStats();
|
|
1957
|
+
return {
|
|
1958
|
+
content: [{ type: "text", text: `No entity found matching "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current). Try a shorter/broader name, or pass a canonical ID in the form 'vault:type:slug'.` }],
|
|
1959
|
+
};
|
|
1960
|
+
}
|
|
1948
1961
|
|
|
1949
1962
|
const triples = store.queryEntityTriples(entityId, { asOf: as_of, direction });
|
|
1950
1963
|
const stats = store.getTripleStats();
|
|
1951
1964
|
|
|
1952
1965
|
if (triples.length === 0) {
|
|
1953
1966
|
return {
|
|
1954
|
-
content: [{ type: "text", text: `No knowledge graph facts found for "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
|
|
1967
|
+
content: [{ type: "text", text: `No knowledge graph facts found for "${entity}" (resolved to ${entityId}). The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
|
|
1955
1968
|
};
|
|
1956
1969
|
}
|
|
1957
1970
|
|
package/src/observer.ts
CHANGED
|
@@ -22,6 +22,13 @@ export type Observation = {
|
|
|
22
22
|
concepts: string[];
|
|
23
23
|
filesRead: string[];
|
|
24
24
|
filesModified: string[];
|
|
25
|
+
triples?: ParsedTriple[];
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
export type ParsedTriple = {
|
|
29
|
+
subject: string;
|
|
30
|
+
predicate: string;
|
|
31
|
+
object: string;
|
|
25
32
|
};
|
|
26
33
|
|
|
27
34
|
export type SessionSummary = {
|
|
@@ -48,28 +55,54 @@ const GENERATION_TEMPERATURE = 0.3;
|
|
|
48
55
|
// =============================================================================
|
|
49
56
|
|
|
50
57
|
const OBSERVATION_SYSTEM_PROMPT = `You are an observer analyzing a coding session transcript. Extract structured observations.
|
|
51
|
-
For each significant action, decision, or discovery, output an <observation> XML element.
|
|
58
|
+
For each significant action, decision, or discovery, output an <observation> XML element with the structure below.
|
|
52
59
|
|
|
60
|
+
Structure:
|
|
53
61
|
<observation>
|
|
54
|
-
<type
|
|
55
|
-
<title
|
|
62
|
+
<type>...</type>
|
|
63
|
+
<title>...</title>
|
|
56
64
|
<facts>
|
|
57
|
-
<fact
|
|
65
|
+
<fact>...</fact>
|
|
58
66
|
</facts>
|
|
59
|
-
<
|
|
67
|
+
<triples>
|
|
68
|
+
<triple>
|
|
69
|
+
<subject>...</subject>
|
|
70
|
+
<predicate>...</predicate>
|
|
71
|
+
<object>...</object>
|
|
72
|
+
</triple>
|
|
73
|
+
</triples>
|
|
74
|
+
<narrative>...</narrative>
|
|
60
75
|
<concepts>
|
|
61
|
-
<concept
|
|
76
|
+
<concept>...</concept>
|
|
62
77
|
</concepts>
|
|
63
|
-
<files_read><file
|
|
64
|
-
<files_modified><file
|
|
78
|
+
<files_read><file>...</file></files_read>
|
|
79
|
+
<files_modified><file>...</file></files_modified>
|
|
65
80
|
</observation>
|
|
66
81
|
|
|
67
|
-
|
|
82
|
+
Field rules:
|
|
83
|
+
- <type>: one of decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem
|
|
84
|
+
- <title>: brief descriptive title, max 80 chars
|
|
85
|
+
- <facts>: 1-5 <fact> elements, each a standalone atomic claim about what happened or what is true (concrete, specific, no schema placeholders or template text)
|
|
86
|
+
- <triples>: 0-3 <triple> elements for structural relationships between named entities (see predicate vocabulary below). Omit entirely if no relational claims apply. Do NOT emit triples for descriptive facts — only for explicit S-P-O relations.
|
|
87
|
+
- <narrative>: 2-3 sentences explaining WHY something was done, not just WHAT
|
|
88
|
+
- <concepts>: 0-3 <concept> elements from: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off
|
|
89
|
+
- <files_read>, <files_modified>: only files explicitly mentioned in the transcript
|
|
90
|
+
|
|
91
|
+
Predicate vocabulary (use EXACTLY these predicates in <predicate>, nothing else):
|
|
92
|
+
- adopted, migrated_to — switching to a new tool/framework/approach
|
|
93
|
+
- deployed_to, runs_on — where something runs
|
|
94
|
+
- replaced — when one thing supersedes another
|
|
95
|
+
- depends_on, integrates_with, uses — structural dependencies
|
|
96
|
+
- prefers, avoids — user preferences (use for <subject>user</subject>)
|
|
97
|
+
- caused_by, resolved_by — causal relationships between problems and fixes
|
|
98
|
+
- owned_by — responsibility / ownership
|
|
99
|
+
|
|
100
|
+
<subject> and <object> must be short canonical entity names (2-80 chars). No sentences. No placeholder text. If you cannot fit a claim into this vocabulary, keep it in <facts> instead and omit the triple.
|
|
101
|
+
|
|
102
|
+
Observation rules:
|
|
68
103
|
- Output 1-5 observations, focusing on the MOST significant events
|
|
69
|
-
- Each fact should be a standalone, atomic piece of information
|
|
70
|
-
- The narrative should explain WHY something was done, not just WHAT
|
|
71
|
-
- Only include files that were explicitly mentioned in the transcript
|
|
72
104
|
- If no significant observations, output nothing
|
|
105
|
+
- Never use schema example text or template placeholders in <fact>, <subject>, or <object> — emit only real content extracted from the transcript
|
|
73
106
|
|
|
74
107
|
Type guidance:
|
|
75
108
|
- preference: user expresses a preference, habit, or way of working (e.g., "don't use subagents for this", "I prefer single PRs")
|
|
@@ -131,6 +164,47 @@ const VALID_CONCEPTS = new Set([
|
|
|
131
164
|
"gotcha", "pattern", "trade-off",
|
|
132
165
|
]);
|
|
133
166
|
|
|
167
|
+
// Canonical SPO predicate vocabulary — parser rejects anything outside this set.
|
|
168
|
+
// Must stay in sync with the predicate list in OBSERVATION_SYSTEM_PROMPT.
|
|
169
|
+
export const VALID_PREDICATES = new Set([
|
|
170
|
+
"adopted", "migrated_to",
|
|
171
|
+
"deployed_to", "runs_on",
|
|
172
|
+
"replaced",
|
|
173
|
+
"depends_on", "integrates_with", "uses",
|
|
174
|
+
"prefers", "avoids",
|
|
175
|
+
"caused_by", "resolved_by",
|
|
176
|
+
"owned_by",
|
|
177
|
+
]);
|
|
178
|
+
|
|
179
|
+
// Predicates whose <object> should be stored as a literal (not resolved to an entity).
|
|
180
|
+
export const LITERAL_PREDICATES = new Set(["prefers", "avoids"]);
|
|
181
|
+
|
|
182
|
+
// Exact placeholder strings that must never be persisted as facts or triple components.
|
|
183
|
+
// Defense-in-depth: even though the prompt no longer places example text inside
|
|
184
|
+
// <fact>/<subject>/<object> tags, a weak model could still echo these phrases.
|
|
185
|
+
const SCHEMA_PLACEHOLDER_STRINGS = new Set([
|
|
186
|
+
"individual atomic fact",
|
|
187
|
+
"atomic fact",
|
|
188
|
+
"one atomic claim per fact element",
|
|
189
|
+
"brief descriptive title",
|
|
190
|
+
"canonical entity name",
|
|
191
|
+
]);
|
|
192
|
+
|
|
193
|
+
// Regex for template placeholder markers: {{...}}, <!--...-->, ${...}.
|
|
194
|
+
// Intentionally narrow — earlier drafts rejected any line starting with
|
|
195
|
+
// "example:" / "placeholder:", which false-positived legitimate facts like
|
|
196
|
+
// "Example: QMD switched to Bun in v0.2". Shape-only matching avoids that
|
|
197
|
+
// drift; the exact-string blocklist above handles known echoed placeholders.
|
|
198
|
+
const PLACEHOLDER_REGEX = /^(\{\{.*\}\}|<!--.*-->|\$\{.*\})/;
|
|
199
|
+
|
|
200
|
+
function isSchemaPlaceholder(text: string): boolean {
|
|
201
|
+
if (!text) return true;
|
|
202
|
+
const normalized = text.trim().toLowerCase();
|
|
203
|
+
if (SCHEMA_PLACEHOLDER_STRINGS.has(normalized)) return true;
|
|
204
|
+
if (PLACEHOLDER_REGEX.test(normalized)) return true;
|
|
205
|
+
return false;
|
|
206
|
+
}
|
|
207
|
+
|
|
134
208
|
export function parseObservationXml(xml: string): Observation | null {
|
|
135
209
|
const typeMatch = xml.match(/<type>\s*(.*?)\s*<\/type>/s);
|
|
136
210
|
const titleMatch = xml.match(/<title>\s*(.*?)\s*<\/title>/s);
|
|
@@ -141,24 +215,67 @@ export function parseObservationXml(xml: string): Observation | null {
|
|
|
141
215
|
const type = typeMatch[1].trim().toLowerCase();
|
|
142
216
|
if (!VALID_OBSERVATION_TYPES.has(type)) return null;
|
|
143
217
|
|
|
144
|
-
const
|
|
218
|
+
const rawTitle = titleMatch[1].trim();
|
|
219
|
+
if (isSchemaPlaceholder(rawTitle)) return null;
|
|
220
|
+
|
|
221
|
+
const facts = extractMultiple(xml, "fact")
|
|
222
|
+
.filter(f => f.length >= 5)
|
|
223
|
+
.filter(f => !isSchemaPlaceholder(f));
|
|
224
|
+
|
|
145
225
|
const concepts = extractMultiple(xml, "concept")
|
|
146
226
|
.filter(c => VALID_CONCEPTS.has(c.toLowerCase()))
|
|
147
227
|
.map(c => c.toLowerCase());
|
|
148
228
|
const filesRead = extractMultiple(xml, "file", "files_read");
|
|
149
229
|
const filesModified = extractMultiple(xml, "file", "files_modified");
|
|
150
230
|
|
|
231
|
+
// Parse triples (Fix A): strict validation against canonical predicate vocabulary.
|
|
232
|
+
// Missing/malformed triples are silently dropped — fail-closed on ambiguity.
|
|
233
|
+
const triples = extractTriples(xml);
|
|
234
|
+
|
|
151
235
|
return {
|
|
152
236
|
type: type as Observation["type"],
|
|
153
|
-
title:
|
|
154
|
-
facts
|
|
237
|
+
title: rawTitle.slice(0, 80),
|
|
238
|
+
facts,
|
|
155
239
|
narrative: narrativeMatch?.[1]?.trim() || "",
|
|
156
240
|
concepts,
|
|
157
241
|
filesRead,
|
|
158
242
|
filesModified,
|
|
243
|
+
triples: triples.length > 0 ? triples : undefined,
|
|
159
244
|
};
|
|
160
245
|
}
|
|
161
246
|
|
|
247
|
+
function extractTriples(xml: string): ParsedTriple[] {
|
|
248
|
+
const parentMatch = xml.match(/<triples>([\s\S]*?)<\/triples>/s);
|
|
249
|
+
if (!parentMatch?.[1]) return [];
|
|
250
|
+
|
|
251
|
+
const blockRegex = /<triple>([\s\S]*?)<\/triple>/g;
|
|
252
|
+
const results: ParsedTriple[] = [];
|
|
253
|
+
let match;
|
|
254
|
+
while ((match = blockRegex.exec(parentMatch[1])) !== null) {
|
|
255
|
+
const block = match[1] ?? "";
|
|
256
|
+
const subject = block.match(/<subject>\s*(.*?)\s*<\/subject>/s)?.[1]?.trim();
|
|
257
|
+
const rawPredicate = block.match(/<predicate>\s*(.*?)\s*<\/predicate>/s)?.[1]?.trim();
|
|
258
|
+
const object = block.match(/<object>\s*(.*?)\s*<\/object>/s)?.[1]?.trim();
|
|
259
|
+
|
|
260
|
+
if (!subject || !rawPredicate || !object) continue;
|
|
261
|
+
|
|
262
|
+
const predicate = rawPredicate.toLowerCase().replace(/\s+/g, "_");
|
|
263
|
+
if (!VALID_PREDICATES.has(predicate)) continue;
|
|
264
|
+
|
|
265
|
+
// Length bounds — guards against sentence-shaped subjects/objects that the
|
|
266
|
+
// regex-era tests expected. Subject and object should be short canonical names.
|
|
267
|
+
if (subject.length < 2 || subject.length > 80) continue;
|
|
268
|
+
if (object.length < 2 || object.length > 120) continue;
|
|
269
|
+
|
|
270
|
+
if (isSchemaPlaceholder(subject) || isSchemaPlaceholder(object)) continue;
|
|
271
|
+
|
|
272
|
+
results.push({ subject, predicate, object });
|
|
273
|
+
|
|
274
|
+
if (results.length >= 5) break; // cap per observation
|
|
275
|
+
}
|
|
276
|
+
return results;
|
|
277
|
+
}
|
|
278
|
+
|
|
162
279
|
export function parseSummaryXml(xml: string): SessionSummary | null {
|
|
163
280
|
const request = extractSingle(xml, "request");
|
|
164
281
|
const investigated = extractSingle(xml, "investigated");
|