clawmem 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/store.ts CHANGED
@@ -544,6 +544,10 @@ function initializeDatabase(db: Database): void {
544
544
  ["skill_name", "ALTER TABLE documents ADD COLUMN skill_name TEXT"],
545
545
  ["obs_quality_score", "ALTER TABLE documents ADD COLUMN obs_quality_score REAL"],
546
546
  ["failure_reason", "ALTER TABLE documents ADD COLUMN failure_reason TEXT"],
547
+ ["source_doc_ids", "ALTER TABLE documents ADD COLUMN source_doc_ids TEXT"],
548
+ ["embed_state", "ALTER TABLE documents ADD COLUMN embed_state TEXT DEFAULT 'pending'"],
549
+ ["embed_error", "ALTER TABLE documents ADD COLUMN embed_error TEXT"],
550
+ ["embed_attempts", "ALTER TABLE documents ADD COLUMN embed_attempts INTEGER DEFAULT 0"],
547
551
  ];
548
552
  for (const [col, sql] of obsMigrations) {
549
553
  if (!colNames.has(col)) {
@@ -708,6 +712,31 @@ function initializeDatabase(db: Database): void {
708
712
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_cooccurrences_a ON entity_cooccurrences(entity_a)`);
709
713
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_cooccurrences_b ON entity_cooccurrences(entity_b)`);
710
714
 
715
+ // SPO knowledge graph: temporal entity-relationship triples
716
+ db.exec(`
717
+ CREATE TABLE IF NOT EXISTS entity_triples (
718
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
719
+ subject_entity_id TEXT NOT NULL,
720
+ predicate TEXT NOT NULL,
721
+ object_entity_id TEXT,
722
+ object_literal TEXT,
723
+ valid_from TEXT,
724
+ valid_to TEXT,
725
+ confidence REAL DEFAULT 1.0,
726
+ source_doc_id INTEGER,
727
+ source_fact TEXT,
728
+ created_at TEXT DEFAULT (datetime('now')),
729
+ FOREIGN KEY (subject_entity_id) REFERENCES entity_nodes(entity_id),
730
+ FOREIGN KEY (object_entity_id) REFERENCES entity_nodes(entity_id),
731
+ FOREIGN KEY (source_doc_id) REFERENCES documents(id)
732
+ )
733
+ `);
734
+
735
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_subject ON entity_triples(subject_entity_id)`);
736
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_object ON entity_triples(object_entity_id)`);
737
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_predicate ON entity_triples(predicate)`);
738
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_valid ON entity_triples(valid_from, valid_to)`);
739
+
711
740
  // Entity FTS5 for fuzzy name lookup
712
741
  db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS entities_fts USING fts5(entity_id, name, entity_type)`);
713
742
 
@@ -881,6 +910,11 @@ export type Store = {
881
910
  pinDocument: (collection: string, path: string, pinned: boolean) => void;
882
911
  snoozeDocument: (collection: string, path: string, until: string | null) => void;
883
912
 
913
+ // Embed state tracking
914
+ markEmbedSynced: (hash: string) => void;
915
+ markEmbedFailed: (hash: string, error: string) => void;
916
+ getEmbedStats: () => { pending: number; synced: number; failed: number };
917
+
884
918
  // Beads integration
885
919
  syncBeadsIssues: (projectDir: string) => Promise<{ synced: number; created: number; newDocIds: number[] }>;
886
920
  detectBeadsProject: (cwd: string) => string | null;
@@ -904,6 +938,12 @@ export type Store = {
904
938
  searchEntities: (query: string, limit?: number) => { entity_id: string; name: string; type: string; mention_count: number; cooccurrence_count: number }[];
905
939
  getEntityGraphNeighbors: (seedDocIds: number[], limit?: number) => { docId: number; score: number; viaEntity: string }[];
906
940
 
941
+ // SPO knowledge graph
942
+ addTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, options?: { validFrom?: string; validTo?: string; confidence?: number; sourceDocId?: number; sourceFact?: string }) => number;
943
+ invalidateTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, endedDate?: string) => number;
944
+ queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[];
945
+ getTripleStats: () => { totalTriples: number; currentFacts: number; expiredFacts: number; predicateTypes: string[] };
946
+
907
947
  // Co-activation tracking
908
948
  recordCoActivation: (paths: string[]) => void;
909
949
  getCoActivated: (path: string, limit?: number) => { path: string; count: number }[];
@@ -1047,6 +1087,24 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
1047
1087
  pinDocument: (collection: string, path: string, pinned: boolean) => pinDocumentFn(db, collection, path, pinned),
1048
1088
  snoozeDocument: (collection: string, path: string, until: string | null) => snoozeDocumentFn(db, collection, path, until),
1049
1089
 
1090
+ // Embed state tracking
1091
+ markEmbedSynced: (hash: string) => {
1092
+ db.prepare(`UPDATE documents SET embed_state = 'synced' WHERE hash = ? AND active = 1`).run(hash);
1093
+ },
1094
+ markEmbedFailed: (hash: string, error: string) => {
1095
+ db.prepare(`UPDATE documents SET embed_state = 'failed', embed_error = ?, embed_attempts = COALESCE(embed_attempts, 0) + 1 WHERE hash = ? AND active = 1`).run(error, hash);
1096
+ },
1097
+ getEmbedStats: () => {
1098
+ const stats = db.prepare(`
1099
+ SELECT
1100
+ SUM(CASE WHEN embed_state = 'pending' OR embed_state IS NULL THEN 1 ELSE 0 END) as pending,
1101
+ SUM(CASE WHEN embed_state = 'synced' THEN 1 ELSE 0 END) as synced,
1102
+ SUM(CASE WHEN embed_state = 'failed' THEN 1 ELSE 0 END) as failed
1103
+ FROM documents WHERE active = 1
1104
+ `).get() as { pending: number; synced: number; failed: number };
1105
+ return { pending: stats.pending || 0, synced: stats.synced || 0, failed: stats.failed || 0 };
1106
+ },
1107
+
1050
1108
  // Beads integration
1051
1109
  syncBeadsIssues: (projectDir: string) => syncBeadsIssues(db, projectDir),
1052
1110
  detectBeadsProject,
@@ -1070,6 +1128,93 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
1070
1128
  searchEntities: (query: string, limit?: number) => searchEntities(db, query, limit),
1071
1129
  getEntityGraphNeighbors: (seedDocIds: number[], limit?: number) => getEntityGraphNeighbors(db, seedDocIds, limit),
1072
1130
 
1131
+ // SPO knowledge graph
1132
+ addTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, options?: { validFrom?: string; validTo?: string; confidence?: number; sourceDocId?: number; sourceFact?: string }) => {
1133
+ const pred = predicate.toLowerCase().replace(/\s+/g, "_");
1134
+ const now = new Date().toISOString();
1135
+ const objClause = objectEntityId
1136
+ ? "object_entity_id = ? AND object_literal IS NULL"
1137
+ : "object_entity_id IS NULL AND object_literal = ?";
1138
+ const objParam = objectEntityId ?? objectLiteral;
1139
+ const existing = db.prepare(
1140
+ `SELECT id FROM entity_triples WHERE subject_entity_id = ? AND predicate = ? AND ${objClause} AND valid_to IS NULL`
1141
+ ).get(subjectEntityId, pred, objParam) as { id: number } | null;
1142
+ if (existing) return existing.id;
1143
+
1144
+ const result = db.prepare(`
1145
+ INSERT INTO entity_triples (subject_entity_id, predicate, object_entity_id, object_literal, valid_from, valid_to, confidence, source_doc_id, source_fact, created_at)
1146
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1147
+ `).run(
1148
+ subjectEntityId, pred, objectEntityId, objectLiteral,
1149
+ options?.validFrom ?? null, options?.validTo ?? null,
1150
+ options?.confidence ?? 1.0, options?.sourceDocId ?? null,
1151
+ options?.sourceFact ?? null, now
1152
+ );
1153
+ return Number(result.lastInsertRowid);
1154
+ },
1155
+
1156
+ invalidateTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, endedDate?: string) => {
1157
+ const pred = predicate.toLowerCase().replace(/\s+/g, "_");
1158
+ const ended = endedDate || new Date().toISOString().slice(0, 10);
1159
+ const objClause = objectEntityId
1160
+ ? "object_entity_id = ? AND object_literal IS NULL"
1161
+ : "object_entity_id IS NULL AND object_literal = ?";
1162
+ const objParam = objectEntityId ?? objectLiteral;
1163
+ const result = db.prepare(
1164
+ `UPDATE entity_triples SET valid_to = ? WHERE subject_entity_id = ? AND predicate = ? AND ${objClause} AND valid_to IS NULL`
1165
+ ).run(ended, subjectEntityId, pred, objParam);
1166
+ return result.changes;
1167
+ },
1168
+
1169
+ queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => {
1170
+ const direction = options?.direction ?? "both";
1171
+ const asOf = options?.asOf;
1172
+ const results: { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[] = [];
1173
+
1174
+ if (direction === "outgoing" || direction === "both") {
1175
+ let query = `SELECT t.id, t.predicate, t.object_entity_id, t.object_literal, t.valid_from, t.valid_to, t.confidence,
1176
+ COALESCE(s.name, t.subject_entity_id) as sub_name, COALESCE(o.name, t.object_literal, t.object_entity_id) as obj_name
1177
+ FROM entity_triples t
1178
+ LEFT JOIN entity_nodes s ON t.subject_entity_id = s.entity_id
1179
+ LEFT JOIN entity_nodes o ON t.object_entity_id = o.entity_id
1180
+ WHERE t.subject_entity_id = ?`;
1181
+ const params: any[] = [entityId];
1182
+ if (asOf) {
1183
+ query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)";
1184
+ params.push(asOf, asOf);
1185
+ }
1186
+ for (const row of db.prepare(query).all(...params) as any[]) {
1187
+ results.push({ id: row.id, direction: "outgoing", subject: row.sub_name, predicate: row.predicate, object: row.obj_name, validFrom: row.valid_from, validTo: row.valid_to, confidence: row.confidence, current: row.valid_to === null });
1188
+ }
1189
+ }
1190
+
1191
+ if (direction === "incoming" || direction === "both") {
1192
+ let query = `SELECT t.id, t.predicate, t.valid_from, t.valid_to, t.confidence,
1193
+ COALESCE(s.name, t.subject_entity_id) as sub_name, COALESCE(o.name, t.object_literal, t.object_entity_id) as obj_name
1194
+ FROM entity_triples t
1195
+ LEFT JOIN entity_nodes s ON t.subject_entity_id = s.entity_id
1196
+ LEFT JOIN entity_nodes o ON t.object_entity_id = o.entity_id
1197
+ WHERE t.object_entity_id = ?`;
1198
+ const params: any[] = [entityId];
1199
+ if (asOf) {
1200
+ query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)";
1201
+ params.push(asOf, asOf);
1202
+ }
1203
+ for (const row of db.prepare(query).all(...params) as any[]) {
1204
+ results.push({ id: row.id, direction: "incoming", subject: row.sub_name, predicate: row.predicate, object: row.obj_name, validFrom: row.valid_from, validTo: row.valid_to, confidence: row.confidence, current: row.valid_to === null });
1205
+ }
1206
+ }
1207
+
1208
+ return results;
1209
+ },
1210
+
1211
+ getTripleStats: () => {
1212
+ const total = (db.prepare("SELECT COUNT(*) as n FROM entity_triples").get() as any).n;
1213
+ const current = (db.prepare("SELECT COUNT(*) as n FROM entity_triples WHERE valid_to IS NULL").get() as any).n;
1214
+ const predicates = db.prepare("SELECT DISTINCT predicate FROM entity_triples ORDER BY predicate").all().map((r: any) => r.predicate);
1215
+ return { totalTriples: total, currentFacts: current, expiredFacts: total - current, predicateTypes: predicates };
1216
+ },
1217
+
1073
1218
  // Co-activation tracking
1074
1219
  recordCoActivation: (paths: string[]) => {
1075
1220
  if (paths.length < 2) return;
@@ -1333,6 +1478,7 @@ export type DocumentRow = {
1333
1478
  confidence: number;
1334
1479
  accessCount: number;
1335
1480
  bodyLength: number;
1481
+ pinned: number;
1336
1482
  };
1337
1483
 
1338
1484
  // =============================================================================
@@ -2805,12 +2951,17 @@ export function getHashesForEmbedding(db: Database): { hash: string; body: strin
2805
2951
  * Returns hashes that have no content_vectors row with fragment_type set.
2806
2952
  */
2807
2953
  export function getHashesNeedingFragments(db: Database): { hash: string; body: string; path: string; title: string; collection: string }[] {
2954
+ // Select docs that either have no fragments at all OR are missing the primary (seq=0) fragment.
2955
+ // The seq=0 embedding is critical — surprisal scoring, semantic graph, and health checks depend on it.
2808
2956
  return db.prepare(`
2809
2957
  SELECT d.hash, c.doc as body, MIN(d.path) as path, MIN(d.title) as title, MIN(d.collection) as collection
2810
2958
  FROM documents d
2811
2959
  JOIN content c ON d.hash = c.hash
2812
2960
  LEFT JOIN content_vectors v ON d.hash = v.hash AND v.fragment_type IS NOT NULL
2813
- WHERE d.active = 1 AND v.hash IS NULL
2961
+ LEFT JOIN content_vectors v0 ON d.hash = v0.hash AND v0.seq = 0
2962
+ WHERE d.active = 1
2963
+ AND (v.hash IS NULL OR v0.hash IS NULL)
2964
+ AND COALESCE(d.embed_attempts, 0) < 3
2814
2965
  GROUP BY d.hash
2815
2966
  `).all() as { hash: string; body: string; path: string; title: string; collection: string }[];
2816
2967
  }
@@ -2822,6 +2973,8 @@ export function getHashesNeedingFragments(db: Database): { hash: string; body: s
2822
2973
  export function clearAllEmbeddings(db: Database): void {
2823
2974
  db.exec(`DELETE FROM content_vectors`);
2824
2975
  db.exec(`DROP TABLE IF EXISTS vectors_vec`);
2976
+ // Reset embed state so failed docs get retried after force re-embed
2977
+ try { db.exec(`UPDATE documents SET embed_state = 'pending', embed_error = NULL, embed_attempts = 0 WHERE active = 1`); } catch { /* column may not exist yet */ }
2825
2978
  vecTableDimsCache.delete(db);
2826
2979
  }
2827
2980
 
@@ -3560,7 +3713,7 @@ function getDocumentsByTypeFn(db: Database, contentType: string, limit: number =
3560
3713
  SELECT d.id, d.collection, d.path, d.title, d.hash, d.modified_at as modifiedAt,
3561
3714
  d.domain, d.workstream, d.tags, d.content_type as contentType,
3562
3715
  d.review_by as reviewBy, d.confidence, d.access_count as accessCount,
3563
- LENGTH(c.doc) as bodyLength
3716
+ LENGTH(c.doc) as bodyLength, d.pinned
3564
3717
  FROM documents d
3565
3718
  JOIN content c ON c.hash = d.hash
3566
3719
  WHERE d.active = 1 AND d.content_type = ?