clawmem 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/store.ts CHANGED
@@ -301,6 +301,10 @@ function initializeDatabase(db: Database): void {
301
301
  sqliteVec.load(db);
302
302
  db.exec("PRAGMA journal_mode = WAL");
303
303
  db.exec("PRAGMA foreign_keys = ON");
304
+ // Set generous busy_timeout during DDL — concurrent Stop hooks (decision-extractor,
305
+ // handoff-generator, feedback-loop) all run initializeDatabase simultaneously.
306
+ // 15s is well within the 30s Stop hook timeout. Reset to normal after DDL completes.
307
+ db.exec("PRAGMA busy_timeout = 15000");
304
308
 
305
309
  // Drop legacy tables that are now managed in YAML
306
310
  db.exec(`DROP TABLE IF EXISTS path_contexts`);
@@ -491,11 +495,18 @@ function initializeDatabase(db: Database): void {
491
495
  hook_name TEXT NOT NULL,
492
496
  injected_paths TEXT NOT NULL DEFAULT '[]',
493
497
  estimated_tokens INTEGER NOT NULL DEFAULT 0,
494
- was_referenced INTEGER NOT NULL DEFAULT 0
498
+ was_referenced INTEGER NOT NULL DEFAULT 0,
499
+ turn_index INTEGER NOT NULL DEFAULT 0
495
500
  )
496
501
  `);
497
502
  db.exec(`CREATE INDEX IF NOT EXISTS idx_context_usage_session ON context_usage(session_id)`);
498
503
 
504
+ // Migration: add turn_index to existing context_usage
505
+ const cuCols = db.prepare("PRAGMA table_info(context_usage)").all() as { name: string }[];
506
+ if (!cuCols.some(c => c.name === "turn_index")) {
507
+ try { db.exec(`ALTER TABLE context_usage ADD COLUMN turn_index INTEGER NOT NULL DEFAULT 0`); } catch { /* exists */ }
508
+ }
509
+
499
510
  // Hook prompt dedupe: suppress duplicate/heartbeat prompts to reduce GPU churn.
500
511
  db.exec(`
501
512
  CREATE TABLE IF NOT EXISTS hook_dedupe (
@@ -785,6 +796,64 @@ function initializeDatabase(db: Database): void {
785
796
  `);
786
797
 
787
798
  db.exec(`CREATE INDEX IF NOT EXISTS idx_intent_cache_time ON intent_classifications(cached_at)`);
799
+
800
+ // Recall tracking: append-only event log for every doc surfaced by retrieval
801
+ // usage_id is informational (no FK) — links to context_usage.id in the same vault
802
+ // but may reference a different vault's row in cross-vault scenarios.
803
+ // Cross-vault linkage uses session_id + turn_index instead.
804
+ db.exec(`
805
+ CREATE TABLE IF NOT EXISTS recall_events (
806
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
807
+ doc_id INTEGER NOT NULL,
808
+ query_hash TEXT NOT NULL,
809
+ search_score REAL NOT NULL,
810
+ session_id TEXT NOT NULL,
811
+ usage_id INTEGER,
812
+ turn_index INTEGER NOT NULL DEFAULT 0,
813
+ surfaced_at TEXT NOT NULL DEFAULT (datetime('now')),
814
+ was_referenced INTEGER NOT NULL DEFAULT 0,
815
+ FOREIGN KEY (doc_id) REFERENCES documents(id) ON DELETE CASCADE
816
+ )
817
+ `);
818
+ // Migration: add usage_id + turn_index columns to existing recall_events tables
819
+ const reCols = db.prepare("PRAGMA table_info(recall_events)").all() as { name: string }[];
820
+ const reColNames = new Set(reCols.map(c => c.name));
821
+ if (!reColNames.has("usage_id")) {
822
+ try { db.exec(`ALTER TABLE recall_events ADD COLUMN usage_id INTEGER`); } catch { /* exists */ }
823
+ }
824
+ if (!reColNames.has("turn_index")) {
825
+ try { db.exec(`ALTER TABLE recall_events ADD COLUMN turn_index INTEGER NOT NULL DEFAULT 0`); } catch { /* exists */ }
826
+ }
827
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_usage ON recall_events(usage_id)`);
828
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_doc ON recall_events(doc_id)`);
829
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_session ON recall_events(session_id)`);
830
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_surfaced ON recall_events(surfaced_at)`);
831
+
832
+ // Recall stats: derived summary recomputed by background worker
833
+ db.exec(`
834
+ CREATE TABLE IF NOT EXISTS recall_stats (
835
+ doc_id INTEGER PRIMARY KEY,
836
+ recall_count INTEGER NOT NULL DEFAULT 0,
837
+ unique_queries INTEGER NOT NULL DEFAULT 0,
838
+ recall_days INTEGER NOT NULL DEFAULT 0,
839
+ total_score REAL NOT NULL DEFAULT 0,
840
+ max_score REAL NOT NULL DEFAULT 0,
841
+ first_recalled_at TEXT,
842
+ last_recalled_at TEXT,
843
+ diversity_score REAL NOT NULL DEFAULT 0,
844
+ spacing_score REAL NOT NULL DEFAULT 0,
845
+ negative_count INTEGER NOT NULL DEFAULT 0,
846
+ updated_at TEXT NOT NULL DEFAULT (datetime('now')),
847
+ FOREIGN KEY (doc_id) REFERENCES documents(id) ON DELETE CASCADE
848
+ )
849
+ `);
850
+
851
+ // Migration: add contradict_confidence to memory_relations
852
+ const mrCols = db.prepare("PRAGMA table_info(memory_relations)").all() as { name: string }[];
853
+ const mrColNames = new Set(mrCols.map(c => c.name));
854
+ if (!mrColNames.has("contradict_confidence")) {
855
+ try { db.exec(`ALTER TABLE memory_relations ADD COLUMN contradict_confidence REAL`); } catch { /* column exists */ }
856
+ }
788
857
  }
789
858
 
790
859
 
@@ -898,7 +967,7 @@ export type Store = {
898
967
  getRecentSessions: (limit: number) => SessionRecord[];
899
968
 
900
969
  // SAME: Context usage tracking
901
- insertUsage: (usage: UsageRecord) => void;
970
+ insertUsage: (usage: UsageRecord) => number;
902
971
  getUsageForSession: (sessionId: string) => UsageRow[];
903
972
  markUsageReferenced: (id: number) => void;
904
973
 
@@ -944,6 +1013,13 @@ export type Store = {
944
1013
  queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[];
945
1014
  getTripleStats: () => { totalTriples: number; currentFacts: number; expiredFacts: number; predicateTypes: string[] };
946
1015
 
1016
+ // Recall tracking
1017
+ insertRecallEvents: (events: { docId: number; queryHash: string; searchScore: number; sessionId: string; usageId?: number; turnIndex?: number; wasReferenced?: boolean }[]) => number;
1018
+ recomputeRecallStats: () => number;
1019
+ getRecallStats: (docId: number) => RecallStatsRow | null;
1020
+ getRecallStatsAll: (minRecallCount?: number) => RecallStatsRow[];
1021
+ markRecallEventsReferenced: (sessionId: string, docIds: number[]) => void;
1022
+
947
1023
  // Co-activation tracking
948
1024
  recordCoActivation: (paths: string[]) => void;
949
1025
  getCoActivated: (path: string, limit?: number) => { path: string; count: number }[];
@@ -987,9 +1063,9 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
987
1063
  db.exec("PRAGMA journal_mode = WAL");
988
1064
  db.exec("PRAGMA query_only = ON");
989
1065
  }
990
- if (opts?.busyTimeout !== undefined) {
991
- db.exec(`PRAGMA busy_timeout = ${opts.busyTimeout}`);
992
- }
1066
+ // Reset busy_timeout to operational value after DDL init (which uses 15s).
1067
+ // Default 5000ms for normal operations — callers can override via opts.
1068
+ db.exec(`PRAGMA busy_timeout = ${opts?.busyTimeout ?? 5000}`);
993
1069
 
994
1070
  return {
995
1071
  db,
@@ -1075,7 +1151,7 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
1075
1151
  getRecentSessions: (limit: number) => getRecentSessionsFn(db, limit),
1076
1152
 
1077
1153
  // SAME: Context usage tracking
1078
- insertUsage: (usage: UsageRecord) => insertUsageFn(db, usage),
1154
+ insertUsage: (usage: UsageRecord) => insertUsageFn(db, usage) as number,
1079
1155
  getUsageForSession: (sessionId: string) => getUsageForSessionFn(db, sessionId),
1080
1156
  markUsageReferenced: (id: number) => markUsageReferencedFn(db, id),
1081
1157
 
@@ -1216,6 +1292,165 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
1216
1292
  },
1217
1293
 
1218
1294
  // Co-activation tracking
1295
+ // Recall tracking: batch insert surfacing events
1296
+ insertRecallEvents: (events: { docId: number; queryHash: string; searchScore: number; sessionId: string; usageId?: number; turnIndex?: number; wasReferenced?: boolean }[]) => {
1297
+ if (events.length === 0) return 0;
1298
+ const stmt = db.prepare(`
1299
+ INSERT INTO recall_events (doc_id, query_hash, search_score, session_id, usage_id, turn_index, surfaced_at, was_referenced)
1300
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
1301
+ `);
1302
+ const now = new Date().toISOString();
1303
+ const tx = db.transaction(() => {
1304
+ for (const e of events) {
1305
+ stmt.run(e.docId, e.queryHash, e.searchScore, e.sessionId, e.usageId ?? null, e.turnIndex ?? 0, now, e.wasReferenced ? 1 : 0);
1306
+ }
1307
+ });
1308
+ tx();
1309
+ return events.length;
1310
+ },
1311
+
1312
+ // Recall tracking: recompute derived stats from events
1313
+ // Uses SQL GROUP BY for aggregation (O(1) queries), then JS for diversity/spacing formulas
1314
+ recomputeRecallStats: () => {
1315
+ const aggregated = db.prepare(`
1316
+ SELECT
1317
+ doc_id,
1318
+ COUNT(*) AS recall_count,
1319
+ COUNT(DISTINCT query_hash) AS unique_queries,
1320
+ COUNT(DISTINCT date(surfaced_at, 'utc')) AS recall_days,
1321
+ SUM(search_score) AS total_score,
1322
+ MAX(search_score) AS max_score,
1323
+ SUM(CASE WHEN was_referenced = 0 THEN 1 ELSE 0 END) AS negative_count,
1324
+ MIN(surfaced_at) AS first_recalled_at,
1325
+ MAX(surfaced_at) AS last_recalled_at,
1326
+ GROUP_CONCAT(DISTINCT date(surfaced_at, 'utc')) AS day_list
1327
+ FROM recall_events
1328
+ GROUP BY doc_id
1329
+ `).all() as {
1330
+ doc_id: number; recall_count: number; unique_queries: number; recall_days: number;
1331
+ total_score: number; max_score: number; negative_count: number;
1332
+ first_recalled_at: string; last_recalled_at: string; day_list: string;
1333
+ }[];
1334
+
1335
+ if (aggregated.length === 0) return 0;
1336
+
1337
+ const upsert = db.prepare(`
1338
+ INSERT INTO recall_stats (doc_id, recall_count, unique_queries, recall_days, total_score, max_score,
1339
+ first_recalled_at, last_recalled_at, diversity_score, spacing_score, negative_count, updated_at)
1340
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1341
+ ON CONFLICT(doc_id) DO UPDATE SET
1342
+ recall_count = excluded.recall_count,
1343
+ unique_queries = excluded.unique_queries,
1344
+ recall_days = excluded.recall_days,
1345
+ total_score = excluded.total_score,
1346
+ max_score = excluded.max_score,
1347
+ first_recalled_at = excluded.first_recalled_at,
1348
+ last_recalled_at = excluded.last_recalled_at,
1349
+ diversity_score = excluded.diversity_score,
1350
+ spacing_score = excluded.spacing_score,
1351
+ negative_count = excluded.negative_count,
1352
+ updated_at = excluded.updated_at
1353
+ `);
1354
+
1355
+ const now = new Date().toISOString();
1356
+ const tx = db.transaction(() => {
1357
+ for (const row of aggregated) {
1358
+ // Diversity: clamped max(uniqueQueries, recallDays) / 5
1359
+ const diversityScore = Math.min(1, Math.max(row.unique_queries, row.recall_days) / 5);
1360
+
1361
+ // Spacing: multi-day spread
1362
+ let spacingScore = 0;
1363
+ if (row.recall_days > 1 && row.day_list) {
1364
+ const days = row.day_list.split(",").sort();
1365
+ const spacing = Math.min(1, Math.log1p(days.length - 1) / Math.log1p(4));
1366
+ const firstDay = new Date(days[0]! + "T00:00:00Z").getTime();
1367
+ const lastDay = new Date(days[days.length - 1]! + "T00:00:00Z").getTime();
1368
+ const spanDays = Math.max(0, (lastDay - firstDay) / (24 * 60 * 60 * 1000));
1369
+ const span = Math.min(1, spanDays / 7);
1370
+ spacingScore = Math.min(1, 0.55 * spacing + 0.45 * span);
1371
+ } else if (row.recall_days === 1) {
1372
+ spacingScore = 0.2;
1373
+ }
1374
+
1375
+ upsert.run(
1376
+ row.doc_id, row.recall_count, row.unique_queries, row.recall_days,
1377
+ row.total_score, row.max_score,
1378
+ row.first_recalled_at, row.last_recalled_at,
1379
+ diversityScore, spacingScore, row.negative_count, now
1380
+ );
1381
+ }
1382
+ });
1383
+ tx();
1384
+ return aggregated.length;
1385
+ },
1386
+
1387
+ getRecallStats: (docId: number) => {
1388
+ const row = db.prepare(`SELECT * FROM recall_stats WHERE doc_id = ?`).get(docId) as any;
1389
+ if (!row) return null;
1390
+ return {
1391
+ docId: row.doc_id,
1392
+ recallCount: row.recall_count,
1393
+ uniqueQueries: row.unique_queries,
1394
+ recallDays: row.recall_days,
1395
+ totalScore: row.total_score,
1396
+ maxScore: row.max_score,
1397
+ firstRecalledAt: row.first_recalled_at,
1398
+ lastRecalledAt: row.last_recalled_at,
1399
+ diversityScore: row.diversity_score,
1400
+ spacingScore: row.spacing_score,
1401
+ negativeCount: row.negative_count,
1402
+ updatedAt: row.updated_at,
1403
+ } as RecallStatsRow;
1404
+ },
1405
+
1406
+ getRecallStatsAll: (minRecallCount: number = 1) => {
1407
+ return (db.prepare(`
1408
+ SELECT rs.*, d.collection, d.path, d.title
1409
+ FROM recall_stats rs
1410
+ JOIN documents d ON rs.doc_id = d.id
1411
+ WHERE rs.recall_count >= ? AND d.active = 1
1412
+ ORDER BY rs.recall_count DESC
1413
+ `).all(minRecallCount) as any[]).map(row => ({
1414
+ docId: row.doc_id,
1415
+ recallCount: row.recall_count,
1416
+ uniqueQueries: row.unique_queries,
1417
+ recallDays: row.recall_days,
1418
+ totalScore: row.total_score,
1419
+ maxScore: row.max_score,
1420
+ firstRecalledAt: row.first_recalled_at,
1421
+ lastRecalledAt: row.last_recalled_at,
1422
+ diversityScore: row.diversity_score,
1423
+ spacingScore: row.spacing_score,
1424
+ negativeCount: row.negative_count,
1425
+ updatedAt: row.updated_at,
1426
+ collection: row.collection,
1427
+ path: row.path,
1428
+ title: row.title,
1429
+ } as RecallStatsRow));
1430
+ },
1431
+
1432
+ markRecallEventsReferenced: (sessionId: string, docIds: number[]) => {
1433
+ if (docIds.length === 0) return;
1434
+ // Mark only the LATEST event per doc in this session, not all events.
1435
+ // This preserves negative signals: if a doc was surfaced across 5 prompts
1436
+ // but only cited once, 4 events stay was_referenced=0 (genuine negatives).
1437
+ const stmt = db.prepare(`
1438
+ UPDATE recall_events SET was_referenced = 1
1439
+ WHERE id = (
1440
+ SELECT id FROM recall_events
1441
+ WHERE session_id = ? AND doc_id = ?
1442
+ ORDER BY surfaced_at DESC
1443
+ LIMIT 1
1444
+ )
1445
+ `);
1446
+ const tx = db.transaction(() => {
1447
+ for (const docId of docIds) {
1448
+ stmt.run(sessionId, docId);
1449
+ }
1450
+ });
1451
+ tx();
1452
+ },
1453
+
1219
1454
  recordCoActivation: (paths: string[]) => {
1220
1455
  if (paths.length < 2) return;
1221
1456
  const now = new Date().toISOString();
@@ -1451,6 +1686,7 @@ export type UsageRecord = {
1451
1686
  injectedPaths: string[];
1452
1687
  estimatedTokens: number;
1453
1688
  wasReferenced: number;
1689
+ turnIndex?: number;
1454
1690
  };
1455
1691
 
1456
1692
  export type UsageRow = {
@@ -1461,6 +1697,26 @@ export type UsageRow = {
1461
1697
  injectedPaths: string;
1462
1698
  estimatedTokens: number;
1463
1699
  wasReferenced: number;
1700
+ turnIndex: number;
1701
+ };
1702
+
1703
+ export type RecallStatsRow = {
1704
+ docId: number;
1705
+ recallCount: number;
1706
+ uniqueQueries: number;
1707
+ recallDays: number;
1708
+ totalScore: number;
1709
+ maxScore: number;
1710
+ firstRecalledAt: string | null;
1711
+ lastRecalledAt: string | null;
1712
+ diversityScore: number;
1713
+ spacingScore: number;
1714
+ negativeCount: number;
1715
+ updatedAt: string;
1716
+ // Joined from documents (only populated by getRecallStatsAll)
1717
+ collection?: string;
1718
+ path?: string;
1719
+ title?: string;
1464
1720
  };
1465
1721
 
1466
1722
  export type DocumentRow = {
@@ -3647,19 +3903,22 @@ function getRecentSessionsFn(db: Database, limit: number): SessionRecord[] {
3647
3903
  // SAME: Context Usage Tracking
3648
3904
  // =============================================================================
3649
3905
 
3650
- function insertUsageFn(db: Database, usage: UsageRecord): void {
3906
+ function insertUsageFn(db: Database, usage: UsageRecord): number {
3651
3907
  db.prepare(`
3652
- INSERT INTO context_usage (session_id, timestamp, hook_name, injected_paths, estimated_tokens, was_referenced)
3653
- VALUES (?, ?, ?, ?, ?, ?)
3654
- `).run(usage.sessionId, usage.timestamp, usage.hookName, JSON.stringify(usage.injectedPaths), usage.estimatedTokens, usage.wasReferenced);
3908
+ INSERT INTO context_usage (session_id, timestamp, hook_name, injected_paths, estimated_tokens, was_referenced, turn_index)
3909
+ VALUES (?, ?, ?, ?, ?, ?, ?)
3910
+ `).run(usage.sessionId, usage.timestamp, usage.hookName, JSON.stringify(usage.injectedPaths), usage.estimatedTokens, usage.wasReferenced, usage.turnIndex ?? 0);
3911
+ // Return the rowid of the just-inserted row for recall event linkage
3912
+ const row = db.prepare("SELECT last_insert_rowid() as id").get() as { id: number };
3913
+ return row.id;
3655
3914
  }
3656
3915
 
3657
3916
  function getUsageForSessionFn(db: Database, sessionId: string): UsageRow[] {
3658
3917
  return db.prepare(`
3659
3918
  SELECT id, session_id AS sessionId, timestamp, hook_name AS hookName,
3660
3919
  injected_paths AS injectedPaths, estimated_tokens AS estimatedTokens,
3661
- was_referenced AS wasReferenced
3662
- FROM context_usage WHERE session_id = ? ORDER BY timestamp
3920
+ was_referenced AS wasReferenced, turn_index AS turnIndex
3921
+ FROM context_usage WHERE session_id = ? ORDER BY turn_index, timestamp
3663
3922
  `).all(sessionId) as UsageRow[];
3664
3923
  }
3665
3924