clawmem 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Recall Tracking — direct-write recall event recording.
3
+ *
4
+ * Context-surfacing writes recall events directly to SQLite (single transaction,
5
+ * <0.4ms for ~12 rows). This replaces the original in-memory buffer design which
6
+ * failed in Claude Code mode where each hook is a separate process invocation.
7
+ *
8
+ * Per GPT 5.4 High review (Codex turn 1):
9
+ * - Direct INSERT is preferred over buffer for cross-process correctness
10
+ * - WAL mode handles concurrent writes safely (busy_timeout=5000ms)
11
+ * - Negative signals (surfaced but not referenced) marked retroactively by feedback-loop
12
+ */
13
+
14
+ import { createHash } from "crypto";
15
+ import type { Store } from "./store.ts";
16
+
17
+ // =============================================================================
18
+ // Query Hashing
19
+ // =============================================================================
20
+
21
+ /**
22
+ * Hash a query string for recall tracking.
23
+ * SHA1 truncated to 12 hex chars (same as OpenClaw's approach).
24
+ */
25
+ export function hashQuery(query: string): string {
26
+ return createHash("sha1")
27
+ .update(query.toLowerCase().trim())
28
+ .digest("hex")
29
+ .slice(0, 12);
30
+ }
31
+
32
+ // =============================================================================
33
+ // Direct Write (replaces in-memory buffer)
34
+ // =============================================================================
35
+
36
+ /**
37
+ * Record surfaced documents as recall events directly to SQLite.
38
+ * Called from context-surfacing hook — single transaction, ~0.4ms.
39
+ *
40
+ * Resolves displayPath → doc_id inline. Docs that can't be resolved
41
+ * (deleted between search and write) are silently skipped.
42
+ *
43
+ * @param store - Store instance with DB access
44
+ * @param sessionId - Current session identifier
45
+ * @param queryHash - SHA1 hash of the search query
46
+ * @param docs - Array of {displayPath, searchScore} for each surfaced result
47
+ * @returns Number of events recorded
48
+ */
49
+ export function writeRecallEvents(
50
+ store: Store,
51
+ sessionId: string,
52
+ queryHash: string,
53
+ docs: { displayPath: string; searchScore: number }[],
54
+ usageId?: number,
55
+ turnIndex?: number
56
+ ): number {
57
+ if (!sessionId || docs.length === 0) return 0;
58
+
59
+ const resolved: { docId: number; queryHash: string; searchScore: number; sessionId: string }[] = [];
60
+
61
+ for (const doc of docs) {
62
+ const parts = doc.displayPath.split("/");
63
+ if (parts.length < 2) continue;
64
+ const collection = parts[0]!;
65
+ const docPath = parts.slice(1).join("/");
66
+ const found = store.findActiveDocument(collection, docPath);
67
+ if (!found) {
68
+ console.debug?.(`[recall] skipping unresolvable displayPath: ${doc.displayPath}`);
69
+ continue;
70
+ }
71
+
72
+ resolved.push({
73
+ docId: found.id,
74
+ queryHash,
75
+ searchScore: doc.searchScore,
76
+ sessionId,
77
+ usageId,
78
+ turnIndex,
79
+ });
80
+ }
81
+
82
+ if (resolved.length === 0) return 0;
83
+ return store.insertRecallEvents(resolved);
84
+ }
85
+
package/src/store.ts CHANGED
@@ -301,6 +301,10 @@ function initializeDatabase(db: Database): void {
301
301
  sqliteVec.load(db);
302
302
  db.exec("PRAGMA journal_mode = WAL");
303
303
  db.exec("PRAGMA foreign_keys = ON");
304
+ // Set generous busy_timeout during DDL — concurrent Stop hooks (decision-extractor,
305
+ // handoff-generator, feedback-loop) all run initializeDatabase simultaneously.
306
+ // 15s is well within the 30s Stop hook timeout. Reset to normal after DDL completes.
307
+ db.exec("PRAGMA busy_timeout = 15000");
304
308
 
305
309
  // Drop legacy tables that are now managed in YAML
306
310
  db.exec(`DROP TABLE IF EXISTS path_contexts`);
@@ -491,11 +495,18 @@ function initializeDatabase(db: Database): void {
491
495
  hook_name TEXT NOT NULL,
492
496
  injected_paths TEXT NOT NULL DEFAULT '[]',
493
497
  estimated_tokens INTEGER NOT NULL DEFAULT 0,
494
- was_referenced INTEGER NOT NULL DEFAULT 0
498
+ was_referenced INTEGER NOT NULL DEFAULT 0,
499
+ turn_index INTEGER NOT NULL DEFAULT 0
495
500
  )
496
501
  `);
497
502
  db.exec(`CREATE INDEX IF NOT EXISTS idx_context_usage_session ON context_usage(session_id)`);
498
503
 
504
+ // Migration: add turn_index to existing context_usage
505
+ const cuCols = db.prepare("PRAGMA table_info(context_usage)").all() as { name: string }[];
506
+ if (!cuCols.some(c => c.name === "turn_index")) {
507
+ try { db.exec(`ALTER TABLE context_usage ADD COLUMN turn_index INTEGER NOT NULL DEFAULT 0`); } catch { /* exists */ }
508
+ }
509
+
499
510
  // Hook prompt dedupe: suppress duplicate/heartbeat prompts to reduce GPU churn.
500
511
  db.exec(`
501
512
  CREATE TABLE IF NOT EXISTS hook_dedupe (
@@ -544,6 +555,10 @@ function initializeDatabase(db: Database): void {
544
555
  ["skill_name", "ALTER TABLE documents ADD COLUMN skill_name TEXT"],
545
556
  ["obs_quality_score", "ALTER TABLE documents ADD COLUMN obs_quality_score REAL"],
546
557
  ["failure_reason", "ALTER TABLE documents ADD COLUMN failure_reason TEXT"],
558
+ ["source_doc_ids", "ALTER TABLE documents ADD COLUMN source_doc_ids TEXT"],
559
+ ["embed_state", "ALTER TABLE documents ADD COLUMN embed_state TEXT DEFAULT 'pending'"],
560
+ ["embed_error", "ALTER TABLE documents ADD COLUMN embed_error TEXT"],
561
+ ["embed_attempts", "ALTER TABLE documents ADD COLUMN embed_attempts INTEGER DEFAULT 0"],
547
562
  ];
548
563
  for (const [col, sql] of obsMigrations) {
549
564
  if (!colNames.has(col)) {
@@ -781,6 +796,64 @@ function initializeDatabase(db: Database): void {
781
796
  `);
782
797
 
783
798
  db.exec(`CREATE INDEX IF NOT EXISTS idx_intent_cache_time ON intent_classifications(cached_at)`);
799
+
800
+ // Recall tracking: append-only event log for every doc surfaced by retrieval
801
+ // usage_id is informational (no FK) — links to context_usage.id in the same vault
802
+ // but may reference a different vault's row in cross-vault scenarios.
803
+ // Cross-vault linkage uses session_id + turn_index instead.
804
+ db.exec(`
805
+ CREATE TABLE IF NOT EXISTS recall_events (
806
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
807
+ doc_id INTEGER NOT NULL,
808
+ query_hash TEXT NOT NULL,
809
+ search_score REAL NOT NULL,
810
+ session_id TEXT NOT NULL,
811
+ usage_id INTEGER,
812
+ turn_index INTEGER NOT NULL DEFAULT 0,
813
+ surfaced_at TEXT NOT NULL DEFAULT (datetime('now')),
814
+ was_referenced INTEGER NOT NULL DEFAULT 0,
815
+ FOREIGN KEY (doc_id) REFERENCES documents(id) ON DELETE CASCADE
816
+ )
817
+ `);
818
+ // Migration: add usage_id + turn_index columns to existing recall_events tables
819
+ const reCols = db.prepare("PRAGMA table_info(recall_events)").all() as { name: string }[];
820
+ const reColNames = new Set(reCols.map(c => c.name));
821
+ if (!reColNames.has("usage_id")) {
822
+ try { db.exec(`ALTER TABLE recall_events ADD COLUMN usage_id INTEGER`); } catch { /* exists */ }
823
+ }
824
+ if (!reColNames.has("turn_index")) {
825
+ try { db.exec(`ALTER TABLE recall_events ADD COLUMN turn_index INTEGER NOT NULL DEFAULT 0`); } catch { /* exists */ }
826
+ }
827
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_usage ON recall_events(usage_id)`);
828
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_doc ON recall_events(doc_id)`);
829
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_session ON recall_events(session_id)`);
830
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_surfaced ON recall_events(surfaced_at)`);
831
+
832
+ // Recall stats: derived summary recomputed by background worker
833
+ db.exec(`
834
+ CREATE TABLE IF NOT EXISTS recall_stats (
835
+ doc_id INTEGER PRIMARY KEY,
836
+ recall_count INTEGER NOT NULL DEFAULT 0,
837
+ unique_queries INTEGER NOT NULL DEFAULT 0,
838
+ recall_days INTEGER NOT NULL DEFAULT 0,
839
+ total_score REAL NOT NULL DEFAULT 0,
840
+ max_score REAL NOT NULL DEFAULT 0,
841
+ first_recalled_at TEXT,
842
+ last_recalled_at TEXT,
843
+ diversity_score REAL NOT NULL DEFAULT 0,
844
+ spacing_score REAL NOT NULL DEFAULT 0,
845
+ negative_count INTEGER NOT NULL DEFAULT 0,
846
+ updated_at TEXT NOT NULL DEFAULT (datetime('now')),
847
+ FOREIGN KEY (doc_id) REFERENCES documents(id) ON DELETE CASCADE
848
+ )
849
+ `);
850
+
851
+ // Migration: add contradict_confidence to memory_relations
852
+ const mrCols = db.prepare("PRAGMA table_info(memory_relations)").all() as { name: string }[];
853
+ const mrColNames = new Set(mrCols.map(c => c.name));
854
+ if (!mrColNames.has("contradict_confidence")) {
855
+ try { db.exec(`ALTER TABLE memory_relations ADD COLUMN contradict_confidence REAL`); } catch { /* column exists */ }
856
+ }
784
857
  }
785
858
 
786
859
 
@@ -894,7 +967,7 @@ export type Store = {
894
967
  getRecentSessions: (limit: number) => SessionRecord[];
895
968
 
896
969
  // SAME: Context usage tracking
897
- insertUsage: (usage: UsageRecord) => void;
970
+ insertUsage: (usage: UsageRecord) => number;
898
971
  getUsageForSession: (sessionId: string) => UsageRow[];
899
972
  markUsageReferenced: (id: number) => void;
900
973
 
@@ -906,6 +979,11 @@ export type Store = {
906
979
  pinDocument: (collection: string, path: string, pinned: boolean) => void;
907
980
  snoozeDocument: (collection: string, path: string, until: string | null) => void;
908
981
 
982
+ // Embed state tracking
983
+ markEmbedSynced: (hash: string) => void;
984
+ markEmbedFailed: (hash: string, error: string) => void;
985
+ getEmbedStats: () => { pending: number; synced: number; failed: number };
986
+
909
987
  // Beads integration
910
988
  syncBeadsIssues: (projectDir: string) => Promise<{ synced: number; created: number; newDocIds: number[] }>;
911
989
  detectBeadsProject: (cwd: string) => string | null;
@@ -935,6 +1013,13 @@ export type Store = {
935
1013
  queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[];
936
1014
  getTripleStats: () => { totalTriples: number; currentFacts: number; expiredFacts: number; predicateTypes: string[] };
937
1015
 
1016
+ // Recall tracking
1017
+ insertRecallEvents: (events: { docId: number; queryHash: string; searchScore: number; sessionId: string; usageId?: number; turnIndex?: number; wasReferenced?: boolean }[]) => number;
1018
+ recomputeRecallStats: () => number;
1019
+ getRecallStats: (docId: number) => RecallStatsRow | null;
1020
+ getRecallStatsAll: (minRecallCount?: number) => RecallStatsRow[];
1021
+ markRecallEventsReferenced: (sessionId: string, docIds: number[]) => void;
1022
+
938
1023
  // Co-activation tracking
939
1024
  recordCoActivation: (paths: string[]) => void;
940
1025
  getCoActivated: (path: string, limit?: number) => { path: string; count: number }[];
@@ -978,9 +1063,9 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
978
1063
  db.exec("PRAGMA journal_mode = WAL");
979
1064
  db.exec("PRAGMA query_only = ON");
980
1065
  }
981
- if (opts?.busyTimeout !== undefined) {
982
- db.exec(`PRAGMA busy_timeout = ${opts.busyTimeout}`);
983
- }
1066
+ // Reset busy_timeout to operational value after DDL init (which uses 15s).
1067
+ // Default 5000ms for normal operations — callers can override via opts.
1068
+ db.exec(`PRAGMA busy_timeout = ${opts?.busyTimeout ?? 5000}`);
984
1069
 
985
1070
  return {
986
1071
  db,
@@ -1066,7 +1151,7 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
1066
1151
  getRecentSessions: (limit: number) => getRecentSessionsFn(db, limit),
1067
1152
 
1068
1153
  // SAME: Context usage tracking
1069
- insertUsage: (usage: UsageRecord) => insertUsageFn(db, usage),
1154
+ insertUsage: (usage: UsageRecord) => insertUsageFn(db, usage) as number,
1070
1155
  getUsageForSession: (sessionId: string) => getUsageForSessionFn(db, sessionId),
1071
1156
  markUsageReferenced: (id: number) => markUsageReferencedFn(db, id),
1072
1157
 
@@ -1078,6 +1163,24 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
1078
1163
  pinDocument: (collection: string, path: string, pinned: boolean) => pinDocumentFn(db, collection, path, pinned),
1079
1164
  snoozeDocument: (collection: string, path: string, until: string | null) => snoozeDocumentFn(db, collection, path, until),
1080
1165
 
1166
+ // Embed state tracking
1167
+ markEmbedSynced: (hash: string) => {
1168
+ db.prepare(`UPDATE documents SET embed_state = 'synced' WHERE hash = ? AND active = 1`).run(hash);
1169
+ },
1170
+ markEmbedFailed: (hash: string, error: string) => {
1171
+ db.prepare(`UPDATE documents SET embed_state = 'failed', embed_error = ?, embed_attempts = COALESCE(embed_attempts, 0) + 1 WHERE hash = ? AND active = 1`).run(error, hash);
1172
+ },
1173
+ getEmbedStats: () => {
1174
+ const stats = db.prepare(`
1175
+ SELECT
1176
+ SUM(CASE WHEN embed_state = 'pending' OR embed_state IS NULL THEN 1 ELSE 0 END) as pending,
1177
+ SUM(CASE WHEN embed_state = 'synced' THEN 1 ELSE 0 END) as synced,
1178
+ SUM(CASE WHEN embed_state = 'failed' THEN 1 ELSE 0 END) as failed
1179
+ FROM documents WHERE active = 1
1180
+ `).get() as { pending: number; synced: number; failed: number };
1181
+ return { pending: stats.pending || 0, synced: stats.synced || 0, failed: stats.failed || 0 };
1182
+ },
1183
+
1081
1184
  // Beads integration
1082
1185
  syncBeadsIssues: (projectDir: string) => syncBeadsIssues(db, projectDir),
1083
1186
  detectBeadsProject,
@@ -1189,6 +1292,165 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
1189
1292
  },
1190
1293
 
1191
1294
  // Co-activation tracking
1295
+ // Recall tracking: batch insert surfacing events
1296
+ insertRecallEvents: (events: { docId: number; queryHash: string; searchScore: number; sessionId: string; usageId?: number; turnIndex?: number; wasReferenced?: boolean }[]) => {
1297
+ if (events.length === 0) return 0;
1298
+ const stmt = db.prepare(`
1299
+ INSERT INTO recall_events (doc_id, query_hash, search_score, session_id, usage_id, turn_index, surfaced_at, was_referenced)
1300
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
1301
+ `);
1302
+ const now = new Date().toISOString();
1303
+ const tx = db.transaction(() => {
1304
+ for (const e of events) {
1305
+ stmt.run(e.docId, e.queryHash, e.searchScore, e.sessionId, e.usageId ?? null, e.turnIndex ?? 0, now, e.wasReferenced ? 1 : 0);
1306
+ }
1307
+ });
1308
+ tx();
1309
+ return events.length;
1310
+ },
1311
+
1312
+ // Recall tracking: recompute derived stats from events
1313
+ // Uses SQL GROUP BY for aggregation (O(1) queries), then JS for diversity/spacing formulas
1314
+ recomputeRecallStats: () => {
1315
+ const aggregated = db.prepare(`
1316
+ SELECT
1317
+ doc_id,
1318
+ COUNT(*) AS recall_count,
1319
+ COUNT(DISTINCT query_hash) AS unique_queries,
1320
+ COUNT(DISTINCT date(surfaced_at, 'utc')) AS recall_days,
1321
+ SUM(search_score) AS total_score,
1322
+ MAX(search_score) AS max_score,
1323
+ SUM(CASE WHEN was_referenced = 0 THEN 1 ELSE 0 END) AS negative_count,
1324
+ MIN(surfaced_at) AS first_recalled_at,
1325
+ MAX(surfaced_at) AS last_recalled_at,
1326
+ GROUP_CONCAT(DISTINCT date(surfaced_at, 'utc')) AS day_list
1327
+ FROM recall_events
1328
+ GROUP BY doc_id
1329
+ `).all() as {
1330
+ doc_id: number; recall_count: number; unique_queries: number; recall_days: number;
1331
+ total_score: number; max_score: number; negative_count: number;
1332
+ first_recalled_at: string; last_recalled_at: string; day_list: string;
1333
+ }[];
1334
+
1335
+ if (aggregated.length === 0) return 0;
1336
+
1337
+ const upsert = db.prepare(`
1338
+ INSERT INTO recall_stats (doc_id, recall_count, unique_queries, recall_days, total_score, max_score,
1339
+ first_recalled_at, last_recalled_at, diversity_score, spacing_score, negative_count, updated_at)
1340
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1341
+ ON CONFLICT(doc_id) DO UPDATE SET
1342
+ recall_count = excluded.recall_count,
1343
+ unique_queries = excluded.unique_queries,
1344
+ recall_days = excluded.recall_days,
1345
+ total_score = excluded.total_score,
1346
+ max_score = excluded.max_score,
1347
+ first_recalled_at = excluded.first_recalled_at,
1348
+ last_recalled_at = excluded.last_recalled_at,
1349
+ diversity_score = excluded.diversity_score,
1350
+ spacing_score = excluded.spacing_score,
1351
+ negative_count = excluded.negative_count,
1352
+ updated_at = excluded.updated_at
1353
+ `);
1354
+
1355
+ const now = new Date().toISOString();
1356
+ const tx = db.transaction(() => {
1357
+ for (const row of aggregated) {
1358
+ // Diversity: clamped max(uniqueQueries, recallDays) / 5
1359
+ const diversityScore = Math.min(1, Math.max(row.unique_queries, row.recall_days) / 5);
1360
+
1361
+ // Spacing: multi-day spread
1362
+ let spacingScore = 0;
1363
+ if (row.recall_days > 1 && row.day_list) {
1364
+ const days = row.day_list.split(",").sort();
1365
+ const spacing = Math.min(1, Math.log1p(days.length - 1) / Math.log1p(4));
1366
+ const firstDay = new Date(days[0]! + "T00:00:00Z").getTime();
1367
+ const lastDay = new Date(days[days.length - 1]! + "T00:00:00Z").getTime();
1368
+ const spanDays = Math.max(0, (lastDay - firstDay) / (24 * 60 * 60 * 1000));
1369
+ const span = Math.min(1, spanDays / 7);
1370
+ spacingScore = Math.min(1, 0.55 * spacing + 0.45 * span);
1371
+ } else if (row.recall_days === 1) {
1372
+ spacingScore = 0.2;
1373
+ }
1374
+
1375
+ upsert.run(
1376
+ row.doc_id, row.recall_count, row.unique_queries, row.recall_days,
1377
+ row.total_score, row.max_score,
1378
+ row.first_recalled_at, row.last_recalled_at,
1379
+ diversityScore, spacingScore, row.negative_count, now
1380
+ );
1381
+ }
1382
+ });
1383
+ tx();
1384
+ return aggregated.length;
1385
+ },
1386
+
1387
+ getRecallStats: (docId: number) => {
1388
+ const row = db.prepare(`SELECT * FROM recall_stats WHERE doc_id = ?`).get(docId) as any;
1389
+ if (!row) return null;
1390
+ return {
1391
+ docId: row.doc_id,
1392
+ recallCount: row.recall_count,
1393
+ uniqueQueries: row.unique_queries,
1394
+ recallDays: row.recall_days,
1395
+ totalScore: row.total_score,
1396
+ maxScore: row.max_score,
1397
+ firstRecalledAt: row.first_recalled_at,
1398
+ lastRecalledAt: row.last_recalled_at,
1399
+ diversityScore: row.diversity_score,
1400
+ spacingScore: row.spacing_score,
1401
+ negativeCount: row.negative_count,
1402
+ updatedAt: row.updated_at,
1403
+ } as RecallStatsRow;
1404
+ },
1405
+
1406
+ getRecallStatsAll: (minRecallCount: number = 1) => {
1407
+ return (db.prepare(`
1408
+ SELECT rs.*, d.collection, d.path, d.title
1409
+ FROM recall_stats rs
1410
+ JOIN documents d ON rs.doc_id = d.id
1411
+ WHERE rs.recall_count >= ? AND d.active = 1
1412
+ ORDER BY rs.recall_count DESC
1413
+ `).all(minRecallCount) as any[]).map(row => ({
1414
+ docId: row.doc_id,
1415
+ recallCount: row.recall_count,
1416
+ uniqueQueries: row.unique_queries,
1417
+ recallDays: row.recall_days,
1418
+ totalScore: row.total_score,
1419
+ maxScore: row.max_score,
1420
+ firstRecalledAt: row.first_recalled_at,
1421
+ lastRecalledAt: row.last_recalled_at,
1422
+ diversityScore: row.diversity_score,
1423
+ spacingScore: row.spacing_score,
1424
+ negativeCount: row.negative_count,
1425
+ updatedAt: row.updated_at,
1426
+ collection: row.collection,
1427
+ path: row.path,
1428
+ title: row.title,
1429
+ } as RecallStatsRow));
1430
+ },
1431
+
1432
+ markRecallEventsReferenced: (sessionId: string, docIds: number[]) => {
1433
+ if (docIds.length === 0) return;
1434
+ // Mark only the LATEST event per doc in this session, not all events.
1435
+ // This preserves negative signals: if a doc was surfaced across 5 prompts
1436
+ // but only cited once, 4 events stay was_referenced=0 (genuine negatives).
1437
+ const stmt = db.prepare(`
1438
+ UPDATE recall_events SET was_referenced = 1
1439
+ WHERE id = (
1440
+ SELECT id FROM recall_events
1441
+ WHERE session_id = ? AND doc_id = ?
1442
+ ORDER BY surfaced_at DESC
1443
+ LIMIT 1
1444
+ )
1445
+ `);
1446
+ const tx = db.transaction(() => {
1447
+ for (const docId of docIds) {
1448
+ stmt.run(sessionId, docId);
1449
+ }
1450
+ });
1451
+ tx();
1452
+ },
1453
+
1192
1454
  recordCoActivation: (paths: string[]) => {
1193
1455
  if (paths.length < 2) return;
1194
1456
  const now = new Date().toISOString();
@@ -1424,6 +1686,7 @@ export type UsageRecord = {
1424
1686
  injectedPaths: string[];
1425
1687
  estimatedTokens: number;
1426
1688
  wasReferenced: number;
1689
+ turnIndex?: number;
1427
1690
  };
1428
1691
 
1429
1692
  export type UsageRow = {
@@ -1434,6 +1697,26 @@ export type UsageRow = {
1434
1697
  injectedPaths: string;
1435
1698
  estimatedTokens: number;
1436
1699
  wasReferenced: number;
1700
+ turnIndex: number;
1701
+ };
1702
+
1703
+ export type RecallStatsRow = {
1704
+ docId: number;
1705
+ recallCount: number;
1706
+ uniqueQueries: number;
1707
+ recallDays: number;
1708
+ totalScore: number;
1709
+ maxScore: number;
1710
+ firstRecalledAt: string | null;
1711
+ lastRecalledAt: string | null;
1712
+ diversityScore: number;
1713
+ spacingScore: number;
1714
+ negativeCount: number;
1715
+ updatedAt: string;
1716
+ // Joined from documents (only populated by getRecallStatsAll)
1717
+ collection?: string;
1718
+ path?: string;
1719
+ title?: string;
1437
1720
  };
1438
1721
 
1439
1722
  export type DocumentRow = {
@@ -2924,12 +3207,17 @@ export function getHashesForEmbedding(db: Database): { hash: string; body: strin
2924
3207
  * Returns hashes that have no content_vectors row with fragment_type set.
2925
3208
  */
2926
3209
  export function getHashesNeedingFragments(db: Database): { hash: string; body: string; path: string; title: string; collection: string }[] {
3210
+ // Select docs that either have no fragments at all OR are missing the primary (seq=0) fragment.
3211
+ // The seq=0 embedding is critical — surprisal scoring, semantic graph, and health checks depend on it.
2927
3212
  return db.prepare(`
2928
3213
  SELECT d.hash, c.doc as body, MIN(d.path) as path, MIN(d.title) as title, MIN(d.collection) as collection
2929
3214
  FROM documents d
2930
3215
  JOIN content c ON d.hash = c.hash
2931
3216
  LEFT JOIN content_vectors v ON d.hash = v.hash AND v.fragment_type IS NOT NULL
2932
- WHERE d.active = 1 AND v.hash IS NULL
3217
+ LEFT JOIN content_vectors v0 ON d.hash = v0.hash AND v0.seq = 0
3218
+ WHERE d.active = 1
3219
+ AND (v.hash IS NULL OR v0.hash IS NULL)
3220
+ AND COALESCE(d.embed_attempts, 0) < 3
2933
3221
  GROUP BY d.hash
2934
3222
  `).all() as { hash: string; body: string; path: string; title: string; collection: string }[];
2935
3223
  }
@@ -2941,6 +3229,8 @@ export function getHashesNeedingFragments(db: Database): { hash: string; body: s
2941
3229
  export function clearAllEmbeddings(db: Database): void {
2942
3230
  db.exec(`DELETE FROM content_vectors`);
2943
3231
  db.exec(`DROP TABLE IF EXISTS vectors_vec`);
3232
+ // Reset embed state so failed docs get retried after force re-embed
3233
+ try { db.exec(`UPDATE documents SET embed_state = 'pending', embed_error = NULL, embed_attempts = 0 WHERE active = 1`); } catch { /* column may not exist yet */ }
2944
3234
  vecTableDimsCache.delete(db);
2945
3235
  }
2946
3236
 
@@ -3613,19 +3903,22 @@ function getRecentSessionsFn(db: Database, limit: number): SessionRecord[] {
3613
3903
  // SAME: Context Usage Tracking
3614
3904
  // =============================================================================
3615
3905
 
3616
- function insertUsageFn(db: Database, usage: UsageRecord): void {
3906
+ function insertUsageFn(db: Database, usage: UsageRecord): number {
3617
3907
  db.prepare(`
3618
- INSERT INTO context_usage (session_id, timestamp, hook_name, injected_paths, estimated_tokens, was_referenced)
3619
- VALUES (?, ?, ?, ?, ?, ?)
3620
- `).run(usage.sessionId, usage.timestamp, usage.hookName, JSON.stringify(usage.injectedPaths), usage.estimatedTokens, usage.wasReferenced);
3908
+ INSERT INTO context_usage (session_id, timestamp, hook_name, injected_paths, estimated_tokens, was_referenced, turn_index)
3909
+ VALUES (?, ?, ?, ?, ?, ?, ?)
3910
+ `).run(usage.sessionId, usage.timestamp, usage.hookName, JSON.stringify(usage.injectedPaths), usage.estimatedTokens, usage.wasReferenced, usage.turnIndex ?? 0);
3911
+ // Return the rowid of the just-inserted row for recall event linkage
3912
+ const row = db.prepare("SELECT last_insert_rowid() as id").get() as { id: number };
3913
+ return row.id;
3621
3914
  }
3622
3915
 
3623
3916
  function getUsageForSessionFn(db: Database, sessionId: string): UsageRow[] {
3624
3917
  return db.prepare(`
3625
3918
  SELECT id, session_id AS sessionId, timestamp, hook_name AS hookName,
3626
3919
  injected_paths AS injectedPaths, estimated_tokens AS estimatedTokens,
3627
- was_referenced AS wasReferenced
3628
- FROM context_usage WHERE session_id = ? ORDER BY timestamp
3920
+ was_referenced AS wasReferenced, turn_index AS turnIndex
3921
+ FROM context_usage WHERE session_id = ? ORDER BY turn_index, timestamp
3629
3922
  `).all(sessionId) as UsageRow[];
3630
3923
  }
3631
3924