clawmem 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +5 -4
- package/CLAUDE.md +5 -4
- package/README.md +16 -2
- package/SKILL.md +1 -1
- package/package.json +1 -1
- package/src/clawmem.ts +17 -0
- package/src/consolidation.ts +323 -1
- package/src/hooks/context-surfacing.ts +104 -13
- package/src/hooks/feedback-loop.ts +40 -0
- package/src/hooks/session-bootstrap.ts +20 -2
- package/src/hooks.ts +8 -3
- package/src/mcp.ts +32 -1
- package/src/memory.ts +5 -3
- package/src/recall-attribution.ts +182 -0
- package/src/recall-buffer.ts +85 -0
- package/src/store.ts +306 -13
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recall Tracking — direct-write recall event recording.
|
|
3
|
+
*
|
|
4
|
+
* Context-surfacing writes recall events directly to SQLite (single transaction,
|
|
5
|
+
* <0.4ms for ~12 rows). This replaces the original in-memory buffer design which
|
|
6
|
+
* failed in Claude Code mode where each hook is a separate process invocation.
|
|
7
|
+
*
|
|
8
|
+
* Per GPT 5.4 High review (Codex turn 1):
|
|
9
|
+
* - Direct INSERT is preferred over buffer for cross-process correctness
|
|
10
|
+
* - WAL mode handles concurrent writes safely (busy_timeout=5000ms)
|
|
11
|
+
* - Negative signals (surfaced but not referenced) marked retroactively by feedback-loop
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { createHash } from "crypto";
|
|
15
|
+
import type { Store } from "./store.ts";
|
|
16
|
+
|
|
17
|
+
// =============================================================================
|
|
18
|
+
// Query Hashing
|
|
19
|
+
// =============================================================================
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Hash a query string for recall tracking.
|
|
23
|
+
* SHA1 truncated to 12 hex chars (same as OpenClaw's approach).
|
|
24
|
+
*/
|
|
25
|
+
export function hashQuery(query: string): string {
|
|
26
|
+
return createHash("sha1")
|
|
27
|
+
.update(query.toLowerCase().trim())
|
|
28
|
+
.digest("hex")
|
|
29
|
+
.slice(0, 12);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// =============================================================================
|
|
33
|
+
// Direct Write (replaces in-memory buffer)
|
|
34
|
+
// =============================================================================
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Record surfaced documents as recall events directly to SQLite.
|
|
38
|
+
* Called from context-surfacing hook — single transaction, ~0.4ms.
|
|
39
|
+
*
|
|
40
|
+
* Resolves displayPath → doc_id inline. Docs that can't be resolved
|
|
41
|
+
* (deleted between search and write) are silently skipped.
|
|
42
|
+
*
|
|
43
|
+
* @param store - Store instance with DB access
|
|
44
|
+
* @param sessionId - Current session identifier
|
|
45
|
+
* @param queryHash - SHA1 hash of the search query
|
|
46
|
+
* @param docs - Array of {displayPath, searchScore} for each surfaced result
|
|
47
|
+
* @returns Number of events recorded
|
|
48
|
+
*/
|
|
49
|
+
export function writeRecallEvents(
|
|
50
|
+
store: Store,
|
|
51
|
+
sessionId: string,
|
|
52
|
+
queryHash: string,
|
|
53
|
+
docs: { displayPath: string; searchScore: number }[],
|
|
54
|
+
usageId?: number,
|
|
55
|
+
turnIndex?: number
|
|
56
|
+
): number {
|
|
57
|
+
if (!sessionId || docs.length === 0) return 0;
|
|
58
|
+
|
|
59
|
+
const resolved: { docId: number; queryHash: string; searchScore: number; sessionId: string }[] = [];
|
|
60
|
+
|
|
61
|
+
for (const doc of docs) {
|
|
62
|
+
const parts = doc.displayPath.split("/");
|
|
63
|
+
if (parts.length < 2) continue;
|
|
64
|
+
const collection = parts[0]!;
|
|
65
|
+
const docPath = parts.slice(1).join("/");
|
|
66
|
+
const found = store.findActiveDocument(collection, docPath);
|
|
67
|
+
if (!found) {
|
|
68
|
+
console.debug?.(`[recall] skipping unresolvable displayPath: ${doc.displayPath}`);
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
resolved.push({
|
|
73
|
+
docId: found.id,
|
|
74
|
+
queryHash,
|
|
75
|
+
searchScore: doc.searchScore,
|
|
76
|
+
sessionId,
|
|
77
|
+
usageId,
|
|
78
|
+
turnIndex,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (resolved.length === 0) return 0;
|
|
83
|
+
return store.insertRecallEvents(resolved);
|
|
84
|
+
}
|
|
85
|
+
|
package/src/store.ts
CHANGED
|
@@ -301,6 +301,10 @@ function initializeDatabase(db: Database): void {
|
|
|
301
301
|
sqliteVec.load(db);
|
|
302
302
|
db.exec("PRAGMA journal_mode = WAL");
|
|
303
303
|
db.exec("PRAGMA foreign_keys = ON");
|
|
304
|
+
// Set generous busy_timeout during DDL — concurrent Stop hooks (decision-extractor,
|
|
305
|
+
// handoff-generator, feedback-loop) all run initializeDatabase simultaneously.
|
|
306
|
+
// 15s is well within the 30s Stop hook timeout. Reset to normal after DDL completes.
|
|
307
|
+
db.exec("PRAGMA busy_timeout = 15000");
|
|
304
308
|
|
|
305
309
|
// Drop legacy tables that are now managed in YAML
|
|
306
310
|
db.exec(`DROP TABLE IF EXISTS path_contexts`);
|
|
@@ -491,11 +495,18 @@ function initializeDatabase(db: Database): void {
|
|
|
491
495
|
hook_name TEXT NOT NULL,
|
|
492
496
|
injected_paths TEXT NOT NULL DEFAULT '[]',
|
|
493
497
|
estimated_tokens INTEGER NOT NULL DEFAULT 0,
|
|
494
|
-
was_referenced INTEGER NOT NULL DEFAULT 0
|
|
498
|
+
was_referenced INTEGER NOT NULL DEFAULT 0,
|
|
499
|
+
turn_index INTEGER NOT NULL DEFAULT 0
|
|
495
500
|
)
|
|
496
501
|
`);
|
|
497
502
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_context_usage_session ON context_usage(session_id)`);
|
|
498
503
|
|
|
504
|
+
// Migration: add turn_index to existing context_usage
|
|
505
|
+
const cuCols = db.prepare("PRAGMA table_info(context_usage)").all() as { name: string }[];
|
|
506
|
+
if (!cuCols.some(c => c.name === "turn_index")) {
|
|
507
|
+
try { db.exec(`ALTER TABLE context_usage ADD COLUMN turn_index INTEGER NOT NULL DEFAULT 0`); } catch { /* exists */ }
|
|
508
|
+
}
|
|
509
|
+
|
|
499
510
|
// Hook prompt dedupe: suppress duplicate/heartbeat prompts to reduce GPU churn.
|
|
500
511
|
db.exec(`
|
|
501
512
|
CREATE TABLE IF NOT EXISTS hook_dedupe (
|
|
@@ -544,6 +555,10 @@ function initializeDatabase(db: Database): void {
|
|
|
544
555
|
["skill_name", "ALTER TABLE documents ADD COLUMN skill_name TEXT"],
|
|
545
556
|
["obs_quality_score", "ALTER TABLE documents ADD COLUMN obs_quality_score REAL"],
|
|
546
557
|
["failure_reason", "ALTER TABLE documents ADD COLUMN failure_reason TEXT"],
|
|
558
|
+
["source_doc_ids", "ALTER TABLE documents ADD COLUMN source_doc_ids TEXT"],
|
|
559
|
+
["embed_state", "ALTER TABLE documents ADD COLUMN embed_state TEXT DEFAULT 'pending'"],
|
|
560
|
+
["embed_error", "ALTER TABLE documents ADD COLUMN embed_error TEXT"],
|
|
561
|
+
["embed_attempts", "ALTER TABLE documents ADD COLUMN embed_attempts INTEGER DEFAULT 0"],
|
|
547
562
|
];
|
|
548
563
|
for (const [col, sql] of obsMigrations) {
|
|
549
564
|
if (!colNames.has(col)) {
|
|
@@ -781,6 +796,64 @@ function initializeDatabase(db: Database): void {
|
|
|
781
796
|
`);
|
|
782
797
|
|
|
783
798
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_intent_cache_time ON intent_classifications(cached_at)`);
|
|
799
|
+
|
|
800
|
+
// Recall tracking: append-only event log for every doc surfaced by retrieval
|
|
801
|
+
// usage_id is informational (no FK) — links to context_usage.id in the same vault
|
|
802
|
+
// but may reference a different vault's row in cross-vault scenarios.
|
|
803
|
+
// Cross-vault linkage uses session_id + turn_index instead.
|
|
804
|
+
db.exec(`
|
|
805
|
+
CREATE TABLE IF NOT EXISTS recall_events (
|
|
806
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
807
|
+
doc_id INTEGER NOT NULL,
|
|
808
|
+
query_hash TEXT NOT NULL,
|
|
809
|
+
search_score REAL NOT NULL,
|
|
810
|
+
session_id TEXT NOT NULL,
|
|
811
|
+
usage_id INTEGER,
|
|
812
|
+
turn_index INTEGER NOT NULL DEFAULT 0,
|
|
813
|
+
surfaced_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
814
|
+
was_referenced INTEGER NOT NULL DEFAULT 0,
|
|
815
|
+
FOREIGN KEY (doc_id) REFERENCES documents(id) ON DELETE CASCADE
|
|
816
|
+
)
|
|
817
|
+
`);
|
|
818
|
+
// Migration: add usage_id + turn_index columns to existing recall_events tables
|
|
819
|
+
const reCols = db.prepare("PRAGMA table_info(recall_events)").all() as { name: string }[];
|
|
820
|
+
const reColNames = new Set(reCols.map(c => c.name));
|
|
821
|
+
if (!reColNames.has("usage_id")) {
|
|
822
|
+
try { db.exec(`ALTER TABLE recall_events ADD COLUMN usage_id INTEGER`); } catch { /* exists */ }
|
|
823
|
+
}
|
|
824
|
+
if (!reColNames.has("turn_index")) {
|
|
825
|
+
try { db.exec(`ALTER TABLE recall_events ADD COLUMN turn_index INTEGER NOT NULL DEFAULT 0`); } catch { /* exists */ }
|
|
826
|
+
}
|
|
827
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_usage ON recall_events(usage_id)`);
|
|
828
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_doc ON recall_events(doc_id)`);
|
|
829
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_session ON recall_events(session_id)`);
|
|
830
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_surfaced ON recall_events(surfaced_at)`);
|
|
831
|
+
|
|
832
|
+
// Recall stats: derived summary recomputed by background worker
|
|
833
|
+
db.exec(`
|
|
834
|
+
CREATE TABLE IF NOT EXISTS recall_stats (
|
|
835
|
+
doc_id INTEGER PRIMARY KEY,
|
|
836
|
+
recall_count INTEGER NOT NULL DEFAULT 0,
|
|
837
|
+
unique_queries INTEGER NOT NULL DEFAULT 0,
|
|
838
|
+
recall_days INTEGER NOT NULL DEFAULT 0,
|
|
839
|
+
total_score REAL NOT NULL DEFAULT 0,
|
|
840
|
+
max_score REAL NOT NULL DEFAULT 0,
|
|
841
|
+
first_recalled_at TEXT,
|
|
842
|
+
last_recalled_at TEXT,
|
|
843
|
+
diversity_score REAL NOT NULL DEFAULT 0,
|
|
844
|
+
spacing_score REAL NOT NULL DEFAULT 0,
|
|
845
|
+
negative_count INTEGER NOT NULL DEFAULT 0,
|
|
846
|
+
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
847
|
+
FOREIGN KEY (doc_id) REFERENCES documents(id) ON DELETE CASCADE
|
|
848
|
+
)
|
|
849
|
+
`);
|
|
850
|
+
|
|
851
|
+
// Migration: add contradict_confidence to memory_relations
|
|
852
|
+
const mrCols = db.prepare("PRAGMA table_info(memory_relations)").all() as { name: string }[];
|
|
853
|
+
const mrColNames = new Set(mrCols.map(c => c.name));
|
|
854
|
+
if (!mrColNames.has("contradict_confidence")) {
|
|
855
|
+
try { db.exec(`ALTER TABLE memory_relations ADD COLUMN contradict_confidence REAL`); } catch { /* column exists */ }
|
|
856
|
+
}
|
|
784
857
|
}
|
|
785
858
|
|
|
786
859
|
|
|
@@ -894,7 +967,7 @@ export type Store = {
|
|
|
894
967
|
getRecentSessions: (limit: number) => SessionRecord[];
|
|
895
968
|
|
|
896
969
|
// SAME: Context usage tracking
|
|
897
|
-
insertUsage: (usage: UsageRecord) =>
|
|
970
|
+
insertUsage: (usage: UsageRecord) => number;
|
|
898
971
|
getUsageForSession: (sessionId: string) => UsageRow[];
|
|
899
972
|
markUsageReferenced: (id: number) => void;
|
|
900
973
|
|
|
@@ -906,6 +979,11 @@ export type Store = {
|
|
|
906
979
|
pinDocument: (collection: string, path: string, pinned: boolean) => void;
|
|
907
980
|
snoozeDocument: (collection: string, path: string, until: string | null) => void;
|
|
908
981
|
|
|
982
|
+
// Embed state tracking
|
|
983
|
+
markEmbedSynced: (hash: string) => void;
|
|
984
|
+
markEmbedFailed: (hash: string, error: string) => void;
|
|
985
|
+
getEmbedStats: () => { pending: number; synced: number; failed: number };
|
|
986
|
+
|
|
909
987
|
// Beads integration
|
|
910
988
|
syncBeadsIssues: (projectDir: string) => Promise<{ synced: number; created: number; newDocIds: number[] }>;
|
|
911
989
|
detectBeadsProject: (cwd: string) => string | null;
|
|
@@ -935,6 +1013,13 @@ export type Store = {
|
|
|
935
1013
|
queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[];
|
|
936
1014
|
getTripleStats: () => { totalTriples: number; currentFacts: number; expiredFacts: number; predicateTypes: string[] };
|
|
937
1015
|
|
|
1016
|
+
// Recall tracking
|
|
1017
|
+
insertRecallEvents: (events: { docId: number; queryHash: string; searchScore: number; sessionId: string; usageId?: number; turnIndex?: number; wasReferenced?: boolean }[]) => number;
|
|
1018
|
+
recomputeRecallStats: () => number;
|
|
1019
|
+
getRecallStats: (docId: number) => RecallStatsRow | null;
|
|
1020
|
+
getRecallStatsAll: (minRecallCount?: number) => RecallStatsRow[];
|
|
1021
|
+
markRecallEventsReferenced: (sessionId: string, docIds: number[]) => void;
|
|
1022
|
+
|
|
938
1023
|
// Co-activation tracking
|
|
939
1024
|
recordCoActivation: (paths: string[]) => void;
|
|
940
1025
|
getCoActivated: (path: string, limit?: number) => { path: string; count: number }[];
|
|
@@ -978,9 +1063,9 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
|
|
|
978
1063
|
db.exec("PRAGMA journal_mode = WAL");
|
|
979
1064
|
db.exec("PRAGMA query_only = ON");
|
|
980
1065
|
}
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
}
|
|
1066
|
+
// Reset busy_timeout to operational value after DDL init (which uses 15s).
|
|
1067
|
+
// Default 5000ms for normal operations — callers can override via opts.
|
|
1068
|
+
db.exec(`PRAGMA busy_timeout = ${opts?.busyTimeout ?? 5000}`);
|
|
984
1069
|
|
|
985
1070
|
return {
|
|
986
1071
|
db,
|
|
@@ -1066,7 +1151,7 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
|
|
|
1066
1151
|
getRecentSessions: (limit: number) => getRecentSessionsFn(db, limit),
|
|
1067
1152
|
|
|
1068
1153
|
// SAME: Context usage tracking
|
|
1069
|
-
insertUsage: (usage: UsageRecord) => insertUsageFn(db, usage),
|
|
1154
|
+
insertUsage: (usage: UsageRecord) => insertUsageFn(db, usage) as number,
|
|
1070
1155
|
getUsageForSession: (sessionId: string) => getUsageForSessionFn(db, sessionId),
|
|
1071
1156
|
markUsageReferenced: (id: number) => markUsageReferencedFn(db, id),
|
|
1072
1157
|
|
|
@@ -1078,6 +1163,24 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
|
|
|
1078
1163
|
pinDocument: (collection: string, path: string, pinned: boolean) => pinDocumentFn(db, collection, path, pinned),
|
|
1079
1164
|
snoozeDocument: (collection: string, path: string, until: string | null) => snoozeDocumentFn(db, collection, path, until),
|
|
1080
1165
|
|
|
1166
|
+
// Embed state tracking
|
|
1167
|
+
markEmbedSynced: (hash: string) => {
|
|
1168
|
+
db.prepare(`UPDATE documents SET embed_state = 'synced' WHERE hash = ? AND active = 1`).run(hash);
|
|
1169
|
+
},
|
|
1170
|
+
markEmbedFailed: (hash: string, error: string) => {
|
|
1171
|
+
db.prepare(`UPDATE documents SET embed_state = 'failed', embed_error = ?, embed_attempts = COALESCE(embed_attempts, 0) + 1 WHERE hash = ? AND active = 1`).run(error, hash);
|
|
1172
|
+
},
|
|
1173
|
+
getEmbedStats: () => {
|
|
1174
|
+
const stats = db.prepare(`
|
|
1175
|
+
SELECT
|
|
1176
|
+
SUM(CASE WHEN embed_state = 'pending' OR embed_state IS NULL THEN 1 ELSE 0 END) as pending,
|
|
1177
|
+
SUM(CASE WHEN embed_state = 'synced' THEN 1 ELSE 0 END) as synced,
|
|
1178
|
+
SUM(CASE WHEN embed_state = 'failed' THEN 1 ELSE 0 END) as failed
|
|
1179
|
+
FROM documents WHERE active = 1
|
|
1180
|
+
`).get() as { pending: number; synced: number; failed: number };
|
|
1181
|
+
return { pending: stats.pending || 0, synced: stats.synced || 0, failed: stats.failed || 0 };
|
|
1182
|
+
},
|
|
1183
|
+
|
|
1081
1184
|
// Beads integration
|
|
1082
1185
|
syncBeadsIssues: (projectDir: string) => syncBeadsIssues(db, projectDir),
|
|
1083
1186
|
detectBeadsProject,
|
|
@@ -1189,6 +1292,165 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
|
|
|
1189
1292
|
},
|
|
1190
1293
|
|
|
1191
1294
|
// Co-activation tracking
|
|
1295
|
+
// Recall tracking: batch insert surfacing events
|
|
1296
|
+
insertRecallEvents: (events: { docId: number; queryHash: string; searchScore: number; sessionId: string; usageId?: number; turnIndex?: number; wasReferenced?: boolean }[]) => {
|
|
1297
|
+
if (events.length === 0) return 0;
|
|
1298
|
+
const stmt = db.prepare(`
|
|
1299
|
+
INSERT INTO recall_events (doc_id, query_hash, search_score, session_id, usage_id, turn_index, surfaced_at, was_referenced)
|
|
1300
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
1301
|
+
`);
|
|
1302
|
+
const now = new Date().toISOString();
|
|
1303
|
+
const tx = db.transaction(() => {
|
|
1304
|
+
for (const e of events) {
|
|
1305
|
+
stmt.run(e.docId, e.queryHash, e.searchScore, e.sessionId, e.usageId ?? null, e.turnIndex ?? 0, now, e.wasReferenced ? 1 : 0);
|
|
1306
|
+
}
|
|
1307
|
+
});
|
|
1308
|
+
tx();
|
|
1309
|
+
return events.length;
|
|
1310
|
+
},
|
|
1311
|
+
|
|
1312
|
+
// Recall tracking: recompute derived stats from events
|
|
1313
|
+
// Uses SQL GROUP BY for aggregation (O(1) queries), then JS for diversity/spacing formulas
|
|
1314
|
+
recomputeRecallStats: () => {
|
|
1315
|
+
const aggregated = db.prepare(`
|
|
1316
|
+
SELECT
|
|
1317
|
+
doc_id,
|
|
1318
|
+
COUNT(*) AS recall_count,
|
|
1319
|
+
COUNT(DISTINCT query_hash) AS unique_queries,
|
|
1320
|
+
COUNT(DISTINCT date(surfaced_at, 'utc')) AS recall_days,
|
|
1321
|
+
SUM(search_score) AS total_score,
|
|
1322
|
+
MAX(search_score) AS max_score,
|
|
1323
|
+
SUM(CASE WHEN was_referenced = 0 THEN 1 ELSE 0 END) AS negative_count,
|
|
1324
|
+
MIN(surfaced_at) AS first_recalled_at,
|
|
1325
|
+
MAX(surfaced_at) AS last_recalled_at,
|
|
1326
|
+
GROUP_CONCAT(DISTINCT date(surfaced_at, 'utc')) AS day_list
|
|
1327
|
+
FROM recall_events
|
|
1328
|
+
GROUP BY doc_id
|
|
1329
|
+
`).all() as {
|
|
1330
|
+
doc_id: number; recall_count: number; unique_queries: number; recall_days: number;
|
|
1331
|
+
total_score: number; max_score: number; negative_count: number;
|
|
1332
|
+
first_recalled_at: string; last_recalled_at: string; day_list: string;
|
|
1333
|
+
}[];
|
|
1334
|
+
|
|
1335
|
+
if (aggregated.length === 0) return 0;
|
|
1336
|
+
|
|
1337
|
+
const upsert = db.prepare(`
|
|
1338
|
+
INSERT INTO recall_stats (doc_id, recall_count, unique_queries, recall_days, total_score, max_score,
|
|
1339
|
+
first_recalled_at, last_recalled_at, diversity_score, spacing_score, negative_count, updated_at)
|
|
1340
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1341
|
+
ON CONFLICT(doc_id) DO UPDATE SET
|
|
1342
|
+
recall_count = excluded.recall_count,
|
|
1343
|
+
unique_queries = excluded.unique_queries,
|
|
1344
|
+
recall_days = excluded.recall_days,
|
|
1345
|
+
total_score = excluded.total_score,
|
|
1346
|
+
max_score = excluded.max_score,
|
|
1347
|
+
first_recalled_at = excluded.first_recalled_at,
|
|
1348
|
+
last_recalled_at = excluded.last_recalled_at,
|
|
1349
|
+
diversity_score = excluded.diversity_score,
|
|
1350
|
+
spacing_score = excluded.spacing_score,
|
|
1351
|
+
negative_count = excluded.negative_count,
|
|
1352
|
+
updated_at = excluded.updated_at
|
|
1353
|
+
`);
|
|
1354
|
+
|
|
1355
|
+
const now = new Date().toISOString();
|
|
1356
|
+
const tx = db.transaction(() => {
|
|
1357
|
+
for (const row of aggregated) {
|
|
1358
|
+
// Diversity: clamped max(uniqueQueries, recallDays) / 5
|
|
1359
|
+
const diversityScore = Math.min(1, Math.max(row.unique_queries, row.recall_days) / 5);
|
|
1360
|
+
|
|
1361
|
+
// Spacing: multi-day spread
|
|
1362
|
+
let spacingScore = 0;
|
|
1363
|
+
if (row.recall_days > 1 && row.day_list) {
|
|
1364
|
+
const days = row.day_list.split(",").sort();
|
|
1365
|
+
const spacing = Math.min(1, Math.log1p(days.length - 1) / Math.log1p(4));
|
|
1366
|
+
const firstDay = new Date(days[0]! + "T00:00:00Z").getTime();
|
|
1367
|
+
const lastDay = new Date(days[days.length - 1]! + "T00:00:00Z").getTime();
|
|
1368
|
+
const spanDays = Math.max(0, (lastDay - firstDay) / (24 * 60 * 60 * 1000));
|
|
1369
|
+
const span = Math.min(1, spanDays / 7);
|
|
1370
|
+
spacingScore = Math.min(1, 0.55 * spacing + 0.45 * span);
|
|
1371
|
+
} else if (row.recall_days === 1) {
|
|
1372
|
+
spacingScore = 0.2;
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
upsert.run(
|
|
1376
|
+
row.doc_id, row.recall_count, row.unique_queries, row.recall_days,
|
|
1377
|
+
row.total_score, row.max_score,
|
|
1378
|
+
row.first_recalled_at, row.last_recalled_at,
|
|
1379
|
+
diversityScore, spacingScore, row.negative_count, now
|
|
1380
|
+
);
|
|
1381
|
+
}
|
|
1382
|
+
});
|
|
1383
|
+
tx();
|
|
1384
|
+
return aggregated.length;
|
|
1385
|
+
},
|
|
1386
|
+
|
|
1387
|
+
getRecallStats: (docId: number) => {
|
|
1388
|
+
const row = db.prepare(`SELECT * FROM recall_stats WHERE doc_id = ?`).get(docId) as any;
|
|
1389
|
+
if (!row) return null;
|
|
1390
|
+
return {
|
|
1391
|
+
docId: row.doc_id,
|
|
1392
|
+
recallCount: row.recall_count,
|
|
1393
|
+
uniqueQueries: row.unique_queries,
|
|
1394
|
+
recallDays: row.recall_days,
|
|
1395
|
+
totalScore: row.total_score,
|
|
1396
|
+
maxScore: row.max_score,
|
|
1397
|
+
firstRecalledAt: row.first_recalled_at,
|
|
1398
|
+
lastRecalledAt: row.last_recalled_at,
|
|
1399
|
+
diversityScore: row.diversity_score,
|
|
1400
|
+
spacingScore: row.spacing_score,
|
|
1401
|
+
negativeCount: row.negative_count,
|
|
1402
|
+
updatedAt: row.updated_at,
|
|
1403
|
+
} as RecallStatsRow;
|
|
1404
|
+
},
|
|
1405
|
+
|
|
1406
|
+
getRecallStatsAll: (minRecallCount: number = 1) => {
|
|
1407
|
+
return (db.prepare(`
|
|
1408
|
+
SELECT rs.*, d.collection, d.path, d.title
|
|
1409
|
+
FROM recall_stats rs
|
|
1410
|
+
JOIN documents d ON rs.doc_id = d.id
|
|
1411
|
+
WHERE rs.recall_count >= ? AND d.active = 1
|
|
1412
|
+
ORDER BY rs.recall_count DESC
|
|
1413
|
+
`).all(minRecallCount) as any[]).map(row => ({
|
|
1414
|
+
docId: row.doc_id,
|
|
1415
|
+
recallCount: row.recall_count,
|
|
1416
|
+
uniqueQueries: row.unique_queries,
|
|
1417
|
+
recallDays: row.recall_days,
|
|
1418
|
+
totalScore: row.total_score,
|
|
1419
|
+
maxScore: row.max_score,
|
|
1420
|
+
firstRecalledAt: row.first_recalled_at,
|
|
1421
|
+
lastRecalledAt: row.last_recalled_at,
|
|
1422
|
+
diversityScore: row.diversity_score,
|
|
1423
|
+
spacingScore: row.spacing_score,
|
|
1424
|
+
negativeCount: row.negative_count,
|
|
1425
|
+
updatedAt: row.updated_at,
|
|
1426
|
+
collection: row.collection,
|
|
1427
|
+
path: row.path,
|
|
1428
|
+
title: row.title,
|
|
1429
|
+
} as RecallStatsRow));
|
|
1430
|
+
},
|
|
1431
|
+
|
|
1432
|
+
markRecallEventsReferenced: (sessionId: string, docIds: number[]) => {
|
|
1433
|
+
if (docIds.length === 0) return;
|
|
1434
|
+
// Mark only the LATEST event per doc in this session, not all events.
|
|
1435
|
+
// This preserves negative signals: if a doc was surfaced across 5 prompts
|
|
1436
|
+
// but only cited once, 4 events stay was_referenced=0 (genuine negatives).
|
|
1437
|
+
const stmt = db.prepare(`
|
|
1438
|
+
UPDATE recall_events SET was_referenced = 1
|
|
1439
|
+
WHERE id = (
|
|
1440
|
+
SELECT id FROM recall_events
|
|
1441
|
+
WHERE session_id = ? AND doc_id = ?
|
|
1442
|
+
ORDER BY surfaced_at DESC
|
|
1443
|
+
LIMIT 1
|
|
1444
|
+
)
|
|
1445
|
+
`);
|
|
1446
|
+
const tx = db.transaction(() => {
|
|
1447
|
+
for (const docId of docIds) {
|
|
1448
|
+
stmt.run(sessionId, docId);
|
|
1449
|
+
}
|
|
1450
|
+
});
|
|
1451
|
+
tx();
|
|
1452
|
+
},
|
|
1453
|
+
|
|
1192
1454
|
recordCoActivation: (paths: string[]) => {
|
|
1193
1455
|
if (paths.length < 2) return;
|
|
1194
1456
|
const now = new Date().toISOString();
|
|
@@ -1424,6 +1686,7 @@ export type UsageRecord = {
|
|
|
1424
1686
|
injectedPaths: string[];
|
|
1425
1687
|
estimatedTokens: number;
|
|
1426
1688
|
wasReferenced: number;
|
|
1689
|
+
turnIndex?: number;
|
|
1427
1690
|
};
|
|
1428
1691
|
|
|
1429
1692
|
export type UsageRow = {
|
|
@@ -1434,6 +1697,26 @@ export type UsageRow = {
|
|
|
1434
1697
|
injectedPaths: string;
|
|
1435
1698
|
estimatedTokens: number;
|
|
1436
1699
|
wasReferenced: number;
|
|
1700
|
+
turnIndex: number;
|
|
1701
|
+
};
|
|
1702
|
+
|
|
1703
|
+
export type RecallStatsRow = {
|
|
1704
|
+
docId: number;
|
|
1705
|
+
recallCount: number;
|
|
1706
|
+
uniqueQueries: number;
|
|
1707
|
+
recallDays: number;
|
|
1708
|
+
totalScore: number;
|
|
1709
|
+
maxScore: number;
|
|
1710
|
+
firstRecalledAt: string | null;
|
|
1711
|
+
lastRecalledAt: string | null;
|
|
1712
|
+
diversityScore: number;
|
|
1713
|
+
spacingScore: number;
|
|
1714
|
+
negativeCount: number;
|
|
1715
|
+
updatedAt: string;
|
|
1716
|
+
// Joined from documents (only populated by getRecallStatsAll)
|
|
1717
|
+
collection?: string;
|
|
1718
|
+
path?: string;
|
|
1719
|
+
title?: string;
|
|
1437
1720
|
};
|
|
1438
1721
|
|
|
1439
1722
|
export type DocumentRow = {
|
|
@@ -2924,12 +3207,17 @@ export function getHashesForEmbedding(db: Database): { hash: string; body: strin
|
|
|
2924
3207
|
* Returns hashes that have no content_vectors row with fragment_type set.
|
|
2925
3208
|
*/
|
|
2926
3209
|
export function getHashesNeedingFragments(db: Database): { hash: string; body: string; path: string; title: string; collection: string }[] {
|
|
3210
|
+
// Select docs that either have no fragments at all OR are missing the primary (seq=0) fragment.
|
|
3211
|
+
// The seq=0 embedding is critical — surprisal scoring, semantic graph, and health checks depend on it.
|
|
2927
3212
|
return db.prepare(`
|
|
2928
3213
|
SELECT d.hash, c.doc as body, MIN(d.path) as path, MIN(d.title) as title, MIN(d.collection) as collection
|
|
2929
3214
|
FROM documents d
|
|
2930
3215
|
JOIN content c ON d.hash = c.hash
|
|
2931
3216
|
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.fragment_type IS NOT NULL
|
|
2932
|
-
|
|
3217
|
+
LEFT JOIN content_vectors v0 ON d.hash = v0.hash AND v0.seq = 0
|
|
3218
|
+
WHERE d.active = 1
|
|
3219
|
+
AND (v.hash IS NULL OR v0.hash IS NULL)
|
|
3220
|
+
AND COALESCE(d.embed_attempts, 0) < 3
|
|
2933
3221
|
GROUP BY d.hash
|
|
2934
3222
|
`).all() as { hash: string; body: string; path: string; title: string; collection: string }[];
|
|
2935
3223
|
}
|
|
@@ -2941,6 +3229,8 @@ export function getHashesNeedingFragments(db: Database): { hash: string; body: s
|
|
|
2941
3229
|
export function clearAllEmbeddings(db: Database): void {
|
|
2942
3230
|
db.exec(`DELETE FROM content_vectors`);
|
|
2943
3231
|
db.exec(`DROP TABLE IF EXISTS vectors_vec`);
|
|
3232
|
+
// Reset embed state so failed docs get retried after force re-embed
|
|
3233
|
+
try { db.exec(`UPDATE documents SET embed_state = 'pending', embed_error = NULL, embed_attempts = 0 WHERE active = 1`); } catch { /* column may not exist yet */ }
|
|
2944
3234
|
vecTableDimsCache.delete(db);
|
|
2945
3235
|
}
|
|
2946
3236
|
|
|
@@ -3613,19 +3903,22 @@ function getRecentSessionsFn(db: Database, limit: number): SessionRecord[] {
|
|
|
3613
3903
|
// SAME: Context Usage Tracking
|
|
3614
3904
|
// =============================================================================
|
|
3615
3905
|
|
|
3616
|
-
function insertUsageFn(db: Database, usage: UsageRecord):
|
|
3906
|
+
function insertUsageFn(db: Database, usage: UsageRecord): number {
|
|
3617
3907
|
db.prepare(`
|
|
3618
|
-
INSERT INTO context_usage (session_id, timestamp, hook_name, injected_paths, estimated_tokens, was_referenced)
|
|
3619
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
3620
|
-
`).run(usage.sessionId, usage.timestamp, usage.hookName, JSON.stringify(usage.injectedPaths), usage.estimatedTokens, usage.wasReferenced);
|
|
3908
|
+
INSERT INTO context_usage (session_id, timestamp, hook_name, injected_paths, estimated_tokens, was_referenced, turn_index)
|
|
3909
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
3910
|
+
`).run(usage.sessionId, usage.timestamp, usage.hookName, JSON.stringify(usage.injectedPaths), usage.estimatedTokens, usage.wasReferenced, usage.turnIndex ?? 0);
|
|
3911
|
+
// Return the rowid of the just-inserted row for recall event linkage
|
|
3912
|
+
const row = db.prepare("SELECT last_insert_rowid() as id").get() as { id: number };
|
|
3913
|
+
return row.id;
|
|
3621
3914
|
}
|
|
3622
3915
|
|
|
3623
3916
|
function getUsageForSessionFn(db: Database, sessionId: string): UsageRow[] {
|
|
3624
3917
|
return db.prepare(`
|
|
3625
3918
|
SELECT id, session_id AS sessionId, timestamp, hook_name AS hookName,
|
|
3626
3919
|
injected_paths AS injectedPaths, estimated_tokens AS estimatedTokens,
|
|
3627
|
-
was_referenced AS wasReferenced
|
|
3628
|
-
FROM context_usage WHERE session_id = ? ORDER BY timestamp
|
|
3920
|
+
was_referenced AS wasReferenced, turn_index AS turnIndex
|
|
3921
|
+
FROM context_usage WHERE session_id = ? ORDER BY turn_index, timestamp
|
|
3629
3922
|
`).all(sessionId) as UsageRow[];
|
|
3630
3923
|
}
|
|
3631
3924
|
|