clawmem 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -252,7 +252,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
252
252
  | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
253
253
  | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, extracts SPO triples from decision/preference/milestone/problem facts. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
254
254
  | `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
255
- | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation) |
255
+ | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation), per-turn recall attribution (marks which surfaced docs were cited in which turn) |
256
256
 
257
257
  **Default behavior:** Read injected `<vault-context>` first. If sufficient, answer immediately.
258
258
 
package/CLAUDE.md CHANGED
@@ -252,7 +252,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
252
252
  | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
253
253
  | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, extracts SPO triples from decision/preference/milestone/problem facts. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
254
254
  | `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
255
- | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation) |
255
+ | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation), per-turn recall attribution (marks which surfaced docs were cited in which turn) |
256
256
 
257
257
  **Default behavior:** Read injected `<vault-context>` first. If sufficient, answer immediately.
258
258
 
package/README.md CHANGED
@@ -85,7 +85,7 @@ Runs fully local with no API keys and no cloud services. Integrates via Claude C
85
85
  **Optional integrations:**
86
86
 
87
87
  - [Claude Code](https://docs.anthropic.com/en/docs/claude-code) — for hooks + MCP integration
88
- - [OpenClaw](https://github.com/openclawai/openclaw) — for ContextEngine plugin integration
88
+ - [OpenClaw](https://github.com/openclaw/openclaw) — for ContextEngine plugin integration
89
89
  - [Hermes Agent](https://github.com/NousResearch/hermes-agent) — for MemoryProvider plugin integration
90
90
  - [bd CLI](https://github.com/dolthub/dolt) v0.58.0+ — for Beads issue tracker sync (only if using Beads)
91
91
 
@@ -885,6 +885,17 @@ Documents are split into semantic fragments (sections, lists, code blocks, front
885
885
 
886
886
  Uses the LLM server (shared with query expansion and intent classification) to extract structured observations from session transcripts. Observation types: `decision`, `bugfix`, `feature`, `refactor`, `discovery`, `change`, `preference`, `milestone`, `problem`. Each observation includes title, facts, narrative, concepts, and files read/modified. Preferences, milestones, and problems get first-class content_type treatment with dedicated confidence baselines and half-lives instead of being flattened to generic "observation". Falls back to regex patterns if the model is unavailable.
887
887
 
888
+ ### Recall Tracking
889
+
890
+ Empirical tracking of which documents are surfaced by retrieval, which queries surfaced them, and whether the assistant actually cited them. Provides signals beyond raw search relevance for lifecycle decisions:
891
+
892
+ - **Per-query diversity**: docs surfaced by multiple distinct queries have proven cross-domain relevance
893
+ - **Multi-day spacing**: docs surfaced across separate calendar days (spaced frequency) are more valuable than binge recalls in one session
894
+ - **Negative signals**: docs surfaced frequently but rarely referenced are noise candidates for snooze
895
+ - **Per-turn attribution**: feedback-loop segments the transcript into turns and attributes references to specific context-surfacing invocations, not the session globally
896
+
897
+ Data feeds `lifecycle_status` (pin/snooze candidate reports) and `lifecycle_sweep` (recall-based recommendations). Adapted from [OpenClaw](https://github.com/openclaw/openclaw) dreaming promotion patterns.
898
+
888
899
  ### User Profile
889
900
 
890
901
  Two-tier auto-curated profile extracted from your decisions and hub documents:
@@ -1124,6 +1135,7 @@ Built on the shoulders of:
1124
1135
  - [MAGMA](https://arxiv.org/abs/2501.13956) — multi-graph memory agent
1125
1136
  - [MemPalace](https://github.com/milla-jovovich/mempalace) — conversation import patterns, broadened observation taxonomy (preference/milestone/problem), session-bootstrap synthesis
1126
1137
  - [memory-lancedb-pro](https://github.com/CortexReach/memory-lancedb-pro) — retrieval gate, length normalization, MMR diversity, access reinforcement algorithms
1138
+ - [OpenClaw](https://github.com/openclaw/openclaw) — recall tracking patterns (per-query diversity, multi-day spacing, negative signal tracking, promotion scoring) extracted from the dreaming memory consolidation system
1127
1139
  - [OpenViking](https://github.com/volcengine/OpenViking) — query decomposition patterns, collection-scoped retrieval, transaction-safe indexing
1128
1140
  - [QMD](https://github.com/tobi/qmd) — search backend (BM25 + vectors + RRF + reranking)
1129
1141
  - [SAME](https://github.com/sgx-labs/statelessagent) — agent memory concepts (recency decay, confidence scoring, session tracking)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmem",
3
- "version": "0.6.0",
3
+ "version": "0.7.0",
4
4
  "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -124,6 +124,17 @@ async function tick(store: Store, llm: LlamaCpp): Promise<void> {
124
124
  if (tickCount % 3 === 0) {
125
125
  await generateDeductiveObservations(store, llm);
126
126
  }
127
+
128
+ // Phase 4: Recall stats recomputation (every tick — lightweight SQL aggregation)
129
+ try {
130
+ const updated = store.recomputeRecallStats();
131
+ if (updated > 0) {
132
+ console.log(`[consolidation] Phase 4: recomputed recall_stats for ${updated} docs`);
133
+ }
134
+ } catch (err) {
135
+ // Non-critical — recall stats are informational, not retrieval-blocking
136
+ console.error("[consolidation] Phase 4 recall stats failed:", err);
137
+ }
127
138
  } catch (err) {
128
139
  console.error("[consolidation] Tick failed:", err);
129
140
  } finally {
@@ -30,6 +30,7 @@ import { enrichResults } from "../search-utils.ts";
30
30
  import { sanitizeSnippet } from "../promptguard.ts";
31
31
  import { shouldSkipRetrieval, isRetrievedNoise } from "../retrieval-gate.ts";
32
32
  import { MAX_QUERY_LENGTH } from "../limits.ts";
33
+ import { writeRecallEvents, hashQuery } from "../recall-buffer.ts";
33
34
 
34
35
  // =============================================================================
35
36
  // Config
@@ -69,18 +70,44 @@ export async function contextSurfacing(
69
70
  input: HookInput
70
71
  ): Promise<HookOutput> {
71
72
  let prompt = input.prompt?.trim();
72
- if (!prompt || prompt.length < MIN_PROMPT_LENGTH) return makeEmptyOutput("context-surfacing");
73
+
74
+ // Compute turn_index FIRST, before any early returns.
75
+ // Every transcript-visible early return must log an empty context_usage row
76
+ // to keep turn_index aligned with transcript turns for per-turn attribution.
77
+ if (input.sessionId) {
78
+ try {
79
+ let turnIndex = 0;
80
+ try {
81
+ const existing = store.db.prepare(
82
+ `SELECT COUNT(*) as cnt FROM context_usage WHERE session_id = ? AND hook_name = 'context-surfacing'`
83
+ ).get(input.sessionId) as { cnt: number };
84
+ turnIndex = existing.cnt;
85
+ } catch { /* fallback to 0 */ }
86
+ (input as any)._turnIndex = turnIndex;
87
+ } catch { /* non-fatal */ }
88
+ }
89
+
90
+ if (!prompt || prompt.length < MIN_PROMPT_LENGTH) {
91
+ logEmptyTurn(store, input);
92
+ return makeEmptyOutput("context-surfacing");
93
+ }
73
94
 
74
95
  // Bound query length to prevent DoS on search indices
75
96
  if (prompt.length > MAX_QUERY_LENGTH) prompt = prompt.slice(0, MAX_QUERY_LENGTH);
76
97
 
77
- // Skip slash commands
78
- if (prompt.startsWith("/")) return makeEmptyOutput("context-surfacing");
98
+ // Skip slash commands — log empty turn for alignment
99
+ if (prompt.startsWith("/")) {
100
+ logEmptyTurn(store, input);
101
+ return makeEmptyOutput("context-surfacing");
102
+ }
79
103
 
80
104
  // Adaptive retrieval gate: skip greetings, shell commands, affirmations, etc.
81
- if (shouldSkipRetrieval(prompt)) return makeEmptyOutput("context-surfacing");
105
+ if (shouldSkipRetrieval(prompt)) {
106
+ logEmptyTurn(store, input);
107
+ return makeEmptyOutput("context-surfacing");
108
+ }
82
109
 
83
- // Heartbeat / duplicate suppression (IO4)
110
+ // Heartbeat / duplicate suppression (IO4) — NOT transcript-visible user turns
84
111
  if (isHeartbeatPrompt(prompt)) return makeEmptyOutput("context-surfacing");
85
112
  if (wasPromptSeenRecently(store, "context-surfacing", prompt)) {
86
113
  return makeEmptyOutput("context-surfacing");
@@ -157,7 +184,7 @@ export async function contextSurfacing(
157
184
  }
158
185
  }
159
186
 
160
- if (results.length === 0) return makeEmptyOutput("context-surfacing");
187
+ if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
161
188
 
162
189
  // Budget-aware deep escalation (deep profile only):
163
190
  // If the fast path finished quickly and found results, spend remaining time budget
@@ -215,7 +242,7 @@ export async function contextSurfacing(
215
242
  !FILTERED_PATHS.some(p => r.displayPath.includes(p))
216
243
  );
217
244
 
218
- if (results.length === 0) return makeEmptyOutput("context-surfacing");
245
+ if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
219
246
 
220
247
  // Filter out snoozed documents
221
248
  const now = new Date();
@@ -231,7 +258,7 @@ export async function contextSurfacing(
231
258
  return true;
232
259
  });
233
260
 
234
- if (results.length === 0) return makeEmptyOutput("context-surfacing");
261
+ if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
235
262
 
236
263
  // Deduplicate by filepath (keep best score per path)
237
264
  const deduped = new Map<string, SearchResult>();
@@ -273,7 +300,7 @@ export async function contextSurfacing(
273
300
  : 0;
274
301
 
275
302
  // Activation floor: if even the best result is too weak, bail entirely
276
- if (bestScore < profile.activationFloor) return makeEmptyOutput("context-surfacing");
303
+ if (bestScore < profile.activationFloor) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
277
304
 
278
305
  const adaptiveMin = Math.max(bestScore * profile.minScoreRatio, profile.absoluteFloor);
279
306
  scored = allScored.filter(r => r.compositeScore >= adaptiveMin);
@@ -282,7 +309,7 @@ export async function contextSurfacing(
282
309
  scored = allScored.filter(r => r.compositeScore >= minScore);
283
310
  }
284
311
 
285
- if (scored.length === 0) return makeEmptyOutput("context-surfacing");
312
+ if (scored.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
286
313
 
287
314
  // Spreading activation (E11): boost results co-activated with top HOT results
288
315
  if (scored.length > 3) {
@@ -325,11 +352,62 @@ export async function contextSurfacing(
325
352
  // Build context within token budget (profile-driven)
326
353
  const { context, paths, tokens } = buildContext(scored, prompt, tokenBudget);
327
354
 
328
- if (!context) return makeEmptyOutput("context-surfacing");
355
+ if (!context) {
356
+ logEmptyTurn(store, input);
357
+ return makeEmptyOutput("context-surfacing");
358
+ }
329
359
 
330
- // Log the injection
360
+ // Use pre-computed turn_index from top of function
331
361
  if (input.sessionId) {
332
- logInjection(store, input.sessionId, "context-surfacing", paths, tokens);
362
+ const turnIndex = (input as any)._turnIndex ?? 0;
363
+
364
+ // Log the injection — returns usage_id for recall event linkage
365
+ const usageId = logInjection(store, input.sessionId, "context-surfacing", paths, tokens, turnIndex);
366
+
367
+ // Record recall events ONLY for docs that made it into the injected context
368
+ // (post-budget). Docs trimmed by token budget were never seen by the model.
369
+ // Each event links to its context_usage row via usage_id + turn_index.
370
+ // Multi-vault: route docs to origin vault's store. Mirror context_usage there too.
371
+ try {
372
+ const qHash = hashQuery(prompt);
373
+ const injectedSet = new Set(paths);
374
+ const injectedScored = scored.filter(r => injectedSet.has(r.displayPath));
375
+
376
+ // Group by vault origin (undefined = general vault)
377
+ const byVault = new Map<string | undefined, typeof injectedScored>();
378
+ for (const r of injectedScored) {
379
+ const vault = (r as any)._fromVault as string | undefined;
380
+ let group = byVault.get(vault);
381
+ if (!group) { group = []; byVault.set(vault, group); }
382
+ group.push(r);
383
+ }
384
+
385
+ const validUsageId = usageId > 0 ? usageId : undefined;
386
+ for (const [vault, docs] of byVault) {
387
+ const mappedDocs = docs.map(r => ({ displayPath: r.displayPath, searchScore: r.compositeScore }));
388
+ if (!vault) {
389
+ writeRecallEvents(store, input.sessionId, qHash, mappedDocs, validUsageId, turnIndex);
390
+ } else {
391
+ try {
392
+ const vaultStore = resolveStore(vault);
393
+ // Mirror context_usage row into named vault for correct FK + attribution
394
+ const vaultPaths = docs.map(r => r.displayPath);
395
+ const vaultUsageId = vaultStore.insertUsage({
396
+ sessionId: input.sessionId,
397
+ timestamp: new Date().toISOString(),
398
+ hookName: "context-surfacing",
399
+ injectedPaths: vaultPaths,
400
+ estimatedTokens: 0,
401
+ wasReferenced: 0,
402
+ turnIndex,
403
+ });
404
+ writeRecallEvents(vaultStore, input.sessionId, qHash, mappedDocs, vaultUsageId > 0 ? vaultUsageId : undefined, turnIndex);
405
+ } catch { /* vault unavailable — skip */ }
406
+ }
407
+ }
408
+ } catch {
409
+ // Non-critical — don't block context surfacing on recall tracking errors
410
+ }
333
411
  }
334
412
 
335
413
  // Routing hint: detect query intent signals and prepend a tool routing directive
@@ -351,6 +429,19 @@ export async function contextSurfacing(
351
429
  // Helpers
352
430
  // =============================================================================
353
431
 
432
+ /**
433
+ * Log an empty context_usage row for a skipped turn.
434
+ * Keeps turn_index aligned with transcript turns so per-turn recall
435
+ * attribution doesn't drift when some prompts are gated.
436
+ */
437
+ function logEmptyTurn(store: Store, input: HookInput): void {
438
+ if (!input.sessionId) return;
439
+ try {
440
+ const turnIndex = (input as any)._turnIndex ?? 0;
441
+ logInjection(store, input.sessionId, "context-surfacing", [], 0, turnIndex);
442
+ } catch { /* non-fatal */ }
443
+ }
444
+
354
445
  /**
355
446
  * Detect causal/temporal/discovery signals in the prompt and return a
356
447
  * routing hint that makes the correct tool choice salient at the moment
@@ -10,12 +10,18 @@
10
10
  */
11
11
 
12
12
  import type { Store } from "../store.ts";
13
+ import { resolveStore } from "../store.ts";
14
+ import { listVaults } from "../config.ts";
13
15
  import type { HookInput, HookOutput } from "../hooks.ts";
14
16
  import {
15
17
  makeEmptyOutput,
16
18
  readTranscript,
17
19
  validateTranscriptPath,
18
20
  } from "../hooks.ts";
21
+ import {
22
+ segmentTranscriptIntoTurns,
23
+ attributeRecallReferences,
24
+ } from "../recall-attribution.ts";
19
25
 
20
26
  // =============================================================================
21
27
  // Handler
@@ -129,6 +135,33 @@ export async function feedbackLoop(
129
135
  // Non-critical — don't block feedback loop on utility tracking errors
130
136
  }
131
137
 
138
+ // Recall tracking: per-turn attribution using transcript segmentation.
139
+ // Reads full transcript, segments into turns, zips with context_usage rows,
140
+ // checks references per-turn rather than session-globally.
141
+ try {
142
+ const allMessages = readTranscript(transcriptPath, 500);
143
+ const turns = segmentTranscriptIntoTurns(allMessages);
144
+ const usages = store.getUsageForSession(sessionId);
145
+
146
+ // General vault attribution
147
+ attributeRecallReferences(store, sessionId, usages, turns);
148
+
149
+ // Cross-vault: attribute recall events in any configured named vaults.
150
+ // Each vault has its own context_usage rows (mirrored during context-surfacing).
151
+ const vaultNames = listVaults();
152
+ for (const vaultName of vaultNames) {
153
+ try {
154
+ const vaultStore = resolveStore(vaultName);
155
+ const vaultUsages = vaultStore.getUsageForSession(sessionId);
156
+ if (vaultUsages.length > 0) {
157
+ attributeRecallReferences(vaultStore, sessionId, vaultUsages, turns);
158
+ }
159
+ } catch { /* vault unavailable — skip */ }
160
+ }
161
+ } catch {
162
+ // Non-critical — don't block feedback loop on recall tracking errors
163
+ }
164
+
132
165
  // Silent return — feedback loop doesn't inject context
133
166
  return makeEmptyOutput("feedback-loop");
134
167
  }
@@ -195,6 +228,13 @@ function trackUtilitySignals(
195
228
  // Reference Detection
196
229
  // =============================================================================
197
230
 
231
+ // Recall attribution logic is in src/recall-attribution.ts
232
+ // (attributeRecallReferences, segmentTranscriptIntoTurns)
233
+
234
+ // =============================================================================
235
+ // Reference Detection
236
+ // =============================================================================
237
+
198
238
  function checkTitleReference(store: Store, path: string, text: string): boolean {
199
239
  try {
200
240
  const parts = path.split("/");
package/src/hooks.ts CHANGED
@@ -385,23 +385,28 @@ export function logInjection(
385
385
  sessionId: string,
386
386
  hookName: string,
387
387
  injectedPaths: string[],
388
- estimatedTokens: number
389
- ): void {
388
+ estimatedTokens: number,
389
+ turnIndex?: number
390
+ ): number {
390
391
  try {
391
- store.insertUsage({
392
+ const usageId = store.insertUsage({
392
393
  sessionId,
393
394
  timestamp: new Date().toISOString(),
394
395
  hookName,
395
396
  injectedPaths,
396
397
  estimatedTokens,
397
398
  wasReferenced: 0,
399
+ turnIndex,
398
400
  });
399
401
 
400
402
  // Record co-activation for all injected paths (E3)
401
403
  if (injectedPaths.length >= 2) {
402
404
  store.recordCoActivation(injectedPaths);
403
405
  }
406
+
407
+ return usageId;
404
408
  } catch {
405
409
  // Non-fatal: don't crash hook if usage logging fails
410
+ return -1;
406
411
  }
407
412
  }
package/src/mcp.ts CHANGED
@@ -2277,6 +2277,11 @@ This is the recommended entry point for ALL memory queries.`,
2277
2277
  const config = loadConfig();
2278
2278
  const policy = config.lifecycle;
2279
2279
 
2280
+ // Recall tracking summary
2281
+ const recallStats = store.getRecallStatsAll(1);
2282
+ const highDiversity = recallStats.filter(r => r.diversityScore >= 0.4 && r.spacingScore >= 0.5 && r.recallCount >= 3);
2283
+ const highNoise = recallStats.filter(r => r.recallCount >= 5 && r.negativeCount > r.recallCount * 0.8);
2284
+
2280
2285
  const lines = [
2281
2286
  `Active: ${stats.active}`,
2282
2287
  `Archived (auto): ${stats.archived}`,
@@ -2286,6 +2291,10 @@ This is the recommended entry point for ALL memory queries.`,
2286
2291
  `Never accessed: ${stats.neverAccessed}`,
2287
2292
  `Oldest access: ${stats.oldestAccess?.slice(0, 10) || "n/a"}`,
2288
2293
  "",
2294
+ `Recall tracking: ${recallStats.length} docs tracked`,
2295
+ ` Pin candidates (high diversity+spacing): ${highDiversity.length}`,
2296
+ ` Snooze candidates (surfaced often, rarely referenced): ${highNoise.length}`,
2297
+ "",
2289
2298
  `Policy: ${policy ? `archive after ${policy.archive_after_days}d, purge after ${policy.purge_after_days ?? "never"}, dry_run=${policy.dry_run}` : "none configured"}`,
2290
2299
  ];
2291
2300
 
@@ -2322,7 +2331,29 @@ This is the recommended entry point for ALL memory queries.`,
2322
2331
  const lines = candidates.map(c =>
2323
2332
  `- ${c.collection}/${c.path} (${c.content_type}, modified ${c.modified_at.slice(0, 10)}, accessed ${c.last_accessed_at?.slice(0, 10) || "never"})`
2324
2333
  );
2325
- return { content: [{ type: "text", text: `Would archive ${candidates.length} document(s):\n${lines.join("\n") || "(none)"}` }] };
2334
+
2335
+ // Recall-based recommendations
2336
+ const recallStats = store.getRecallStatsAll(3);
2337
+ const pinCandidates = recallStats.filter(r => r.diversityScore >= 0.4 && r.spacingScore >= 0.5 && r.recallCount >= 3);
2338
+ const snoozeCandidates = recallStats.filter(r => r.recallCount >= 5 && r.negativeCount > r.recallCount * 0.8);
2339
+
2340
+ const recallLines: string[] = [];
2341
+ if (pinCandidates.length > 0) {
2342
+ recallLines.push("", "Pin candidates (high diversity, multi-day spread, recall≥3):");
2343
+ for (const r of pinCandidates.slice(0, 5)) {
2344
+ const label = r.collection && r.path ? `${r.collection}/${r.path}` : `doc#${r.docId}`;
2345
+ recallLines.push(` - ${label} (recalls=${r.recallCount}, queries=${r.uniqueQueries}, days=${r.recallDays}, diversity=${r.diversityScore.toFixed(2)}, spacing=${r.spacingScore.toFixed(2)})`);
2346
+ }
2347
+ }
2348
+ if (snoozeCandidates.length > 0) {
2349
+ recallLines.push("", "Snooze candidates (surfaced often, rarely referenced):");
2350
+ for (const r of snoozeCandidates.slice(0, 5)) {
2351
+ const label = r.collection && r.path ? `${r.collection}/${r.path}` : `doc#${r.docId}`;
2352
+ recallLines.push(` - ${label} (recalls=${r.recallCount}, referenced=${r.recallCount - r.negativeCount}, noise_ratio=${(r.negativeCount / r.recallCount * 100).toFixed(0)}%)`);
2353
+ }
2354
+ }
2355
+
2356
+ return { content: [{ type: "text", text: `Would archive ${candidates.length} document(s):\n${lines.join("\n") || "(none)"}${recallLines.join("\n")}` }] };
2326
2357
  }
2327
2358
 
2328
2359
  const archived = store.archiveDocuments(candidates.map(c => c.id));
@@ -0,0 +1,182 @@
1
+ /**
2
+ * Recall Attribution — per-turn reference detection for recall tracking.
3
+ *
4
+ * Extracted into a standalone module for testability (per GPT 5.4 High review turn 4).
5
+ *
6
+ * Architecture:
7
+ * 1. Segment the transcript into ordered turns (user → assistant pairs)
8
+ * 2. Zip context_usage rows (by turn_index) with transcript turns (by position)
9
+ * 3. For each pair, detect references in that turn's assistant text only
10
+ * 4. Mark recall_events linked to the usage rows whose turn actually cited the doc
11
+ */
12
+
13
+ import type { Store, UsageRow } from "./store.ts";
14
+
15
+ // =============================================================================
16
+ // Types
17
+ // =============================================================================
18
+
19
+ export type TranscriptTurn = {
20
+ userText: string;
21
+ assistantText: string;
22
+ };
23
+
24
+ // =============================================================================
25
+ // Transcript Segmentation
26
+ // =============================================================================
27
+
28
+ /**
29
+ * Segment a flat message array into ordered turns.
30
+ * A turn starts on each "user" message and includes all following "assistant"
31
+ * messages until the next "user" message.
32
+ *
33
+ * @param messages - Ordered array of {role, content} from transcript JSONL
34
+ * @returns Ordered array of turns
35
+ */
36
+ export function segmentTranscriptIntoTurns(
37
+ messages: { role: string; content: string }[]
38
+ ): TranscriptTurn[] {
39
+ const turns: TranscriptTurn[] = [];
40
+ let currentUser = "";
41
+ let currentAssistant = "";
42
+
43
+ for (const msg of messages) {
44
+ if (msg.role === "user") {
45
+ // New turn: flush previous if it has assistant content
46
+ if (currentUser || currentAssistant) {
47
+ turns.push({ userText: currentUser, assistantText: currentAssistant });
48
+ }
49
+ currentUser = msg.content;
50
+ currentAssistant = "";
51
+ } else if (msg.role === "assistant") {
52
+ currentAssistant += (currentAssistant ? "\n" : "") + msg.content;
53
+ }
54
+ // Ignore system/tool messages for attribution purposes
55
+ }
56
+
57
+ // Flush final turn
58
+ if (currentUser || currentAssistant) {
59
+ turns.push({ userText: currentUser, assistantText: currentAssistant });
60
+ }
61
+
62
+ return turns;
63
+ }
64
+
65
+ // =============================================================================
66
+ // Per-Turn Reference Detection
67
+ // =============================================================================
68
+
69
+ /**
70
+ * Check if a displayPath (collection/path) is referenced in text.
71
+ * Matches by: full path, filename (without extension), or doc title.
72
+ */
73
+ function isPathReferenced(
74
+ store: Store,
75
+ displayPath: string,
76
+ text: string
77
+ ): boolean {
78
+ if (!text || !displayPath) return false;
79
+
80
+ // Full path match
81
+ if (text.includes(displayPath)) return true;
82
+
83
+ // Filename match (without extension, min 4 chars)
84
+ const filename = displayPath.split("/").pop()?.replace(/\.(md|txt)$/i, "");
85
+ if (filename && filename.length > 3 && text.toLowerCase().includes(filename.toLowerCase())) {
86
+ return true;
87
+ }
88
+
89
+ // Title match from DB
90
+ const parts = displayPath.split("/");
91
+ if (parts.length >= 2) {
92
+ const collection = parts[0]!;
93
+ const docPath = parts.slice(1).join("/");
94
+ const doc = store.findActiveDocument(collection, docPath);
95
+ if (doc?.title && doc.title.length >= 5 && text.toLowerCase().includes(doc.title.toLowerCase())) {
96
+ return true;
97
+ }
98
+ }
99
+
100
+ return false;
101
+ }
102
+
103
+ // =============================================================================
104
+ // Attribution Core
105
+ // =============================================================================
106
+
107
+ /**
108
+ * Attribute recall events to specific turns using per-turn reference detection.
109
+ *
110
+ * For each context_usage row (ordered by turn_index), finds the corresponding
111
+ * transcript turn and checks which of that turn's injected docs were cited in
112
+ * that turn's assistant text. Only marks recall_events linked to turns where
113
+ * the doc was actually referenced.
114
+ *
115
+ * @param store - Store instance for doc resolution and event marking
116
+ * @param sessionId - Session identifier
117
+ * @param usages - context_usage rows for this session, ordered by turn_index
118
+ * @param turns - Transcript turns, ordered by position
119
+ */
120
+ export function attributeRecallReferences(
121
+ store: Store,
122
+ sessionId: string,
123
+ usages: UsageRow[],
124
+ turns: TranscriptTurn[]
125
+ ): void {
126
+ // Filter to context-surfacing usages only
127
+ const surfacingUsages = usages.filter(u => u.hookName === "context-surfacing");
128
+
129
+ for (const usage of surfacingUsages) {
130
+ // Match usage to transcript turn by turn_index
131
+ const turn = turns[usage.turnIndex];
132
+ if (!turn || !turn.assistantText) continue;
133
+
134
+ // Parse injected paths for this turn
135
+ let injectedPaths: string[];
136
+ try { injectedPaths = JSON.parse(usage.injectedPaths) as string[]; }
137
+ catch { continue; }
138
+ if (injectedPaths.length === 0) continue;
139
+
140
+ // Check which docs from THIS turn were referenced in THIS turn's assistant text
141
+ const referencedDocIds: number[] = [];
142
+ for (const path of injectedPaths) {
143
+ if (!isPathReferenced(store, path, turn.assistantText)) continue;
144
+
145
+ const parts = path.split("/");
146
+ if (parts.length < 2) continue;
147
+ const collection = parts[0]!;
148
+ const docPath = parts.slice(1).join("/");
149
+ const doc = store.findActiveDocument(collection, docPath);
150
+ if (doc) referencedDocIds.push(doc.id);
151
+ }
152
+
153
+ if (referencedDocIds.length === 0) continue;
154
+
155
+ // Mark only recall events linked to THIS usage row
156
+ for (const docId of referencedDocIds) {
157
+ // Primary: usage_id-linked events (current schema)
158
+ const linked = store.db.prepare(`
159
+ SELECT id FROM recall_events
160
+ WHERE usage_id = ? AND doc_id = ? AND was_referenced = 0
161
+ `).all(usage.id, docId) as { id: number }[];
162
+
163
+ if (linked.length > 0) {
164
+ const ids = linked.map(r => r.id);
165
+ const placeholders = ids.map(() => "?").join(",");
166
+ store.db.prepare(`
167
+ UPDATE recall_events SET was_referenced = 1
168
+ WHERE id IN (${placeholders})
169
+ `).run(...ids);
170
+ } else {
171
+ // Fallback: pre-migration events without usage_id — match by turn_index
172
+ store.db.prepare(`
173
+ UPDATE recall_events SET was_referenced = 1
174
+ WHERE id IN (
175
+ SELECT id FROM recall_events
176
+ WHERE session_id = ? AND doc_id = ? AND turn_index = ? AND was_referenced = 0
177
+ )
178
+ `).run(sessionId, docId, usage.turnIndex);
179
+ }
180
+ }
181
+ }
182
+ }
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Recall Tracking — direct-write recall event recording.
3
+ *
4
+ * Context-surfacing writes recall events directly to SQLite (single transaction,
5
+ * <0.4ms for ~12 rows). This replaces the original in-memory buffer design which
6
+ * failed in Claude Code mode where each hook is a separate process invocation.
7
+ *
8
+ * Per GPT 5.4 High review (Codex turn 1):
9
+ * - Direct INSERT is preferred over buffer for cross-process correctness
10
+ * - WAL mode handles concurrent writes safely (busy_timeout=5000ms)
11
+ * - Negative signals (surfaced but not referenced) marked retroactively by feedback-loop
12
+ */
13
+
14
+ import { createHash } from "crypto";
15
+ import type { Store } from "./store.ts";
16
+
17
+ // =============================================================================
18
+ // Query Hashing
19
+ // =============================================================================
20
+
21
+ /**
22
+ * Hash a query string for recall tracking.
23
+ * SHA1 truncated to 12 hex chars (same as OpenClaw's approach).
24
+ */
25
+ export function hashQuery(query: string): string {
26
+ return createHash("sha1")
27
+ .update(query.toLowerCase().trim())
28
+ .digest("hex")
29
+ .slice(0, 12);
30
+ }
31
+
32
+ // =============================================================================
33
+ // Direct Write (replaces in-memory buffer)
34
+ // =============================================================================
35
+
36
+ /**
37
+ * Record surfaced documents as recall events directly to SQLite.
38
+ * Called from context-surfacing hook — single transaction, ~0.4ms.
39
+ *
40
+ * Resolves displayPath → doc_id inline. Docs that can't be resolved
41
+ * (deleted between search and write) are silently skipped.
42
+ *
43
+ * @param store - Store instance with DB access
44
+ * @param sessionId - Current session identifier
45
+ * @param queryHash - SHA1 hash of the search query
46
+ * @param docs - Array of {displayPath, searchScore} for each surfaced result
47
+ * @returns Number of events recorded
48
+ */
49
+ export function writeRecallEvents(
50
+ store: Store,
51
+ sessionId: string,
52
+ queryHash: string,
53
+ docs: { displayPath: string; searchScore: number }[],
54
+ usageId?: number,
55
+ turnIndex?: number
56
+ ): number {
57
+ if (!sessionId || docs.length === 0) return 0;
58
+
59
+ const resolved: { docId: number; queryHash: string; searchScore: number; sessionId: string }[] = [];
60
+
61
+ for (const doc of docs) {
62
+ const parts = doc.displayPath.split("/");
63
+ if (parts.length < 2) continue;
64
+ const collection = parts[0]!;
65
+ const docPath = parts.slice(1).join("/");
66
+ const found = store.findActiveDocument(collection, docPath);
67
+ if (!found) {
68
+ console.debug?.(`[recall] skipping unresolvable displayPath: ${doc.displayPath}`);
69
+ continue;
70
+ }
71
+
72
+ resolved.push({
73
+ docId: found.id,
74
+ queryHash,
75
+ searchScore: doc.searchScore,
76
+ sessionId,
77
+ usageId,
78
+ turnIndex,
79
+ });
80
+ }
81
+
82
+ if (resolved.length === 0) return 0;
83
+ return store.insertRecallEvents(resolved);
84
+ }
85
+
package/src/store.ts CHANGED
@@ -301,6 +301,10 @@ function initializeDatabase(db: Database): void {
301
301
  sqliteVec.load(db);
302
302
  db.exec("PRAGMA journal_mode = WAL");
303
303
  db.exec("PRAGMA foreign_keys = ON");
304
+ // Set generous busy_timeout during DDL — concurrent Stop hooks (decision-extractor,
305
+ // handoff-generator, feedback-loop) all run initializeDatabase simultaneously.
306
+ // 15s is well within the 30s Stop hook timeout. Reset to normal after DDL completes.
307
+ db.exec("PRAGMA busy_timeout = 15000");
304
308
 
305
309
  // Drop legacy tables that are now managed in YAML
306
310
  db.exec(`DROP TABLE IF EXISTS path_contexts`);
@@ -491,11 +495,18 @@ function initializeDatabase(db: Database): void {
491
495
  hook_name TEXT NOT NULL,
492
496
  injected_paths TEXT NOT NULL DEFAULT '[]',
493
497
  estimated_tokens INTEGER NOT NULL DEFAULT 0,
494
- was_referenced INTEGER NOT NULL DEFAULT 0
498
+ was_referenced INTEGER NOT NULL DEFAULT 0,
499
+ turn_index INTEGER NOT NULL DEFAULT 0
495
500
  )
496
501
  `);
497
502
  db.exec(`CREATE INDEX IF NOT EXISTS idx_context_usage_session ON context_usage(session_id)`);
498
503
 
504
+ // Migration: add turn_index to existing context_usage
505
+ const cuCols = db.prepare("PRAGMA table_info(context_usage)").all() as { name: string }[];
506
+ if (!cuCols.some(c => c.name === "turn_index")) {
507
+ try { db.exec(`ALTER TABLE context_usage ADD COLUMN turn_index INTEGER NOT NULL DEFAULT 0`); } catch { /* exists */ }
508
+ }
509
+
499
510
  // Hook prompt dedupe: suppress duplicate/heartbeat prompts to reduce GPU churn.
500
511
  db.exec(`
501
512
  CREATE TABLE IF NOT EXISTS hook_dedupe (
@@ -785,6 +796,64 @@ function initializeDatabase(db: Database): void {
785
796
  `);
786
797
 
787
798
  db.exec(`CREATE INDEX IF NOT EXISTS idx_intent_cache_time ON intent_classifications(cached_at)`);
799
+
800
+ // Recall tracking: append-only event log for every doc surfaced by retrieval
801
+ // usage_id is informational (no FK) — links to context_usage.id in the same vault
802
+ // but may reference a different vault's row in cross-vault scenarios.
803
+ // Cross-vault linkage uses session_id + turn_index instead.
804
+ db.exec(`
805
+ CREATE TABLE IF NOT EXISTS recall_events (
806
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
807
+ doc_id INTEGER NOT NULL,
808
+ query_hash TEXT NOT NULL,
809
+ search_score REAL NOT NULL,
810
+ session_id TEXT NOT NULL,
811
+ usage_id INTEGER,
812
+ turn_index INTEGER NOT NULL DEFAULT 0,
813
+ surfaced_at TEXT NOT NULL DEFAULT (datetime('now')),
814
+ was_referenced INTEGER NOT NULL DEFAULT 0,
815
+ FOREIGN KEY (doc_id) REFERENCES documents(id) ON DELETE CASCADE
816
+ )
817
+ `);
818
+ // Migration: add usage_id + turn_index columns to existing recall_events tables
819
+ const reCols = db.prepare("PRAGMA table_info(recall_events)").all() as { name: string }[];
820
+ const reColNames = new Set(reCols.map(c => c.name));
821
+ if (!reColNames.has("usage_id")) {
822
+ try { db.exec(`ALTER TABLE recall_events ADD COLUMN usage_id INTEGER`); } catch { /* exists */ }
823
+ }
824
+ if (!reColNames.has("turn_index")) {
825
+ try { db.exec(`ALTER TABLE recall_events ADD COLUMN turn_index INTEGER NOT NULL DEFAULT 0`); } catch { /* exists */ }
826
+ }
827
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_usage ON recall_events(usage_id)`);
828
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_doc ON recall_events(doc_id)`);
829
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_session ON recall_events(session_id)`);
830
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_recall_events_surfaced ON recall_events(surfaced_at)`);
831
+
832
+ // Recall stats: derived summary recomputed by background worker
833
+ db.exec(`
834
+ CREATE TABLE IF NOT EXISTS recall_stats (
835
+ doc_id INTEGER PRIMARY KEY,
836
+ recall_count INTEGER NOT NULL DEFAULT 0,
837
+ unique_queries INTEGER NOT NULL DEFAULT 0,
838
+ recall_days INTEGER NOT NULL DEFAULT 0,
839
+ total_score REAL NOT NULL DEFAULT 0,
840
+ max_score REAL NOT NULL DEFAULT 0,
841
+ first_recalled_at TEXT,
842
+ last_recalled_at TEXT,
843
+ diversity_score REAL NOT NULL DEFAULT 0,
844
+ spacing_score REAL NOT NULL DEFAULT 0,
845
+ negative_count INTEGER NOT NULL DEFAULT 0,
846
+ updated_at TEXT NOT NULL DEFAULT (datetime('now')),
847
+ FOREIGN KEY (doc_id) REFERENCES documents(id) ON DELETE CASCADE
848
+ )
849
+ `);
850
+
851
+ // Migration: add contradict_confidence to memory_relations
852
+ const mrCols = db.prepare("PRAGMA table_info(memory_relations)").all() as { name: string }[];
853
+ const mrColNames = new Set(mrCols.map(c => c.name));
854
+ if (!mrColNames.has("contradict_confidence")) {
855
+ try { db.exec(`ALTER TABLE memory_relations ADD COLUMN contradict_confidence REAL`); } catch { /* column exists */ }
856
+ }
788
857
  }
789
858
 
790
859
 
@@ -898,7 +967,7 @@ export type Store = {
898
967
  getRecentSessions: (limit: number) => SessionRecord[];
899
968
 
900
969
  // SAME: Context usage tracking
901
- insertUsage: (usage: UsageRecord) => void;
970
+ insertUsage: (usage: UsageRecord) => number;
902
971
  getUsageForSession: (sessionId: string) => UsageRow[];
903
972
  markUsageReferenced: (id: number) => void;
904
973
 
@@ -944,6 +1013,13 @@ export type Store = {
944
1013
  queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[];
945
1014
  getTripleStats: () => { totalTriples: number; currentFacts: number; expiredFacts: number; predicateTypes: string[] };
946
1015
 
1016
+ // Recall tracking
1017
+ insertRecallEvents: (events: { docId: number; queryHash: string; searchScore: number; sessionId: string; usageId?: number; turnIndex?: number; wasReferenced?: boolean }[]) => number;
1018
+ recomputeRecallStats: () => number;
1019
+ getRecallStats: (docId: number) => RecallStatsRow | null;
1020
+ getRecallStatsAll: (minRecallCount?: number) => RecallStatsRow[];
1021
+ markRecallEventsReferenced: (sessionId: string, docIds: number[]) => void;
1022
+
947
1023
  // Co-activation tracking
948
1024
  recordCoActivation: (paths: string[]) => void;
949
1025
  getCoActivated: (path: string, limit?: number) => { path: string; count: number }[];
@@ -987,9 +1063,9 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
987
1063
  db.exec("PRAGMA journal_mode = WAL");
988
1064
  db.exec("PRAGMA query_only = ON");
989
1065
  }
990
- if (opts?.busyTimeout !== undefined) {
991
- db.exec(`PRAGMA busy_timeout = ${opts.busyTimeout}`);
992
- }
1066
+ // Reset busy_timeout to operational value after DDL init (which uses 15s).
1067
+ // Default 5000ms for normal operations — callers can override via opts.
1068
+ db.exec(`PRAGMA busy_timeout = ${opts?.busyTimeout ?? 5000}`);
993
1069
 
994
1070
  return {
995
1071
  db,
@@ -1075,7 +1151,7 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
1075
1151
  getRecentSessions: (limit: number) => getRecentSessionsFn(db, limit),
1076
1152
 
1077
1153
  // SAME: Context usage tracking
1078
- insertUsage: (usage: UsageRecord) => insertUsageFn(db, usage),
1154
+ insertUsage: (usage: UsageRecord) => insertUsageFn(db, usage) as number,
1079
1155
  getUsageForSession: (sessionId: string) => getUsageForSessionFn(db, sessionId),
1080
1156
  markUsageReferenced: (id: number) => markUsageReferencedFn(db, id),
1081
1157
 
@@ -1216,6 +1292,165 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
1216
1292
  },
1217
1293
 
1218
1294
  // Co-activation tracking
1295
+ // Recall tracking: batch insert surfacing events
1296
+ insertRecallEvents: (events: { docId: number; queryHash: string; searchScore: number; sessionId: string; usageId?: number; turnIndex?: number; wasReferenced?: boolean }[]) => {
1297
+ if (events.length === 0) return 0;
1298
+ const stmt = db.prepare(`
1299
+ INSERT INTO recall_events (doc_id, query_hash, search_score, session_id, usage_id, turn_index, surfaced_at, was_referenced)
1300
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
1301
+ `);
1302
+ const now = new Date().toISOString();
1303
+ const tx = db.transaction(() => {
1304
+ for (const e of events) {
1305
+ stmt.run(e.docId, e.queryHash, e.searchScore, e.sessionId, e.usageId ?? null, e.turnIndex ?? 0, now, e.wasReferenced ? 1 : 0);
1306
+ }
1307
+ });
1308
+ tx();
1309
+ return events.length;
1310
+ },
1311
+
1312
+ // Recall tracking: recompute derived stats from events
1313
+ // Uses SQL GROUP BY for aggregation (O(1) queries), then JS for diversity/spacing formulas
1314
+ recomputeRecallStats: () => {
1315
+ const aggregated = db.prepare(`
1316
+ SELECT
1317
+ doc_id,
1318
+ COUNT(*) AS recall_count,
1319
+ COUNT(DISTINCT query_hash) AS unique_queries,
1320
+ COUNT(DISTINCT date(surfaced_at, 'utc')) AS recall_days,
1321
+ SUM(search_score) AS total_score,
1322
+ MAX(search_score) AS max_score,
1323
+ SUM(CASE WHEN was_referenced = 0 THEN 1 ELSE 0 END) AS negative_count,
1324
+ MIN(surfaced_at) AS first_recalled_at,
1325
+ MAX(surfaced_at) AS last_recalled_at,
1326
+ GROUP_CONCAT(DISTINCT date(surfaced_at, 'utc')) AS day_list
1327
+ FROM recall_events
1328
+ GROUP BY doc_id
1329
+ `).all() as {
1330
+ doc_id: number; recall_count: number; unique_queries: number; recall_days: number;
1331
+ total_score: number; max_score: number; negative_count: number;
1332
+ first_recalled_at: string; last_recalled_at: string; day_list: string;
1333
+ }[];
1334
+
1335
+ if (aggregated.length === 0) return 0;
1336
+
1337
+ const upsert = db.prepare(`
1338
+ INSERT INTO recall_stats (doc_id, recall_count, unique_queries, recall_days, total_score, max_score,
1339
+ first_recalled_at, last_recalled_at, diversity_score, spacing_score, negative_count, updated_at)
1340
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1341
+ ON CONFLICT(doc_id) DO UPDATE SET
1342
+ recall_count = excluded.recall_count,
1343
+ unique_queries = excluded.unique_queries,
1344
+ recall_days = excluded.recall_days,
1345
+ total_score = excluded.total_score,
1346
+ max_score = excluded.max_score,
1347
+ first_recalled_at = excluded.first_recalled_at,
1348
+ last_recalled_at = excluded.last_recalled_at,
1349
+ diversity_score = excluded.diversity_score,
1350
+ spacing_score = excluded.spacing_score,
1351
+ negative_count = excluded.negative_count,
1352
+ updated_at = excluded.updated_at
1353
+ `);
1354
+
1355
+ const now = new Date().toISOString();
1356
+ const tx = db.transaction(() => {
1357
+ for (const row of aggregated) {
1358
+ // Diversity: clamped max(uniqueQueries, recallDays) / 5
1359
+ const diversityScore = Math.min(1, Math.max(row.unique_queries, row.recall_days) / 5);
1360
+
1361
+ // Spacing: multi-day spread
1362
+ let spacingScore = 0;
1363
+ if (row.recall_days > 1 && row.day_list) {
1364
+ const days = row.day_list.split(",").sort();
1365
+ const spacing = Math.min(1, Math.log1p(days.length - 1) / Math.log1p(4));
1366
+ const firstDay = new Date(days[0]! + "T00:00:00Z").getTime();
1367
+ const lastDay = new Date(days[days.length - 1]! + "T00:00:00Z").getTime();
1368
+ const spanDays = Math.max(0, (lastDay - firstDay) / (24 * 60 * 60 * 1000));
1369
+ const span = Math.min(1, spanDays / 7);
1370
+ spacingScore = Math.min(1, 0.55 * spacing + 0.45 * span);
1371
+ } else if (row.recall_days === 1) {
1372
+ spacingScore = 0.2;
1373
+ }
1374
+
1375
+ upsert.run(
1376
+ row.doc_id, row.recall_count, row.unique_queries, row.recall_days,
1377
+ row.total_score, row.max_score,
1378
+ row.first_recalled_at, row.last_recalled_at,
1379
+ diversityScore, spacingScore, row.negative_count, now
1380
+ );
1381
+ }
1382
+ });
1383
+ tx();
1384
+ return aggregated.length;
1385
+ },
1386
+
1387
+ getRecallStats: (docId: number) => {
1388
+ const row = db.prepare(`SELECT * FROM recall_stats WHERE doc_id = ?`).get(docId) as any;
1389
+ if (!row) return null;
1390
+ return {
1391
+ docId: row.doc_id,
1392
+ recallCount: row.recall_count,
1393
+ uniqueQueries: row.unique_queries,
1394
+ recallDays: row.recall_days,
1395
+ totalScore: row.total_score,
1396
+ maxScore: row.max_score,
1397
+ firstRecalledAt: row.first_recalled_at,
1398
+ lastRecalledAt: row.last_recalled_at,
1399
+ diversityScore: row.diversity_score,
1400
+ spacingScore: row.spacing_score,
1401
+ negativeCount: row.negative_count,
1402
+ updatedAt: row.updated_at,
1403
+ } as RecallStatsRow;
1404
+ },
1405
+
1406
+ getRecallStatsAll: (minRecallCount: number = 1) => {
1407
+ return (db.prepare(`
1408
+ SELECT rs.*, d.collection, d.path, d.title
1409
+ FROM recall_stats rs
1410
+ JOIN documents d ON rs.doc_id = d.id
1411
+ WHERE rs.recall_count >= ? AND d.active = 1
1412
+ ORDER BY rs.recall_count DESC
1413
+ `).all(minRecallCount) as any[]).map(row => ({
1414
+ docId: row.doc_id,
1415
+ recallCount: row.recall_count,
1416
+ uniqueQueries: row.unique_queries,
1417
+ recallDays: row.recall_days,
1418
+ totalScore: row.total_score,
1419
+ maxScore: row.max_score,
1420
+ firstRecalledAt: row.first_recalled_at,
1421
+ lastRecalledAt: row.last_recalled_at,
1422
+ diversityScore: row.diversity_score,
1423
+ spacingScore: row.spacing_score,
1424
+ negativeCount: row.negative_count,
1425
+ updatedAt: row.updated_at,
1426
+ collection: row.collection,
1427
+ path: row.path,
1428
+ title: row.title,
1429
+ } as RecallStatsRow));
1430
+ },
1431
+
1432
+ markRecallEventsReferenced: (sessionId: string, docIds: number[]) => {
1433
+ if (docIds.length === 0) return;
1434
+ // Mark only the LATEST event per doc in this session, not all events.
1435
+ // This preserves negative signals: if a doc was surfaced across 5 prompts
1436
+ // but only cited once, 4 events stay was_referenced=0 (genuine negatives).
1437
+ const stmt = db.prepare(`
1438
+ UPDATE recall_events SET was_referenced = 1
1439
+ WHERE id = (
1440
+ SELECT id FROM recall_events
1441
+ WHERE session_id = ? AND doc_id = ?
1442
+ ORDER BY surfaced_at DESC
1443
+ LIMIT 1
1444
+ )
1445
+ `);
1446
+ const tx = db.transaction(() => {
1447
+ for (const docId of docIds) {
1448
+ stmt.run(sessionId, docId);
1449
+ }
1450
+ });
1451
+ tx();
1452
+ },
1453
+
1219
1454
  recordCoActivation: (paths: string[]) => {
1220
1455
  if (paths.length < 2) return;
1221
1456
  const now = new Date().toISOString();
@@ -1451,6 +1686,7 @@ export type UsageRecord = {
1451
1686
  injectedPaths: string[];
1452
1687
  estimatedTokens: number;
1453
1688
  wasReferenced: number;
1689
+ turnIndex?: number;
1454
1690
  };
1455
1691
 
1456
1692
  export type UsageRow = {
@@ -1461,6 +1697,26 @@ export type UsageRow = {
1461
1697
  injectedPaths: string;
1462
1698
  estimatedTokens: number;
1463
1699
  wasReferenced: number;
1700
+ turnIndex: number;
1701
+ };
1702
+
1703
+ export type RecallStatsRow = {
1704
+ docId: number;
1705
+ recallCount: number;
1706
+ uniqueQueries: number;
1707
+ recallDays: number;
1708
+ totalScore: number;
1709
+ maxScore: number;
1710
+ firstRecalledAt: string | null;
1711
+ lastRecalledAt: string | null;
1712
+ diversityScore: number;
1713
+ spacingScore: number;
1714
+ negativeCount: number;
1715
+ updatedAt: string;
1716
+ // Joined from documents (only populated by getRecallStatsAll)
1717
+ collection?: string;
1718
+ path?: string;
1719
+ title?: string;
1464
1720
  };
1465
1721
 
1466
1722
  export type DocumentRow = {
@@ -3647,19 +3903,22 @@ function getRecentSessionsFn(db: Database, limit: number): SessionRecord[] {
3647
3903
  // SAME: Context Usage Tracking
3648
3904
  // =============================================================================
3649
3905
 
3650
- function insertUsageFn(db: Database, usage: UsageRecord): void {
3906
+ function insertUsageFn(db: Database, usage: UsageRecord): number {
3651
3907
  db.prepare(`
3652
- INSERT INTO context_usage (session_id, timestamp, hook_name, injected_paths, estimated_tokens, was_referenced)
3653
- VALUES (?, ?, ?, ?, ?, ?)
3654
- `).run(usage.sessionId, usage.timestamp, usage.hookName, JSON.stringify(usage.injectedPaths), usage.estimatedTokens, usage.wasReferenced);
3908
+ INSERT INTO context_usage (session_id, timestamp, hook_name, injected_paths, estimated_tokens, was_referenced, turn_index)
3909
+ VALUES (?, ?, ?, ?, ?, ?, ?)
3910
+ `).run(usage.sessionId, usage.timestamp, usage.hookName, JSON.stringify(usage.injectedPaths), usage.estimatedTokens, usage.wasReferenced, usage.turnIndex ?? 0);
3911
+ // Return the rowid of the just-inserted row for recall event linkage
3912
+ const row = db.prepare("SELECT last_insert_rowid() as id").get() as { id: number };
3913
+ return row.id;
3655
3914
  }
3656
3915
 
3657
3916
  function getUsageForSessionFn(db: Database, sessionId: string): UsageRow[] {
3658
3917
  return db.prepare(`
3659
3918
  SELECT id, session_id AS sessionId, timestamp, hook_name AS hookName,
3660
3919
  injected_paths AS injectedPaths, estimated_tokens AS estimatedTokens,
3661
- was_referenced AS wasReferenced
3662
- FROM context_usage WHERE session_id = ? ORDER BY timestamp
3920
+ was_referenced AS wasReferenced, turn_index AS turnIndex
3921
+ FROM context_usage WHERE session_id = ? ORDER BY turn_index, timestamp
3663
3922
  `).all(sessionId) as UsageRow[];
3664
3923
  }
3665
3924