clawmem 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,6 +30,7 @@ import { enrichResults } from "../search-utils.ts";
30
30
  import { sanitizeSnippet } from "../promptguard.ts";
31
31
  import { shouldSkipRetrieval, isRetrievedNoise } from "../retrieval-gate.ts";
32
32
  import { MAX_QUERY_LENGTH } from "../limits.ts";
33
+ import { writeRecallEvents, hashQuery } from "../recall-buffer.ts";
33
34
 
34
35
  // =============================================================================
35
36
  // Config
@@ -69,18 +70,44 @@ export async function contextSurfacing(
69
70
  input: HookInput
70
71
  ): Promise<HookOutput> {
71
72
  let prompt = input.prompt?.trim();
72
- if (!prompt || prompt.length < MIN_PROMPT_LENGTH) return makeEmptyOutput("context-surfacing");
73
+
74
+ // Compute turn_index FIRST, before any early returns.
75
+ // Every transcript-visible early return must log an empty context_usage row
76
+ // to keep turn_index aligned with transcript turns for per-turn attribution.
77
+ if (input.sessionId) {
78
+ try {
79
+ let turnIndex = 0;
80
+ try {
81
+ const existing = store.db.prepare(
82
+ `SELECT COUNT(*) as cnt FROM context_usage WHERE session_id = ? AND hook_name = 'context-surfacing'`
83
+ ).get(input.sessionId) as { cnt: number };
84
+ turnIndex = existing.cnt;
85
+ } catch { /* fallback to 0 */ }
86
+ (input as any)._turnIndex = turnIndex;
87
+ } catch { /* non-fatal */ }
88
+ }
89
+
90
+ if (!prompt || prompt.length < MIN_PROMPT_LENGTH) {
91
+ logEmptyTurn(store, input);
92
+ return makeEmptyOutput("context-surfacing");
93
+ }
73
94
 
74
95
  // Bound query length to prevent DoS on search indices
75
96
  if (prompt.length > MAX_QUERY_LENGTH) prompt = prompt.slice(0, MAX_QUERY_LENGTH);
76
97
 
77
- // Skip slash commands
78
- if (prompt.startsWith("/")) return makeEmptyOutput("context-surfacing");
98
+ // Skip slash commands — log empty turn for alignment
99
+ if (prompt.startsWith("/")) {
100
+ logEmptyTurn(store, input);
101
+ return makeEmptyOutput("context-surfacing");
102
+ }
79
103
 
80
104
  // Adaptive retrieval gate: skip greetings, shell commands, affirmations, etc.
81
- if (shouldSkipRetrieval(prompt)) return makeEmptyOutput("context-surfacing");
105
+ if (shouldSkipRetrieval(prompt)) {
106
+ logEmptyTurn(store, input);
107
+ return makeEmptyOutput("context-surfacing");
108
+ }
82
109
 
83
- // Heartbeat / duplicate suppression (IO4)
110
+ // Heartbeat / duplicate suppression (IO4) — NOT transcript-visible user turns
84
111
  if (isHeartbeatPrompt(prompt)) return makeEmptyOutput("context-surfacing");
85
112
  if (wasPromptSeenRecently(store, "context-surfacing", prompt)) {
86
113
  return makeEmptyOutput("context-surfacing");
@@ -157,7 +184,7 @@ export async function contextSurfacing(
157
184
  }
158
185
  }
159
186
 
160
- if (results.length === 0) return makeEmptyOutput("context-surfacing");
187
+ if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
161
188
 
162
189
  // Budget-aware deep escalation (deep profile only):
163
190
  // If the fast path finished quickly and found results, spend remaining time budget
@@ -215,7 +242,7 @@ export async function contextSurfacing(
215
242
  !FILTERED_PATHS.some(p => r.displayPath.includes(p))
216
243
  );
217
244
 
218
- if (results.length === 0) return makeEmptyOutput("context-surfacing");
245
+ if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
219
246
 
220
247
  // Filter out snoozed documents
221
248
  const now = new Date();
@@ -231,7 +258,7 @@ export async function contextSurfacing(
231
258
  return true;
232
259
  });
233
260
 
234
- if (results.length === 0) return makeEmptyOutput("context-surfacing");
261
+ if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
235
262
 
236
263
  // Deduplicate by filepath (keep best score per path)
237
264
  const deduped = new Map<string, SearchResult>();
@@ -273,7 +300,7 @@ export async function contextSurfacing(
273
300
  : 0;
274
301
 
275
302
  // Activation floor: if even the best result is too weak, bail entirely
276
- if (bestScore < profile.activationFloor) return makeEmptyOutput("context-surfacing");
303
+ if (bestScore < profile.activationFloor) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
277
304
 
278
305
  const adaptiveMin = Math.max(bestScore * profile.minScoreRatio, profile.absoluteFloor);
279
306
  scored = allScored.filter(r => r.compositeScore >= adaptiveMin);
@@ -282,7 +309,7 @@ export async function contextSurfacing(
282
309
  scored = allScored.filter(r => r.compositeScore >= minScore);
283
310
  }
284
311
 
285
- if (scored.length === 0) return makeEmptyOutput("context-surfacing");
312
+ if (scored.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
286
313
 
287
314
  // Spreading activation (E11): boost results co-activated with top HOT results
288
315
  if (scored.length > 3) {
@@ -325,11 +352,62 @@ export async function contextSurfacing(
325
352
  // Build context within token budget (profile-driven)
326
353
  const { context, paths, tokens } = buildContext(scored, prompt, tokenBudget);
327
354
 
328
- if (!context) return makeEmptyOutput("context-surfacing");
355
+ if (!context) {
356
+ logEmptyTurn(store, input);
357
+ return makeEmptyOutput("context-surfacing");
358
+ }
329
359
 
330
- // Log the injection
360
+ // Use pre-computed turn_index from top of function
331
361
  if (input.sessionId) {
332
- logInjection(store, input.sessionId, "context-surfacing", paths, tokens);
362
+ const turnIndex = (input as any)._turnIndex ?? 0;
363
+
364
+ // Log the injection — returns usage_id for recall event linkage
365
+ const usageId = logInjection(store, input.sessionId, "context-surfacing", paths, tokens, turnIndex);
366
+
367
+ // Record recall events ONLY for docs that made it into the injected context
368
+ // (post-budget). Docs trimmed by token budget were never seen by the model.
369
+ // Each event links to its context_usage row via usage_id + turn_index.
370
+ // Multi-vault: route docs to origin vault's store. Mirror context_usage there too.
371
+ try {
372
+ const qHash = hashQuery(prompt);
373
+ const injectedSet = new Set(paths);
374
+ const injectedScored = scored.filter(r => injectedSet.has(r.displayPath));
375
+
376
+ // Group by vault origin (undefined = general vault)
377
+ const byVault = new Map<string | undefined, typeof injectedScored>();
378
+ for (const r of injectedScored) {
379
+ const vault = (r as any)._fromVault as string | undefined;
380
+ let group = byVault.get(vault);
381
+ if (!group) { group = []; byVault.set(vault, group); }
382
+ group.push(r);
383
+ }
384
+
385
+ const validUsageId = usageId > 0 ? usageId : undefined;
386
+ for (const [vault, docs] of byVault) {
387
+ const mappedDocs = docs.map(r => ({ displayPath: r.displayPath, searchScore: r.compositeScore }));
388
+ if (!vault) {
389
+ writeRecallEvents(store, input.sessionId, qHash, mappedDocs, validUsageId, turnIndex);
390
+ } else {
391
+ try {
392
+ const vaultStore = resolveStore(vault);
393
+ // Mirror context_usage row into named vault for correct FK + attribution
394
+ const vaultPaths = docs.map(r => r.displayPath);
395
+ const vaultUsageId = vaultStore.insertUsage({
396
+ sessionId: input.sessionId,
397
+ timestamp: new Date().toISOString(),
398
+ hookName: "context-surfacing",
399
+ injectedPaths: vaultPaths,
400
+ estimatedTokens: 0,
401
+ wasReferenced: 0,
402
+ turnIndex,
403
+ });
404
+ writeRecallEvents(vaultStore, input.sessionId, qHash, mappedDocs, vaultUsageId > 0 ? vaultUsageId : undefined, turnIndex);
405
+ } catch { /* vault unavailable — skip */ }
406
+ }
407
+ }
408
+ } catch {
409
+ // Non-critical — don't block context surfacing on recall tracking errors
410
+ }
333
411
  }
334
412
 
335
413
  // Routing hint: detect query intent signals and prepend a tool routing directive
@@ -351,6 +429,19 @@ export async function contextSurfacing(
351
429
  // Helpers
352
430
  // =============================================================================
353
431
 
432
+ /**
433
+ * Log an empty context_usage row for a skipped turn.
434
+ * Keeps turn_index aligned with transcript turns so per-turn recall
435
+ * attribution doesn't drift when some prompts are gated.
436
+ */
437
+ function logEmptyTurn(store: Store, input: HookInput): void {
438
+ if (!input.sessionId) return;
439
+ try {
440
+ const turnIndex = (input as any)._turnIndex ?? 0;
441
+ logInjection(store, input.sessionId, "context-surfacing", [], 0, turnIndex);
442
+ } catch { /* non-fatal */ }
443
+ }
444
+
354
445
  /**
355
446
  * Detect causal/temporal/discovery signals in the prompt and return a
356
447
  * routing hint that makes the correct tool choice salient at the moment
@@ -10,12 +10,18 @@
10
10
  */
11
11
 
12
12
  import type { Store } from "../store.ts";
13
+ import { resolveStore } from "../store.ts";
14
+ import { listVaults } from "../config.ts";
13
15
  import type { HookInput, HookOutput } from "../hooks.ts";
14
16
  import {
15
17
  makeEmptyOutput,
16
18
  readTranscript,
17
19
  validateTranscriptPath,
18
20
  } from "../hooks.ts";
21
+ import {
22
+ segmentTranscriptIntoTurns,
23
+ attributeRecallReferences,
24
+ } from "../recall-attribution.ts";
19
25
 
20
26
  // =============================================================================
21
27
  // Handler
@@ -129,6 +135,33 @@ export async function feedbackLoop(
129
135
  // Non-critical — don't block feedback loop on utility tracking errors
130
136
  }
131
137
 
138
+ // Recall tracking: per-turn attribution using transcript segmentation.
139
+ // Reads full transcript, segments into turns, zips with context_usage rows,
140
+ // checks references per-turn rather than session-globally.
141
+ try {
142
+ const allMessages = readTranscript(transcriptPath, 500);
143
+ const turns = segmentTranscriptIntoTurns(allMessages);
144
+ const usages = store.getUsageForSession(sessionId);
145
+
146
+ // General vault attribution
147
+ attributeRecallReferences(store, sessionId, usages, turns);
148
+
149
+ // Cross-vault: attribute recall events in any configured named vaults.
150
+ // Each vault has its own context_usage rows (mirrored during context-surfacing).
151
+ const vaultNames = listVaults();
152
+ for (const vaultName of vaultNames) {
153
+ try {
154
+ const vaultStore = resolveStore(vaultName);
155
+ const vaultUsages = vaultStore.getUsageForSession(sessionId);
156
+ if (vaultUsages.length > 0) {
157
+ attributeRecallReferences(vaultStore, sessionId, vaultUsages, turns);
158
+ }
159
+ } catch { /* vault unavailable — skip */ }
160
+ }
161
+ } catch {
162
+ // Non-critical — don't block feedback loop on recall tracking errors
163
+ }
164
+
132
165
  // Silent return — feedback loop doesn't inject context
133
166
  return makeEmptyOutput("feedback-loop");
134
167
  }
@@ -195,6 +228,13 @@ function trackUtilitySignals(
195
228
  // Reference Detection
196
229
  // =============================================================================
197
230
 
231
+ // Recall attribution logic is in src/recall-attribution.ts
232
+ // (attributeRecallReferences, segmentTranscriptIntoTurns)
233
+
234
+ // =============================================================================
235
+ // Reference Detection
236
+ // =============================================================================
237
+
198
238
  function checkTitleReference(store: Store, path: string, text: string): boolean {
199
239
  try {
200
240
  const parts = path.split("/");
@@ -260,10 +260,11 @@ function getCurrentFocus(
260
260
  cutoff.setDate(cutoff.getDate() - DECISION_LOOKBACK_DAYS);
261
261
  const cutoffStr = cutoff.toISOString();
262
262
 
263
- // Gather recent decisions, preferences, and active problems
263
+ // Gather recent decisions, preferences, active problems, and deductive insights
264
264
  const decisions = store.getDocumentsByType("decision", 10);
265
265
  const preferences = store.getDocumentsByType("preference", 5);
266
266
  const problems = store.getDocumentsByType("problem", 5);
267
+ const deductions = store.getDocumentsByType("deductive", 5);
267
268
 
268
269
  // Rank by: pinned first, then recency, then access_count
269
270
  const now = Date.now();
@@ -285,7 +286,11 @@ function getCurrentFocus(
285
286
  // Preferences are durable — no date filter, just rank
286
287
  const rankedPrefs = [...preferences].sort((a, b) => rankDoc(b) - rankDoc(a));
287
288
 
288
- if (recentDecisions.length === 0 && rankedPrefs.length === 0 && activeProblems.length === 0) {
289
+ const recentDeductions = deductions
290
+ .filter(d => d.modifiedAt >= cutoffStr)
291
+ .sort((a, b) => rankDoc(b) - rankDoc(a));
292
+
293
+ if (recentDecisions.length === 0 && rankedPrefs.length === 0 && activeProblems.length === 0 && recentDeductions.length === 0) {
289
294
  return null;
290
295
  }
291
296
 
@@ -338,6 +343,19 @@ function getCurrentFocus(
338
343
  }
339
344
  }
340
345
 
346
+ // Cross-session deductions (derived insights with source provenance)
347
+ if (recentDeductions.length > 0) {
348
+ lines.push("**Derived Insights:**");
349
+ charCount += 24;
350
+ for (const d of recentDeductions) {
351
+ if (charCount >= maxChars) break;
352
+ const entry = `- ${d.title} (${d.modifiedAt.slice(0, 10)})`;
353
+ lines.push(entry);
354
+ paths.push(`${d.collection}/${d.path}`);
355
+ charCount += entry.length + 2;
356
+ }
357
+ }
358
+
341
359
  return lines.length > 1 ? { text: lines.join("\n"), paths } : null;
342
360
  }
343
361
 
package/src/hooks.ts CHANGED
@@ -385,23 +385,28 @@ export function logInjection(
385
385
  sessionId: string,
386
386
  hookName: string,
387
387
  injectedPaths: string[],
388
- estimatedTokens: number
389
- ): void {
388
+ estimatedTokens: number,
389
+ turnIndex?: number
390
+ ): number {
390
391
  try {
391
- store.insertUsage({
392
+ const usageId = store.insertUsage({
392
393
  sessionId,
393
394
  timestamp: new Date().toISOString(),
394
395
  hookName,
395
396
  injectedPaths,
396
397
  estimatedTokens,
397
398
  wasReferenced: 0,
399
+ turnIndex,
398
400
  });
399
401
 
400
402
  // Record co-activation for all injected paths (E3)
401
403
  if (injectedPaths.length >= 2) {
402
404
  store.recordCoActivation(injectedPaths);
403
405
  }
406
+
407
+ return usageId;
404
408
  } catch {
405
409
  // Non-fatal: don't crash hook if usage logging fails
410
+ return -1;
406
411
  }
407
412
  }
package/src/mcp.ts CHANGED
@@ -2277,6 +2277,11 @@ This is the recommended entry point for ALL memory queries.`,
2277
2277
  const config = loadConfig();
2278
2278
  const policy = config.lifecycle;
2279
2279
 
2280
+ // Recall tracking summary
2281
+ const recallStats = store.getRecallStatsAll(1);
2282
+ const highDiversity = recallStats.filter(r => r.diversityScore >= 0.4 && r.spacingScore >= 0.5 && r.recallCount >= 3);
2283
+ const highNoise = recallStats.filter(r => r.recallCount >= 5 && r.negativeCount > r.recallCount * 0.8);
2284
+
2280
2285
  const lines = [
2281
2286
  `Active: ${stats.active}`,
2282
2287
  `Archived (auto): ${stats.archived}`,
@@ -2286,6 +2291,10 @@ This is the recommended entry point for ALL memory queries.`,
2286
2291
  `Never accessed: ${stats.neverAccessed}`,
2287
2292
  `Oldest access: ${stats.oldestAccess?.slice(0, 10) || "n/a"}`,
2288
2293
  "",
2294
+ `Recall tracking: ${recallStats.length} docs tracked`,
2295
+ ` Pin candidates (high diversity+spacing): ${highDiversity.length}`,
2296
+ ` Snooze candidates (surfaced often, rarely referenced): ${highNoise.length}`,
2297
+ "",
2289
2298
  `Policy: ${policy ? `archive after ${policy.archive_after_days}d, purge after ${policy.purge_after_days ?? "never"}, dry_run=${policy.dry_run}` : "none configured"}`,
2290
2299
  ];
2291
2300
 
@@ -2322,7 +2331,29 @@ This is the recommended entry point for ALL memory queries.`,
2322
2331
  const lines = candidates.map(c =>
2323
2332
  `- ${c.collection}/${c.path} (${c.content_type}, modified ${c.modified_at.slice(0, 10)}, accessed ${c.last_accessed_at?.slice(0, 10) || "never"})`
2324
2333
  );
2325
- return { content: [{ type: "text", text: `Would archive ${candidates.length} document(s):\n${lines.join("\n") || "(none)"}` }] };
2334
+
2335
+ // Recall-based recommendations
2336
+ const recallStats = store.getRecallStatsAll(3);
2337
+ const pinCandidates = recallStats.filter(r => r.diversityScore >= 0.4 && r.spacingScore >= 0.5 && r.recallCount >= 3);
2338
+ const snoozeCandidates = recallStats.filter(r => r.recallCount >= 5 && r.negativeCount > r.recallCount * 0.8);
2339
+
2340
+ const recallLines: string[] = [];
2341
+ if (pinCandidates.length > 0) {
2342
+ recallLines.push("", "Pin candidates (high diversity, multi-day spread, recall≥3):");
2343
+ for (const r of pinCandidates.slice(0, 5)) {
2344
+ const label = r.collection && r.path ? `${r.collection}/${r.path}` : `doc#${r.docId}`;
2345
+ recallLines.push(` - ${label} (recalls=${r.recallCount}, queries=${r.uniqueQueries}, days=${r.recallDays}, diversity=${r.diversityScore.toFixed(2)}, spacing=${r.spacingScore.toFixed(2)})`);
2346
+ }
2347
+ }
2348
+ if (snoozeCandidates.length > 0) {
2349
+ recallLines.push("", "Snooze candidates (surfaced often, rarely referenced):");
2350
+ for (const r of snoozeCandidates.slice(0, 5)) {
2351
+ const label = r.collection && r.path ? `${r.collection}/${r.path}` : `doc#${r.docId}`;
2352
+ recallLines.push(` - ${label} (recalls=${r.recallCount}, referenced=${r.recallCount - r.negativeCount}, noise_ratio=${(r.negativeCount / r.recallCount * 100).toFixed(0)}%)`);
2353
+ }
2354
+ }
2355
+
2356
+ return { content: [{ type: "text", text: `Would archive ${candidates.length} document(s):\n${lines.join("\n") || "(none)"}${recallLines.join("\n")}` }] };
2326
2357
  }
2327
2358
 
2328
2359
  const archived = store.archiveDocuments(candidates.map(c => c.id));
package/src/memory.ts CHANGED
@@ -20,6 +20,7 @@ export const HALF_LIVES: Record<string, number> = {
20
20
  project: 120,
21
21
  preference: Infinity,
22
22
  decision: Infinity,
23
+ deductive: Infinity,
23
24
  hub: Infinity,
24
25
  };
25
26
 
@@ -29,6 +30,7 @@ export const HALF_LIVES: Record<string, number> = {
29
30
 
30
31
  export const TYPE_BASELINES: Record<string, number> = {
31
32
  decision: 0.85,
33
+ deductive: 0.85,
32
34
  preference: 0.80,
33
35
  hub: 0.80,
34
36
  problem: 0.75,
@@ -45,7 +47,7 @@ export const TYPE_BASELINES: Record<string, number> = {
45
47
  // Content Type Inference
46
48
  // =============================================================================
47
49
 
48
- export type ContentType = "decision" | "preference" | "hub" | "research" | "project" | "handoff" | "conversation" | "progress" | "milestone" | "problem" | "note";
50
+ export type ContentType = "decision" | "deductive" | "preference" | "hub" | "research" | "project" | "handoff" | "conversation" | "progress" | "milestone" | "problem" | "note";
49
51
 
50
52
  export function inferContentType(path: string, explicitType?: string): ContentType {
51
53
  if (explicitType && explicitType in TYPE_BASELINES) return explicitType as ContentType;
@@ -75,7 +77,7 @@ export type MemoryType = "episodic" | "semantic" | "procedural";
75
77
  */
76
78
  export function inferMemoryType(path: string, contentType: string, body?: string): MemoryType {
77
79
  if (["handoff", "progress", "conversation"].includes(contentType)) return "episodic";
78
- if (["decision", "hub", "research"].includes(contentType)) return "semantic";
80
+ if (["decision", "deductive", "hub", "research"].includes(contentType)) return "semantic";
79
81
  if (body && /\b(step\s+\d|workflow|recipe|how\s+to|procedure|runbook|playbook)\b/i.test(body)) return "procedural";
80
82
  if (path.includes("sop") || path.includes("runbook") || path.includes("playbook")) return "procedural";
81
83
  if (contentType === "antipattern") return "semantic";
@@ -150,7 +152,7 @@ export function confidenceScore(
150
152
  // Attention decay: reduce confidence if not accessed recently (5% per week)
151
153
  // Only apply to episodic/progress content — skip for durable types (decision, hub, research)
152
154
  // Also skip if last_accessed_at was backfilled from modified_at (no real access yet)
153
- const DECAY_EXEMPT_TYPES = new Set(["decision", "hub", "research", "antipattern", "preference"]);
155
+ const DECAY_EXEMPT_TYPES = new Set(["decision", "deductive", "hub", "research", "antipattern", "preference"]);
154
156
  let attentionDecay = 1.0;
155
157
  if (lastAccessedAt && !DECAY_EXEMPT_TYPES.has(contentType)) {
156
158
  const lastAccess = typeof lastAccessedAt === "string" ? new Date(lastAccessedAt) : lastAccessedAt;
@@ -0,0 +1,182 @@
1
+ /**
2
+ * Recall Attribution — per-turn reference detection for recall tracking.
3
+ *
4
+ * Extracted into a standalone module for testability (per GPT 5.4 High review turn 4).
5
+ *
6
+ * Architecture:
7
+ * 1. Segment the transcript into ordered turns (user → assistant pairs)
8
+ * 2. Zip context_usage rows (by turn_index) with transcript turns (by position)
9
+ * 3. For each pair, detect references in that turn's assistant text only
10
+ * 4. Mark recall_events linked to the usage rows whose turn actually cited the doc
11
+ */
12
+
13
+ import type { Store, UsageRow } from "./store.ts";
14
+
15
+ // =============================================================================
16
+ // Types
17
+ // =============================================================================
18
+
19
+ export type TranscriptTurn = {
20
+ userText: string;
21
+ assistantText: string;
22
+ };
23
+
24
+ // =============================================================================
25
+ // Transcript Segmentation
26
+ // =============================================================================
27
+
28
+ /**
29
+ * Segment a flat message array into ordered turns.
30
+ * A turn starts on each "user" message and includes all following "assistant"
31
+ * messages until the next "user" message.
32
+ *
33
+ * @param messages - Ordered array of {role, content} from transcript JSONL
34
+ * @returns Ordered array of turns
35
+ */
36
+ export function segmentTranscriptIntoTurns(
37
+ messages: { role: string; content: string }[]
38
+ ): TranscriptTurn[] {
39
+ const turns: TranscriptTurn[] = [];
40
+ let currentUser = "";
41
+ let currentAssistant = "";
42
+
43
+ for (const msg of messages) {
44
+ if (msg.role === "user") {
45
+ // New turn: flush previous if it has assistant content
46
+ if (currentUser || currentAssistant) {
47
+ turns.push({ userText: currentUser, assistantText: currentAssistant });
48
+ }
49
+ currentUser = msg.content;
50
+ currentAssistant = "";
51
+ } else if (msg.role === "assistant") {
52
+ currentAssistant += (currentAssistant ? "\n" : "") + msg.content;
53
+ }
54
+ // Ignore system/tool messages for attribution purposes
55
+ }
56
+
57
+ // Flush final turn
58
+ if (currentUser || currentAssistant) {
59
+ turns.push({ userText: currentUser, assistantText: currentAssistant });
60
+ }
61
+
62
+ return turns;
63
+ }
64
+
65
+ // =============================================================================
66
+ // Per-Turn Reference Detection
67
+ // =============================================================================
68
+
69
+ /**
70
+ * Check if a displayPath (collection/path) is referenced in text.
71
+ * Matches by: full path, filename (without extension), or doc title.
72
+ */
73
+ function isPathReferenced(
74
+ store: Store,
75
+ displayPath: string,
76
+ text: string
77
+ ): boolean {
78
+ if (!text || !displayPath) return false;
79
+
80
+ // Full path match
81
+ if (text.includes(displayPath)) return true;
82
+
83
+ // Filename match (without extension, min 4 chars)
84
+ const filename = displayPath.split("/").pop()?.replace(/\.(md|txt)$/i, "");
85
+ if (filename && filename.length > 3 && text.toLowerCase().includes(filename.toLowerCase())) {
86
+ return true;
87
+ }
88
+
89
+ // Title match from DB
90
+ const parts = displayPath.split("/");
91
+ if (parts.length >= 2) {
92
+ const collection = parts[0]!;
93
+ const docPath = parts.slice(1).join("/");
94
+ const doc = store.findActiveDocument(collection, docPath);
95
+ if (doc?.title && doc.title.length >= 5 && text.toLowerCase().includes(doc.title.toLowerCase())) {
96
+ return true;
97
+ }
98
+ }
99
+
100
+ return false;
101
+ }
102
+
103
+ // =============================================================================
104
+ // Attribution Core
105
+ // =============================================================================
106
+
107
+ /**
108
+ * Attribute recall events to specific turns using per-turn reference detection.
109
+ *
110
+ * For each context_usage row (ordered by turn_index), finds the corresponding
111
+ * transcript turn and checks which of that turn's injected docs were cited in
112
+ * that turn's assistant text. Only marks recall_events linked to turns where
113
+ * the doc was actually referenced.
114
+ *
115
+ * @param store - Store instance for doc resolution and event marking
116
+ * @param sessionId - Session identifier
117
+ * @param usages - context_usage rows for this session, ordered by turn_index
118
+ * @param turns - Transcript turns, ordered by position
119
+ */
120
+ export function attributeRecallReferences(
121
+ store: Store,
122
+ sessionId: string,
123
+ usages: UsageRow[],
124
+ turns: TranscriptTurn[]
125
+ ): void {
126
+ // Filter to context-surfacing usages only
127
+ const surfacingUsages = usages.filter(u => u.hookName === "context-surfacing");
128
+
129
+ for (const usage of surfacingUsages) {
130
+ // Match usage to transcript turn by turn_index
131
+ const turn = turns[usage.turnIndex];
132
+ if (!turn || !turn.assistantText) continue;
133
+
134
+ // Parse injected paths for this turn
135
+ let injectedPaths: string[];
136
+ try { injectedPaths = JSON.parse(usage.injectedPaths) as string[]; }
137
+ catch { continue; }
138
+ if (injectedPaths.length === 0) continue;
139
+
140
+ // Check which docs from THIS turn were referenced in THIS turn's assistant text
141
+ const referencedDocIds: number[] = [];
142
+ for (const path of injectedPaths) {
143
+ if (!isPathReferenced(store, path, turn.assistantText)) continue;
144
+
145
+ const parts = path.split("/");
146
+ if (parts.length < 2) continue;
147
+ const collection = parts[0]!;
148
+ const docPath = parts.slice(1).join("/");
149
+ const doc = store.findActiveDocument(collection, docPath);
150
+ if (doc) referencedDocIds.push(doc.id);
151
+ }
152
+
153
+ if (referencedDocIds.length === 0) continue;
154
+
155
+ // Mark only recall events linked to THIS usage row
156
+ for (const docId of referencedDocIds) {
157
+ // Primary: usage_id-linked events (current schema)
158
+ const linked = store.db.prepare(`
159
+ SELECT id FROM recall_events
160
+ WHERE usage_id = ? AND doc_id = ? AND was_referenced = 0
161
+ `).all(usage.id, docId) as { id: number }[];
162
+
163
+ if (linked.length > 0) {
164
+ const ids = linked.map(r => r.id);
165
+ const placeholders = ids.map(() => "?").join(",");
166
+ store.db.prepare(`
167
+ UPDATE recall_events SET was_referenced = 1
168
+ WHERE id IN (${placeholders})
169
+ `).run(...ids);
170
+ } else {
171
+ // Fallback: pre-migration events without usage_id — match by turn_index
172
+ store.db.prepare(`
173
+ UPDATE recall_events SET was_referenced = 1
174
+ WHERE id IN (
175
+ SELECT id FROM recall_events
176
+ WHERE session_id = ? AND doc_id = ? AND turn_index = ? AND was_referenced = 0
177
+ )
178
+ `).run(sessionId, docId, usage.turnIndex);
179
+ }
180
+ }
181
+ }
182
+ }