clawmem 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +5 -4
- package/CLAUDE.md +5 -4
- package/README.md +16 -2
- package/SKILL.md +1 -1
- package/package.json +1 -1
- package/src/clawmem.ts +17 -0
- package/src/consolidation.ts +323 -1
- package/src/hooks/context-surfacing.ts +104 -13
- package/src/hooks/feedback-loop.ts +40 -0
- package/src/hooks/session-bootstrap.ts +20 -2
- package/src/hooks.ts +8 -3
- package/src/mcp.ts +32 -1
- package/src/memory.ts +5 -3
- package/src/recall-attribution.ts +182 -0
- package/src/recall-buffer.ts +85 -0
- package/src/store.ts +306 -13
|
@@ -30,6 +30,7 @@ import { enrichResults } from "../search-utils.ts";
|
|
|
30
30
|
import { sanitizeSnippet } from "../promptguard.ts";
|
|
31
31
|
import { shouldSkipRetrieval, isRetrievedNoise } from "../retrieval-gate.ts";
|
|
32
32
|
import { MAX_QUERY_LENGTH } from "../limits.ts";
|
|
33
|
+
import { writeRecallEvents, hashQuery } from "../recall-buffer.ts";
|
|
33
34
|
|
|
34
35
|
// =============================================================================
|
|
35
36
|
// Config
|
|
@@ -69,18 +70,44 @@ export async function contextSurfacing(
|
|
|
69
70
|
input: HookInput
|
|
70
71
|
): Promise<HookOutput> {
|
|
71
72
|
let prompt = input.prompt?.trim();
|
|
72
|
-
|
|
73
|
+
|
|
74
|
+
// Compute turn_index FIRST, before any early returns.
|
|
75
|
+
// Every transcript-visible early return must log an empty context_usage row
|
|
76
|
+
// to keep turn_index aligned with transcript turns for per-turn attribution.
|
|
77
|
+
if (input.sessionId) {
|
|
78
|
+
try {
|
|
79
|
+
let turnIndex = 0;
|
|
80
|
+
try {
|
|
81
|
+
const existing = store.db.prepare(
|
|
82
|
+
`SELECT COUNT(*) as cnt FROM context_usage WHERE session_id = ? AND hook_name = 'context-surfacing'`
|
|
83
|
+
).get(input.sessionId) as { cnt: number };
|
|
84
|
+
turnIndex = existing.cnt;
|
|
85
|
+
} catch { /* fallback to 0 */ }
|
|
86
|
+
(input as any)._turnIndex = turnIndex;
|
|
87
|
+
} catch { /* non-fatal */ }
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (!prompt || prompt.length < MIN_PROMPT_LENGTH) {
|
|
91
|
+
logEmptyTurn(store, input);
|
|
92
|
+
return makeEmptyOutput("context-surfacing");
|
|
93
|
+
}
|
|
73
94
|
|
|
74
95
|
// Bound query length to prevent DoS on search indices
|
|
75
96
|
if (prompt.length > MAX_QUERY_LENGTH) prompt = prompt.slice(0, MAX_QUERY_LENGTH);
|
|
76
97
|
|
|
77
|
-
// Skip slash commands
|
|
78
|
-
if (prompt.startsWith("/"))
|
|
98
|
+
// Skip slash commands — log empty turn for alignment
|
|
99
|
+
if (prompt.startsWith("/")) {
|
|
100
|
+
logEmptyTurn(store, input);
|
|
101
|
+
return makeEmptyOutput("context-surfacing");
|
|
102
|
+
}
|
|
79
103
|
|
|
80
104
|
// Adaptive retrieval gate: skip greetings, shell commands, affirmations, etc.
|
|
81
|
-
if (shouldSkipRetrieval(prompt))
|
|
105
|
+
if (shouldSkipRetrieval(prompt)) {
|
|
106
|
+
logEmptyTurn(store, input);
|
|
107
|
+
return makeEmptyOutput("context-surfacing");
|
|
108
|
+
}
|
|
82
109
|
|
|
83
|
-
// Heartbeat / duplicate suppression (IO4)
|
|
110
|
+
// Heartbeat / duplicate suppression (IO4) — NOT transcript-visible user turns
|
|
84
111
|
if (isHeartbeatPrompt(prompt)) return makeEmptyOutput("context-surfacing");
|
|
85
112
|
if (wasPromptSeenRecently(store, "context-surfacing", prompt)) {
|
|
86
113
|
return makeEmptyOutput("context-surfacing");
|
|
@@ -157,7 +184,7 @@ export async function contextSurfacing(
|
|
|
157
184
|
}
|
|
158
185
|
}
|
|
159
186
|
|
|
160
|
-
if (results.length === 0) return makeEmptyOutput("context-surfacing");
|
|
187
|
+
if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
|
|
161
188
|
|
|
162
189
|
// Budget-aware deep escalation (deep profile only):
|
|
163
190
|
// If the fast path finished quickly and found results, spend remaining time budget
|
|
@@ -215,7 +242,7 @@ export async function contextSurfacing(
|
|
|
215
242
|
!FILTERED_PATHS.some(p => r.displayPath.includes(p))
|
|
216
243
|
);
|
|
217
244
|
|
|
218
|
-
if (results.length === 0) return makeEmptyOutput("context-surfacing");
|
|
245
|
+
if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
|
|
219
246
|
|
|
220
247
|
// Filter out snoozed documents
|
|
221
248
|
const now = new Date();
|
|
@@ -231,7 +258,7 @@ export async function contextSurfacing(
|
|
|
231
258
|
return true;
|
|
232
259
|
});
|
|
233
260
|
|
|
234
|
-
if (results.length === 0) return makeEmptyOutput("context-surfacing");
|
|
261
|
+
if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
|
|
235
262
|
|
|
236
263
|
// Deduplicate by filepath (keep best score per path)
|
|
237
264
|
const deduped = new Map<string, SearchResult>();
|
|
@@ -273,7 +300,7 @@ export async function contextSurfacing(
|
|
|
273
300
|
: 0;
|
|
274
301
|
|
|
275
302
|
// Activation floor: if even the best result is too weak, bail entirely
|
|
276
|
-
if (bestScore < profile.activationFloor) return makeEmptyOutput("context-surfacing");
|
|
303
|
+
if (bestScore < profile.activationFloor) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
|
|
277
304
|
|
|
278
305
|
const adaptiveMin = Math.max(bestScore * profile.minScoreRatio, profile.absoluteFloor);
|
|
279
306
|
scored = allScored.filter(r => r.compositeScore >= adaptiveMin);
|
|
@@ -282,7 +309,7 @@ export async function contextSurfacing(
|
|
|
282
309
|
scored = allScored.filter(r => r.compositeScore >= minScore);
|
|
283
310
|
}
|
|
284
311
|
|
|
285
|
-
if (scored.length === 0) return makeEmptyOutput("context-surfacing");
|
|
312
|
+
if (scored.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
|
|
286
313
|
|
|
287
314
|
// Spreading activation (E11): boost results co-activated with top HOT results
|
|
288
315
|
if (scored.length > 3) {
|
|
@@ -325,11 +352,62 @@ export async function contextSurfacing(
|
|
|
325
352
|
// Build context within token budget (profile-driven)
|
|
326
353
|
const { context, paths, tokens } = buildContext(scored, prompt, tokenBudget);
|
|
327
354
|
|
|
328
|
-
if (!context)
|
|
355
|
+
if (!context) {
|
|
356
|
+
logEmptyTurn(store, input);
|
|
357
|
+
return makeEmptyOutput("context-surfacing");
|
|
358
|
+
}
|
|
329
359
|
|
|
330
|
-
//
|
|
360
|
+
// Use pre-computed turn_index from top of function
|
|
331
361
|
if (input.sessionId) {
|
|
332
|
-
|
|
362
|
+
const turnIndex = (input as any)._turnIndex ?? 0;
|
|
363
|
+
|
|
364
|
+
// Log the injection — returns usage_id for recall event linkage
|
|
365
|
+
const usageId = logInjection(store, input.sessionId, "context-surfacing", paths, tokens, turnIndex);
|
|
366
|
+
|
|
367
|
+
// Record recall events ONLY for docs that made it into the injected context
|
|
368
|
+
// (post-budget). Docs trimmed by token budget were never seen by the model.
|
|
369
|
+
// Each event links to its context_usage row via usage_id + turn_index.
|
|
370
|
+
// Multi-vault: route docs to origin vault's store. Mirror context_usage there too.
|
|
371
|
+
try {
|
|
372
|
+
const qHash = hashQuery(prompt);
|
|
373
|
+
const injectedSet = new Set(paths);
|
|
374
|
+
const injectedScored = scored.filter(r => injectedSet.has(r.displayPath));
|
|
375
|
+
|
|
376
|
+
// Group by vault origin (undefined = general vault)
|
|
377
|
+
const byVault = new Map<string | undefined, typeof injectedScored>();
|
|
378
|
+
for (const r of injectedScored) {
|
|
379
|
+
const vault = (r as any)._fromVault as string | undefined;
|
|
380
|
+
let group = byVault.get(vault);
|
|
381
|
+
if (!group) { group = []; byVault.set(vault, group); }
|
|
382
|
+
group.push(r);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const validUsageId = usageId > 0 ? usageId : undefined;
|
|
386
|
+
for (const [vault, docs] of byVault) {
|
|
387
|
+
const mappedDocs = docs.map(r => ({ displayPath: r.displayPath, searchScore: r.compositeScore }));
|
|
388
|
+
if (!vault) {
|
|
389
|
+
writeRecallEvents(store, input.sessionId, qHash, mappedDocs, validUsageId, turnIndex);
|
|
390
|
+
} else {
|
|
391
|
+
try {
|
|
392
|
+
const vaultStore = resolveStore(vault);
|
|
393
|
+
// Mirror context_usage row into named vault for correct FK + attribution
|
|
394
|
+
const vaultPaths = docs.map(r => r.displayPath);
|
|
395
|
+
const vaultUsageId = vaultStore.insertUsage({
|
|
396
|
+
sessionId: input.sessionId,
|
|
397
|
+
timestamp: new Date().toISOString(),
|
|
398
|
+
hookName: "context-surfacing",
|
|
399
|
+
injectedPaths: vaultPaths,
|
|
400
|
+
estimatedTokens: 0,
|
|
401
|
+
wasReferenced: 0,
|
|
402
|
+
turnIndex,
|
|
403
|
+
});
|
|
404
|
+
writeRecallEvents(vaultStore, input.sessionId, qHash, mappedDocs, vaultUsageId > 0 ? vaultUsageId : undefined, turnIndex);
|
|
405
|
+
} catch { /* vault unavailable — skip */ }
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
} catch {
|
|
409
|
+
// Non-critical — don't block context surfacing on recall tracking errors
|
|
410
|
+
}
|
|
333
411
|
}
|
|
334
412
|
|
|
335
413
|
// Routing hint: detect query intent signals and prepend a tool routing directive
|
|
@@ -351,6 +429,19 @@ export async function contextSurfacing(
|
|
|
351
429
|
// Helpers
|
|
352
430
|
// =============================================================================
|
|
353
431
|
|
|
432
|
+
/**
|
|
433
|
+
* Log an empty context_usage row for a skipped turn.
|
|
434
|
+
* Keeps turn_index aligned with transcript turns so per-turn recall
|
|
435
|
+
* attribution doesn't drift when some prompts are gated.
|
|
436
|
+
*/
|
|
437
|
+
function logEmptyTurn(store: Store, input: HookInput): void {
|
|
438
|
+
if (!input.sessionId) return;
|
|
439
|
+
try {
|
|
440
|
+
const turnIndex = (input as any)._turnIndex ?? 0;
|
|
441
|
+
logInjection(store, input.sessionId, "context-surfacing", [], 0, turnIndex);
|
|
442
|
+
} catch { /* non-fatal */ }
|
|
443
|
+
}
|
|
444
|
+
|
|
354
445
|
/**
|
|
355
446
|
* Detect causal/temporal/discovery signals in the prompt and return a
|
|
356
447
|
* routing hint that makes the correct tool choice salient at the moment
|
|
@@ -10,12 +10,18 @@
|
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
import type { Store } from "../store.ts";
|
|
13
|
+
import { resolveStore } from "../store.ts";
|
|
14
|
+
import { listVaults } from "../config.ts";
|
|
13
15
|
import type { HookInput, HookOutput } from "../hooks.ts";
|
|
14
16
|
import {
|
|
15
17
|
makeEmptyOutput,
|
|
16
18
|
readTranscript,
|
|
17
19
|
validateTranscriptPath,
|
|
18
20
|
} from "../hooks.ts";
|
|
21
|
+
import {
|
|
22
|
+
segmentTranscriptIntoTurns,
|
|
23
|
+
attributeRecallReferences,
|
|
24
|
+
} from "../recall-attribution.ts";
|
|
19
25
|
|
|
20
26
|
// =============================================================================
|
|
21
27
|
// Handler
|
|
@@ -129,6 +135,33 @@ export async function feedbackLoop(
|
|
|
129
135
|
// Non-critical — don't block feedback loop on utility tracking errors
|
|
130
136
|
}
|
|
131
137
|
|
|
138
|
+
// Recall tracking: per-turn attribution using transcript segmentation.
|
|
139
|
+
// Reads full transcript, segments into turns, zips with context_usage rows,
|
|
140
|
+
// checks references per-turn rather than session-globally.
|
|
141
|
+
try {
|
|
142
|
+
const allMessages = readTranscript(transcriptPath, 500);
|
|
143
|
+
const turns = segmentTranscriptIntoTurns(allMessages);
|
|
144
|
+
const usages = store.getUsageForSession(sessionId);
|
|
145
|
+
|
|
146
|
+
// General vault attribution
|
|
147
|
+
attributeRecallReferences(store, sessionId, usages, turns);
|
|
148
|
+
|
|
149
|
+
// Cross-vault: attribute recall events in any configured named vaults.
|
|
150
|
+
// Each vault has its own context_usage rows (mirrored during context-surfacing).
|
|
151
|
+
const vaultNames = listVaults();
|
|
152
|
+
for (const vaultName of vaultNames) {
|
|
153
|
+
try {
|
|
154
|
+
const vaultStore = resolveStore(vaultName);
|
|
155
|
+
const vaultUsages = vaultStore.getUsageForSession(sessionId);
|
|
156
|
+
if (vaultUsages.length > 0) {
|
|
157
|
+
attributeRecallReferences(vaultStore, sessionId, vaultUsages, turns);
|
|
158
|
+
}
|
|
159
|
+
} catch { /* vault unavailable — skip */ }
|
|
160
|
+
}
|
|
161
|
+
} catch {
|
|
162
|
+
// Non-critical — don't block feedback loop on recall tracking errors
|
|
163
|
+
}
|
|
164
|
+
|
|
132
165
|
// Silent return — feedback loop doesn't inject context
|
|
133
166
|
return makeEmptyOutput("feedback-loop");
|
|
134
167
|
}
|
|
@@ -195,6 +228,13 @@ function trackUtilitySignals(
|
|
|
195
228
|
// Reference Detection
|
|
196
229
|
// =============================================================================
|
|
197
230
|
|
|
231
|
+
// Recall attribution logic is in src/recall-attribution.ts
|
|
232
|
+
// (attributeRecallReferences, segmentTranscriptIntoTurns)
|
|
233
|
+
|
|
234
|
+
// =============================================================================
|
|
235
|
+
// Reference Detection
|
|
236
|
+
// =============================================================================
|
|
237
|
+
|
|
198
238
|
function checkTitleReference(store: Store, path: string, text: string): boolean {
|
|
199
239
|
try {
|
|
200
240
|
const parts = path.split("/");
|
|
@@ -260,10 +260,11 @@ function getCurrentFocus(
|
|
|
260
260
|
cutoff.setDate(cutoff.getDate() - DECISION_LOOKBACK_DAYS);
|
|
261
261
|
const cutoffStr = cutoff.toISOString();
|
|
262
262
|
|
|
263
|
-
// Gather recent decisions, preferences, and
|
|
263
|
+
// Gather recent decisions, preferences, active problems, and deductive insights
|
|
264
264
|
const decisions = store.getDocumentsByType("decision", 10);
|
|
265
265
|
const preferences = store.getDocumentsByType("preference", 5);
|
|
266
266
|
const problems = store.getDocumentsByType("problem", 5);
|
|
267
|
+
const deductions = store.getDocumentsByType("deductive", 5);
|
|
267
268
|
|
|
268
269
|
// Rank by: pinned first, then recency, then access_count
|
|
269
270
|
const now = Date.now();
|
|
@@ -285,7 +286,11 @@ function getCurrentFocus(
|
|
|
285
286
|
// Preferences are durable — no date filter, just rank
|
|
286
287
|
const rankedPrefs = [...preferences].sort((a, b) => rankDoc(b) - rankDoc(a));
|
|
287
288
|
|
|
288
|
-
|
|
289
|
+
const recentDeductions = deductions
|
|
290
|
+
.filter(d => d.modifiedAt >= cutoffStr)
|
|
291
|
+
.sort((a, b) => rankDoc(b) - rankDoc(a));
|
|
292
|
+
|
|
293
|
+
if (recentDecisions.length === 0 && rankedPrefs.length === 0 && activeProblems.length === 0 && recentDeductions.length === 0) {
|
|
289
294
|
return null;
|
|
290
295
|
}
|
|
291
296
|
|
|
@@ -338,6 +343,19 @@ function getCurrentFocus(
|
|
|
338
343
|
}
|
|
339
344
|
}
|
|
340
345
|
|
|
346
|
+
// Cross-session deductions (derived insights with source provenance)
|
|
347
|
+
if (recentDeductions.length > 0) {
|
|
348
|
+
lines.push("**Derived Insights:**");
|
|
349
|
+
charCount += 24;
|
|
350
|
+
for (const d of recentDeductions) {
|
|
351
|
+
if (charCount >= maxChars) break;
|
|
352
|
+
const entry = `- ${d.title} (${d.modifiedAt.slice(0, 10)})`;
|
|
353
|
+
lines.push(entry);
|
|
354
|
+
paths.push(`${d.collection}/${d.path}`);
|
|
355
|
+
charCount += entry.length + 2;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
341
359
|
return lines.length > 1 ? { text: lines.join("\n"), paths } : null;
|
|
342
360
|
}
|
|
343
361
|
|
package/src/hooks.ts
CHANGED
|
@@ -385,23 +385,28 @@ export function logInjection(
|
|
|
385
385
|
sessionId: string,
|
|
386
386
|
hookName: string,
|
|
387
387
|
injectedPaths: string[],
|
|
388
|
-
estimatedTokens: number
|
|
389
|
-
|
|
388
|
+
estimatedTokens: number,
|
|
389
|
+
turnIndex?: number
|
|
390
|
+
): number {
|
|
390
391
|
try {
|
|
391
|
-
store.insertUsage({
|
|
392
|
+
const usageId = store.insertUsage({
|
|
392
393
|
sessionId,
|
|
393
394
|
timestamp: new Date().toISOString(),
|
|
394
395
|
hookName,
|
|
395
396
|
injectedPaths,
|
|
396
397
|
estimatedTokens,
|
|
397
398
|
wasReferenced: 0,
|
|
399
|
+
turnIndex,
|
|
398
400
|
});
|
|
399
401
|
|
|
400
402
|
// Record co-activation for all injected paths (E3)
|
|
401
403
|
if (injectedPaths.length >= 2) {
|
|
402
404
|
store.recordCoActivation(injectedPaths);
|
|
403
405
|
}
|
|
406
|
+
|
|
407
|
+
return usageId;
|
|
404
408
|
} catch {
|
|
405
409
|
// Non-fatal: don't crash hook if usage logging fails
|
|
410
|
+
return -1;
|
|
406
411
|
}
|
|
407
412
|
}
|
package/src/mcp.ts
CHANGED
|
@@ -2277,6 +2277,11 @@ This is the recommended entry point for ALL memory queries.`,
|
|
|
2277
2277
|
const config = loadConfig();
|
|
2278
2278
|
const policy = config.lifecycle;
|
|
2279
2279
|
|
|
2280
|
+
// Recall tracking summary
|
|
2281
|
+
const recallStats = store.getRecallStatsAll(1);
|
|
2282
|
+
const highDiversity = recallStats.filter(r => r.diversityScore >= 0.4 && r.spacingScore >= 0.5 && r.recallCount >= 3);
|
|
2283
|
+
const highNoise = recallStats.filter(r => r.recallCount >= 5 && r.negativeCount > r.recallCount * 0.8);
|
|
2284
|
+
|
|
2280
2285
|
const lines = [
|
|
2281
2286
|
`Active: ${stats.active}`,
|
|
2282
2287
|
`Archived (auto): ${stats.archived}`,
|
|
@@ -2286,6 +2291,10 @@ This is the recommended entry point for ALL memory queries.`,
|
|
|
2286
2291
|
`Never accessed: ${stats.neverAccessed}`,
|
|
2287
2292
|
`Oldest access: ${stats.oldestAccess?.slice(0, 10) || "n/a"}`,
|
|
2288
2293
|
"",
|
|
2294
|
+
`Recall tracking: ${recallStats.length} docs tracked`,
|
|
2295
|
+
` Pin candidates (high diversity+spacing): ${highDiversity.length}`,
|
|
2296
|
+
` Snooze candidates (surfaced often, rarely referenced): ${highNoise.length}`,
|
|
2297
|
+
"",
|
|
2289
2298
|
`Policy: ${policy ? `archive after ${policy.archive_after_days}d, purge after ${policy.purge_after_days ?? "never"}, dry_run=${policy.dry_run}` : "none configured"}`,
|
|
2290
2299
|
];
|
|
2291
2300
|
|
|
@@ -2322,7 +2331,29 @@ This is the recommended entry point for ALL memory queries.`,
|
|
|
2322
2331
|
const lines = candidates.map(c =>
|
|
2323
2332
|
`- ${c.collection}/${c.path} (${c.content_type}, modified ${c.modified_at.slice(0, 10)}, accessed ${c.last_accessed_at?.slice(0, 10) || "never"})`
|
|
2324
2333
|
);
|
|
2325
|
-
|
|
2334
|
+
|
|
2335
|
+
// Recall-based recommendations
|
|
2336
|
+
const recallStats = store.getRecallStatsAll(3);
|
|
2337
|
+
const pinCandidates = recallStats.filter(r => r.diversityScore >= 0.4 && r.spacingScore >= 0.5 && r.recallCount >= 3);
|
|
2338
|
+
const snoozeCandidates = recallStats.filter(r => r.recallCount >= 5 && r.negativeCount > r.recallCount * 0.8);
|
|
2339
|
+
|
|
2340
|
+
const recallLines: string[] = [];
|
|
2341
|
+
if (pinCandidates.length > 0) {
|
|
2342
|
+
recallLines.push("", "Pin candidates (high diversity, multi-day spread, recall≥3):");
|
|
2343
|
+
for (const r of pinCandidates.slice(0, 5)) {
|
|
2344
|
+
const label = r.collection && r.path ? `${r.collection}/${r.path}` : `doc#${r.docId}`;
|
|
2345
|
+
recallLines.push(` - ${label} (recalls=${r.recallCount}, queries=${r.uniqueQueries}, days=${r.recallDays}, diversity=${r.diversityScore.toFixed(2)}, spacing=${r.spacingScore.toFixed(2)})`);
|
|
2346
|
+
}
|
|
2347
|
+
}
|
|
2348
|
+
if (snoozeCandidates.length > 0) {
|
|
2349
|
+
recallLines.push("", "Snooze candidates (surfaced often, rarely referenced):");
|
|
2350
|
+
for (const r of snoozeCandidates.slice(0, 5)) {
|
|
2351
|
+
const label = r.collection && r.path ? `${r.collection}/${r.path}` : `doc#${r.docId}`;
|
|
2352
|
+
recallLines.push(` - ${label} (recalls=${r.recallCount}, referenced=${r.recallCount - r.negativeCount}, noise_ratio=${(r.negativeCount / r.recallCount * 100).toFixed(0)}%)`);
|
|
2353
|
+
}
|
|
2354
|
+
}
|
|
2355
|
+
|
|
2356
|
+
return { content: [{ type: "text", text: `Would archive ${candidates.length} document(s):\n${lines.join("\n") || "(none)"}${recallLines.join("\n")}` }] };
|
|
2326
2357
|
}
|
|
2327
2358
|
|
|
2328
2359
|
const archived = store.archiveDocuments(candidates.map(c => c.id));
|
package/src/memory.ts
CHANGED
|
@@ -20,6 +20,7 @@ export const HALF_LIVES: Record<string, number> = {
|
|
|
20
20
|
project: 120,
|
|
21
21
|
preference: Infinity,
|
|
22
22
|
decision: Infinity,
|
|
23
|
+
deductive: Infinity,
|
|
23
24
|
hub: Infinity,
|
|
24
25
|
};
|
|
25
26
|
|
|
@@ -29,6 +30,7 @@ export const HALF_LIVES: Record<string, number> = {
|
|
|
29
30
|
|
|
30
31
|
export const TYPE_BASELINES: Record<string, number> = {
|
|
31
32
|
decision: 0.85,
|
|
33
|
+
deductive: 0.85,
|
|
32
34
|
preference: 0.80,
|
|
33
35
|
hub: 0.80,
|
|
34
36
|
problem: 0.75,
|
|
@@ -45,7 +47,7 @@ export const TYPE_BASELINES: Record<string, number> = {
|
|
|
45
47
|
// Content Type Inference
|
|
46
48
|
// =============================================================================
|
|
47
49
|
|
|
48
|
-
export type ContentType = "decision" | "preference" | "hub" | "research" | "project" | "handoff" | "conversation" | "progress" | "milestone" | "problem" | "note";
|
|
50
|
+
export type ContentType = "decision" | "deductive" | "preference" | "hub" | "research" | "project" | "handoff" | "conversation" | "progress" | "milestone" | "problem" | "note";
|
|
49
51
|
|
|
50
52
|
export function inferContentType(path: string, explicitType?: string): ContentType {
|
|
51
53
|
if (explicitType && explicitType in TYPE_BASELINES) return explicitType as ContentType;
|
|
@@ -75,7 +77,7 @@ export type MemoryType = "episodic" | "semantic" | "procedural";
|
|
|
75
77
|
*/
|
|
76
78
|
export function inferMemoryType(path: string, contentType: string, body?: string): MemoryType {
|
|
77
79
|
if (["handoff", "progress", "conversation"].includes(contentType)) return "episodic";
|
|
78
|
-
if (["decision", "hub", "research"].includes(contentType)) return "semantic";
|
|
80
|
+
if (["decision", "deductive", "hub", "research"].includes(contentType)) return "semantic";
|
|
79
81
|
if (body && /\b(step\s+\d|workflow|recipe|how\s+to|procedure|runbook|playbook)\b/i.test(body)) return "procedural";
|
|
80
82
|
if (path.includes("sop") || path.includes("runbook") || path.includes("playbook")) return "procedural";
|
|
81
83
|
if (contentType === "antipattern") return "semantic";
|
|
@@ -150,7 +152,7 @@ export function confidenceScore(
|
|
|
150
152
|
// Attention decay: reduce confidence if not accessed recently (5% per week)
|
|
151
153
|
// Only apply to episodic/progress content — skip for durable types (decision, hub, research)
|
|
152
154
|
// Also skip if last_accessed_at was backfilled from modified_at (no real access yet)
|
|
153
|
-
const DECAY_EXEMPT_TYPES = new Set(["decision", "hub", "research", "antipattern", "preference"]);
|
|
155
|
+
const DECAY_EXEMPT_TYPES = new Set(["decision", "deductive", "hub", "research", "antipattern", "preference"]);
|
|
154
156
|
let attentionDecay = 1.0;
|
|
155
157
|
if (lastAccessedAt && !DECAY_EXEMPT_TYPES.has(contentType)) {
|
|
156
158
|
const lastAccess = typeof lastAccessedAt === "string" ? new Date(lastAccessedAt) : lastAccessedAt;
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recall Attribution — per-turn reference detection for recall tracking.
|
|
3
|
+
*
|
|
4
|
+
* Extracted into a standalone module for testability (per GPT 5.4 High review turn 4).
|
|
5
|
+
*
|
|
6
|
+
* Architecture:
|
|
7
|
+
* 1. Segment the transcript into ordered turns (user → assistant pairs)
|
|
8
|
+
* 2. Zip context_usage rows (by turn_index) with transcript turns (by position)
|
|
9
|
+
* 3. For each pair, detect references in that turn's assistant text only
|
|
10
|
+
* 4. Mark recall_events linked to the usage rows whose turn actually cited the doc
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { Store, UsageRow } from "./store.ts";
|
|
14
|
+
|
|
15
|
+
// =============================================================================
|
|
16
|
+
// Types
|
|
17
|
+
// =============================================================================
|
|
18
|
+
|
|
19
|
+
export type TranscriptTurn = {
|
|
20
|
+
userText: string;
|
|
21
|
+
assistantText: string;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
// =============================================================================
|
|
25
|
+
// Transcript Segmentation
|
|
26
|
+
// =============================================================================
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Segment a flat message array into ordered turns.
|
|
30
|
+
* A turn starts on each "user" message and includes all following "assistant"
|
|
31
|
+
* messages until the next "user" message.
|
|
32
|
+
*
|
|
33
|
+
* @param messages - Ordered array of {role, content} from transcript JSONL
|
|
34
|
+
* @returns Ordered array of turns
|
|
35
|
+
*/
|
|
36
|
+
export function segmentTranscriptIntoTurns(
|
|
37
|
+
messages: { role: string; content: string }[]
|
|
38
|
+
): TranscriptTurn[] {
|
|
39
|
+
const turns: TranscriptTurn[] = [];
|
|
40
|
+
let currentUser = "";
|
|
41
|
+
let currentAssistant = "";
|
|
42
|
+
|
|
43
|
+
for (const msg of messages) {
|
|
44
|
+
if (msg.role === "user") {
|
|
45
|
+
// New turn: flush previous if it has assistant content
|
|
46
|
+
if (currentUser || currentAssistant) {
|
|
47
|
+
turns.push({ userText: currentUser, assistantText: currentAssistant });
|
|
48
|
+
}
|
|
49
|
+
currentUser = msg.content;
|
|
50
|
+
currentAssistant = "";
|
|
51
|
+
} else if (msg.role === "assistant") {
|
|
52
|
+
currentAssistant += (currentAssistant ? "\n" : "") + msg.content;
|
|
53
|
+
}
|
|
54
|
+
// Ignore system/tool messages for attribution purposes
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Flush final turn
|
|
58
|
+
if (currentUser || currentAssistant) {
|
|
59
|
+
turns.push({ userText: currentUser, assistantText: currentAssistant });
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return turns;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// =============================================================================
|
|
66
|
+
// Per-Turn Reference Detection
|
|
67
|
+
// =============================================================================
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Check if a displayPath (collection/path) is referenced in text.
|
|
71
|
+
* Matches by: full path, filename (without extension), or doc title.
|
|
72
|
+
*/
|
|
73
|
+
function isPathReferenced(
|
|
74
|
+
store: Store,
|
|
75
|
+
displayPath: string,
|
|
76
|
+
text: string
|
|
77
|
+
): boolean {
|
|
78
|
+
if (!text || !displayPath) return false;
|
|
79
|
+
|
|
80
|
+
// Full path match
|
|
81
|
+
if (text.includes(displayPath)) return true;
|
|
82
|
+
|
|
83
|
+
// Filename match (without extension, min 4 chars)
|
|
84
|
+
const filename = displayPath.split("/").pop()?.replace(/\.(md|txt)$/i, "");
|
|
85
|
+
if (filename && filename.length > 3 && text.toLowerCase().includes(filename.toLowerCase())) {
|
|
86
|
+
return true;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Title match from DB
|
|
90
|
+
const parts = displayPath.split("/");
|
|
91
|
+
if (parts.length >= 2) {
|
|
92
|
+
const collection = parts[0]!;
|
|
93
|
+
const docPath = parts.slice(1).join("/");
|
|
94
|
+
const doc = store.findActiveDocument(collection, docPath);
|
|
95
|
+
if (doc?.title && doc.title.length >= 5 && text.toLowerCase().includes(doc.title.toLowerCase())) {
|
|
96
|
+
return true;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return false;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// =============================================================================
|
|
104
|
+
// Attribution Core
|
|
105
|
+
// =============================================================================
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Attribute recall events to specific turns using per-turn reference detection.
|
|
109
|
+
*
|
|
110
|
+
* For each context_usage row (ordered by turn_index), finds the corresponding
|
|
111
|
+
* transcript turn and checks which of that turn's injected docs were cited in
|
|
112
|
+
* that turn's assistant text. Only marks recall_events linked to turns where
|
|
113
|
+
* the doc was actually referenced.
|
|
114
|
+
*
|
|
115
|
+
* @param store - Store instance for doc resolution and event marking
|
|
116
|
+
* @param sessionId - Session identifier
|
|
117
|
+
* @param usages - context_usage rows for this session, ordered by turn_index
|
|
118
|
+
* @param turns - Transcript turns, ordered by position
|
|
119
|
+
*/
|
|
120
|
+
export function attributeRecallReferences(
|
|
121
|
+
store: Store,
|
|
122
|
+
sessionId: string,
|
|
123
|
+
usages: UsageRow[],
|
|
124
|
+
turns: TranscriptTurn[]
|
|
125
|
+
): void {
|
|
126
|
+
// Filter to context-surfacing usages only
|
|
127
|
+
const surfacingUsages = usages.filter(u => u.hookName === "context-surfacing");
|
|
128
|
+
|
|
129
|
+
for (const usage of surfacingUsages) {
|
|
130
|
+
// Match usage to transcript turn by turn_index
|
|
131
|
+
const turn = turns[usage.turnIndex];
|
|
132
|
+
if (!turn || !turn.assistantText) continue;
|
|
133
|
+
|
|
134
|
+
// Parse injected paths for this turn
|
|
135
|
+
let injectedPaths: string[];
|
|
136
|
+
try { injectedPaths = JSON.parse(usage.injectedPaths) as string[]; }
|
|
137
|
+
catch { continue; }
|
|
138
|
+
if (injectedPaths.length === 0) continue;
|
|
139
|
+
|
|
140
|
+
// Check which docs from THIS turn were referenced in THIS turn's assistant text
|
|
141
|
+
const referencedDocIds: number[] = [];
|
|
142
|
+
for (const path of injectedPaths) {
|
|
143
|
+
if (!isPathReferenced(store, path, turn.assistantText)) continue;
|
|
144
|
+
|
|
145
|
+
const parts = path.split("/");
|
|
146
|
+
if (parts.length < 2) continue;
|
|
147
|
+
const collection = parts[0]!;
|
|
148
|
+
const docPath = parts.slice(1).join("/");
|
|
149
|
+
const doc = store.findActiveDocument(collection, docPath);
|
|
150
|
+
if (doc) referencedDocIds.push(doc.id);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
if (referencedDocIds.length === 0) continue;
|
|
154
|
+
|
|
155
|
+
// Mark only recall events linked to THIS usage row
|
|
156
|
+
for (const docId of referencedDocIds) {
|
|
157
|
+
// Primary: usage_id-linked events (current schema)
|
|
158
|
+
const linked = store.db.prepare(`
|
|
159
|
+
SELECT id FROM recall_events
|
|
160
|
+
WHERE usage_id = ? AND doc_id = ? AND was_referenced = 0
|
|
161
|
+
`).all(usage.id, docId) as { id: number }[];
|
|
162
|
+
|
|
163
|
+
if (linked.length > 0) {
|
|
164
|
+
const ids = linked.map(r => r.id);
|
|
165
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
166
|
+
store.db.prepare(`
|
|
167
|
+
UPDATE recall_events SET was_referenced = 1
|
|
168
|
+
WHERE id IN (${placeholders})
|
|
169
|
+
`).run(...ids);
|
|
170
|
+
} else {
|
|
171
|
+
// Fallback: pre-migration events without usage_id — match by turn_index
|
|
172
|
+
store.db.prepare(`
|
|
173
|
+
UPDATE recall_events SET was_referenced = 1
|
|
174
|
+
WHERE id IN (
|
|
175
|
+
SELECT id FROM recall_events
|
|
176
|
+
WHERE session_id = ? AND doc_id = ? AND turn_index = ? AND was_referenced = 0
|
|
177
|
+
)
|
|
178
|
+
`).run(sessionId, docId, usage.turnIndex);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|