context-vault 3.18.0 → 3.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +673 -4
- package/dist/register-tools.d.ts.map +1 -1
- package/dist/register-tools.js +0 -2
- package/dist/register-tools.js.map +1 -1
- package/dist/server.js +78 -1
- package/dist/server.js.map +1 -1
- package/dist/tools/recall.d.ts +1 -1
- package/dist/tools/recall.d.ts.map +1 -1
- package/dist/tools/recall.js +50 -100
- package/dist/tools/recall.js.map +1 -1
- package/node_modules/@context-vault/core/dist/assemble.d.ts +22 -0
- package/node_modules/@context-vault/core/dist/assemble.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/assemble.js +143 -0
- package/node_modules/@context-vault/core/dist/assemble.js.map +1 -0
- package/node_modules/@context-vault/core/dist/capture.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/capture.js +10 -5
- package/node_modules/@context-vault/core/dist/capture.js.map +1 -1
- package/node_modules/@context-vault/core/dist/consolidation.d.ts +40 -0
- package/node_modules/@context-vault/core/dist/consolidation.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/consolidation.js +229 -0
- package/node_modules/@context-vault/core/dist/consolidation.js.map +1 -0
- package/node_modules/@context-vault/core/dist/db.d.ts +25 -1
- package/node_modules/@context-vault/core/dist/db.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/db.js +92 -4
- package/node_modules/@context-vault/core/dist/db.js.map +1 -1
- package/node_modules/@context-vault/core/dist/frontmatter.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/frontmatter.js +26 -3
- package/node_modules/@context-vault/core/dist/frontmatter.js.map +1 -1
- package/node_modules/@context-vault/core/dist/index.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/index.js +225 -184
- package/node_modules/@context-vault/core/dist/index.js.map +1 -1
- package/node_modules/@context-vault/core/dist/main.d.ts +3 -0
- package/node_modules/@context-vault/core/dist/main.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/main.js +4 -0
- package/node_modules/@context-vault/core/dist/main.js.map +1 -1
- package/node_modules/@context-vault/core/dist/search.d.ts +6 -0
- package/node_modules/@context-vault/core/dist/search.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/search.js +106 -5
- package/node_modules/@context-vault/core/dist/search.js.map +1 -1
- package/node_modules/@context-vault/core/dist/search.test.d.ts +2 -0
- package/node_modules/@context-vault/core/dist/search.test.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/search.test.js +49 -0
- package/node_modules/@context-vault/core/dist/search.test.js.map +1 -0
- package/node_modules/@context-vault/core/dist/summarize.d.ts +5 -0
- package/node_modules/@context-vault/core/dist/summarize.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/summarize.js +146 -0
- package/node_modules/@context-vault/core/dist/summarize.js.map +1 -0
- package/node_modules/@context-vault/core/dist/types.d.ts +2 -0
- package/node_modules/@context-vault/core/dist/types.d.ts.map +1 -1
- package/node_modules/@context-vault/core/package.json +13 -1
- package/node_modules/@context-vault/core/src/assemble.ts +187 -0
- package/node_modules/@context-vault/core/src/capture.ts +10 -5
- package/node_modules/@context-vault/core/src/consolidation.ts +356 -0
- package/node_modules/@context-vault/core/src/db.ts +95 -4
- package/node_modules/@context-vault/core/src/frontmatter.ts +25 -4
- package/node_modules/@context-vault/core/src/index.ts +127 -88
- package/node_modules/@context-vault/core/src/main.ts +7 -0
- package/node_modules/@context-vault/core/src/search.test.ts +59 -0
- package/node_modules/@context-vault/core/src/search.ts +112 -5
- package/node_modules/@context-vault/core/src/summarize.ts +157 -0
- package/node_modules/@context-vault/core/src/types.ts +2 -0
- package/package.json +2 -2
- package/scripts/validate-epipe-shutdown.mjs +183 -0
- package/scripts/validate-sqlite-busy-retry.mjs +243 -0
- package/src/register-tools.ts +0 -2
- package/src/server.ts +76 -1
- package/src/tools/recall.ts +51 -110
- package/.claude-plugin/README.md +0 -219
- package/.claude-plugin/plugin.json +0 -11
- package/commands/vault-cleanup.md +0 -43
- package/commands/vault-snapshot.md +0 -43
- package/commands/vault-status.md +0 -35
- package/dist/tools/session-start.d.ts +0 -25
- package/dist/tools/session-start.d.ts.map +0 -1
- package/dist/tools/session-start.js +0 -469
- package/dist/tools/session-start.js.map +0 -1
- package/skills/context-assembly/SKILL.md +0 -308
- package/skills/knowledge-capture/SKILL.md +0 -303
- package/skills/memory-management/SKILL.md +0 -237
- package/src/tools/session-start.ts +0 -527
|
@@ -23,12 +23,34 @@ export function parseFrontmatter(text: string): {
|
|
|
23
23
|
const match = normalized.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
|
|
24
24
|
if (!match) return { meta: {}, body: normalized.trim() };
|
|
25
25
|
const meta: Record<string, unknown> = {};
|
|
26
|
-
|
|
26
|
+
const lines = match[1].split('\n');
|
|
27
|
+
|
|
28
|
+
for (let i = 0; i < lines.length; i++) {
|
|
29
|
+
const line = lines[i];
|
|
27
30
|
const idx = line.indexOf(':');
|
|
28
31
|
if (idx === -1) continue;
|
|
29
32
|
const key = line.slice(0, idx).trim();
|
|
30
33
|
let val: unknown = line.slice(idx + 1).trim() as string;
|
|
31
|
-
|
|
34
|
+
|
|
35
|
+
// Handle YAML multiline list format (tags: followed by - items)
|
|
36
|
+
if (typeof val === 'string' && val === '' && key === 'tags') {
|
|
37
|
+
const items: string[] = [];
|
|
38
|
+
let nextIdx = i + 1;
|
|
39
|
+
while (nextIdx < lines.length) {
|
|
40
|
+
const nextLine = lines[nextIdx];
|
|
41
|
+
// Match lines that start with optional whitespace + dash + space
|
|
42
|
+
const dashMatch = nextLine.match(/^\s*-\s+(.+)$/);
|
|
43
|
+
if (!dashMatch) break;
|
|
44
|
+
items.push(dashMatch[1].trim().replace(/^"|"$/g, '').replace(/^'|'$/g, ''));
|
|
45
|
+
nextIdx++;
|
|
46
|
+
}
|
|
47
|
+
if (items.length > 0) {
|
|
48
|
+
val = items;
|
|
49
|
+
i = nextIdx - 1; // Skip processed lines
|
|
50
|
+
} else {
|
|
51
|
+
val = '';
|
|
52
|
+
}
|
|
53
|
+
} else if (
|
|
32
54
|
typeof val === 'string' &&
|
|
33
55
|
val.length >= 2 &&
|
|
34
56
|
val.startsWith('"') &&
|
|
@@ -40,8 +62,7 @@ export function parseFrontmatter(text: string): {
|
|
|
40
62
|
} catch {
|
|
41
63
|
/* keep as-is */
|
|
42
64
|
}
|
|
43
|
-
}
|
|
44
|
-
if (typeof val === 'string' && val.startsWith('[') && val.endsWith(']')) {
|
|
65
|
+
} else if (typeof val === 'string' && val.startsWith('[') && val.endsWith(']')) {
|
|
45
66
|
try {
|
|
46
67
|
val = JSON.parse(val);
|
|
47
68
|
} catch {
|
|
@@ -7,6 +7,9 @@ import { embedBatch } from './embed.js';
|
|
|
7
7
|
import type { BaseCtx, IndexEntryInput, IndexingConfig, ReindexStats } from './types.js';
|
|
8
8
|
import { shouldIndex } from './indexing.js';
|
|
9
9
|
import { DEFAULT_INDEXING } from './constants.js';
|
|
10
|
+
import { generateSummaryTiers } from './summarize.js';
|
|
11
|
+
import { buildEmbeddingText } from './search.js';
|
|
12
|
+
import { withBusyRetry, isBusyError } from './db.js';
|
|
10
13
|
|
|
11
14
|
const EXCLUDED_DIRS = new Set(['projects', '_archive']);
|
|
12
15
|
const EXCLUDED_FILES = new Set(['context.md', 'memory.md', 'README.md']);
|
|
@@ -46,95 +49,117 @@ export async function indexEntry(
|
|
|
46
49
|
const cat = category || categoryFor(kind);
|
|
47
50
|
const effectiveTier = tier || defaultTierFor(kind);
|
|
48
51
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if (cat === 'entity' && identity_key) {
|
|
52
|
-
const existing = ctx.stmts.getByIdentityKey.get(kind, identity_key) as
|
|
53
|
-
| Record<string, unknown>
|
|
54
|
-
| undefined;
|
|
55
|
-
if (existing) {
|
|
56
|
-
ctx.stmts.upsertByIdentityKey.run(
|
|
57
|
-
title || null,
|
|
58
|
-
body,
|
|
59
|
-
metaJson,
|
|
60
|
-
tagsJson,
|
|
61
|
-
source || 'claude-code',
|
|
62
|
-
cat,
|
|
63
|
-
filePath,
|
|
64
|
-
expires_at || null,
|
|
65
|
-
sourceFilesJson,
|
|
66
|
-
kind,
|
|
67
|
-
identity_key
|
|
68
|
-
);
|
|
69
|
-
wasUpdate = true;
|
|
70
|
-
}
|
|
71
|
-
}
|
|
52
|
+
const rowid = await withBusyRetry(() => {
|
|
53
|
+
let wasUpdate = false;
|
|
72
54
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
title || null,
|
|
80
|
-
body,
|
|
81
|
-
metaJson,
|
|
82
|
-
tagsJson,
|
|
83
|
-
source || 'claude-code',
|
|
84
|
-
filePath,
|
|
85
|
-
identity_key || null,
|
|
86
|
-
expires_at || null,
|
|
87
|
-
createdAt,
|
|
88
|
-
createdAt,
|
|
89
|
-
sourceFilesJson,
|
|
90
|
-
effectiveTier,
|
|
91
|
-
indexed ? 1 : 0
|
|
92
|
-
);
|
|
93
|
-
} catch (e) {
|
|
94
|
-
if ((e as Error).message.includes('UNIQUE constraint')) {
|
|
95
|
-
ctx.stmts.updateEntry.run(
|
|
55
|
+
if (cat === 'entity' && identity_key) {
|
|
56
|
+
const existing = ctx.stmts.getByIdentityKey.get(kind, identity_key) as
|
|
57
|
+
| Record<string, unknown>
|
|
58
|
+
| undefined;
|
|
59
|
+
if (existing) {
|
|
60
|
+
ctx.stmts.upsertByIdentityKey.run(
|
|
96
61
|
title || null,
|
|
97
62
|
body,
|
|
98
63
|
metaJson,
|
|
99
64
|
tagsJson,
|
|
100
65
|
source || 'claude-code',
|
|
101
66
|
cat,
|
|
67
|
+
filePath,
|
|
68
|
+
expires_at || null,
|
|
69
|
+
sourceFilesJson,
|
|
70
|
+
kind,
|
|
71
|
+
identity_key
|
|
72
|
+
);
|
|
73
|
+
wasUpdate = true;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (!wasUpdate) {
|
|
78
|
+
try {
|
|
79
|
+
ctx.stmts.insertEntry.run(
|
|
80
|
+
id,
|
|
81
|
+
kind,
|
|
82
|
+
cat,
|
|
83
|
+
title || null,
|
|
84
|
+
body,
|
|
85
|
+
metaJson,
|
|
86
|
+
tagsJson,
|
|
87
|
+
source || 'claude-code',
|
|
88
|
+
filePath,
|
|
102
89
|
identity_key || null,
|
|
103
90
|
expires_at || null,
|
|
104
|
-
|
|
91
|
+
createdAt,
|
|
92
|
+
createdAt,
|
|
93
|
+
sourceFilesJson,
|
|
94
|
+
effectiveTier,
|
|
95
|
+
indexed ? 1 : 0
|
|
105
96
|
);
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
97
|
+
} catch (e) {
|
|
98
|
+
if (isBusyError(e)) throw e;
|
|
99
|
+
if ((e as Error).message.includes('UNIQUE constraint')) {
|
|
100
|
+
ctx.stmts.updateEntry.run(
|
|
101
|
+
title || null,
|
|
102
|
+
body,
|
|
103
|
+
metaJson,
|
|
104
|
+
tagsJson,
|
|
105
|
+
source || 'claude-code',
|
|
106
|
+
cat,
|
|
107
|
+
identity_key || null,
|
|
108
|
+
expires_at || null,
|
|
109
|
+
filePath
|
|
110
|
+
);
|
|
111
|
+
if (sourceFilesJson !== null && ctx.stmts.updateSourceFiles) {
|
|
112
|
+
const entryRow = ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined;
|
|
113
|
+
if (entryRow) {
|
|
114
|
+
const idRow = ctx.db
|
|
115
|
+
.prepare('SELECT id FROM vault WHERE file_path = ?')
|
|
116
|
+
.get(filePath) as { id: string } | undefined;
|
|
117
|
+
if (idRow) ctx.stmts.updateSourceFiles.run(sourceFilesJson, idRow.id);
|
|
118
|
+
}
|
|
113
119
|
}
|
|
120
|
+
wasUpdate = true;
|
|
121
|
+
} else {
|
|
122
|
+
throw e;
|
|
114
123
|
}
|
|
115
|
-
wasUpdate = true;
|
|
116
|
-
} else {
|
|
117
|
-
throw e;
|
|
118
124
|
}
|
|
119
125
|
}
|
|
120
|
-
}
|
|
121
126
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
127
|
+
const rowidResult = wasUpdate
|
|
128
|
+
? (ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined)
|
|
129
|
+
: (ctx.stmts.getRowid.get(id) as { rowid: number } | undefined);
|
|
125
130
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
+
if (!rowidResult || rowidResult.rowid == null) {
|
|
132
|
+
throw new Error(
|
|
133
|
+
`Could not find rowid for entry: ${wasUpdate ? `file_path=${filePath}` : `id=${id}`}`
|
|
134
|
+
);
|
|
135
|
+
}
|
|
131
136
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
137
|
+
const rowidNum = Number(rowidResult.rowid);
|
|
138
|
+
if (!Number.isFinite(rowidNum) || rowidNum < 1) {
|
|
139
|
+
throw new Error(
|
|
140
|
+
`Invalid rowid retrieved: ${rowidResult.rowid} (type: ${typeof rowidResult.rowid})`
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Generate and store precomputed summary tiers
|
|
145
|
+
try {
|
|
146
|
+
const { condensed, keypoint } = generateSummaryTiers(body);
|
|
147
|
+
ctx.db
|
|
148
|
+
.prepare('UPDATE vault SET summary_condensed = ?, summary_keypoint = ? WHERE id = ?')
|
|
149
|
+
.run(condensed || null, keypoint || null, id);
|
|
150
|
+
} catch (sumErr) {
|
|
151
|
+
if (isBusyError(sumErr)) throw sumErr;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return rowidNum;
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
// Observability: log the INSERT outcome so silent-wedge cases surface in error.log.
|
|
158
|
+
// Added 2026-04-19 after 44h of save_context returning ✓ without persisting (runtime/
|
|
159
|
+
// subprocess-state issue, see specs/reindex-and-save-pipeline-bugs.md).
|
|
160
|
+
console.warn(
|
|
161
|
+
`[context-vault] indexEntry: id=${id} rowid=${rowid} cat=${cat} indexed=${indexed ? 1 : 0}`
|
|
162
|
+
);
|
|
138
163
|
|
|
139
164
|
if (indexed && cat !== 'event') {
|
|
140
165
|
let embedding: Float32Array | null = null;
|
|
@@ -142,7 +167,7 @@ export async function indexEntry(
|
|
|
142
167
|
embedding = precomputedEmbedding;
|
|
143
168
|
} else {
|
|
144
169
|
try {
|
|
145
|
-
embedding = await ctx.embed(
|
|
170
|
+
embedding = await ctx.embed(buildEmbeddingText(title, body, tagsJson, kind));
|
|
146
171
|
} catch (embedErr) {
|
|
147
172
|
console.warn(
|
|
148
173
|
`[context-vault] embed() failed for entry ${id} — skipping vec insert: ${(embedErr as Error).message}`
|
|
@@ -151,12 +176,15 @@ export async function indexEntry(
|
|
|
151
176
|
}
|
|
152
177
|
|
|
153
178
|
if (embedding) {
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
179
|
+
await withBusyRetry(() => {
|
|
180
|
+
try {
|
|
181
|
+
ctx.deleteVec(rowid);
|
|
182
|
+
} catch (delErr) {
|
|
183
|
+
if (isBusyError(delErr)) throw delErr;
|
|
184
|
+
/* no-op */
|
|
185
|
+
}
|
|
186
|
+
ctx.insertVec(rowid, embedding);
|
|
187
|
+
});
|
|
160
188
|
}
|
|
161
189
|
}
|
|
162
190
|
}
|
|
@@ -261,6 +289,7 @@ export async function reindex(
|
|
|
261
289
|
return stats;
|
|
262
290
|
}
|
|
263
291
|
|
|
292
|
+
await withBusyRetry(() => {
|
|
264
293
|
ctx.db.exec('BEGIN');
|
|
265
294
|
try {
|
|
266
295
|
for (const { kind, dir } of filteredKindEntries) {
|
|
@@ -336,10 +365,9 @@ export async function reindex(
|
|
|
336
365
|
if (entryIndexed && category !== 'event') {
|
|
337
366
|
const rowidResult = ctx.stmts.getRowid.get(id) as { rowid: number } | undefined;
|
|
338
367
|
if (rowidResult?.rowid) {
|
|
339
|
-
const embeddingText = [parsed.title, parsed.body].filter(Boolean).join(' ');
|
|
340
368
|
pendingEmbeds.push({
|
|
341
369
|
rowid: rowidResult.rowid,
|
|
342
|
-
text:
|
|
370
|
+
text: buildEmbeddingText(parsed.title, parsed.body, tagsJson, kind),
|
|
343
371
|
});
|
|
344
372
|
}
|
|
345
373
|
}
|
|
@@ -384,13 +412,12 @@ export async function reindex(
|
|
|
384
412
|
try { ctx.deleteVec(rowid); stats.embeddingsCleared!++; } catch {}
|
|
385
413
|
}
|
|
386
414
|
stats.skippedIndexing!++;
|
|
387
|
-
} else if ((bodyChanged || titleChanged) && category !== 'event') {
|
|
415
|
+
} else if ((bodyChanged || titleChanged || tagsChanged) && category !== 'event') {
|
|
388
416
|
const rowid = (
|
|
389
417
|
ctx.stmts.getRowid.get(existing.id as string) as { rowid: number } | undefined
|
|
390
418
|
)?.rowid;
|
|
391
419
|
if (rowid) {
|
|
392
|
-
|
|
393
|
-
pendingEmbeds.push({ rowid, text: embeddingText });
|
|
420
|
+
pendingEmbeds.push({ rowid, text: buildEmbeddingText(parsed.title, parsed.body, tagsJson, kind) });
|
|
394
421
|
}
|
|
395
422
|
}
|
|
396
423
|
stats.updated++;
|
|
@@ -465,9 +492,21 @@ export async function reindex(
|
|
|
465
492
|
|
|
466
493
|
ctx.db.exec('COMMIT');
|
|
467
494
|
} catch (e) {
|
|
468
|
-
ctx.db.exec('ROLLBACK');
|
|
495
|
+
try { ctx.db.exec('ROLLBACK'); } catch {}
|
|
496
|
+
// On SQLITE_BUSY, reset stats so the retry is idempotent (counters
|
|
497
|
+
// accumulate inside the tx and would double-count otherwise).
|
|
498
|
+
if (isBusyError(e)) {
|
|
499
|
+
stats.added = 0;
|
|
500
|
+
stats.updated = 0;
|
|
501
|
+
stats.removed = 0;
|
|
502
|
+
stats.unchanged = 0;
|
|
503
|
+
stats.skippedIndexing = 0;
|
|
504
|
+
stats.embeddingsCleared = 0;
|
|
505
|
+
pendingEmbeds.length = 0;
|
|
506
|
+
}
|
|
469
507
|
throw e;
|
|
470
508
|
}
|
|
509
|
+
});
|
|
471
510
|
|
|
472
511
|
if (!skipEmbeddings) {
|
|
473
512
|
for (let i = 0; i < pendingEmbeds.length; i += EMBED_BATCH_SIZE) {
|
|
@@ -487,17 +526,17 @@ export async function reindex(
|
|
|
487
526
|
if (fullSync) {
|
|
488
527
|
const missingVec = ctx.db
|
|
489
528
|
.prepare(
|
|
490
|
-
`SELECT v.rowid, v.title, v.body FROM vault v
|
|
529
|
+
`SELECT v.rowid, v.title, v.body, v.tags, v.kind FROM vault v
|
|
491
530
|
WHERE v.category != 'event'
|
|
492
531
|
AND v.indexed = 1
|
|
493
532
|
AND v.rowid NOT IN (SELECT rowid FROM vault_vec)`
|
|
494
533
|
)
|
|
495
|
-
.all() as { rowid: number; title: string | null; body: string }[];
|
|
534
|
+
.all() as { rowid: number; title: string | null; body: string; tags: string | null; kind: string }[];
|
|
496
535
|
|
|
497
536
|
if (missingVec.length > 0) {
|
|
498
537
|
const missingEmbeds = missingVec.map((r) => ({
|
|
499
538
|
rowid: r.rowid,
|
|
500
|
-
text:
|
|
539
|
+
text: buildEmbeddingText(r.title, r.body, r.tags, r.kind),
|
|
501
540
|
}));
|
|
502
541
|
|
|
503
542
|
for (let i = 0; i < missingEmbeds.length; i += EMBED_BATCH_SIZE) {
|
|
@@ -117,3 +117,10 @@ export { htmlToMarkdown, extractHtmlContent, ingestUrl } from './ingest-url.js';
|
|
|
117
117
|
// Watch
|
|
118
118
|
export { startWatcher } from './watch.js';
|
|
119
119
|
export type { WatcherOptions, VaultWatcher } from './watch.js';
|
|
120
|
+
|
|
121
|
+
// Consolidation
|
|
122
|
+
export { findDuplicates, mergeEntries, computeDecayScores } from './consolidation.js';
|
|
123
|
+
export type { DuplicateGroup, MergeResult, DecayScore } from './consolidation.js';
|
|
124
|
+
|
|
125
|
+
// Assemble
|
|
126
|
+
export { assembleContext } from './assemble.js';
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { kindBoost, recencyBoost } from './search.js';
|
|
3
|
+
|
|
4
|
+
describe('kindBoost', () => {
|
|
5
|
+
it('returns 1.5 for task kind', () => {
|
|
6
|
+
expect(kindBoost('task')).toBe(1.5);
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
it('returns 1.0 for knowledge kinds', () => {
|
|
10
|
+
for (const k of ['insight', 'decision', 'pattern', 'reference', 'note']) {
|
|
11
|
+
expect(kindBoost(k)).toBe(1.0);
|
|
12
|
+
}
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it('returns 1.0 for entity kinds', () => {
|
|
16
|
+
for (const k of ['bucket', 'project', 'tool', 'contact']) {
|
|
17
|
+
expect(kindBoost(k)).toBe(1.0);
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('returns 1.0 for null/undefined', () => {
|
|
22
|
+
expect(kindBoost(null)).toBe(1.0);
|
|
23
|
+
expect(kindBoost(undefined)).toBe(1.0);
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
describe('task-runs ranking parity', () => {
|
|
28
|
+
it('task entry outscores equivalent insight entry despite event-category decay', () => {
|
|
29
|
+
// Both entries are 14 days old — insight gets no recency decay, task gets event decay.
|
|
30
|
+
// kindBoost must more than compensate.
|
|
31
|
+
const createdAt = new Date(Date.now() - 14 * 86400000).toISOString();
|
|
32
|
+
const baseRrf = 1 / (60 + 1); // rank 0 in a single RRF list
|
|
33
|
+
|
|
34
|
+
const insightDecay = recencyBoost(createdAt, 'knowledge', 30);
|
|
35
|
+
const taskDecay = recencyBoost(createdAt, 'event', 30);
|
|
36
|
+
|
|
37
|
+
const insightScore = baseRrf * insightDecay * 1.0; // no kindBoost
|
|
38
|
+
const taskScore = baseRrf * taskDecay * kindBoost('task');
|
|
39
|
+
|
|
40
|
+
expect(taskScore).toBeGreaterThan(insightScore);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it('kindBoost reduces the event-decay penalty by ≥ 40% at 30-day age', () => {
|
|
44
|
+
// At 30 days event decay is 0.5 (half-life). Without kindBoost a task entry would
|
|
45
|
+
// score half of an equivalent knowledge entry. The 1.5x boost brings it to 0.75x —
|
|
46
|
+
// a 50% reduction in the penalty gap (from 0.5 to 0.75 relative to 1.0 baseline).
|
|
47
|
+
const createdAt = new Date(Date.now() - 30 * 86400000).toISOString();
|
|
48
|
+
const baseRrf = 1 / (60 + 1);
|
|
49
|
+
|
|
50
|
+
const unboostedTaskScore = baseRrf * recencyBoost(createdAt, 'event', 30) * 1.0;
|
|
51
|
+
const boostedTaskScore = baseRrf * recencyBoost(createdAt, 'event', 30) * kindBoost('task');
|
|
52
|
+
const knowledgeScore = baseRrf * recencyBoost(createdAt, 'knowledge', 30) * 1.0;
|
|
53
|
+
|
|
54
|
+
// Boost must meaningfully close the gap: boosted gap ≤ 60% of unboosted gap
|
|
55
|
+
const unboostedGap = knowledgeScore - unboostedTaskScore;
|
|
56
|
+
const boostedGap = knowledgeScore - boostedTaskScore;
|
|
57
|
+
expect(boostedGap).toBeLessThan(unboostedGap * 0.6);
|
|
58
|
+
});
|
|
59
|
+
});
|
|
@@ -3,6 +3,33 @@ import { embedBatch } from './embed.js';
|
|
|
3
3
|
|
|
4
4
|
const NEAR_DUP_THRESHOLD = 0.92;
|
|
5
5
|
const RRF_K = 60;
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Build text used for embedding generation. Includes tags and kind so the
|
|
9
|
+
* vector representation captures metadata, not just title/body content.
|
|
10
|
+
*/
|
|
11
|
+
export function buildEmbeddingText(
|
|
12
|
+
title: string | null | undefined,
|
|
13
|
+
body: string | null | undefined,
|
|
14
|
+
tags: string | null | undefined,
|
|
15
|
+
kind: string | null | undefined
|
|
16
|
+
): string {
|
|
17
|
+
const parts: string[] = [];
|
|
18
|
+
if (title) parts.push(title);
|
|
19
|
+
if (body) parts.push(body);
|
|
20
|
+
if (tags) {
|
|
21
|
+
try {
|
|
22
|
+
const parsed = JSON.parse(tags);
|
|
23
|
+
if (Array.isArray(parsed)) {
|
|
24
|
+
parts.push(`[tags: ${parsed.join(', ')}]`);
|
|
25
|
+
}
|
|
26
|
+
} catch {
|
|
27
|
+
parts.push(`[tags: ${tags}]`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
if (kind) parts.push(`[kind: ${kind}]`);
|
|
31
|
+
return parts.join(' ');
|
|
32
|
+
}
|
|
6
33
|
const RECALL_BOOST_CAP = 2.0;
|
|
7
34
|
const RECALL_HALF_LIFE_DAYS = 30;
|
|
8
35
|
const DISCOVERY_SLOTS = 2;
|
|
@@ -29,7 +56,8 @@ export function buildFtsQuery(query: string): string | null {
|
|
|
29
56
|
const phrase = `"${words.join(' ')}"`;
|
|
30
57
|
const near = `NEAR(${words.map((w) => `"${w}"`).join(' ')}, 10)`;
|
|
31
58
|
const and = words.map((w) => `"${w}"`).join(' AND ');
|
|
32
|
-
|
|
59
|
+
const or = words.map((w) => `"${w}"`).join(' OR ');
|
|
60
|
+
return `${phrase} OR ${near} OR ${and} OR ${or}`;
|
|
33
61
|
}
|
|
34
62
|
|
|
35
63
|
export function recencyBoost(createdAt: string, category: string, decayDays = 30): number {
|
|
@@ -50,6 +78,15 @@ export function recallBoost(recallCount: number, lastRecalledAt: string | null):
|
|
|
50
78
|
return Math.min(boost, RECALL_BOOST_CAP);
|
|
51
79
|
}
|
|
52
80
|
|
|
81
|
+
// task entries (kind='task') are event-category and would otherwise be penalised by
|
|
82
|
+
// recencyBoost's event decay; this boost restores their ranking parity for free-text queries.
|
|
83
|
+
const TASK_KIND_BOOST = 1.5;
|
|
84
|
+
|
|
85
|
+
export function kindBoost(kind: string | null | undefined): number {
|
|
86
|
+
if (kind === 'task') return TASK_KIND_BOOST;
|
|
87
|
+
return 1.0;
|
|
88
|
+
}
|
|
89
|
+
|
|
53
90
|
export function buildFilterClauses({
|
|
54
91
|
categoryFilter,
|
|
55
92
|
excludeEvents = false,
|
|
@@ -202,7 +239,7 @@ export async function hybridSearch(
|
|
|
202
239
|
if (missing.length > 0) {
|
|
203
240
|
const entries = missing.map((r) => {
|
|
204
241
|
const entry = rowMap.get(r.id);
|
|
205
|
-
return { rowid: r.rowid, text:
|
|
242
|
+
return { rowid: r.rowid, text: buildEmbeddingText(entry?.title, entry?.body, entry?.tags, entry?.kind) };
|
|
206
243
|
});
|
|
207
244
|
const embeddings = await embedBatch(entries.map((e) => e.text));
|
|
208
245
|
for (let i = 0; i < entries.length; i++) {
|
|
@@ -232,7 +269,7 @@ export async function hybridSearch(
|
|
|
232
269
|
if (vecCount > 0) {
|
|
233
270
|
queryVec = await ctx.embed(query);
|
|
234
271
|
if (queryVec) {
|
|
235
|
-
const vecLimit = kindFilter ?
|
|
272
|
+
const vecLimit = kindFilter ? 60 : 40;
|
|
236
273
|
const vecRows = ctx.db
|
|
237
274
|
.prepare(
|
|
238
275
|
`SELECT v.rowid, v.distance FROM vault_vec v WHERE embedding MATCH ? ORDER BY distance LIMIT ?`
|
|
@@ -327,10 +364,73 @@ export async function hybridSearch(
|
|
|
327
364
|
}
|
|
328
365
|
}
|
|
329
366
|
|
|
367
|
+
// Tag-based search lane: match query keywords against tags JSON and kind
|
|
368
|
+
const tagRankedIds: string[] = [];
|
|
369
|
+
try {
|
|
370
|
+
const tagWords = query
|
|
371
|
+
.split(/[\s-]+/)
|
|
372
|
+
.map((w) => w.replace(/[*"():^~{}]/g, '').toLowerCase())
|
|
373
|
+
.filter((w) => w.length > 1);
|
|
374
|
+
if (tagWords.length > 0) {
|
|
375
|
+
const tagWhereParts = ['indexed = 1', "(expires_at IS NULL OR expires_at > datetime('now'))", 'superseded_by IS NULL'];
|
|
376
|
+
const tagParams: (string | number | null)[] = [];
|
|
377
|
+
|
|
378
|
+
const likeClauses = tagWords.map((w) => {
|
|
379
|
+
tagParams.push(`%${w}%`);
|
|
380
|
+
return `tags LIKE ?`;
|
|
381
|
+
});
|
|
382
|
+
const kindClauses = tagWords.map((w) => {
|
|
383
|
+
tagParams.push(`%${w}%`);
|
|
384
|
+
return `kind LIKE ?`;
|
|
385
|
+
});
|
|
386
|
+
tagWhereParts.push(`(${[...likeClauses, ...kindClauses].join(' OR ')})`);
|
|
387
|
+
|
|
388
|
+
if (kindFilter) {
|
|
389
|
+
tagWhereParts.push('kind = ?');
|
|
390
|
+
tagParams.push(kindFilter);
|
|
391
|
+
}
|
|
392
|
+
if (categoryFilter) {
|
|
393
|
+
tagWhereParts.push('category = ?');
|
|
394
|
+
tagParams.push(categoryFilter);
|
|
395
|
+
}
|
|
396
|
+
if (excludeEvents && !categoryFilter) {
|
|
397
|
+
tagWhereParts.push("category != 'event'");
|
|
398
|
+
}
|
|
399
|
+
if (since) {
|
|
400
|
+
tagWhereParts.push('created_at >= ?');
|
|
401
|
+
tagParams.push(since);
|
|
402
|
+
}
|
|
403
|
+
if (until) {
|
|
404
|
+
tagWhereParts.push('created_at <= ?');
|
|
405
|
+
tagParams.push(until);
|
|
406
|
+
}
|
|
407
|
+
if (!includeSuperseeded) {
|
|
408
|
+
// already have superseded_by IS NULL above
|
|
409
|
+
}
|
|
410
|
+
if (!includeEphemeral) {
|
|
411
|
+
tagWhereParts.push("tier != 'ephemeral'");
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
const tagSQL = `SELECT id FROM vault WHERE ${tagWhereParts.join(' AND ')} ORDER BY recall_count DESC LIMIT 20`;
|
|
415
|
+
const tagRows = ctx.db.prepare(tagSQL).all(...tagParams) as { id: string }[];
|
|
416
|
+
|
|
417
|
+
for (const row of tagRows) {
|
|
418
|
+
tagRankedIds.push(row.id);
|
|
419
|
+
if (!rowMap.has(row.id)) {
|
|
420
|
+
const full = ctx.db.prepare('SELECT * FROM vault WHERE id = ?').get(row.id) as VaultEntry | undefined;
|
|
421
|
+
if (full) rowMap.set(full.id, full);
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
} catch (err) {
|
|
426
|
+
console.error(`[retrieve] Tag search error: ${(err as Error).message}`);
|
|
427
|
+
}
|
|
428
|
+
|
|
330
429
|
if (rowMap.size === 0) return [];
|
|
331
430
|
|
|
332
|
-
// Build ranked lists for RRF: content FTS + content vec + optional context vec
|
|
431
|
+
// Build ranked lists for RRF: content FTS + content vec + tags + optional context vec
|
|
333
432
|
const rankedLists = [ftsRankedIds, vecRankedIds];
|
|
433
|
+
if (tagRankedIds.length > 0) rankedLists.push(tagRankedIds);
|
|
334
434
|
if (ctxRankedIds.length > 0) rankedLists.push(ctxRankedIds);
|
|
335
435
|
const rrfScores = reciprocalRankFusion(rankedLists);
|
|
336
436
|
|
|
@@ -341,7 +441,14 @@ export async function hybridSearch(
|
|
|
341
441
|
entry.last_recalled_at ?? null
|
|
342
442
|
);
|
|
343
443
|
const durable = entry.tier === 'durable' ? 1.3 : 1.0;
|
|
344
|
-
|
|
444
|
+
// Heat-tier ranking boost: frequently recalled entries rank higher
|
|
445
|
+
const heatMultiplier =
|
|
446
|
+
entry.heat_tier === 'hot' ? 1.4 :
|
|
447
|
+
entry.heat_tier === 'warm' ? 1.1 :
|
|
448
|
+
entry.heat_tier === 'cold' ? 0.9 :
|
|
449
|
+
entry.heat_tier === 'frozen' ? 0.7 : 1.0;
|
|
450
|
+
const kBoost = kindBoost(entry.kind);
|
|
451
|
+
rrfScores.set(id, (rrfScores.get(id) ?? 0) * boost * recall * durable * heatMultiplier * kBoost);
|
|
345
452
|
}
|
|
346
453
|
|
|
347
454
|
const candidates: SearchResult[] = [...rowMap.values()].map((entry) => ({
|