context-vault 3.17.0 → 3.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +157 -0
- package/dist/register-tools.d.ts.map +1 -1
- package/dist/register-tools.js +0 -2
- package/dist/register-tools.js.map +1 -1
- package/dist/server.js +78 -1
- package/dist/server.js.map +1 -1
- package/dist/tools/recall.d.ts +1 -1
- package/dist/tools/recall.d.ts.map +1 -1
- package/dist/tools/recall.js +50 -100
- package/dist/tools/recall.js.map +1 -1
- package/node_modules/@context-vault/core/dist/assemble.d.ts +22 -0
- package/node_modules/@context-vault/core/dist/assemble.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/assemble.js +143 -0
- package/node_modules/@context-vault/core/dist/assemble.js.map +1 -0
- package/node_modules/@context-vault/core/dist/capture.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/capture.js +10 -5
- package/node_modules/@context-vault/core/dist/capture.js.map +1 -1
- package/node_modules/@context-vault/core/dist/consolidation.d.ts +40 -0
- package/node_modules/@context-vault/core/dist/consolidation.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/consolidation.js +229 -0
- package/node_modules/@context-vault/core/dist/consolidation.js.map +1 -0
- package/node_modules/@context-vault/core/dist/db.d.ts +25 -1
- package/node_modules/@context-vault/core/dist/db.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/db.js +92 -4
- package/node_modules/@context-vault/core/dist/db.js.map +1 -1
- package/node_modules/@context-vault/core/dist/frontmatter.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/frontmatter.js +26 -3
- package/node_modules/@context-vault/core/dist/frontmatter.js.map +1 -1
- package/node_modules/@context-vault/core/dist/index.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/index.js +225 -184
- package/node_modules/@context-vault/core/dist/index.js.map +1 -1
- package/node_modules/@context-vault/core/dist/main.d.ts +2 -0
- package/node_modules/@context-vault/core/dist/main.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/main.js +2 -0
- package/node_modules/@context-vault/core/dist/main.js.map +1 -1
- package/node_modules/@context-vault/core/dist/search.d.ts +5 -0
- package/node_modules/@context-vault/core/dist/search.d.ts.map +1 -1
- package/node_modules/@context-vault/core/dist/search.js +97 -5
- package/node_modules/@context-vault/core/dist/search.js.map +1 -1
- package/node_modules/@context-vault/core/dist/summarize.d.ts +5 -0
- package/node_modules/@context-vault/core/dist/summarize.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/summarize.js +146 -0
- package/node_modules/@context-vault/core/dist/summarize.js.map +1 -0
- package/node_modules/@context-vault/core/dist/types.d.ts +2 -0
- package/node_modules/@context-vault/core/dist/types.d.ts.map +1 -1
- package/node_modules/@context-vault/core/package.json +5 -1
- package/node_modules/@context-vault/core/src/assemble.ts +187 -0
- package/node_modules/@context-vault/core/src/capture.ts +10 -5
- package/node_modules/@context-vault/core/src/consolidation.ts +356 -0
- package/node_modules/@context-vault/core/src/db.ts +95 -4
- package/node_modules/@context-vault/core/src/frontmatter.ts +25 -4
- package/node_modules/@context-vault/core/src/index.ts +127 -88
- package/node_modules/@context-vault/core/src/main.ts +4 -0
- package/node_modules/@context-vault/core/src/search.ts +102 -5
- package/node_modules/@context-vault/core/src/summarize.ts +157 -0
- package/node_modules/@context-vault/core/src/types.ts +2 -0
- package/package.json +2 -2
- package/scripts/validate-epipe-shutdown.mjs +183 -0
- package/scripts/validate-sqlite-busy-retry.mjs +243 -0
- package/src/register-tools.ts +0 -2
- package/src/server.ts +76 -1
- package/src/tools/recall.ts +51 -110
|
@@ -23,12 +23,34 @@ export function parseFrontmatter(text: string): {
|
|
|
23
23
|
const match = normalized.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
|
|
24
24
|
if (!match) return { meta: {}, body: normalized.trim() };
|
|
25
25
|
const meta: Record<string, unknown> = {};
|
|
26
|
-
|
|
26
|
+
const lines = match[1].split('\n');
|
|
27
|
+
|
|
28
|
+
for (let i = 0; i < lines.length; i++) {
|
|
29
|
+
const line = lines[i];
|
|
27
30
|
const idx = line.indexOf(':');
|
|
28
31
|
if (idx === -1) continue;
|
|
29
32
|
const key = line.slice(0, idx).trim();
|
|
30
33
|
let val: unknown = line.slice(idx + 1).trim() as string;
|
|
31
|
-
|
|
34
|
+
|
|
35
|
+
// Handle YAML multiline list format (tags: followed by - items)
|
|
36
|
+
if (typeof val === 'string' && val === '' && key === 'tags') {
|
|
37
|
+
const items: string[] = [];
|
|
38
|
+
let nextIdx = i + 1;
|
|
39
|
+
while (nextIdx < lines.length) {
|
|
40
|
+
const nextLine = lines[nextIdx];
|
|
41
|
+
// Match lines that start with optional whitespace + dash + space
|
|
42
|
+
const dashMatch = nextLine.match(/^\s*-\s+(.+)$/);
|
|
43
|
+
if (!dashMatch) break;
|
|
44
|
+
items.push(dashMatch[1].trim().replace(/^"|"$/g, '').replace(/^'|'$/g, ''));
|
|
45
|
+
nextIdx++;
|
|
46
|
+
}
|
|
47
|
+
if (items.length > 0) {
|
|
48
|
+
val = items;
|
|
49
|
+
i = nextIdx - 1; // Skip processed lines
|
|
50
|
+
} else {
|
|
51
|
+
val = '';
|
|
52
|
+
}
|
|
53
|
+
} else if (
|
|
32
54
|
typeof val === 'string' &&
|
|
33
55
|
val.length >= 2 &&
|
|
34
56
|
val.startsWith('"') &&
|
|
@@ -40,8 +62,7 @@ export function parseFrontmatter(text: string): {
|
|
|
40
62
|
} catch {
|
|
41
63
|
/* keep as-is */
|
|
42
64
|
}
|
|
43
|
-
}
|
|
44
|
-
if (typeof val === 'string' && val.startsWith('[') && val.endsWith(']')) {
|
|
65
|
+
} else if (typeof val === 'string' && val.startsWith('[') && val.endsWith(']')) {
|
|
45
66
|
try {
|
|
46
67
|
val = JSON.parse(val);
|
|
47
68
|
} catch {
|
|
@@ -7,6 +7,9 @@ import { embedBatch } from './embed.js';
|
|
|
7
7
|
import type { BaseCtx, IndexEntryInput, IndexingConfig, ReindexStats } from './types.js';
|
|
8
8
|
import { shouldIndex } from './indexing.js';
|
|
9
9
|
import { DEFAULT_INDEXING } from './constants.js';
|
|
10
|
+
import { generateSummaryTiers } from './summarize.js';
|
|
11
|
+
import { buildEmbeddingText } from './search.js';
|
|
12
|
+
import { withBusyRetry, isBusyError } from './db.js';
|
|
10
13
|
|
|
11
14
|
const EXCLUDED_DIRS = new Set(['projects', '_archive']);
|
|
12
15
|
const EXCLUDED_FILES = new Set(['context.md', 'memory.md', 'README.md']);
|
|
@@ -46,95 +49,117 @@ export async function indexEntry(
|
|
|
46
49
|
const cat = category || categoryFor(kind);
|
|
47
50
|
const effectiveTier = tier || defaultTierFor(kind);
|
|
48
51
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if (cat === 'entity' && identity_key) {
|
|
52
|
-
const existing = ctx.stmts.getByIdentityKey.get(kind, identity_key) as
|
|
53
|
-
| Record<string, unknown>
|
|
54
|
-
| undefined;
|
|
55
|
-
if (existing) {
|
|
56
|
-
ctx.stmts.upsertByIdentityKey.run(
|
|
57
|
-
title || null,
|
|
58
|
-
body,
|
|
59
|
-
metaJson,
|
|
60
|
-
tagsJson,
|
|
61
|
-
source || 'claude-code',
|
|
62
|
-
cat,
|
|
63
|
-
filePath,
|
|
64
|
-
expires_at || null,
|
|
65
|
-
sourceFilesJson,
|
|
66
|
-
kind,
|
|
67
|
-
identity_key
|
|
68
|
-
);
|
|
69
|
-
wasUpdate = true;
|
|
70
|
-
}
|
|
71
|
-
}
|
|
52
|
+
const rowid = await withBusyRetry(() => {
|
|
53
|
+
let wasUpdate = false;
|
|
72
54
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
title || null,
|
|
80
|
-
body,
|
|
81
|
-
metaJson,
|
|
82
|
-
tagsJson,
|
|
83
|
-
source || 'claude-code',
|
|
84
|
-
filePath,
|
|
85
|
-
identity_key || null,
|
|
86
|
-
expires_at || null,
|
|
87
|
-
createdAt,
|
|
88
|
-
createdAt,
|
|
89
|
-
sourceFilesJson,
|
|
90
|
-
effectiveTier,
|
|
91
|
-
indexed ? 1 : 0
|
|
92
|
-
);
|
|
93
|
-
} catch (e) {
|
|
94
|
-
if ((e as Error).message.includes('UNIQUE constraint')) {
|
|
95
|
-
ctx.stmts.updateEntry.run(
|
|
55
|
+
if (cat === 'entity' && identity_key) {
|
|
56
|
+
const existing = ctx.stmts.getByIdentityKey.get(kind, identity_key) as
|
|
57
|
+
| Record<string, unknown>
|
|
58
|
+
| undefined;
|
|
59
|
+
if (existing) {
|
|
60
|
+
ctx.stmts.upsertByIdentityKey.run(
|
|
96
61
|
title || null,
|
|
97
62
|
body,
|
|
98
63
|
metaJson,
|
|
99
64
|
tagsJson,
|
|
100
65
|
source || 'claude-code',
|
|
101
66
|
cat,
|
|
67
|
+
filePath,
|
|
68
|
+
expires_at || null,
|
|
69
|
+
sourceFilesJson,
|
|
70
|
+
kind,
|
|
71
|
+
identity_key
|
|
72
|
+
);
|
|
73
|
+
wasUpdate = true;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (!wasUpdate) {
|
|
78
|
+
try {
|
|
79
|
+
ctx.stmts.insertEntry.run(
|
|
80
|
+
id,
|
|
81
|
+
kind,
|
|
82
|
+
cat,
|
|
83
|
+
title || null,
|
|
84
|
+
body,
|
|
85
|
+
metaJson,
|
|
86
|
+
tagsJson,
|
|
87
|
+
source || 'claude-code',
|
|
88
|
+
filePath,
|
|
102
89
|
identity_key || null,
|
|
103
90
|
expires_at || null,
|
|
104
|
-
|
|
91
|
+
createdAt,
|
|
92
|
+
createdAt,
|
|
93
|
+
sourceFilesJson,
|
|
94
|
+
effectiveTier,
|
|
95
|
+
indexed ? 1 : 0
|
|
105
96
|
);
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
97
|
+
} catch (e) {
|
|
98
|
+
if (isBusyError(e)) throw e;
|
|
99
|
+
if ((e as Error).message.includes('UNIQUE constraint')) {
|
|
100
|
+
ctx.stmts.updateEntry.run(
|
|
101
|
+
title || null,
|
|
102
|
+
body,
|
|
103
|
+
metaJson,
|
|
104
|
+
tagsJson,
|
|
105
|
+
source || 'claude-code',
|
|
106
|
+
cat,
|
|
107
|
+
identity_key || null,
|
|
108
|
+
expires_at || null,
|
|
109
|
+
filePath
|
|
110
|
+
);
|
|
111
|
+
if (sourceFilesJson !== null && ctx.stmts.updateSourceFiles) {
|
|
112
|
+
const entryRow = ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined;
|
|
113
|
+
if (entryRow) {
|
|
114
|
+
const idRow = ctx.db
|
|
115
|
+
.prepare('SELECT id FROM vault WHERE file_path = ?')
|
|
116
|
+
.get(filePath) as { id: string } | undefined;
|
|
117
|
+
if (idRow) ctx.stmts.updateSourceFiles.run(sourceFilesJson, idRow.id);
|
|
118
|
+
}
|
|
113
119
|
}
|
|
120
|
+
wasUpdate = true;
|
|
121
|
+
} else {
|
|
122
|
+
throw e;
|
|
114
123
|
}
|
|
115
|
-
wasUpdate = true;
|
|
116
|
-
} else {
|
|
117
|
-
throw e;
|
|
118
124
|
}
|
|
119
125
|
}
|
|
120
|
-
}
|
|
121
126
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
127
|
+
const rowidResult = wasUpdate
|
|
128
|
+
? (ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined)
|
|
129
|
+
: (ctx.stmts.getRowid.get(id) as { rowid: number } | undefined);
|
|
125
130
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
+
if (!rowidResult || rowidResult.rowid == null) {
|
|
132
|
+
throw new Error(
|
|
133
|
+
`Could not find rowid for entry: ${wasUpdate ? `file_path=${filePath}` : `id=${id}`}`
|
|
134
|
+
);
|
|
135
|
+
}
|
|
131
136
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
137
|
+
const rowidNum = Number(rowidResult.rowid);
|
|
138
|
+
if (!Number.isFinite(rowidNum) || rowidNum < 1) {
|
|
139
|
+
throw new Error(
|
|
140
|
+
`Invalid rowid retrieved: ${rowidResult.rowid} (type: ${typeof rowidResult.rowid})`
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Generate and store precomputed summary tiers
|
|
145
|
+
try {
|
|
146
|
+
const { condensed, keypoint } = generateSummaryTiers(body);
|
|
147
|
+
ctx.db
|
|
148
|
+
.prepare('UPDATE vault SET summary_condensed = ?, summary_keypoint = ? WHERE id = ?')
|
|
149
|
+
.run(condensed || null, keypoint || null, id);
|
|
150
|
+
} catch (sumErr) {
|
|
151
|
+
if (isBusyError(sumErr)) throw sumErr;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return rowidNum;
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
// Observability: log the INSERT outcome so silent-wedge cases surface in error.log.
|
|
158
|
+
// Added 2026-04-19 after 44h of save_context returning ✓ without persisting (runtime/
|
|
159
|
+
// subprocess-state issue, see specs/reindex-and-save-pipeline-bugs.md).
|
|
160
|
+
console.warn(
|
|
161
|
+
`[context-vault] indexEntry: id=${id} rowid=${rowid} cat=${cat} indexed=${indexed ? 1 : 0}`
|
|
162
|
+
);
|
|
138
163
|
|
|
139
164
|
if (indexed && cat !== 'event') {
|
|
140
165
|
let embedding: Float32Array | null = null;
|
|
@@ -142,7 +167,7 @@ export async function indexEntry(
|
|
|
142
167
|
embedding = precomputedEmbedding;
|
|
143
168
|
} else {
|
|
144
169
|
try {
|
|
145
|
-
embedding = await ctx.embed(
|
|
170
|
+
embedding = await ctx.embed(buildEmbeddingText(title, body, tagsJson, kind));
|
|
146
171
|
} catch (embedErr) {
|
|
147
172
|
console.warn(
|
|
148
173
|
`[context-vault] embed() failed for entry ${id} — skipping vec insert: ${(embedErr as Error).message}`
|
|
@@ -151,12 +176,15 @@ export async function indexEntry(
|
|
|
151
176
|
}
|
|
152
177
|
|
|
153
178
|
if (embedding) {
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
179
|
+
await withBusyRetry(() => {
|
|
180
|
+
try {
|
|
181
|
+
ctx.deleteVec(rowid);
|
|
182
|
+
} catch (delErr) {
|
|
183
|
+
if (isBusyError(delErr)) throw delErr;
|
|
184
|
+
/* no-op */
|
|
185
|
+
}
|
|
186
|
+
ctx.insertVec(rowid, embedding);
|
|
187
|
+
});
|
|
160
188
|
}
|
|
161
189
|
}
|
|
162
190
|
}
|
|
@@ -261,6 +289,7 @@ export async function reindex(
|
|
|
261
289
|
return stats;
|
|
262
290
|
}
|
|
263
291
|
|
|
292
|
+
await withBusyRetry(() => {
|
|
264
293
|
ctx.db.exec('BEGIN');
|
|
265
294
|
try {
|
|
266
295
|
for (const { kind, dir } of filteredKindEntries) {
|
|
@@ -336,10 +365,9 @@ export async function reindex(
|
|
|
336
365
|
if (entryIndexed && category !== 'event') {
|
|
337
366
|
const rowidResult = ctx.stmts.getRowid.get(id) as { rowid: number } | undefined;
|
|
338
367
|
if (rowidResult?.rowid) {
|
|
339
|
-
const embeddingText = [parsed.title, parsed.body].filter(Boolean).join(' ');
|
|
340
368
|
pendingEmbeds.push({
|
|
341
369
|
rowid: rowidResult.rowid,
|
|
342
|
-
text:
|
|
370
|
+
text: buildEmbeddingText(parsed.title, parsed.body, tagsJson, kind),
|
|
343
371
|
});
|
|
344
372
|
}
|
|
345
373
|
}
|
|
@@ -384,13 +412,12 @@ export async function reindex(
|
|
|
384
412
|
try { ctx.deleteVec(rowid); stats.embeddingsCleared!++; } catch {}
|
|
385
413
|
}
|
|
386
414
|
stats.skippedIndexing!++;
|
|
387
|
-
} else if ((bodyChanged || titleChanged) && category !== 'event') {
|
|
415
|
+
} else if ((bodyChanged || titleChanged || tagsChanged) && category !== 'event') {
|
|
388
416
|
const rowid = (
|
|
389
417
|
ctx.stmts.getRowid.get(existing.id as string) as { rowid: number } | undefined
|
|
390
418
|
)?.rowid;
|
|
391
419
|
if (rowid) {
|
|
392
|
-
|
|
393
|
-
pendingEmbeds.push({ rowid, text: embeddingText });
|
|
420
|
+
pendingEmbeds.push({ rowid, text: buildEmbeddingText(parsed.title, parsed.body, tagsJson, kind) });
|
|
394
421
|
}
|
|
395
422
|
}
|
|
396
423
|
stats.updated++;
|
|
@@ -465,9 +492,21 @@ export async function reindex(
|
|
|
465
492
|
|
|
466
493
|
ctx.db.exec('COMMIT');
|
|
467
494
|
} catch (e) {
|
|
468
|
-
ctx.db.exec('ROLLBACK');
|
|
495
|
+
try { ctx.db.exec('ROLLBACK'); } catch {}
|
|
496
|
+
// On SQLITE_BUSY, reset stats so the retry is idempotent (counters
|
|
497
|
+
// accumulate inside the tx and would double-count otherwise).
|
|
498
|
+
if (isBusyError(e)) {
|
|
499
|
+
stats.added = 0;
|
|
500
|
+
stats.updated = 0;
|
|
501
|
+
stats.removed = 0;
|
|
502
|
+
stats.unchanged = 0;
|
|
503
|
+
stats.skippedIndexing = 0;
|
|
504
|
+
stats.embeddingsCleared = 0;
|
|
505
|
+
pendingEmbeds.length = 0;
|
|
506
|
+
}
|
|
469
507
|
throw e;
|
|
470
508
|
}
|
|
509
|
+
});
|
|
471
510
|
|
|
472
511
|
if (!skipEmbeddings) {
|
|
473
512
|
for (let i = 0; i < pendingEmbeds.length; i += EMBED_BATCH_SIZE) {
|
|
@@ -487,17 +526,17 @@ export async function reindex(
|
|
|
487
526
|
if (fullSync) {
|
|
488
527
|
const missingVec = ctx.db
|
|
489
528
|
.prepare(
|
|
490
|
-
`SELECT v.rowid, v.title, v.body FROM vault v
|
|
529
|
+
`SELECT v.rowid, v.title, v.body, v.tags, v.kind FROM vault v
|
|
491
530
|
WHERE v.category != 'event'
|
|
492
531
|
AND v.indexed = 1
|
|
493
532
|
AND v.rowid NOT IN (SELECT rowid FROM vault_vec)`
|
|
494
533
|
)
|
|
495
|
-
.all() as { rowid: number; title: string | null; body: string }[];
|
|
534
|
+
.all() as { rowid: number; title: string | null; body: string; tags: string | null; kind: string }[];
|
|
496
535
|
|
|
497
536
|
if (missingVec.length > 0) {
|
|
498
537
|
const missingEmbeds = missingVec.map((r) => ({
|
|
499
538
|
rowid: r.rowid,
|
|
500
|
-
text:
|
|
539
|
+
text: buildEmbeddingText(r.title, r.body, r.tags, r.kind),
|
|
501
540
|
}));
|
|
502
541
|
|
|
503
542
|
for (let i = 0; i < missingEmbeds.length; i += EMBED_BATCH_SIZE) {
|
|
@@ -117,3 +117,7 @@ export { htmlToMarkdown, extractHtmlContent, ingestUrl } from './ingest-url.js';
|
|
|
117
117
|
// Watch
|
|
118
118
|
export { startWatcher } from './watch.js';
|
|
119
119
|
export type { WatcherOptions, VaultWatcher } from './watch.js';
|
|
120
|
+
|
|
121
|
+
// Consolidation
|
|
122
|
+
export { findDuplicates, mergeEntries, computeDecayScores } from './consolidation.js';
|
|
123
|
+
export type { DuplicateGroup, MergeResult, DecayScore } from './consolidation.js';
|
|
@@ -3,6 +3,33 @@ import { embedBatch } from './embed.js';
|
|
|
3
3
|
|
|
4
4
|
const NEAR_DUP_THRESHOLD = 0.92;
|
|
5
5
|
const RRF_K = 60;
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Build text used for embedding generation. Includes tags and kind so the
|
|
9
|
+
* vector representation captures metadata, not just title/body content.
|
|
10
|
+
*/
|
|
11
|
+
export function buildEmbeddingText(
|
|
12
|
+
title: string | null | undefined,
|
|
13
|
+
body: string | null | undefined,
|
|
14
|
+
tags: string | null | undefined,
|
|
15
|
+
kind: string | null | undefined
|
|
16
|
+
): string {
|
|
17
|
+
const parts: string[] = [];
|
|
18
|
+
if (title) parts.push(title);
|
|
19
|
+
if (body) parts.push(body);
|
|
20
|
+
if (tags) {
|
|
21
|
+
try {
|
|
22
|
+
const parsed = JSON.parse(tags);
|
|
23
|
+
if (Array.isArray(parsed)) {
|
|
24
|
+
parts.push(`[tags: ${parsed.join(', ')}]`);
|
|
25
|
+
}
|
|
26
|
+
} catch {
|
|
27
|
+
parts.push(`[tags: ${tags}]`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
if (kind) parts.push(`[kind: ${kind}]`);
|
|
31
|
+
return parts.join(' ');
|
|
32
|
+
}
|
|
6
33
|
const RECALL_BOOST_CAP = 2.0;
|
|
7
34
|
const RECALL_HALF_LIFE_DAYS = 30;
|
|
8
35
|
const DISCOVERY_SLOTS = 2;
|
|
@@ -29,7 +56,8 @@ export function buildFtsQuery(query: string): string | null {
|
|
|
29
56
|
const phrase = `"${words.join(' ')}"`;
|
|
30
57
|
const near = `NEAR(${words.map((w) => `"${w}"`).join(' ')}, 10)`;
|
|
31
58
|
const and = words.map((w) => `"${w}"`).join(' AND ');
|
|
32
|
-
|
|
59
|
+
const or = words.map((w) => `"${w}"`).join(' OR ');
|
|
60
|
+
return `${phrase} OR ${near} OR ${and} OR ${or}`;
|
|
33
61
|
}
|
|
34
62
|
|
|
35
63
|
export function recencyBoost(createdAt: string, category: string, decayDays = 30): number {
|
|
@@ -202,7 +230,7 @@ export async function hybridSearch(
|
|
|
202
230
|
if (missing.length > 0) {
|
|
203
231
|
const entries = missing.map((r) => {
|
|
204
232
|
const entry = rowMap.get(r.id);
|
|
205
|
-
return { rowid: r.rowid, text:
|
|
233
|
+
return { rowid: r.rowid, text: buildEmbeddingText(entry?.title, entry?.body, entry?.tags, entry?.kind) };
|
|
206
234
|
});
|
|
207
235
|
const embeddings = await embedBatch(entries.map((e) => e.text));
|
|
208
236
|
for (let i = 0; i < entries.length; i++) {
|
|
@@ -232,7 +260,7 @@ export async function hybridSearch(
|
|
|
232
260
|
if (vecCount > 0) {
|
|
233
261
|
queryVec = await ctx.embed(query);
|
|
234
262
|
if (queryVec) {
|
|
235
|
-
const vecLimit = kindFilter ?
|
|
263
|
+
const vecLimit = kindFilter ? 60 : 40;
|
|
236
264
|
const vecRows = ctx.db
|
|
237
265
|
.prepare(
|
|
238
266
|
`SELECT v.rowid, v.distance FROM vault_vec v WHERE embedding MATCH ? ORDER BY distance LIMIT ?`
|
|
@@ -327,10 +355,73 @@ export async function hybridSearch(
|
|
|
327
355
|
}
|
|
328
356
|
}
|
|
329
357
|
|
|
358
|
+
// Tag-based search lane: match query keywords against tags JSON and kind
|
|
359
|
+
const tagRankedIds: string[] = [];
|
|
360
|
+
try {
|
|
361
|
+
const tagWords = query
|
|
362
|
+
.split(/[\s-]+/)
|
|
363
|
+
.map((w) => w.replace(/[*"():^~{}]/g, '').toLowerCase())
|
|
364
|
+
.filter((w) => w.length > 1);
|
|
365
|
+
if (tagWords.length > 0) {
|
|
366
|
+
const tagWhereParts = ['indexed = 1', "(expires_at IS NULL OR expires_at > datetime('now'))", 'superseded_by IS NULL'];
|
|
367
|
+
const tagParams: (string | number | null)[] = [];
|
|
368
|
+
|
|
369
|
+
const likeClauses = tagWords.map((w) => {
|
|
370
|
+
tagParams.push(`%${w}%`);
|
|
371
|
+
return `tags LIKE ?`;
|
|
372
|
+
});
|
|
373
|
+
const kindClauses = tagWords.map((w) => {
|
|
374
|
+
tagParams.push(`%${w}%`);
|
|
375
|
+
return `kind LIKE ?`;
|
|
376
|
+
});
|
|
377
|
+
tagWhereParts.push(`(${[...likeClauses, ...kindClauses].join(' OR ')})`);
|
|
378
|
+
|
|
379
|
+
if (kindFilter) {
|
|
380
|
+
tagWhereParts.push('kind = ?');
|
|
381
|
+
tagParams.push(kindFilter);
|
|
382
|
+
}
|
|
383
|
+
if (categoryFilter) {
|
|
384
|
+
tagWhereParts.push('category = ?');
|
|
385
|
+
tagParams.push(categoryFilter);
|
|
386
|
+
}
|
|
387
|
+
if (excludeEvents && !categoryFilter) {
|
|
388
|
+
tagWhereParts.push("category != 'event'");
|
|
389
|
+
}
|
|
390
|
+
if (since) {
|
|
391
|
+
tagWhereParts.push('created_at >= ?');
|
|
392
|
+
tagParams.push(since);
|
|
393
|
+
}
|
|
394
|
+
if (until) {
|
|
395
|
+
tagWhereParts.push('created_at <= ?');
|
|
396
|
+
tagParams.push(until);
|
|
397
|
+
}
|
|
398
|
+
if (!includeSuperseeded) {
|
|
399
|
+
// already have superseded_by IS NULL above
|
|
400
|
+
}
|
|
401
|
+
if (!includeEphemeral) {
|
|
402
|
+
tagWhereParts.push("tier != 'ephemeral'");
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
const tagSQL = `SELECT id FROM vault WHERE ${tagWhereParts.join(' AND ')} ORDER BY recall_count DESC LIMIT 20`;
|
|
406
|
+
const tagRows = ctx.db.prepare(tagSQL).all(...tagParams) as { id: string }[];
|
|
407
|
+
|
|
408
|
+
for (const row of tagRows) {
|
|
409
|
+
tagRankedIds.push(row.id);
|
|
410
|
+
if (!rowMap.has(row.id)) {
|
|
411
|
+
const full = ctx.db.prepare('SELECT * FROM vault WHERE id = ?').get(row.id) as VaultEntry | undefined;
|
|
412
|
+
if (full) rowMap.set(full.id, full);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
} catch (err) {
|
|
417
|
+
console.error(`[retrieve] Tag search error: ${(err as Error).message}`);
|
|
418
|
+
}
|
|
419
|
+
|
|
330
420
|
if (rowMap.size === 0) return [];
|
|
331
421
|
|
|
332
|
-
// Build ranked lists for RRF: content FTS + content vec + optional context vec
|
|
422
|
+
// Build ranked lists for RRF: content FTS + content vec + tags + optional context vec
|
|
333
423
|
const rankedLists = [ftsRankedIds, vecRankedIds];
|
|
424
|
+
if (tagRankedIds.length > 0) rankedLists.push(tagRankedIds);
|
|
334
425
|
if (ctxRankedIds.length > 0) rankedLists.push(ctxRankedIds);
|
|
335
426
|
const rrfScores = reciprocalRankFusion(rankedLists);
|
|
336
427
|
|
|
@@ -341,7 +432,13 @@ export async function hybridSearch(
|
|
|
341
432
|
entry.last_recalled_at ?? null
|
|
342
433
|
);
|
|
343
434
|
const durable = entry.tier === 'durable' ? 1.3 : 1.0;
|
|
344
|
-
|
|
435
|
+
// Heat-tier ranking boost: frequently recalled entries rank higher
|
|
436
|
+
const heatMultiplier =
|
|
437
|
+
entry.heat_tier === 'hot' ? 1.4 :
|
|
438
|
+
entry.heat_tier === 'warm' ? 1.1 :
|
|
439
|
+
entry.heat_tier === 'cold' ? 0.9 :
|
|
440
|
+
entry.heat_tier === 'frozen' ? 0.7 : 1.0;
|
|
441
|
+
rrfScores.set(id, (rrfScores.get(id) ?? 0) * boost * recall * durable * heatMultiplier);
|
|
345
442
|
}
|
|
346
443
|
|
|
347
444
|
const candidates: SearchResult[] = [...rowMap.values()].map((entry) => ({
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
const CONDENSED_CAP = 300;
|
|
2
|
+
const KEYPOINT_CAP = 150;
|
|
3
|
+
const SHORT_THRESHOLD = 150;
|
|
4
|
+
|
|
5
|
+
const ABBREVS = /(?:Mr|Mrs|Ms|Dr|Prof|Sr|Jr|vs|etc|i\.e|e\.g|approx|dept|est|inc|ltd|corp)\.\s*$/i;
|
|
6
|
+
|
|
7
|
+
function stripFrontmatter(text: string): string {
|
|
8
|
+
if (!text.startsWith('---')) return text;
|
|
9
|
+
const end = text.indexOf('\n---', 3);
|
|
10
|
+
if (end === -1) return text;
|
|
11
|
+
return text.slice(end + 4).trimStart();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function splitSentences(text: string): string[] {
|
|
15
|
+
const sentences: string[] = [];
|
|
16
|
+
let current = '';
|
|
17
|
+
|
|
18
|
+
const lines = text.split('\n');
|
|
19
|
+
for (const line of lines) {
|
|
20
|
+
const trimmed = line.trim();
|
|
21
|
+
|
|
22
|
+
if (!trimmed) {
|
|
23
|
+
if (current.trim()) {
|
|
24
|
+
sentences.push(current.trim());
|
|
25
|
+
current = '';
|
|
26
|
+
}
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Skip markdown headers
|
|
31
|
+
if (trimmed.startsWith('#')) continue;
|
|
32
|
+
// Skip code fences
|
|
33
|
+
if (trimmed.startsWith('```')) continue;
|
|
34
|
+
// Skip list markers for sentence splitting but keep content
|
|
35
|
+
const listContent = trimmed.replace(/^[-*+]\s+/, '').replace(/^\d+\.\s+/, '');
|
|
36
|
+
|
|
37
|
+
current += (current ? ' ' : '') + listContent;
|
|
38
|
+
|
|
39
|
+
// Try to split on sentence-ending punctuation
|
|
40
|
+
const parts = current.split(/(?<=[.!?])\s+/);
|
|
41
|
+
if (parts.length > 1) {
|
|
42
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
43
|
+
const part = parts[i].trim();
|
|
44
|
+
if (part && !ABBREVS.test(part)) {
|
|
45
|
+
sentences.push(part);
|
|
46
|
+
} else if (part) {
|
|
47
|
+
// Reattach abbreviated segment to next part
|
|
48
|
+
parts[i + 1] = part + ' ' + parts[i + 1];
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
current = parts[parts.length - 1];
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (current.trim()) {
|
|
56
|
+
sentences.push(current.trim());
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return sentences.filter(s => s.length > 0);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function firstHeaderText(text: string): string | null {
|
|
63
|
+
const lines = text.split('\n');
|
|
64
|
+
for (const line of lines) {
|
|
65
|
+
const match = line.match(/^#{1,6}\s+(.+)/);
|
|
66
|
+
if (match) return match[1].trim();
|
|
67
|
+
}
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function firstCodeComment(text: string): string | null {
|
|
72
|
+
const lines = text.split('\n');
|
|
73
|
+
for (const line of lines) {
|
|
74
|
+
const trimmed = line.trim();
|
|
75
|
+
if (trimmed.startsWith('//')) return trimmed.slice(2).trim();
|
|
76
|
+
if (trimmed.startsWith('#') && !trimmed.startsWith('##')) return trimmed.slice(1).trim();
|
|
77
|
+
if (trimmed.startsWith('/*')) {
|
|
78
|
+
const content = trimmed.replace(/^\/\*\s*/, '').replace(/\s*\*\/$/, '');
|
|
79
|
+
if (content) return content;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function isCodeOnly(text: string): boolean {
|
|
86
|
+
const stripped = stripFrontmatter(text);
|
|
87
|
+
const lines = stripped.split('\n').filter(l => l.trim());
|
|
88
|
+
if (lines.length === 0) return false;
|
|
89
|
+
const codeLines = lines.filter(l => {
|
|
90
|
+
const t = l.trim();
|
|
91
|
+
return t.startsWith('```') || t.startsWith('//') || t.startsWith('/*') ||
|
|
92
|
+
t.startsWith('import ') || t.startsWith('export ') || t.startsWith('const ') ||
|
|
93
|
+
t.startsWith('let ') || t.startsWith('function ') || t.startsWith('class ') ||
|
|
94
|
+
t.startsWith('{') || t.startsWith('}') || t.startsWith('def ') ||
|
|
95
|
+
t.startsWith('return ');
|
|
96
|
+
});
|
|
97
|
+
return codeLines.length / lines.length > 0.7;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function cap(text: string, limit: number): string {
|
|
101
|
+
if (text.length <= limit) return text;
|
|
102
|
+
const truncated = text.slice(0, limit - 3);
|
|
103
|
+
const lastSpace = truncated.lastIndexOf(' ');
|
|
104
|
+
return (lastSpace > limit * 0.5 ? truncated.slice(0, lastSpace) : truncated) + '...';
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export function generateSummaryTiers(body: string): {
|
|
108
|
+
condensed: string;
|
|
109
|
+
keypoint: string;
|
|
110
|
+
} {
|
|
111
|
+
const cleaned = stripFrontmatter(body).trim();
|
|
112
|
+
|
|
113
|
+
if (!cleaned) {
|
|
114
|
+
return { condensed: '', keypoint: '' };
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Short entries: use body as both
|
|
118
|
+
if (cleaned.length < SHORT_THRESHOLD) {
|
|
119
|
+
return { condensed: cleaned, keypoint: cleaned };
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Code-only entries
|
|
123
|
+
if (isCodeOnly(cleaned)) {
|
|
124
|
+
const comment = firstCodeComment(cleaned);
|
|
125
|
+
const firstLine = cleaned.split('\n').find(l => l.trim())?.trim() || '';
|
|
126
|
+
const label = comment || `Code block: ${firstLine.slice(0, 80)}`;
|
|
127
|
+
return {
|
|
128
|
+
condensed: cap(label, CONDENSED_CAP),
|
|
129
|
+
keypoint: cap(label, KEYPOINT_CAP),
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const sentences = splitSentences(cleaned);
|
|
134
|
+
|
|
135
|
+
// Keypoint: prefer first header, then first sentence
|
|
136
|
+
const header = firstHeaderText(cleaned);
|
|
137
|
+
const keypoint = header || sentences[0] || cleaned.slice(0, KEYPOINT_CAP);
|
|
138
|
+
|
|
139
|
+
// Condensed: first sentence + last sentence
|
|
140
|
+
let condensed: string;
|
|
141
|
+
if (sentences.length <= 1) {
|
|
142
|
+
condensed = sentences[0] || cleaned.slice(0, CONDENSED_CAP);
|
|
143
|
+
} else {
|
|
144
|
+
const first = sentences[0];
|
|
145
|
+
const last = sentences[sentences.length - 1];
|
|
146
|
+
if (first === last) {
|
|
147
|
+
condensed = first;
|
|
148
|
+
} else {
|
|
149
|
+
condensed = `${first} ${last}`;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
condensed: cap(condensed, CONDENSED_CAP),
|
|
155
|
+
keypoint: cap(keypoint, KEYPOINT_CAP),
|
|
156
|
+
};
|
|
157
|
+
}
|