context-vault 3.18.0 → 3.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/bin/cli.js +157 -0
  2. package/dist/register-tools.d.ts.map +1 -1
  3. package/dist/register-tools.js +0 -2
  4. package/dist/register-tools.js.map +1 -1
  5. package/dist/server.js +78 -1
  6. package/dist/server.js.map +1 -1
  7. package/dist/tools/recall.d.ts +1 -1
  8. package/dist/tools/recall.d.ts.map +1 -1
  9. package/dist/tools/recall.js +50 -100
  10. package/dist/tools/recall.js.map +1 -1
  11. package/node_modules/@context-vault/core/dist/assemble.d.ts +22 -0
  12. package/node_modules/@context-vault/core/dist/assemble.d.ts.map +1 -0
  13. package/node_modules/@context-vault/core/dist/assemble.js +143 -0
  14. package/node_modules/@context-vault/core/dist/assemble.js.map +1 -0
  15. package/node_modules/@context-vault/core/dist/capture.d.ts.map +1 -1
  16. package/node_modules/@context-vault/core/dist/capture.js +10 -5
  17. package/node_modules/@context-vault/core/dist/capture.js.map +1 -1
  18. package/node_modules/@context-vault/core/dist/consolidation.d.ts +40 -0
  19. package/node_modules/@context-vault/core/dist/consolidation.d.ts.map +1 -0
  20. package/node_modules/@context-vault/core/dist/consolidation.js +229 -0
  21. package/node_modules/@context-vault/core/dist/consolidation.js.map +1 -0
  22. package/node_modules/@context-vault/core/dist/db.d.ts +25 -1
  23. package/node_modules/@context-vault/core/dist/db.d.ts.map +1 -1
  24. package/node_modules/@context-vault/core/dist/db.js +92 -4
  25. package/node_modules/@context-vault/core/dist/db.js.map +1 -1
  26. package/node_modules/@context-vault/core/dist/frontmatter.d.ts.map +1 -1
  27. package/node_modules/@context-vault/core/dist/frontmatter.js +26 -3
  28. package/node_modules/@context-vault/core/dist/frontmatter.js.map +1 -1
  29. package/node_modules/@context-vault/core/dist/index.d.ts.map +1 -1
  30. package/node_modules/@context-vault/core/dist/index.js +225 -184
  31. package/node_modules/@context-vault/core/dist/index.js.map +1 -1
  32. package/node_modules/@context-vault/core/dist/main.d.ts +2 -0
  33. package/node_modules/@context-vault/core/dist/main.d.ts.map +1 -1
  34. package/node_modules/@context-vault/core/dist/main.js +2 -0
  35. package/node_modules/@context-vault/core/dist/main.js.map +1 -1
  36. package/node_modules/@context-vault/core/dist/search.d.ts +5 -0
  37. package/node_modules/@context-vault/core/dist/search.d.ts.map +1 -1
  38. package/node_modules/@context-vault/core/dist/search.js +97 -5
  39. package/node_modules/@context-vault/core/dist/search.js.map +1 -1
  40. package/node_modules/@context-vault/core/dist/summarize.d.ts +5 -0
  41. package/node_modules/@context-vault/core/dist/summarize.d.ts.map +1 -0
  42. package/node_modules/@context-vault/core/dist/summarize.js +146 -0
  43. package/node_modules/@context-vault/core/dist/summarize.js.map +1 -0
  44. package/node_modules/@context-vault/core/dist/types.d.ts +2 -0
  45. package/node_modules/@context-vault/core/dist/types.d.ts.map +1 -1
  46. package/node_modules/@context-vault/core/package.json +5 -1
  47. package/node_modules/@context-vault/core/src/assemble.ts +187 -0
  48. package/node_modules/@context-vault/core/src/capture.ts +10 -5
  49. package/node_modules/@context-vault/core/src/consolidation.ts +356 -0
  50. package/node_modules/@context-vault/core/src/db.ts +95 -4
  51. package/node_modules/@context-vault/core/src/frontmatter.ts +25 -4
  52. package/node_modules/@context-vault/core/src/index.ts +127 -88
  53. package/node_modules/@context-vault/core/src/main.ts +4 -0
  54. package/node_modules/@context-vault/core/src/search.ts +102 -5
  55. package/node_modules/@context-vault/core/src/summarize.ts +157 -0
  56. package/node_modules/@context-vault/core/src/types.ts +2 -0
  57. package/package.json +2 -2
  58. package/scripts/validate-epipe-shutdown.mjs +183 -0
  59. package/scripts/validate-sqlite-busy-retry.mjs +243 -0
  60. package/src/register-tools.ts +0 -2
  61. package/src/server.ts +76 -1
  62. package/src/tools/recall.ts +51 -110
@@ -23,12 +23,34 @@ export function parseFrontmatter(text: string): {
23
23
  const match = normalized.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
24
24
  if (!match) return { meta: {}, body: normalized.trim() };
25
25
  const meta: Record<string, unknown> = {};
26
- for (const line of match[1].split('\n')) {
26
+ const lines = match[1].split('\n');
27
+
28
+ for (let i = 0; i < lines.length; i++) {
29
+ const line = lines[i];
27
30
  const idx = line.indexOf(':');
28
31
  if (idx === -1) continue;
29
32
  const key = line.slice(0, idx).trim();
30
33
  let val: unknown = line.slice(idx + 1).trim() as string;
31
- if (
34
+
35
+ // Handle YAML multiline list format (tags: followed by - items)
36
+ if (typeof val === 'string' && val === '' && key === 'tags') {
37
+ const items: string[] = [];
38
+ let nextIdx = i + 1;
39
+ while (nextIdx < lines.length) {
40
+ const nextLine = lines[nextIdx];
41
+ // Match lines that start with optional whitespace + dash + space
42
+ const dashMatch = nextLine.match(/^\s*-\s+(.+)$/);
43
+ if (!dashMatch) break;
44
+ items.push(dashMatch[1].trim().replace(/^"|"$/g, '').replace(/^'|'$/g, ''));
45
+ nextIdx++;
46
+ }
47
+ if (items.length > 0) {
48
+ val = items;
49
+ i = nextIdx - 1; // Skip processed lines
50
+ } else {
51
+ val = '';
52
+ }
53
+ } else if (
32
54
  typeof val === 'string' &&
33
55
  val.length >= 2 &&
34
56
  val.startsWith('"') &&
@@ -40,8 +62,7 @@ export function parseFrontmatter(text: string): {
40
62
  } catch {
41
63
  /* keep as-is */
42
64
  }
43
- }
44
- if (typeof val === 'string' && val.startsWith('[') && val.endsWith(']')) {
65
+ } else if (typeof val === 'string' && val.startsWith('[') && val.endsWith(']')) {
45
66
  try {
46
67
  val = JSON.parse(val);
47
68
  } catch {
@@ -7,6 +7,9 @@ import { embedBatch } from './embed.js';
7
7
  import type { BaseCtx, IndexEntryInput, IndexingConfig, ReindexStats } from './types.js';
8
8
  import { shouldIndex } from './indexing.js';
9
9
  import { DEFAULT_INDEXING } from './constants.js';
10
+ import { generateSummaryTiers } from './summarize.js';
11
+ import { buildEmbeddingText } from './search.js';
12
+ import { withBusyRetry, isBusyError } from './db.js';
10
13
 
11
14
  const EXCLUDED_DIRS = new Set(['projects', '_archive']);
12
15
  const EXCLUDED_FILES = new Set(['context.md', 'memory.md', 'README.md']);
@@ -46,95 +49,117 @@ export async function indexEntry(
46
49
  const cat = category || categoryFor(kind);
47
50
  const effectiveTier = tier || defaultTierFor(kind);
48
51
 
49
- let wasUpdate = false;
50
-
51
- if (cat === 'entity' && identity_key) {
52
- const existing = ctx.stmts.getByIdentityKey.get(kind, identity_key) as
53
- | Record<string, unknown>
54
- | undefined;
55
- if (existing) {
56
- ctx.stmts.upsertByIdentityKey.run(
57
- title || null,
58
- body,
59
- metaJson,
60
- tagsJson,
61
- source || 'claude-code',
62
- cat,
63
- filePath,
64
- expires_at || null,
65
- sourceFilesJson,
66
- kind,
67
- identity_key
68
- );
69
- wasUpdate = true;
70
- }
71
- }
52
+ const rowid = await withBusyRetry(() => {
53
+ let wasUpdate = false;
72
54
 
73
- if (!wasUpdate) {
74
- try {
75
- ctx.stmts.insertEntry.run(
76
- id,
77
- kind,
78
- cat,
79
- title || null,
80
- body,
81
- metaJson,
82
- tagsJson,
83
- source || 'claude-code',
84
- filePath,
85
- identity_key || null,
86
- expires_at || null,
87
- createdAt,
88
- createdAt,
89
- sourceFilesJson,
90
- effectiveTier,
91
- indexed ? 1 : 0
92
- );
93
- } catch (e) {
94
- if ((e as Error).message.includes('UNIQUE constraint')) {
95
- ctx.stmts.updateEntry.run(
55
+ if (cat === 'entity' && identity_key) {
56
+ const existing = ctx.stmts.getByIdentityKey.get(kind, identity_key) as
57
+ | Record<string, unknown>
58
+ | undefined;
59
+ if (existing) {
60
+ ctx.stmts.upsertByIdentityKey.run(
96
61
  title || null,
97
62
  body,
98
63
  metaJson,
99
64
  tagsJson,
100
65
  source || 'claude-code',
101
66
  cat,
67
+ filePath,
68
+ expires_at || null,
69
+ sourceFilesJson,
70
+ kind,
71
+ identity_key
72
+ );
73
+ wasUpdate = true;
74
+ }
75
+ }
76
+
77
+ if (!wasUpdate) {
78
+ try {
79
+ ctx.stmts.insertEntry.run(
80
+ id,
81
+ kind,
82
+ cat,
83
+ title || null,
84
+ body,
85
+ metaJson,
86
+ tagsJson,
87
+ source || 'claude-code',
88
+ filePath,
102
89
  identity_key || null,
103
90
  expires_at || null,
104
- filePath
91
+ createdAt,
92
+ createdAt,
93
+ sourceFilesJson,
94
+ effectiveTier,
95
+ indexed ? 1 : 0
105
96
  );
106
- if (sourceFilesJson !== null && ctx.stmts.updateSourceFiles) {
107
- const entryRow = ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined;
108
- if (entryRow) {
109
- const idRow = ctx.db
110
- .prepare('SELECT id FROM vault WHERE file_path = ?')
111
- .get(filePath) as { id: string } | undefined;
112
- if (idRow) ctx.stmts.updateSourceFiles.run(sourceFilesJson, idRow.id);
97
+ } catch (e) {
98
+ if (isBusyError(e)) throw e;
99
+ if ((e as Error).message.includes('UNIQUE constraint')) {
100
+ ctx.stmts.updateEntry.run(
101
+ title || null,
102
+ body,
103
+ metaJson,
104
+ tagsJson,
105
+ source || 'claude-code',
106
+ cat,
107
+ identity_key || null,
108
+ expires_at || null,
109
+ filePath
110
+ );
111
+ if (sourceFilesJson !== null && ctx.stmts.updateSourceFiles) {
112
+ const entryRow = ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined;
113
+ if (entryRow) {
114
+ const idRow = ctx.db
115
+ .prepare('SELECT id FROM vault WHERE file_path = ?')
116
+ .get(filePath) as { id: string } | undefined;
117
+ if (idRow) ctx.stmts.updateSourceFiles.run(sourceFilesJson, idRow.id);
118
+ }
113
119
  }
120
+ wasUpdate = true;
121
+ } else {
122
+ throw e;
114
123
  }
115
- wasUpdate = true;
116
- } else {
117
- throw e;
118
124
  }
119
125
  }
120
- }
121
126
 
122
- const rowidResult = wasUpdate
123
- ? (ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined)
124
- : (ctx.stmts.getRowid.get(id) as { rowid: number } | undefined);
127
+ const rowidResult = wasUpdate
128
+ ? (ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined)
129
+ : (ctx.stmts.getRowid.get(id) as { rowid: number } | undefined);
125
130
 
126
- if (!rowidResult || rowidResult.rowid == null) {
127
- throw new Error(
128
- `Could not find rowid for entry: ${wasUpdate ? `file_path=${filePath}` : `id=${id}`}`
129
- );
130
- }
131
+ if (!rowidResult || rowidResult.rowid == null) {
132
+ throw new Error(
133
+ `Could not find rowid for entry: ${wasUpdate ? `file_path=${filePath}` : `id=${id}`}`
134
+ );
135
+ }
131
136
 
132
- const rowid = Number(rowidResult.rowid);
133
- if (!Number.isFinite(rowid) || rowid < 1) {
134
- throw new Error(
135
- `Invalid rowid retrieved: ${rowidResult.rowid} (type: ${typeof rowidResult.rowid})`
136
- );
137
- }
137
+ const rowidNum = Number(rowidResult.rowid);
138
+ if (!Number.isFinite(rowidNum) || rowidNum < 1) {
139
+ throw new Error(
140
+ `Invalid rowid retrieved: ${rowidResult.rowid} (type: ${typeof rowidResult.rowid})`
141
+ );
142
+ }
143
+
144
+ // Generate and store precomputed summary tiers
145
+ try {
146
+ const { condensed, keypoint } = generateSummaryTiers(body);
147
+ ctx.db
148
+ .prepare('UPDATE vault SET summary_condensed = ?, summary_keypoint = ? WHERE id = ?')
149
+ .run(condensed || null, keypoint || null, id);
150
+ } catch (sumErr) {
151
+ if (isBusyError(sumErr)) throw sumErr;
152
+ }
153
+
154
+ return rowidNum;
155
+ });
156
+
157
+ // Observability: log the INSERT outcome so silent-wedge cases surface in error.log.
158
+ // Added 2026-04-19 after 44h of save_context returning ✓ without persisting (runtime/
159
+ // subprocess-state issue, see specs/reindex-and-save-pipeline-bugs.md).
160
+ console.warn(
161
+ `[context-vault] indexEntry: id=${id} rowid=${rowid} cat=${cat} indexed=${indexed ? 1 : 0}`
162
+ );
138
163
 
139
164
  if (indexed && cat !== 'event') {
140
165
  let embedding: Float32Array | null = null;
@@ -142,7 +167,7 @@ export async function indexEntry(
142
167
  embedding = precomputedEmbedding;
143
168
  } else {
144
169
  try {
145
- embedding = await ctx.embed([title, body].filter(Boolean).join(' '));
170
+ embedding = await ctx.embed(buildEmbeddingText(title, body, tagsJson, kind));
146
171
  } catch (embedErr) {
147
172
  console.warn(
148
173
  `[context-vault] embed() failed for entry ${id} — skipping vec insert: ${(embedErr as Error).message}`
@@ -151,12 +176,15 @@ export async function indexEntry(
151
176
  }
152
177
 
153
178
  if (embedding) {
154
- try {
155
- ctx.deleteVec(rowid);
156
- } catch {
157
- /* no-op */
158
- }
159
- ctx.insertVec(rowid, embedding);
179
+ await withBusyRetry(() => {
180
+ try {
181
+ ctx.deleteVec(rowid);
182
+ } catch (delErr) {
183
+ if (isBusyError(delErr)) throw delErr;
184
+ /* no-op */
185
+ }
186
+ ctx.insertVec(rowid, embedding);
187
+ });
160
188
  }
161
189
  }
162
190
  }
@@ -261,6 +289,7 @@ export async function reindex(
261
289
  return stats;
262
290
  }
263
291
 
292
+ await withBusyRetry(() => {
264
293
  ctx.db.exec('BEGIN');
265
294
  try {
266
295
  for (const { kind, dir } of filteredKindEntries) {
@@ -336,10 +365,9 @@ export async function reindex(
336
365
  if (entryIndexed && category !== 'event') {
337
366
  const rowidResult = ctx.stmts.getRowid.get(id) as { rowid: number } | undefined;
338
367
  if (rowidResult?.rowid) {
339
- const embeddingText = [parsed.title, parsed.body].filter(Boolean).join(' ');
340
368
  pendingEmbeds.push({
341
369
  rowid: rowidResult.rowid,
342
- text: embeddingText,
370
+ text: buildEmbeddingText(parsed.title, parsed.body, tagsJson, kind),
343
371
  });
344
372
  }
345
373
  }
@@ -384,13 +412,12 @@ export async function reindex(
384
412
  try { ctx.deleteVec(rowid); stats.embeddingsCleared!++; } catch {}
385
413
  }
386
414
  stats.skippedIndexing!++;
387
- } else if ((bodyChanged || titleChanged) && category !== 'event') {
415
+ } else if ((bodyChanged || titleChanged || tagsChanged) && category !== 'event') {
388
416
  const rowid = (
389
417
  ctx.stmts.getRowid.get(existing.id as string) as { rowid: number } | undefined
390
418
  )?.rowid;
391
419
  if (rowid) {
392
- const embeddingText = [parsed.title, parsed.body].filter(Boolean).join(' ');
393
- pendingEmbeds.push({ rowid, text: embeddingText });
420
+ pendingEmbeds.push({ rowid, text: buildEmbeddingText(parsed.title, parsed.body, tagsJson, kind) });
394
421
  }
395
422
  }
396
423
  stats.updated++;
@@ -465,9 +492,21 @@ export async function reindex(
465
492
 
466
493
  ctx.db.exec('COMMIT');
467
494
  } catch (e) {
468
- ctx.db.exec('ROLLBACK');
495
+ try { ctx.db.exec('ROLLBACK'); } catch {}
496
+ // On SQLITE_BUSY, reset stats so the retry is idempotent (counters
497
+ // accumulate inside the tx and would double-count otherwise).
498
+ if (isBusyError(e)) {
499
+ stats.added = 0;
500
+ stats.updated = 0;
501
+ stats.removed = 0;
502
+ stats.unchanged = 0;
503
+ stats.skippedIndexing = 0;
504
+ stats.embeddingsCleared = 0;
505
+ pendingEmbeds.length = 0;
506
+ }
469
507
  throw e;
470
508
  }
509
+ });
471
510
 
472
511
  if (!skipEmbeddings) {
473
512
  for (let i = 0; i < pendingEmbeds.length; i += EMBED_BATCH_SIZE) {
@@ -487,17 +526,17 @@ export async function reindex(
487
526
  if (fullSync) {
488
527
  const missingVec = ctx.db
489
528
  .prepare(
490
- `SELECT v.rowid, v.title, v.body FROM vault v
529
+ `SELECT v.rowid, v.title, v.body, v.tags, v.kind FROM vault v
491
530
  WHERE v.category != 'event'
492
531
  AND v.indexed = 1
493
532
  AND v.rowid NOT IN (SELECT rowid FROM vault_vec)`
494
533
  )
495
- .all() as { rowid: number; title: string | null; body: string }[];
534
+ .all() as { rowid: number; title: string | null; body: string; tags: string | null; kind: string }[];
496
535
 
497
536
  if (missingVec.length > 0) {
498
537
  const missingEmbeds = missingVec.map((r) => ({
499
538
  rowid: r.rowid,
500
- text: [r.title, r.body].filter(Boolean).join(' '),
539
+ text: buildEmbeddingText(r.title, r.body, r.tags, r.kind),
501
540
  }));
502
541
 
503
542
  for (let i = 0; i < missingEmbeds.length; i += EMBED_BATCH_SIZE) {
@@ -117,3 +117,7 @@ export { htmlToMarkdown, extractHtmlContent, ingestUrl } from './ingest-url.js';
117
117
  // Watch
118
118
  export { startWatcher } from './watch.js';
119
119
  export type { WatcherOptions, VaultWatcher } from './watch.js';
120
+
121
+ // Consolidation
122
+ export { findDuplicates, mergeEntries, computeDecayScores } from './consolidation.js';
123
+ export type { DuplicateGroup, MergeResult, DecayScore } from './consolidation.js';
@@ -3,6 +3,33 @@ import { embedBatch } from './embed.js';
3
3
 
4
4
  const NEAR_DUP_THRESHOLD = 0.92;
5
5
  const RRF_K = 60;
6
+
7
+ /**
8
+ * Build text used for embedding generation. Includes tags and kind so the
9
+ * vector representation captures metadata, not just title/body content.
10
+ */
11
+ export function buildEmbeddingText(
12
+ title: string | null | undefined,
13
+ body: string | null | undefined,
14
+ tags: string | null | undefined,
15
+ kind: string | null | undefined
16
+ ): string {
17
+ const parts: string[] = [];
18
+ if (title) parts.push(title);
19
+ if (body) parts.push(body);
20
+ if (tags) {
21
+ try {
22
+ const parsed = JSON.parse(tags);
23
+ if (Array.isArray(parsed)) {
24
+ parts.push(`[tags: ${parsed.join(', ')}]`);
25
+ }
26
+ } catch {
27
+ parts.push(`[tags: ${tags}]`);
28
+ }
29
+ }
30
+ if (kind) parts.push(`[kind: ${kind}]`);
31
+ return parts.join(' ');
32
+ }
6
33
  const RECALL_BOOST_CAP = 2.0;
7
34
  const RECALL_HALF_LIFE_DAYS = 30;
8
35
  const DISCOVERY_SLOTS = 2;
@@ -29,7 +56,8 @@ export function buildFtsQuery(query: string): string | null {
29
56
  const phrase = `"${words.join(' ')}"`;
30
57
  const near = `NEAR(${words.map((w) => `"${w}"`).join(' ')}, 10)`;
31
58
  const and = words.map((w) => `"${w}"`).join(' AND ');
32
- return `${phrase} OR ${near} OR ${and}`;
59
+ const or = words.map((w) => `"${w}"`).join(' OR ');
60
+ return `${phrase} OR ${near} OR ${and} OR ${or}`;
33
61
  }
34
62
 
35
63
  export function recencyBoost(createdAt: string, category: string, decayDays = 30): number {
@@ -202,7 +230,7 @@ export async function hybridSearch(
202
230
  if (missing.length > 0) {
203
231
  const entries = missing.map((r) => {
204
232
  const entry = rowMap.get(r.id);
205
- return { rowid: r.rowid, text: [entry?.title, entry?.body].filter(Boolean).join(' ') };
233
+ return { rowid: r.rowid, text: buildEmbeddingText(entry?.title, entry?.body, entry?.tags, entry?.kind) };
206
234
  });
207
235
  const embeddings = await embedBatch(entries.map((e) => e.text));
208
236
  for (let i = 0; i < entries.length; i++) {
@@ -232,7 +260,7 @@ export async function hybridSearch(
232
260
  if (vecCount > 0) {
233
261
  queryVec = await ctx.embed(query);
234
262
  if (queryVec) {
235
- const vecLimit = kindFilter ? 30 : 15;
263
+ const vecLimit = kindFilter ? 60 : 40;
236
264
  const vecRows = ctx.db
237
265
  .prepare(
238
266
  `SELECT v.rowid, v.distance FROM vault_vec v WHERE embedding MATCH ? ORDER BY distance LIMIT ?`
@@ -327,10 +355,73 @@ export async function hybridSearch(
327
355
  }
328
356
  }
329
357
 
358
+ // Tag-based search lane: match query keywords against tags JSON and kind
359
+ const tagRankedIds: string[] = [];
360
+ try {
361
+ const tagWords = query
362
+ .split(/[\s-]+/)
363
+ .map((w) => w.replace(/[*"():^~{}]/g, '').toLowerCase())
364
+ .filter((w) => w.length > 1);
365
+ if (tagWords.length > 0) {
366
+ const tagWhereParts = ['indexed = 1', "(expires_at IS NULL OR expires_at > datetime('now'))", 'superseded_by IS NULL'];
367
+ const tagParams: (string | number | null)[] = [];
368
+
369
+ const likeClauses = tagWords.map((w) => {
370
+ tagParams.push(`%${w}%`);
371
+ return `tags LIKE ?`;
372
+ });
373
+ const kindClauses = tagWords.map((w) => {
374
+ tagParams.push(`%${w}%`);
375
+ return `kind LIKE ?`;
376
+ });
377
+ tagWhereParts.push(`(${[...likeClauses, ...kindClauses].join(' OR ')})`);
378
+
379
+ if (kindFilter) {
380
+ tagWhereParts.push('kind = ?');
381
+ tagParams.push(kindFilter);
382
+ }
383
+ if (categoryFilter) {
384
+ tagWhereParts.push('category = ?');
385
+ tagParams.push(categoryFilter);
386
+ }
387
+ if (excludeEvents && !categoryFilter) {
388
+ tagWhereParts.push("category != 'event'");
389
+ }
390
+ if (since) {
391
+ tagWhereParts.push('created_at >= ?');
392
+ tagParams.push(since);
393
+ }
394
+ if (until) {
395
+ tagWhereParts.push('created_at <= ?');
396
+ tagParams.push(until);
397
+ }
398
+ if (!includeSuperseeded) {
399
+ // already have superseded_by IS NULL above
400
+ }
401
+ if (!includeEphemeral) {
402
+ tagWhereParts.push("tier != 'ephemeral'");
403
+ }
404
+
405
+ const tagSQL = `SELECT id FROM vault WHERE ${tagWhereParts.join(' AND ')} ORDER BY recall_count DESC LIMIT 20`;
406
+ const tagRows = ctx.db.prepare(tagSQL).all(...tagParams) as { id: string }[];
407
+
408
+ for (const row of tagRows) {
409
+ tagRankedIds.push(row.id);
410
+ if (!rowMap.has(row.id)) {
411
+ const full = ctx.db.prepare('SELECT * FROM vault WHERE id = ?').get(row.id) as VaultEntry | undefined;
412
+ if (full) rowMap.set(full.id, full);
413
+ }
414
+ }
415
+ }
416
+ } catch (err) {
417
+ console.error(`[retrieve] Tag search error: ${(err as Error).message}`);
418
+ }
419
+
330
420
  if (rowMap.size === 0) return [];
331
421
 
332
- // Build ranked lists for RRF: content FTS + content vec + optional context vec
422
+ // Build ranked lists for RRF: content FTS + content vec + tags + optional context vec
333
423
  const rankedLists = [ftsRankedIds, vecRankedIds];
424
+ if (tagRankedIds.length > 0) rankedLists.push(tagRankedIds);
334
425
  if (ctxRankedIds.length > 0) rankedLists.push(ctxRankedIds);
335
426
  const rrfScores = reciprocalRankFusion(rankedLists);
336
427
 
@@ -341,7 +432,13 @@ export async function hybridSearch(
341
432
  entry.last_recalled_at ?? null
342
433
  );
343
434
  const durable = entry.tier === 'durable' ? 1.3 : 1.0;
344
- rrfScores.set(id, (rrfScores.get(id) ?? 0) * boost * recall * durable);
435
+ // Heat-tier ranking boost: frequently recalled entries rank higher
436
+ const heatMultiplier =
437
+ entry.heat_tier === 'hot' ? 1.4 :
438
+ entry.heat_tier === 'warm' ? 1.1 :
439
+ entry.heat_tier === 'cold' ? 0.9 :
440
+ entry.heat_tier === 'frozen' ? 0.7 : 1.0;
441
+ rrfScores.set(id, (rrfScores.get(id) ?? 0) * boost * recall * durable * heatMultiplier);
345
442
  }
346
443
 
347
444
  const candidates: SearchResult[] = [...rowMap.values()].map((entry) => ({
@@ -0,0 +1,157 @@
1
+ const CONDENSED_CAP = 300;
2
+ const KEYPOINT_CAP = 150;
3
+ const SHORT_THRESHOLD = 150;
4
+
5
+ const ABBREVS = /(?:Mr|Mrs|Ms|Dr|Prof|Sr|Jr|vs|etc|i\.e|e\.g|approx|dept|est|inc|ltd|corp)\.\s*$/i;
6
+
7
+ function stripFrontmatter(text: string): string {
8
+ if (!text.startsWith('---')) return text;
9
+ const end = text.indexOf('\n---', 3);
10
+ if (end === -1) return text;
11
+ return text.slice(end + 4).trimStart();
12
+ }
13
+
14
+ function splitSentences(text: string): string[] {
15
+ const sentences: string[] = [];
16
+ let current = '';
17
+
18
+ const lines = text.split('\n');
19
+ for (const line of lines) {
20
+ const trimmed = line.trim();
21
+
22
+ if (!trimmed) {
23
+ if (current.trim()) {
24
+ sentences.push(current.trim());
25
+ current = '';
26
+ }
27
+ continue;
28
+ }
29
+
30
+ // Skip markdown headers
31
+ if (trimmed.startsWith('#')) continue;
32
+ // Skip code fences
33
+ if (trimmed.startsWith('```')) continue;
34
+ // Skip list markers for sentence splitting but keep content
35
+ const listContent = trimmed.replace(/^[-*+]\s+/, '').replace(/^\d+\.\s+/, '');
36
+
37
+ current += (current ? ' ' : '') + listContent;
38
+
39
+ // Try to split on sentence-ending punctuation
40
+ const parts = current.split(/(?<=[.!?])\s+/);
41
+ if (parts.length > 1) {
42
+ for (let i = 0; i < parts.length - 1; i++) {
43
+ const part = parts[i].trim();
44
+ if (part && !ABBREVS.test(part)) {
45
+ sentences.push(part);
46
+ } else if (part) {
47
+ // Reattach abbreviated segment to next part
48
+ parts[i + 1] = part + ' ' + parts[i + 1];
49
+ }
50
+ }
51
+ current = parts[parts.length - 1];
52
+ }
53
+ }
54
+
55
+ if (current.trim()) {
56
+ sentences.push(current.trim());
57
+ }
58
+
59
+ return sentences.filter(s => s.length > 0);
60
+ }
61
+
62
+ function firstHeaderText(text: string): string | null {
63
+ const lines = text.split('\n');
64
+ for (const line of lines) {
65
+ const match = line.match(/^#{1,6}\s+(.+)/);
66
+ if (match) return match[1].trim();
67
+ }
68
+ return null;
69
+ }
70
+
71
+ function firstCodeComment(text: string): string | null {
72
+ const lines = text.split('\n');
73
+ for (const line of lines) {
74
+ const trimmed = line.trim();
75
+ if (trimmed.startsWith('//')) return trimmed.slice(2).trim();
76
+ if (trimmed.startsWith('#') && !trimmed.startsWith('##')) return trimmed.slice(1).trim();
77
+ if (trimmed.startsWith('/*')) {
78
+ const content = trimmed.replace(/^\/\*\s*/, '').replace(/\s*\*\/$/, '');
79
+ if (content) return content;
80
+ }
81
+ }
82
+ return null;
83
+ }
84
+
85
+ function isCodeOnly(text: string): boolean {
86
+ const stripped = stripFrontmatter(text);
87
+ const lines = stripped.split('\n').filter(l => l.trim());
88
+ if (lines.length === 0) return false;
89
+ const codeLines = lines.filter(l => {
90
+ const t = l.trim();
91
+ return t.startsWith('```') || t.startsWith('//') || t.startsWith('/*') ||
92
+ t.startsWith('import ') || t.startsWith('export ') || t.startsWith('const ') ||
93
+ t.startsWith('let ') || t.startsWith('function ') || t.startsWith('class ') ||
94
+ t.startsWith('{') || t.startsWith('}') || t.startsWith('def ') ||
95
+ t.startsWith('return ');
96
+ });
97
+ return codeLines.length / lines.length > 0.7;
98
+ }
99
+
100
+ function cap(text: string, limit: number): string {
101
+ if (text.length <= limit) return text;
102
+ const truncated = text.slice(0, limit - 3);
103
+ const lastSpace = truncated.lastIndexOf(' ');
104
+ return (lastSpace > limit * 0.5 ? truncated.slice(0, lastSpace) : truncated) + '...';
105
+ }
106
+
107
+ export function generateSummaryTiers(body: string): {
108
+ condensed: string;
109
+ keypoint: string;
110
+ } {
111
+ const cleaned = stripFrontmatter(body).trim();
112
+
113
+ if (!cleaned) {
114
+ return { condensed: '', keypoint: '' };
115
+ }
116
+
117
+ // Short entries: use body as both
118
+ if (cleaned.length < SHORT_THRESHOLD) {
119
+ return { condensed: cleaned, keypoint: cleaned };
120
+ }
121
+
122
+ // Code-only entries
123
+ if (isCodeOnly(cleaned)) {
124
+ const comment = firstCodeComment(cleaned);
125
+ const firstLine = cleaned.split('\n').find(l => l.trim())?.trim() || '';
126
+ const label = comment || `Code block: ${firstLine.slice(0, 80)}`;
127
+ return {
128
+ condensed: cap(label, CONDENSED_CAP),
129
+ keypoint: cap(label, KEYPOINT_CAP),
130
+ };
131
+ }
132
+
133
+ const sentences = splitSentences(cleaned);
134
+
135
+ // Keypoint: prefer first header, then first sentence
136
+ const header = firstHeaderText(cleaned);
137
+ const keypoint = header || sentences[0] || cleaned.slice(0, KEYPOINT_CAP);
138
+
139
+ // Condensed: first sentence + last sentence
140
+ let condensed: string;
141
+ if (sentences.length <= 1) {
142
+ condensed = sentences[0] || cleaned.slice(0, CONDENSED_CAP);
143
+ } else {
144
+ const first = sentences[0];
145
+ const last = sentences[sentences.length - 1];
146
+ if (first === last) {
147
+ condensed = first;
148
+ } else {
149
+ condensed = `${first} ${last}`;
150
+ }
151
+ }
152
+
153
+ return {
154
+ condensed: cap(condensed, CONDENSED_CAP),
155
+ keypoint: cap(keypoint, KEYPOINT_CAP),
156
+ };
157
+ }
@@ -114,6 +114,8 @@ export interface VaultEntry {
114
114
  recall_sessions: number;
115
115
  last_recalled_at: string | null;
116
116
  heat_tier: string | null;
117
+ summary_condensed: string | null;
118
+ summary_keypoint: string | null;
117
119
  rowid?: number;
118
120
  }
119
121