@context-vault/core 3.1.4 → 3.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -12,12 +12,27 @@ const EMBED_BATCH_SIZE = 32;
12
12
 
13
13
  export async function indexEntry(
14
14
  ctx: BaseCtx,
15
- entry: IndexEntryInput & { supersedes?: string[] | null; related_to?: string[] | null },
15
+ entry: IndexEntryInput & {
16
+ supersedes?: string[] | null;
17
+ related_to?: string[] | null;
18
+ },
16
19
  precomputedEmbedding?: Float32Array | null,
17
20
  ): Promise<void> {
18
21
  const {
19
- id, kind, category, title, body, meta, tags, source,
20
- filePath, createdAt, identity_key, expires_at, source_files, tier,
22
+ id,
23
+ kind,
24
+ category,
25
+ title,
26
+ body,
27
+ meta,
28
+ tags,
29
+ source,
30
+ filePath,
31
+ createdAt,
32
+ identity_key,
33
+ expires_at,
34
+ source_files,
35
+ tier,
21
36
  } = entry;
22
37
 
23
38
  if (expires_at && new Date(expires_at) <= new Date()) return;
@@ -31,13 +46,22 @@ export async function indexEntry(
31
46
  let wasUpdate = false;
32
47
 
33
48
  if (cat === "entity" && identity_key) {
34
- const existing = ctx.stmts.getByIdentityKey.get(kind, identity_key) as Record<string, unknown> | undefined;
49
+ const existing = ctx.stmts.getByIdentityKey.get(kind, identity_key) as
50
+ | Record<string, unknown>
51
+ | undefined;
35
52
  if (existing) {
36
53
  ctx.stmts.upsertByIdentityKey.run(
37
- title || null, body, metaJson, tagsJson,
38
- source || "claude-code", cat, filePath,
39
- expires_at || null, sourceFilesJson,
40
- kind, identity_key,
54
+ title || null,
55
+ body,
56
+ metaJson,
57
+ tagsJson,
58
+ source || "claude-code",
59
+ cat,
60
+ filePath,
61
+ expires_at || null,
62
+ sourceFilesJson,
63
+ kind,
64
+ identity_key,
41
65
  );
42
66
  wasUpdate = true;
43
67
  }
@@ -46,20 +70,39 @@ export async function indexEntry(
46
70
  if (!wasUpdate) {
47
71
  try {
48
72
  ctx.stmts.insertEntry.run(
49
- id, kind, cat, title || null, body, metaJson, tagsJson,
50
- source || "claude-code", filePath,
51
- identity_key || null, expires_at || null,
52
- createdAt, createdAt, sourceFilesJson, effectiveTier,
73
+ id,
74
+ kind,
75
+ cat,
76
+ title || null,
77
+ body,
78
+ metaJson,
79
+ tagsJson,
80
+ source || "claude-code",
81
+ filePath,
82
+ identity_key || null,
83
+ expires_at || null,
84
+ createdAt,
85
+ createdAt,
86
+ sourceFilesJson,
87
+ effectiveTier,
53
88
  );
54
89
  } catch (e) {
55
90
  if ((e as Error).message.includes("UNIQUE constraint")) {
56
91
  ctx.stmts.updateEntry.run(
57
- title || null, body, metaJson, tagsJson,
58
- source || "claude-code", cat,
59
- identity_key || null, expires_at || null, filePath,
92
+ title || null,
93
+ body,
94
+ metaJson,
95
+ tagsJson,
96
+ source || "claude-code",
97
+ cat,
98
+ identity_key || null,
99
+ expires_at || null,
100
+ filePath,
60
101
  );
61
102
  if (sourceFilesJson !== null && ctx.stmts.updateSourceFiles) {
62
- const entryRow = ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined;
103
+ const entryRow = ctx.stmts.getRowidByPath.get(filePath) as
104
+ | { rowid: number }
105
+ | undefined;
63
106
  if (entryRow) {
64
107
  const idRow = ctx.db
65
108
  .prepare("SELECT id FROM vault WHERE file_path = ?")
@@ -76,8 +119,8 @@ export async function indexEntry(
76
119
  }
77
120
 
78
121
  const rowidResult = wasUpdate
79
- ? ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined
80
- : ctx.stmts.getRowid.get(id) as { rowid: number } | undefined;
122
+ ? (ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined)
123
+ : (ctx.stmts.getRowid.get(id) as { rowid: number } | undefined);
81
124
 
82
125
  if (!rowidResult || rowidResult.rowid == null) {
83
126
  throw new Error(
@@ -100,12 +143,18 @@ export async function indexEntry(
100
143
  try {
101
144
  embedding = await ctx.embed([title, body].filter(Boolean).join(" "));
102
145
  } catch (embedErr) {
103
- console.warn(`[context-vault] embed() failed for entry ${id} — skipping vec insert: ${(embedErr as Error).message}`);
146
+ console.warn(
147
+ `[context-vault] embed() failed for entry ${id} — skipping vec insert: ${(embedErr as Error).message}`,
148
+ );
104
149
  }
105
150
  }
106
151
 
107
152
  if (embedding) {
108
- try { ctx.deleteVec(rowid); } catch { /* no-op */ }
153
+ try {
154
+ ctx.deleteVec(rowid);
155
+ } catch {
156
+ /* no-op */
157
+ }
109
158
  ctx.insertVec(rowid, embedding);
110
159
  }
111
160
  }
@@ -120,11 +169,17 @@ export async function pruneExpired(ctx: BaseCtx): Promise<number> {
120
169
 
121
170
  for (const row of expired) {
122
171
  if (row.file_path) {
123
- try { unlinkSync(row.file_path); } catch {}
172
+ try {
173
+ unlinkSync(row.file_path);
174
+ } catch {}
124
175
  }
125
- const vRowid = (ctx.stmts.getRowid.get(row.id) as { rowid: number } | undefined)?.rowid;
176
+ const vRowid = (
177
+ ctx.stmts.getRowid.get(row.id) as { rowid: number } | undefined
178
+ )?.rowid;
126
179
  if (vRowid) {
127
- try { ctx.deleteVec(Number(vRowid)); } catch {}
180
+ try {
181
+ ctx.deleteVec(Number(vRowid));
182
+ } catch {}
128
183
  }
129
184
  ctx.stmts.deleteEntry.run(row.id);
130
185
  }
@@ -137,7 +192,12 @@ export async function reindex(
137
192
  opts: { fullSync?: boolean } = {},
138
193
  ): Promise<ReindexStats> {
139
194
  const { fullSync = true } = opts;
140
- const stats: ReindexStats = { added: 0, updated: 0, removed: 0, unchanged: 0 };
195
+ const stats: ReindexStats = {
196
+ added: 0,
197
+ updated: 0,
198
+ removed: 0,
199
+ unchanged: 0,
200
+ };
141
201
 
142
202
  if (!existsSync(ctx.config.vaultDir)) return stats;
143
203
 
@@ -224,20 +284,32 @@ export async function reindex(
224
284
  if (!existing) {
225
285
  const id = (fmMeta.id as string) || ulid();
226
286
  const tagsJson = fmMeta.tags ? JSON.stringify(fmMeta.tags) : null;
227
- const created = (fmMeta.created as string) || new Date().toISOString();
287
+ const created =
288
+ (fmMeta.created as string) || new Date().toISOString();
228
289
 
229
290
  const result = upsertEntry.run(
230
- id, kind, category, parsed.title || null, parsed.body,
231
- metaJson, tagsJson, (fmMeta.source as string) || "file",
232
- filePath, identity_key, expires_at,
233
- created, (fmMeta.updated as string) || created,
291
+ id,
292
+ kind,
293
+ category,
294
+ parsed.title || null,
295
+ parsed.body,
296
+ metaJson,
297
+ tagsJson,
298
+ (fmMeta.source as string) || "file",
299
+ filePath,
300
+ identity_key,
301
+ expires_at,
302
+ created,
303
+ (fmMeta.updated as string) || created,
234
304
  );
235
305
  if ((result as { changes: number }).changes > 0) {
236
306
  if (relatedToJson && ctx.stmts.updateRelatedTo) {
237
307
  ctx.stmts.updateRelatedTo.run(relatedToJson, id);
238
308
  }
239
309
  if (category !== "event") {
240
- const rowidResult = ctx.stmts.getRowid.get(id) as { rowid: number } | undefined;
310
+ const rowidResult = ctx.stmts.getRowid.get(id) as
311
+ | { rowid: number }
312
+ | undefined;
241
313
  if (rowidResult?.rowid) {
242
314
  const embeddingText = [parsed.title, parsed.body]
243
315
  .filter(Boolean)
@@ -254,24 +326,45 @@ export async function reindex(
254
326
  }
255
327
  } else if (fullSync) {
256
328
  const tagsJson = fmMeta.tags ? JSON.stringify(fmMeta.tags) : null;
257
- const titleChanged = (parsed.title || null) !== ((existing.title as string) || null);
329
+ const titleChanged =
330
+ (parsed.title || null) !== ((existing.title as string) || null);
258
331
  const bodyChanged = (existing.body as string) !== parsed.body;
259
332
  const tagsChanged = tagsJson !== ((existing.tags as string) || null);
260
333
  const metaChanged = metaJson !== ((existing.meta as string) || null);
261
- const relatedToChanged = relatedToJson !== ((existing.related_to as string) || null);
262
-
263
- if (bodyChanged || titleChanged || tagsChanged || metaChanged || relatedToChanged) {
334
+ const relatedToChanged =
335
+ relatedToJson !== ((existing.related_to as string) || null);
336
+
337
+ if (
338
+ bodyChanged ||
339
+ titleChanged ||
340
+ tagsChanged ||
341
+ metaChanged ||
342
+ relatedToChanged
343
+ ) {
264
344
  ctx.stmts.updateEntry.run(
265
- parsed.title || null, parsed.body, metaJson, tagsJson,
266
- (fmMeta.source as string) || "file", category,
267
- identity_key, expires_at, filePath,
345
+ parsed.title || null,
346
+ parsed.body,
347
+ metaJson,
348
+ tagsJson,
349
+ (fmMeta.source as string) || "file",
350
+ category,
351
+ identity_key,
352
+ expires_at,
353
+ filePath,
268
354
  );
269
355
  if (relatedToChanged && ctx.stmts.updateRelatedTo) {
270
- ctx.stmts.updateRelatedTo.run(relatedToJson, existing.id as string);
356
+ ctx.stmts.updateRelatedTo.run(
357
+ relatedToJson,
358
+ existing.id as string,
359
+ );
271
360
  }
272
361
 
273
362
  if ((bodyChanged || titleChanged) && category !== "event") {
274
- const rowid = (ctx.stmts.getRowid.get(existing.id as string) as { rowid: number } | undefined)?.rowid;
363
+ const rowid = (
364
+ ctx.stmts.getRowid.get(existing.id as string) as
365
+ | { rowid: number }
366
+ | undefined
367
+ )?.rowid;
275
368
  if (rowid) {
276
369
  const embeddingText = [parsed.title, parsed.body]
277
370
  .filter(Boolean)
@@ -291,9 +384,15 @@ export async function reindex(
291
384
  if (fullSync) {
292
385
  for (const [dbPath, row] of dbByPath) {
293
386
  if (!diskPaths.has(dbPath)) {
294
- const vRowid = (ctx.stmts.getRowid.get(row.id as string) as { rowid: number } | undefined)?.rowid;
387
+ const vRowid = (
388
+ ctx.stmts.getRowid.get(row.id as string) as
389
+ | { rowid: number }
390
+ | undefined
391
+ )?.rowid;
295
392
  if (vRowid) {
296
- try { ctx.deleteVec(vRowid); } catch {}
393
+ try {
394
+ ctx.deleteVec(vRowid);
395
+ } catch {}
297
396
  }
298
397
  ctx.stmts.deleteEntry.run(row.id as string);
299
398
  stats.removed++;
@@ -313,7 +412,9 @@ export async function reindex(
313
412
  .prepare("SELECT id, rowid FROM vault WHERE kind = ?")
314
413
  .all(kind) as { id: string; rowid: number }[];
315
414
  for (const row of orphaned) {
316
- try { ctx.deleteVec(row.rowid); } catch {}
415
+ try {
416
+ ctx.deleteVec(row.rowid);
417
+ } catch {}
317
418
  ctx.stmts.deleteEntry.run(row.id);
318
419
  stats.removed++;
319
420
  }
@@ -329,11 +430,17 @@ export async function reindex(
329
430
 
330
431
  for (const row of expired) {
331
432
  if (row.file_path) {
332
- try { unlinkSync(row.file_path); } catch {}
433
+ try {
434
+ unlinkSync(row.file_path);
435
+ } catch {}
333
436
  }
334
- const vRowid = (ctx.stmts.getRowid.get(row.id) as { rowid: number } | undefined)?.rowid;
437
+ const vRowid = (
438
+ ctx.stmts.getRowid.get(row.id) as { rowid: number } | undefined
439
+ )?.rowid;
335
440
  if (vRowid) {
336
- try { ctx.deleteVec(Number(vRowid)); } catch {}
441
+ try {
442
+ ctx.deleteVec(Number(vRowid));
443
+ } catch {}
337
444
  }
338
445
  ctx.stmts.deleteEntry.run(row.id);
339
446
  stats.removed++;
@@ -350,7 +457,9 @@ export async function reindex(
350
457
  const embeddings = await embedBatch(batch.map((e) => e.text));
351
458
  for (let j = 0; j < batch.length; j++) {
352
459
  if (embeddings[j]) {
353
- try { ctx.deleteVec(batch[j].rowid); } catch {}
460
+ try {
461
+ ctx.deleteVec(batch[j].rowid);
462
+ } catch {}
354
463
  ctx.insertVec(batch[j].rowid, embeddings[j]!);
355
464
  }
356
465
  }
package/src/ingest-url.ts CHANGED
@@ -6,27 +6,81 @@ export function htmlToMarkdown(html: string): string {
6
6
  md = md.replace(/<header[\s\S]*?<\/header>/gi, "");
7
7
  md = md.replace(/<footer[\s\S]*?<\/footer>/gi, "");
8
8
  md = md.replace(/<aside[\s\S]*?<\/aside>/gi, "");
9
- md = md.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, (_, c: string) => `\n# ${stripTags(c).trim()}\n`);
10
- md = md.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, (_, c: string) => `\n## ${stripTags(c).trim()}\n`);
11
- md = md.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, (_, c: string) => `\n### ${stripTags(c).trim()}\n`);
12
- md = md.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, (_, c: string) => `\n#### ${stripTags(c).trim()}\n`);
13
- md = md.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, (_, c: string) => `\n##### ${stripTags(c).trim()}\n`);
14
- md = md.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, (_, c: string) => `\n###### ${stripTags(c).trim()}\n`);
15
- md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi, (_, href: string, text: string) => {
16
- const cleanText = stripTags(text).trim();
17
- return cleanText ? `[${cleanText}](${href})` : "";
18
- });
19
- md = md.replace(/<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi, (_, c: string) => `\n\`\`\`\n${decodeEntities(c).trim()}\n\`\`\`\n`);
20
- md = md.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, (_, c: string) => `\n\`\`\`\n${decodeEntities(stripTags(c)).trim()}\n\`\`\`\n`);
21
- md = md.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, (_, c: string) => `\`${decodeEntities(c).trim()}\``);
22
- md = md.replace(/<(strong|b)[^>]*>([\s\S]*?)<\/\1>/gi, (_, __: string, c: string) => `**${stripTags(c).trim()}**`);
23
- md = md.replace(/<(em|i)[^>]*>([\s\S]*?)<\/\1>/gi, (_, __: string, c: string) => `*${stripTags(c).trim()}*`);
24
- md = md.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (_, c: string) => `- ${stripTags(c).trim()}\n`);
9
+ md = md.replace(
10
+ /<h1[^>]*>([\s\S]*?)<\/h1>/gi,
11
+ (_, c: string) => `\n# ${stripTags(c).trim()}\n`,
12
+ );
13
+ md = md.replace(
14
+ /<h2[^>]*>([\s\S]*?)<\/h2>/gi,
15
+ (_, c: string) => `\n## ${stripTags(c).trim()}\n`,
16
+ );
17
+ md = md.replace(
18
+ /<h3[^>]*>([\s\S]*?)<\/h3>/gi,
19
+ (_, c: string) => `\n### ${stripTags(c).trim()}\n`,
20
+ );
21
+ md = md.replace(
22
+ /<h4[^>]*>([\s\S]*?)<\/h4>/gi,
23
+ (_, c: string) => `\n#### ${stripTags(c).trim()}\n`,
24
+ );
25
+ md = md.replace(
26
+ /<h5[^>]*>([\s\S]*?)<\/h5>/gi,
27
+ (_, c: string) => `\n##### ${stripTags(c).trim()}\n`,
28
+ );
29
+ md = md.replace(
30
+ /<h6[^>]*>([\s\S]*?)<\/h6>/gi,
31
+ (_, c: string) => `\n###### ${stripTags(c).trim()}\n`,
32
+ );
33
+ md = md.replace(
34
+ /<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi,
35
+ (_, href: string, text: string) => {
36
+ const cleanText = stripTags(text).trim();
37
+ return cleanText ? `[${cleanText}](${href})` : "";
38
+ },
39
+ );
40
+ md = md.replace(
41
+ /<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi,
42
+ (_, c: string) => `\n\`\`\`\n${decodeEntities(c).trim()}\n\`\`\`\n`,
43
+ );
44
+ md = md.replace(
45
+ /<pre[^>]*>([\s\S]*?)<\/pre>/gi,
46
+ (_, c: string) =>
47
+ `\n\`\`\`\n${decodeEntities(stripTags(c)).trim()}\n\`\`\`\n`,
48
+ );
49
+ md = md.replace(
50
+ /<code[^>]*>([\s\S]*?)<\/code>/gi,
51
+ (_, c: string) => `\`${decodeEntities(c).trim()}\``,
52
+ );
53
+ md = md.replace(
54
+ /<(strong|b)[^>]*>([\s\S]*?)<\/\1>/gi,
55
+ (_, __: string, c: string) => `**${stripTags(c).trim()}**`,
56
+ );
57
+ md = md.replace(
58
+ /<(em|i)[^>]*>([\s\S]*?)<\/\1>/gi,
59
+ (_, __: string, c: string) => `*${stripTags(c).trim()}*`,
60
+ );
61
+ md = md.replace(
62
+ /<li[^>]*>([\s\S]*?)<\/li>/gi,
63
+ (_, c: string) => `- ${stripTags(c).trim()}\n`,
64
+ );
25
65
  md = md.replace(/<br\s*\/?>/gi, "\n");
26
- md = md.replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, (_, c: string) => `\n${stripTags(c).trim()}\n`);
27
- md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (_, c: string) => {
28
- return "\n" + stripTags(c).trim().split("\n").map((l: string) => `> ${l}`).join("\n") + "\n";
29
- });
66
+ md = md.replace(
67
+ /<p[^>]*>([\s\S]*?)<\/p>/gi,
68
+ (_, c: string) => `\n${stripTags(c).trim()}\n`,
69
+ );
70
+ md = md.replace(
71
+ /<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi,
72
+ (_, c: string) => {
73
+ return (
74
+ "\n" +
75
+ stripTags(c)
76
+ .trim()
77
+ .split("\n")
78
+ .map((l: string) => `> ${l}`)
79
+ .join("\n") +
80
+ "\n"
81
+ );
82
+ },
83
+ );
30
84
  md = stripTags(md);
31
85
  md = decodeEntities(md);
32
86
  md = md.replace(/\n{3,}/g, "\n\n").trim();
@@ -39,15 +93,28 @@ function stripTags(html: string): string {
39
93
 
40
94
  function decodeEntities(text: string): string {
41
95
  return text
42
- .replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">")
43
- .replace(/&quot;/g, '"').replace(/&#39;/g, "'").replace(/&nbsp;/g, " ")
44
- .replace(/&#(\d+);/g, (_, n: string) => String.fromCharCode(parseInt(n, 10)))
45
- .replace(/&#x([0-9a-f]+);/gi, (_, n: string) => String.fromCharCode(parseInt(n, 16)));
96
+ .replace(/&amp;/g, "&")
97
+ .replace(/&lt;/g, "<")
98
+ .replace(/&gt;/g, ">")
99
+ .replace(/&quot;/g, '"')
100
+ .replace(/&#39;/g, "'")
101
+ .replace(/&nbsp;/g, " ")
102
+ .replace(/&#(\d+);/g, (_, n: string) =>
103
+ String.fromCharCode(parseInt(n, 10)),
104
+ )
105
+ .replace(/&#x([0-9a-f]+);/gi, (_, n: string) =>
106
+ String.fromCharCode(parseInt(n, 16)),
107
+ );
46
108
  }
47
109
 
48
- export function extractHtmlContent(html: string, _url: string): { title: string; body: string } {
110
+ export function extractHtmlContent(
111
+ html: string,
112
+ _url: string,
113
+ ): { title: string; body: string } {
49
114
  const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
50
- const title = titleMatch ? stripTags(decodeEntities(titleMatch[1])).trim() : "";
115
+ const title = titleMatch
116
+ ? stripTags(decodeEntities(titleMatch[1])).trim()
117
+ : "";
51
118
  let contentHtml = "";
52
119
  const articleMatch = html.match(/<article[^>]*>([\s\S]*?)<\/article>/i);
53
120
  const mainMatch = html.match(/<main[^>]*>([\s\S]*?)<\/main>/i);
@@ -63,37 +130,83 @@ export function extractHtmlContent(html: string, _url: string): { title: string;
63
130
 
64
131
  export async function ingestUrl(
65
132
  url: string,
66
- opts: { kind?: string; tags?: string[]; source?: string; maxBodyLength?: number; timeoutMs?: number } = {},
67
- ): Promise<{ kind: string; title: string; body: string; tags: string[]; meta: Record<string, unknown>; source: string }> {
68
- const { kind = "reference", tags = [], source, maxBodyLength = 50000, timeoutMs = 15000 } = opts;
133
+ opts: {
134
+ kind?: string;
135
+ tags?: string[];
136
+ source?: string;
137
+ maxBodyLength?: number;
138
+ timeoutMs?: number;
139
+ } = {},
140
+ ): Promise<{
141
+ kind: string;
142
+ title: string;
143
+ body: string;
144
+ tags: string[];
145
+ meta: Record<string, unknown>;
146
+ source: string;
147
+ }> {
148
+ const {
149
+ kind = "reference",
150
+ tags = [],
151
+ source,
152
+ maxBodyLength = 50000,
153
+ timeoutMs = 15000,
154
+ } = opts;
69
155
  let domain: string;
70
- try { domain = new URL(url).hostname; } catch { throw new Error(`Invalid URL: ${url}`); }
156
+ try {
157
+ domain = new URL(url).hostname;
158
+ } catch {
159
+ throw new Error(`Invalid URL: ${url}`);
160
+ }
71
161
  const controller = new AbortController();
72
162
  const timeout = setTimeout(() => controller.abort(), timeoutMs);
73
163
  let response: Response;
74
164
  try {
75
165
  response = await fetch(url, {
76
166
  signal: controller.signal,
77
- headers: { "User-Agent": "ContextVault/1.0 (+https://github.com/fellanH/context-vault)", Accept: "text/html,application/xhtml+xml,text/plain,*/*" },
167
+ headers: {
168
+ "User-Agent":
169
+ "ContextVault/1.0 (+https://github.com/fellanH/context-vault)",
170
+ Accept: "text/html,application/xhtml+xml,text/plain,*/*",
171
+ },
78
172
  });
79
173
  } catch (err) {
80
- if ((err as Error).name === "AbortError") throw new Error(`Request timed out after ${timeoutMs}ms`);
174
+ if ((err as Error).name === "AbortError")
175
+ throw new Error(`Request timed out after ${timeoutMs}ms`);
81
176
  throw new Error(`Fetch failed: ${(err as Error).message}`);
82
- } finally { clearTimeout(timeout); }
83
- if (!response.ok) throw new Error(`HTTP ${response.status}: ${response.statusText}`);
177
+ } finally {
178
+ clearTimeout(timeout);
179
+ }
180
+ if (!response.ok)
181
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
84
182
  const contentType = response.headers.get("content-type") || "";
85
183
  const html = await response.text();
86
184
  let title: string, body: string;
87
- if (contentType.includes("text/html") || contentType.includes("application/xhtml")) {
185
+ if (
186
+ contentType.includes("text/html") ||
187
+ contentType.includes("application/xhtml")
188
+ ) {
88
189
  const extracted = extractHtmlContent(html, url);
89
- title = extracted.title; body = extracted.body;
90
- } else { title = domain; body = html; }
91
- if (body.length > maxBodyLength) body = body.slice(0, maxBodyLength) + "\n\n[Content truncated]";
190
+ title = extracted.title;
191
+ body = extracted.body;
192
+ } else {
193
+ title = domain;
194
+ body = html;
195
+ }
196
+ if (body.length > maxBodyLength)
197
+ body = body.slice(0, maxBodyLength) + "\n\n[Content truncated]";
92
198
  if (!body.trim()) throw new Error("No readable content extracted from URL");
93
199
  return {
94
- kind, title: title || domain, body,
200
+ kind,
201
+ title: title || domain,
202
+ body,
95
203
  tags: [...tags, "web-import"],
96
- meta: { url, domain, fetched_at: new Date().toISOString(), content_type: contentType.split(";")[0].trim() || "text/html" },
204
+ meta: {
205
+ url,
206
+ domain,
207
+ fetched_at: new Date().toISOString(),
208
+ content_type: contentType.split(";")[0].trim() || "text/html",
209
+ },
97
210
  source: source || domain,
98
211
  };
99
212
  }
package/src/main.ts CHANGED
@@ -80,7 +80,12 @@ export {
80
80
  } from "./db.js";
81
81
 
82
82
  // Embeddings
83
- export { embed, embedBatch, resetEmbedPipeline, isEmbedAvailable } from "./embed.js";
83
+ export {
84
+ embed,
85
+ embedBatch,
86
+ resetEmbedPipeline,
87
+ isEmbedAvailable,
88
+ } from "./embed.js";
84
89
 
85
90
  // Index (reindex + indexEntry)
86
91
  export { indexEntry, reindex, pruneExpired } from "./index.js";
@@ -97,15 +102,7 @@ export {
97
102
  } from "./search.js";
98
103
 
99
104
  // Capture
100
- export {
101
- writeEntry,
102
- updateEntryFile,
103
- captureAndIndex,
104
- } from "./capture.js";
105
+ export { writeEntry, updateEntryFile, captureAndIndex } from "./capture.js";
105
106
 
106
107
  // Ingest URL
107
- export {
108
- htmlToMarkdown,
109
- extractHtmlContent,
110
- ingestUrl,
111
- } from "./ingest-url.js";
108
+ export { htmlToMarkdown, extractHtmlContent, ingestUrl } from "./ingest-url.js";
package/src/search.ts CHANGED
@@ -1,9 +1,17 @@
1
- import type { BaseCtx, SearchResult, SearchOptions, VaultEntry } from "./types.js";
1
+ import type {
2
+ BaseCtx,
3
+ SearchResult,
4
+ SearchOptions,
5
+ VaultEntry,
6
+ } from "./types.js";
2
7
 
3
8
  const NEAR_DUP_THRESHOLD = 0.92;
4
9
  const RRF_K = 60;
5
10
 
6
- export function recencyDecayScore(updatedAt: string | null | undefined, decayRate = 0.05): number {
11
+ export function recencyDecayScore(
12
+ updatedAt: string | null | undefined,
13
+ decayRate = 0.05,
14
+ ): number {
7
15
  if (updatedAt == null) return 0.5;
8
16
  const ageDays = (Date.now() - new Date(updatedAt).getTime()) / 86400000;
9
17
  return Math.exp(-decayRate * ageDays);
@@ -28,7 +36,11 @@ export function buildFtsQuery(query: string): string | null {
28
36
  return `${phrase} OR ${near} OR ${and}`;
29
37
  }
30
38
 
31
- export function recencyBoost(createdAt: string, category: string, decayDays = 30): number {
39
+ export function recencyBoost(
40
+ createdAt: string,
41
+ category: string,
42
+ decayDays = 30,
43
+ ): number {
32
44
  if (category !== "event") return 1.0;
33
45
  const ageDays = (Date.now() - new Date(createdAt).getTime()) / 86400000;
34
46
  return 1 / (1 + ageDays / decayDays);
@@ -46,9 +58,9 @@ export function buildFilterClauses({
46
58
  since?: string | null;
47
59
  until?: string | null;
48
60
  includeSuperseeded?: boolean;
49
- }): { clauses: string[]; params: unknown[] } {
61
+ }): { clauses: string[]; params: (string | number | null)[] } {
50
62
  const clauses: string[] = [];
51
- const params: unknown[] = [];
63
+ const params: (string | number | null)[] = [];
52
64
  if (categoryFilter) {
53
65
  clauses.push("e.category = ?");
54
66
  params.push(categoryFilter);
@@ -120,7 +132,7 @@ export async function hybridSearch(
120
132
  if (ftsQuery) {
121
133
  try {
122
134
  const whereParts = ["vault_fts MATCH ?"];
123
- const ftsParams: unknown[] = [ftsQuery];
135
+ const ftsParams: (string | number | null)[] = [ftsQuery];
124
136
 
125
137
  if (kindFilter) {
126
138
  whereParts.push("e.kind = ?");
@@ -130,8 +142,9 @@ export async function hybridSearch(
130
142
  ftsParams.push(...extraFilters.params);
131
143
 
132
144
  const ftsSQL = `SELECT e.*, rank FROM vault_fts f JOIN vault e ON f.rowid = e.rowid WHERE ${whereParts.join(" AND ")} ORDER BY rank LIMIT 15`;
133
- // @ts-expect-error -- node:sqlite types are overly strict for dynamic SQL params
134
- const rows = ctx.db.prepare(ftsSQL).all(...ftsParams) as unknown as (VaultEntry & { rank: number })[];
145
+ const rows = ctx.db
146
+ .prepare(ftsSQL)
147
+ .all(...ftsParams) as unknown as (VaultEntry & { rank: number })[];
135
148
 
136
149
  for (const { rank: _rank, ...row } of rows) {
137
150
  ftsRankedIds.push(row.id);
@@ -148,7 +161,11 @@ export async function hybridSearch(
148
161
  const vecSimMap = new Map<string, number>();
149
162
 
150
163
  try {
151
- const vecCount = (ctx.db.prepare("SELECT COUNT(*) as c FROM vault_vec").get() as { c: number }).c;
164
+ const vecCount = (
165
+ ctx.db.prepare("SELECT COUNT(*) as c FROM vault_vec").get() as {
166
+ c: number;
167
+ }
168
+ ).c;
152
169
  if (vecCount > 0) {
153
170
  queryVec = await ctx.embed(query);
154
171
  if (queryVec) {
@@ -196,7 +213,9 @@ export async function hybridSearch(
196
213
  }
197
214
  } catch (err) {
198
215
  if (!(err as Error).message?.includes("no such table")) {
199
- console.error(`[retrieve] Vector search error: ${(err as Error).message}`);
216
+ console.error(
217
+ `[retrieve] Vector search error: ${(err as Error).message}`,
218
+ );
200
219
  }
201
220
  }
202
221