context-vault 2.17.1 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +795 -71
- package/node_modules/@context-vault/core/dist/capture.d.ts +21 -0
- package/node_modules/@context-vault/core/dist/capture.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/capture.js +269 -0
- package/node_modules/@context-vault/core/dist/capture.js.map +1 -0
- package/node_modules/@context-vault/core/dist/categories.d.ts +6 -0
- package/node_modules/@context-vault/core/dist/categories.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/categories.js +50 -0
- package/node_modules/@context-vault/core/dist/categories.js.map +1 -0
- package/node_modules/@context-vault/core/dist/config.d.ts +4 -0
- package/node_modules/@context-vault/core/dist/config.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/config.js +190 -0
- package/node_modules/@context-vault/core/dist/config.js.map +1 -0
- package/node_modules/@context-vault/core/dist/constants.d.ts +33 -0
- package/node_modules/@context-vault/core/dist/constants.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/constants.js +23 -0
- package/node_modules/@context-vault/core/dist/constants.js.map +1 -0
- package/node_modules/@context-vault/core/dist/db.d.ts +13 -0
- package/node_modules/@context-vault/core/dist/db.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/db.js +191 -0
- package/node_modules/@context-vault/core/dist/db.js.map +1 -0
- package/node_modules/@context-vault/core/dist/embed.d.ts +5 -0
- package/node_modules/@context-vault/core/dist/embed.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/embed.js +78 -0
- package/node_modules/@context-vault/core/dist/embed.js.map +1 -0
- package/node_modules/@context-vault/core/dist/files.d.ts +13 -0
- package/node_modules/@context-vault/core/dist/files.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/files.js +66 -0
- package/node_modules/@context-vault/core/dist/files.js.map +1 -0
- package/node_modules/@context-vault/core/dist/formatters.d.ts +8 -0
- package/node_modules/@context-vault/core/dist/formatters.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/formatters.js +18 -0
- package/node_modules/@context-vault/core/dist/formatters.js.map +1 -0
- package/node_modules/@context-vault/core/dist/frontmatter.d.ts +12 -0
- package/node_modules/@context-vault/core/dist/frontmatter.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/frontmatter.js +101 -0
- package/node_modules/@context-vault/core/dist/frontmatter.js.map +1 -0
- package/node_modules/@context-vault/core/dist/index.d.ts +10 -0
- package/node_modules/@context-vault/core/dist/index.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/index.js +297 -0
- package/node_modules/@context-vault/core/dist/index.js.map +1 -0
- package/node_modules/@context-vault/core/dist/ingest-url.d.ts +20 -0
- package/node_modules/@context-vault/core/dist/ingest-url.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/ingest-url.js +113 -0
- package/node_modules/@context-vault/core/dist/ingest-url.js.map +1 -0
- package/node_modules/@context-vault/core/dist/main.d.ts +14 -0
- package/node_modules/@context-vault/core/dist/main.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/main.js +25 -0
- package/node_modules/@context-vault/core/dist/main.js.map +1 -0
- package/node_modules/@context-vault/core/dist/search.d.ts +18 -0
- package/node_modules/@context-vault/core/dist/search.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/search.js +238 -0
- package/node_modules/@context-vault/core/dist/search.js.map +1 -0
- package/node_modules/@context-vault/core/dist/types.d.ts +176 -0
- package/node_modules/@context-vault/core/dist/types.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/types.js +2 -0
- package/node_modules/@context-vault/core/dist/types.js.map +1 -0
- package/node_modules/@context-vault/core/package.json +66 -16
- package/node_modules/@context-vault/core/src/capture.ts +308 -0
- package/node_modules/@context-vault/core/src/categories.ts +54 -0
- package/node_modules/@context-vault/core/src/{core/config.js → config.ts} +34 -33
- package/node_modules/@context-vault/core/src/{constants.js → constants.ts} +6 -3
- package/node_modules/@context-vault/core/src/db.ts +229 -0
- package/node_modules/@context-vault/core/src/{index/embed.js → embed.ts} +10 -35
- package/node_modules/@context-vault/core/src/{core/files.js → files.ts} +15 -20
- package/node_modules/@context-vault/core/src/{capture/formatters.js → formatters.ts} +13 -11
- package/node_modules/@context-vault/core/src/{core/frontmatter.js → frontmatter.ts} +26 -33
- package/node_modules/@context-vault/core/src/index.ts +351 -0
- package/node_modules/@context-vault/core/src/ingest-url.ts +99 -0
- package/node_modules/@context-vault/core/src/main.ts +111 -0
- package/node_modules/@context-vault/core/src/{retrieve/index.js → search.ts} +62 -150
- package/node_modules/@context-vault/core/src/types.ts +166 -0
- package/package.json +12 -7
- package/scripts/postinstall.js +1 -1
- package/{node_modules/@context-vault/core/src/core → src}/error-log.js +1 -15
- package/{node_modules/@context-vault/core/src/server → src}/helpers.js +9 -4
- package/src/linking.js +100 -0
- package/{node_modules/@context-vault/core/src/server/tools.js → src/register-tools.js} +14 -13
- package/src/{server/index.js → server.js} +10 -38
- package/src/status.js +235 -0
- package/{node_modules/@context-vault/core/src/core → src}/telemetry.js +9 -19
- package/src/temporal.js +97 -0
- package/{node_modules/@context-vault/core/src/server → src}/tools/context-status.js +3 -4
- package/{node_modules/@context-vault/core/src/server → src}/tools/create-snapshot.js +6 -7
- package/{node_modules/@context-vault/core/src/server → src}/tools/delete-context.js +0 -2
- package/{node_modules/@context-vault/core/src/server → src}/tools/get-context.js +17 -21
- package/{node_modules/@context-vault/core/src/server → src}/tools/ingest-project.js +5 -6
- package/{node_modules/@context-vault/core/src/server → src}/tools/ingest-url.js +3 -4
- package/{node_modules/@context-vault/core/src/server → src}/tools/list-buckets.js +4 -5
- package/{node_modules/@context-vault/core/src/server → src}/tools/list-context.js +3 -6
- package/{node_modules/@context-vault/core/src/server → src}/tools/save-context.js +17 -20
- package/{node_modules/@context-vault/core/src/server → src}/tools/session-start.js +9 -16
- package/node_modules/@context-vault/core/src/capture/file-ops.js +0 -99
- package/node_modules/@context-vault/core/src/capture/import-pipeline.js +0 -46
- package/node_modules/@context-vault/core/src/capture/importers.js +0 -387
- package/node_modules/@context-vault/core/src/capture/index.js +0 -250
- package/node_modules/@context-vault/core/src/capture/ingest-url.js +0 -252
- package/node_modules/@context-vault/core/src/consolidation/index.js +0 -112
- package/node_modules/@context-vault/core/src/core/categories.js +0 -73
- package/node_modules/@context-vault/core/src/core/linking.js +0 -161
- package/node_modules/@context-vault/core/src/core/migrate-dirs.js +0 -196
- package/node_modules/@context-vault/core/src/core/status.js +0 -350
- package/node_modules/@context-vault/core/src/core/temporal.js +0 -146
- package/node_modules/@context-vault/core/src/index/db.js +0 -586
- package/node_modules/@context-vault/core/src/index/index.js +0 -583
- package/node_modules/@context-vault/core/src/index.js +0 -71
- package/node_modules/@context-vault/core/src/sync/sync.js +0 -235
- package/src/hooks/post-tool-call.mjs +0 -62
- package/src/hooks/session-end.mjs +0 -492
- /package/{node_modules/@context-vault/core/src/server → src}/tools/clear-context.js +0 -0
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
import { readFileSync, readdirSync, existsSync, unlinkSync } from "node:fs";
|
|
2
|
+
import { join, basename } from "node:path";
|
|
3
|
+
import { dirToKind, walkDir, ulid } from "./files.js";
|
|
4
|
+
import { categoryFor, defaultTierFor, CATEGORY_DIRS } from "./categories.js";
|
|
5
|
+
import { parseFrontmatter, parseEntryFromMarkdown } from "./frontmatter.js";
|
|
6
|
+
import { embedBatch } from "./embed.js";
|
|
7
|
+
import type { BaseCtx, IndexEntryInput, ReindexStats } from "./types.js";
|
|
8
|
+
|
|
9
|
+
const EXCLUDED_DIRS = new Set(["projects", "_archive"]);
|
|
10
|
+
const EXCLUDED_FILES = new Set(["context.md", "memory.md", "README.md"]);
|
|
11
|
+
const EMBED_BATCH_SIZE = 32;
|
|
12
|
+
|
|
13
|
+
export async function indexEntry(
|
|
14
|
+
ctx: BaseCtx,
|
|
15
|
+
entry: IndexEntryInput & { supersedes?: string[] | null; related_to?: string[] | null },
|
|
16
|
+
): Promise<void> {
|
|
17
|
+
const {
|
|
18
|
+
id, kind, category, title, body, meta, tags, source,
|
|
19
|
+
filePath, createdAt, identity_key, expires_at, source_files, tier,
|
|
20
|
+
} = entry;
|
|
21
|
+
|
|
22
|
+
if (expires_at && new Date(expires_at) <= new Date()) return;
|
|
23
|
+
|
|
24
|
+
const tagsJson = tags ? JSON.stringify(tags) : null;
|
|
25
|
+
const metaJson = meta ? JSON.stringify(meta) : null;
|
|
26
|
+
const sourceFilesJson = source_files ? JSON.stringify(source_files) : null;
|
|
27
|
+
const cat = category || categoryFor(kind);
|
|
28
|
+
const effectiveTier = tier || defaultTierFor(kind);
|
|
29
|
+
|
|
30
|
+
let wasUpdate = false;
|
|
31
|
+
|
|
32
|
+
if (cat === "entity" && identity_key) {
|
|
33
|
+
const existing = ctx.stmts.getByIdentityKey.get(kind, identity_key) as Record<string, unknown> | undefined;
|
|
34
|
+
if (existing) {
|
|
35
|
+
ctx.stmts.upsertByIdentityKey.run(
|
|
36
|
+
title || null, body, metaJson, tagsJson,
|
|
37
|
+
source || "claude-code", cat, filePath,
|
|
38
|
+
expires_at || null, sourceFilesJson,
|
|
39
|
+
kind, identity_key,
|
|
40
|
+
);
|
|
41
|
+
wasUpdate = true;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (!wasUpdate) {
|
|
46
|
+
try {
|
|
47
|
+
ctx.stmts.insertEntry.run(
|
|
48
|
+
id, kind, cat, title || null, body, metaJson, tagsJson,
|
|
49
|
+
source || "claude-code", filePath,
|
|
50
|
+
identity_key || null, expires_at || null,
|
|
51
|
+
createdAt, createdAt, sourceFilesJson, effectiveTier,
|
|
52
|
+
);
|
|
53
|
+
} catch (e) {
|
|
54
|
+
if ((e as Error).message.includes("UNIQUE constraint")) {
|
|
55
|
+
ctx.stmts.updateEntry.run(
|
|
56
|
+
title || null, body, metaJson, tagsJson,
|
|
57
|
+
source || "claude-code", cat,
|
|
58
|
+
identity_key || null, expires_at || null, filePath,
|
|
59
|
+
);
|
|
60
|
+
if (sourceFilesJson !== null && ctx.stmts.updateSourceFiles) {
|
|
61
|
+
const entryRow = ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined;
|
|
62
|
+
if (entryRow) {
|
|
63
|
+
const idRow = ctx.db
|
|
64
|
+
.prepare("SELECT id FROM vault WHERE file_path = ?")
|
|
65
|
+
.get(filePath) as { id: string } | undefined;
|
|
66
|
+
if (idRow)
|
|
67
|
+
ctx.stmts.updateSourceFiles.run(sourceFilesJson, idRow.id);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
wasUpdate = true;
|
|
71
|
+
} else {
|
|
72
|
+
throw e;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const rowidResult = wasUpdate
|
|
78
|
+
? ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined
|
|
79
|
+
: ctx.stmts.getRowid.get(id) as { rowid: number } | undefined;
|
|
80
|
+
|
|
81
|
+
if (!rowidResult || rowidResult.rowid == null) {
|
|
82
|
+
throw new Error(
|
|
83
|
+
`Could not find rowid for entry: ${wasUpdate ? `file_path=${filePath}` : `id=${id}`}`,
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const rowid = Number(rowidResult.rowid);
|
|
88
|
+
if (!Number.isFinite(rowid) || rowid < 1) {
|
|
89
|
+
throw new Error(
|
|
90
|
+
`Invalid rowid retrieved: ${rowidResult.rowid} (type: ${typeof rowidResult.rowid})`,
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (cat !== "event") {
|
|
95
|
+
const embeddingText = [title, body].filter(Boolean).join(" ");
|
|
96
|
+
const embedding = await ctx.embed(embeddingText);
|
|
97
|
+
|
|
98
|
+
if (embedding) {
|
|
99
|
+
try { ctx.deleteVec(rowid); } catch { /* no-op */ }
|
|
100
|
+
ctx.insertVec(rowid, embedding);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export async function pruneExpired(ctx: BaseCtx): Promise<number> {
|
|
106
|
+
const expired = ctx.db
|
|
107
|
+
.prepare(
|
|
108
|
+
"SELECT id, file_path FROM vault WHERE expires_at IS NOT NULL AND expires_at <= datetime('now')",
|
|
109
|
+
)
|
|
110
|
+
.all() as { id: string; file_path: string | null }[];
|
|
111
|
+
|
|
112
|
+
for (const row of expired) {
|
|
113
|
+
if (row.file_path) {
|
|
114
|
+
try { unlinkSync(row.file_path); } catch {}
|
|
115
|
+
}
|
|
116
|
+
const vRowid = (ctx.stmts.getRowid.get(row.id) as { rowid: number } | undefined)?.rowid;
|
|
117
|
+
if (vRowid) {
|
|
118
|
+
try { ctx.deleteVec(Number(vRowid)); } catch {}
|
|
119
|
+
}
|
|
120
|
+
ctx.stmts.deleteEntry.run(row.id);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return expired.length;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export async function reindex(
|
|
127
|
+
ctx: BaseCtx,
|
|
128
|
+
opts: { fullSync?: boolean } = {},
|
|
129
|
+
): Promise<ReindexStats> {
|
|
130
|
+
const { fullSync = true } = opts;
|
|
131
|
+
const stats: ReindexStats = { added: 0, updated: 0, removed: 0, unchanged: 0 };
|
|
132
|
+
|
|
133
|
+
if (!existsSync(ctx.config.vaultDir)) return stats;
|
|
134
|
+
|
|
135
|
+
const upsertEntry = ctx.db.prepare(
|
|
136
|
+
`INSERT OR IGNORE INTO vault (id, kind, category, title, body, meta, tags, source, file_path, identity_key, expires_at, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
const kindEntries: { kind: string; dir: string }[] = [];
|
|
140
|
+
const topDirs = readdirSync(ctx.config.vaultDir, {
|
|
141
|
+
withFileTypes: true,
|
|
142
|
+
}).filter(
|
|
143
|
+
(d) =>
|
|
144
|
+
d.isDirectory() && !EXCLUDED_DIRS.has(d.name) && !d.name.startsWith("_"),
|
|
145
|
+
);
|
|
146
|
+
|
|
147
|
+
for (const d of topDirs) {
|
|
148
|
+
if (CATEGORY_DIRS.has(d.name)) {
|
|
149
|
+
const catDir = join(ctx.config.vaultDir, d.name);
|
|
150
|
+
const subDirs = readdirSync(catDir, { withFileTypes: true }).filter(
|
|
151
|
+
(sd) => sd.isDirectory() && !sd.name.startsWith("_"),
|
|
152
|
+
);
|
|
153
|
+
for (const sd of subDirs) {
|
|
154
|
+
kindEntries.push({
|
|
155
|
+
kind: dirToKind(sd.name),
|
|
156
|
+
dir: join(catDir, sd.name),
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
} else {
|
|
160
|
+
kindEntries.push({
|
|
161
|
+
kind: dirToKind(d.name),
|
|
162
|
+
dir: join(ctx.config.vaultDir, d.name),
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const pendingEmbeds: { rowid: number; text: string }[] = [];
|
|
168
|
+
|
|
169
|
+
ctx.db.exec("BEGIN");
|
|
170
|
+
try {
|
|
171
|
+
for (const { kind, dir } of kindEntries) {
|
|
172
|
+
const category = categoryFor(kind);
|
|
173
|
+
const mdFiles = walkDir(dir).filter(
|
|
174
|
+
(f) => !EXCLUDED_FILES.has(basename(f.filePath)),
|
|
175
|
+
);
|
|
176
|
+
|
|
177
|
+
const dbRows = ctx.db
|
|
178
|
+
.prepare(
|
|
179
|
+
"SELECT id, file_path, body, title, tags, meta, related_to FROM vault WHERE kind = ?",
|
|
180
|
+
)
|
|
181
|
+
.all(kind) as Record<string, unknown>[];
|
|
182
|
+
const dbByPath = new Map(dbRows.map((r) => [r.file_path as string, r]));
|
|
183
|
+
const diskPaths = new Set(mdFiles.map((e) => e.filePath));
|
|
184
|
+
|
|
185
|
+
for (const { filePath, relDir } of mdFiles) {
|
|
186
|
+
const existing = dbByPath.get(filePath);
|
|
187
|
+
|
|
188
|
+
if (!fullSync && existing) {
|
|
189
|
+
stats.unchanged++;
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
194
|
+
if (!raw.startsWith("---\n")) {
|
|
195
|
+
console.error(`[reindex] skipping (no frontmatter): ${filePath}`);
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
const { meta: fmMeta, body: rawBody } = parseFrontmatter(raw);
|
|
199
|
+
const parsed = parseEntryFromMarkdown(kind, rawBody, fmMeta);
|
|
200
|
+
|
|
201
|
+
const identity_key = (fmMeta.identity_key as string) || null;
|
|
202
|
+
const expires_at = (fmMeta.expires_at as string) || null;
|
|
203
|
+
const related_to = Array.isArray(fmMeta.related_to)
|
|
204
|
+
? (fmMeta.related_to as string[])
|
|
205
|
+
: null;
|
|
206
|
+
const relatedToJson = related_to?.length
|
|
207
|
+
? JSON.stringify(related_to)
|
|
208
|
+
: null;
|
|
209
|
+
|
|
210
|
+
const meta: Record<string, unknown> = { ...(parsed.meta || {}) };
|
|
211
|
+
if (relDir) meta.folder = relDir;
|
|
212
|
+
else delete meta.folder;
|
|
213
|
+
const metaJson = Object.keys(meta).length ? JSON.stringify(meta) : null;
|
|
214
|
+
|
|
215
|
+
if (!existing) {
|
|
216
|
+
const id = (fmMeta.id as string) || ulid();
|
|
217
|
+
const tagsJson = fmMeta.tags ? JSON.stringify(fmMeta.tags) : null;
|
|
218
|
+
const created = (fmMeta.created as string) || new Date().toISOString();
|
|
219
|
+
|
|
220
|
+
const result = upsertEntry.run(
|
|
221
|
+
id, kind, category, parsed.title || null, parsed.body,
|
|
222
|
+
metaJson, tagsJson, (fmMeta.source as string) || "file",
|
|
223
|
+
filePath, identity_key, expires_at,
|
|
224
|
+
created, (fmMeta.updated as string) || created,
|
|
225
|
+
);
|
|
226
|
+
if ((result as { changes: number }).changes > 0) {
|
|
227
|
+
if (relatedToJson && ctx.stmts.updateRelatedTo) {
|
|
228
|
+
ctx.stmts.updateRelatedTo.run(relatedToJson, id);
|
|
229
|
+
}
|
|
230
|
+
if (category !== "event") {
|
|
231
|
+
const rowidResult = ctx.stmts.getRowid.get(id) as { rowid: number } | undefined;
|
|
232
|
+
if (rowidResult?.rowid) {
|
|
233
|
+
const embeddingText = [parsed.title, parsed.body]
|
|
234
|
+
.filter(Boolean)
|
|
235
|
+
.join(" ");
|
|
236
|
+
pendingEmbeds.push({
|
|
237
|
+
rowid: rowidResult.rowid,
|
|
238
|
+
text: embeddingText,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
stats.added++;
|
|
243
|
+
} else {
|
|
244
|
+
stats.unchanged++;
|
|
245
|
+
}
|
|
246
|
+
} else if (fullSync) {
|
|
247
|
+
const tagsJson = fmMeta.tags ? JSON.stringify(fmMeta.tags) : null;
|
|
248
|
+
const titleChanged = (parsed.title || null) !== ((existing.title as string) || null);
|
|
249
|
+
const bodyChanged = (existing.body as string) !== parsed.body;
|
|
250
|
+
const tagsChanged = tagsJson !== ((existing.tags as string) || null);
|
|
251
|
+
const metaChanged = metaJson !== ((existing.meta as string) || null);
|
|
252
|
+
const relatedToChanged = relatedToJson !== ((existing.related_to as string) || null);
|
|
253
|
+
|
|
254
|
+
if (bodyChanged || titleChanged || tagsChanged || metaChanged || relatedToChanged) {
|
|
255
|
+
ctx.stmts.updateEntry.run(
|
|
256
|
+
parsed.title || null, parsed.body, metaJson, tagsJson,
|
|
257
|
+
(fmMeta.source as string) || "file", category,
|
|
258
|
+
identity_key, expires_at, filePath,
|
|
259
|
+
);
|
|
260
|
+
if (relatedToChanged && ctx.stmts.updateRelatedTo) {
|
|
261
|
+
ctx.stmts.updateRelatedTo.run(relatedToJson, existing.id as string);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
if ((bodyChanged || titleChanged) && category !== "event") {
|
|
265
|
+
const rowid = (ctx.stmts.getRowid.get(existing.id as string) as { rowid: number } | undefined)?.rowid;
|
|
266
|
+
if (rowid) {
|
|
267
|
+
const embeddingText = [parsed.title, parsed.body]
|
|
268
|
+
.filter(Boolean)
|
|
269
|
+
.join(" ");
|
|
270
|
+
pendingEmbeds.push({ rowid, text: embeddingText });
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
stats.updated++;
|
|
274
|
+
} else {
|
|
275
|
+
stats.unchanged++;
|
|
276
|
+
}
|
|
277
|
+
} else {
|
|
278
|
+
stats.unchanged++;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if (fullSync) {
|
|
283
|
+
for (const [dbPath, row] of dbByPath) {
|
|
284
|
+
if (!diskPaths.has(dbPath)) {
|
|
285
|
+
const vRowid = (ctx.stmts.getRowid.get(row.id as string) as { rowid: number } | undefined)?.rowid;
|
|
286
|
+
if (vRowid) {
|
|
287
|
+
try { ctx.deleteVec(vRowid); } catch {}
|
|
288
|
+
}
|
|
289
|
+
ctx.stmts.deleteEntry.run(row.id as string);
|
|
290
|
+
stats.removed++;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (fullSync) {
|
|
297
|
+
const indexedKinds = new Set(kindEntries.map((ke) => ke.kind));
|
|
298
|
+
const allDbKinds = ctx.db
|
|
299
|
+
.prepare("SELECT DISTINCT kind FROM vault")
|
|
300
|
+
.all() as { kind: string }[];
|
|
301
|
+
for (const { kind } of allDbKinds) {
|
|
302
|
+
if (!indexedKinds.has(kind)) {
|
|
303
|
+
const orphaned = ctx.db
|
|
304
|
+
.prepare("SELECT id, rowid FROM vault WHERE kind = ?")
|
|
305
|
+
.all(kind) as { id: string; rowid: number }[];
|
|
306
|
+
for (const row of orphaned) {
|
|
307
|
+
try { ctx.deleteVec(row.rowid); } catch {}
|
|
308
|
+
ctx.stmts.deleteEntry.run(row.id);
|
|
309
|
+
stats.removed++;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
const expired = ctx.db
|
|
316
|
+
.prepare(
|
|
317
|
+
"SELECT id, file_path FROM vault WHERE expires_at IS NOT NULL AND expires_at <= datetime('now')",
|
|
318
|
+
)
|
|
319
|
+
.all() as { id: string; file_path: string | null }[];
|
|
320
|
+
|
|
321
|
+
for (const row of expired) {
|
|
322
|
+
if (row.file_path) {
|
|
323
|
+
try { unlinkSync(row.file_path); } catch {}
|
|
324
|
+
}
|
|
325
|
+
const vRowid = (ctx.stmts.getRowid.get(row.id) as { rowid: number } | undefined)?.rowid;
|
|
326
|
+
if (vRowid) {
|
|
327
|
+
try { ctx.deleteVec(Number(vRowid)); } catch {}
|
|
328
|
+
}
|
|
329
|
+
ctx.stmts.deleteEntry.run(row.id);
|
|
330
|
+
stats.removed++;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
ctx.db.exec("COMMIT");
|
|
334
|
+
} catch (e) {
|
|
335
|
+
ctx.db.exec("ROLLBACK");
|
|
336
|
+
throw e;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
for (let i = 0; i < pendingEmbeds.length; i += EMBED_BATCH_SIZE) {
|
|
340
|
+
const batch = pendingEmbeds.slice(i, i + EMBED_BATCH_SIZE);
|
|
341
|
+
const embeddings = await embedBatch(batch.map((e) => e.text));
|
|
342
|
+
for (let j = 0; j < batch.length; j++) {
|
|
343
|
+
if (embeddings[j]) {
|
|
344
|
+
try { ctx.deleteVec(batch[j].rowid); } catch {}
|
|
345
|
+
ctx.insertVec(batch[j].rowid, embeddings[j]!);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
return stats;
|
|
351
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
export function htmlToMarkdown(html: string): string {
|
|
2
|
+
let md = html;
|
|
3
|
+
md = md.replace(/<script[\s\S]*?<\/script>/gi, "");
|
|
4
|
+
md = md.replace(/<style[\s\S]*?<\/style>/gi, "");
|
|
5
|
+
md = md.replace(/<nav[\s\S]*?<\/nav>/gi, "");
|
|
6
|
+
md = md.replace(/<header[\s\S]*?<\/header>/gi, "");
|
|
7
|
+
md = md.replace(/<footer[\s\S]*?<\/footer>/gi, "");
|
|
8
|
+
md = md.replace(/<aside[\s\S]*?<\/aside>/gi, "");
|
|
9
|
+
md = md.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, (_, c: string) => `\n# ${stripTags(c).trim()}\n`);
|
|
10
|
+
md = md.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, (_, c: string) => `\n## ${stripTags(c).trim()}\n`);
|
|
11
|
+
md = md.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, (_, c: string) => `\n### ${stripTags(c).trim()}\n`);
|
|
12
|
+
md = md.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, (_, c: string) => `\n#### ${stripTags(c).trim()}\n`);
|
|
13
|
+
md = md.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, (_, c: string) => `\n##### ${stripTags(c).trim()}\n`);
|
|
14
|
+
md = md.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, (_, c: string) => `\n###### ${stripTags(c).trim()}\n`);
|
|
15
|
+
md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi, (_, href: string, text: string) => {
|
|
16
|
+
const cleanText = stripTags(text).trim();
|
|
17
|
+
return cleanText ? `[${cleanText}](${href})` : "";
|
|
18
|
+
});
|
|
19
|
+
md = md.replace(/<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi, (_, c: string) => `\n\`\`\`\n${decodeEntities(c).trim()}\n\`\`\`\n`);
|
|
20
|
+
md = md.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, (_, c: string) => `\n\`\`\`\n${decodeEntities(stripTags(c)).trim()}\n\`\`\`\n`);
|
|
21
|
+
md = md.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, (_, c: string) => `\`${decodeEntities(c).trim()}\``);
|
|
22
|
+
md = md.replace(/<(strong|b)[^>]*>([\s\S]*?)<\/\1>/gi, (_, __: string, c: string) => `**${stripTags(c).trim()}**`);
|
|
23
|
+
md = md.replace(/<(em|i)[^>]*>([\s\S]*?)<\/\1>/gi, (_, __: string, c: string) => `*${stripTags(c).trim()}*`);
|
|
24
|
+
md = md.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (_, c: string) => `- ${stripTags(c).trim()}\n`);
|
|
25
|
+
md = md.replace(/<br\s*\/?>/gi, "\n");
|
|
26
|
+
md = md.replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, (_, c: string) => `\n${stripTags(c).trim()}\n`);
|
|
27
|
+
md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (_, c: string) => {
|
|
28
|
+
return "\n" + stripTags(c).trim().split("\n").map((l: string) => `> ${l}`).join("\n") + "\n";
|
|
29
|
+
});
|
|
30
|
+
md = stripTags(md);
|
|
31
|
+
md = decodeEntities(md);
|
|
32
|
+
md = md.replace(/\n{3,}/g, "\n\n").trim();
|
|
33
|
+
return md;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function stripTags(html: string): string {
|
|
37
|
+
return html.replace(/<[^>]+>/g, "");
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function decodeEntities(text: string): string {
|
|
41
|
+
return text
|
|
42
|
+
.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">")
|
|
43
|
+
.replace(/"/g, '"').replace(/'/g, "'").replace(/ /g, " ")
|
|
44
|
+
.replace(/&#(\d+);/g, (_, n: string) => String.fromCharCode(parseInt(n, 10)))
|
|
45
|
+
.replace(/&#x([0-9a-f]+);/gi, (_, n: string) => String.fromCharCode(parseInt(n, 16)));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function extractHtmlContent(html: string, _url: string): { title: string; body: string } {
|
|
49
|
+
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
50
|
+
const title = titleMatch ? stripTags(decodeEntities(titleMatch[1])).trim() : "";
|
|
51
|
+
let contentHtml = "";
|
|
52
|
+
const articleMatch = html.match(/<article[^>]*>([\s\S]*?)<\/article>/i);
|
|
53
|
+
const mainMatch = html.match(/<main[^>]*>([\s\S]*?)<\/main>/i);
|
|
54
|
+
if (articleMatch) contentHtml = articleMatch[1];
|
|
55
|
+
else if (mainMatch) contentHtml = mainMatch[1];
|
|
56
|
+
else {
|
|
57
|
+
const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
|
|
58
|
+
contentHtml = bodyMatch ? bodyMatch[1] : html;
|
|
59
|
+
}
|
|
60
|
+
const body = htmlToMarkdown(contentHtml);
|
|
61
|
+
return { title, body };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export async function ingestUrl(
|
|
65
|
+
url: string,
|
|
66
|
+
opts: { kind?: string; tags?: string[]; source?: string; maxBodyLength?: number; timeoutMs?: number } = {},
|
|
67
|
+
): Promise<{ kind: string; title: string; body: string; tags: string[]; meta: Record<string, unknown>; source: string }> {
|
|
68
|
+
const { kind = "reference", tags = [], source, maxBodyLength = 50000, timeoutMs = 15000 } = opts;
|
|
69
|
+
let domain: string;
|
|
70
|
+
try { domain = new URL(url).hostname; } catch { throw new Error(`Invalid URL: ${url}`); }
|
|
71
|
+
const controller = new AbortController();
|
|
72
|
+
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
73
|
+
let response: Response;
|
|
74
|
+
try {
|
|
75
|
+
response = await fetch(url, {
|
|
76
|
+
signal: controller.signal,
|
|
77
|
+
headers: { "User-Agent": "ContextVault/1.0 (+https://github.com/fellanH/context-vault)", Accept: "text/html,application/xhtml+xml,text/plain,*/*" },
|
|
78
|
+
});
|
|
79
|
+
} catch (err) {
|
|
80
|
+
if ((err as Error).name === "AbortError") throw new Error(`Request timed out after ${timeoutMs}ms`);
|
|
81
|
+
throw new Error(`Fetch failed: ${(err as Error).message}`);
|
|
82
|
+
} finally { clearTimeout(timeout); }
|
|
83
|
+
if (!response.ok) throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
84
|
+
const contentType = response.headers.get("content-type") || "";
|
|
85
|
+
const html = await response.text();
|
|
86
|
+
let title: string, body: string;
|
|
87
|
+
if (contentType.includes("text/html") || contentType.includes("application/xhtml")) {
|
|
88
|
+
const extracted = extractHtmlContent(html, url);
|
|
89
|
+
title = extracted.title; body = extracted.body;
|
|
90
|
+
} else { title = domain; body = html; }
|
|
91
|
+
if (body.length > maxBodyLength) body = body.slice(0, maxBodyLength) + "\n\n[Content truncated]";
|
|
92
|
+
if (!body.trim()) throw new Error("No readable content extracted from URL");
|
|
93
|
+
return {
|
|
94
|
+
kind, title: title || domain, body,
|
|
95
|
+
tags: [...tags, "web-import"],
|
|
96
|
+
meta: { url, domain, fetched_at: new Date().toISOString(), content_type: contentType.split(";")[0].trim() || "text/html" },
|
|
97
|
+
source: source || domain,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
// Types
|
|
2
|
+
export type {
|
|
3
|
+
VaultConfig,
|
|
4
|
+
RecallConfig,
|
|
5
|
+
ConsolidationConfig,
|
|
6
|
+
GrowthThresholds,
|
|
7
|
+
PreparedStatements,
|
|
8
|
+
VaultEntry,
|
|
9
|
+
SearchResult,
|
|
10
|
+
CaptureInput,
|
|
11
|
+
CaptureResult,
|
|
12
|
+
IndexEntryInput,
|
|
13
|
+
ReindexStats,
|
|
14
|
+
BaseCtx,
|
|
15
|
+
SearchOptions,
|
|
16
|
+
} from "./types.js";
|
|
17
|
+
|
|
18
|
+
// Constants
|
|
19
|
+
export {
|
|
20
|
+
APP_URL,
|
|
21
|
+
API_URL,
|
|
22
|
+
MARKETING_URL,
|
|
23
|
+
GITHUB_ISSUES_URL,
|
|
24
|
+
MAX_BODY_LENGTH,
|
|
25
|
+
MAX_TITLE_LENGTH,
|
|
26
|
+
MAX_KIND_LENGTH,
|
|
27
|
+
MAX_TAG_LENGTH,
|
|
28
|
+
MAX_TAGS_COUNT,
|
|
29
|
+
MAX_META_LENGTH,
|
|
30
|
+
MAX_SOURCE_LENGTH,
|
|
31
|
+
MAX_IDENTITY_KEY_LENGTH,
|
|
32
|
+
DEFAULT_GROWTH_THRESHOLDS,
|
|
33
|
+
DEFAULT_LIFECYCLE,
|
|
34
|
+
} from "./constants.js";
|
|
35
|
+
|
|
36
|
+
// Categories
|
|
37
|
+
export {
|
|
38
|
+
categoryFor,
|
|
39
|
+
categoryDirFor,
|
|
40
|
+
defaultTierFor,
|
|
41
|
+
CATEGORY_DIRS,
|
|
42
|
+
KIND_STALENESS_DAYS,
|
|
43
|
+
} from "./categories.js";
|
|
44
|
+
|
|
45
|
+
// Config
|
|
46
|
+
export { parseArgs, resolveConfig } from "./config.js";
|
|
47
|
+
|
|
48
|
+
// Files
|
|
49
|
+
export {
|
|
50
|
+
ulid,
|
|
51
|
+
slugify,
|
|
52
|
+
kindToDir,
|
|
53
|
+
dirToKind,
|
|
54
|
+
normalizeKind,
|
|
55
|
+
kindToPath,
|
|
56
|
+
safeJoin,
|
|
57
|
+
walkDir,
|
|
58
|
+
} from "./files.js";
|
|
59
|
+
|
|
60
|
+
// Frontmatter
|
|
61
|
+
export {
|
|
62
|
+
formatFrontmatter,
|
|
63
|
+
parseFrontmatter,
|
|
64
|
+
extractCustomMeta,
|
|
65
|
+
parseEntryFromMarkdown,
|
|
66
|
+
} from "./frontmatter.js";
|
|
67
|
+
|
|
68
|
+
// Formatters
|
|
69
|
+
export { formatBody } from "./formatters.js";
|
|
70
|
+
|
|
71
|
+
// Database
|
|
72
|
+
export {
|
|
73
|
+
SCHEMA_DDL,
|
|
74
|
+
NativeModuleError,
|
|
75
|
+
initDatabase,
|
|
76
|
+
prepareStatements,
|
|
77
|
+
insertVec,
|
|
78
|
+
deleteVec,
|
|
79
|
+
testConnection,
|
|
80
|
+
} from "./db.js";
|
|
81
|
+
|
|
82
|
+
// Embeddings
|
|
83
|
+
export { embed, embedBatch, resetEmbedPipeline, isEmbedAvailable } from "./embed.js";
|
|
84
|
+
|
|
85
|
+
// Index (reindex + indexEntry)
|
|
86
|
+
export { indexEntry, reindex, pruneExpired } from "./index.js";
|
|
87
|
+
|
|
88
|
+
// Search (retrieve)
|
|
89
|
+
export {
|
|
90
|
+
hybridSearch,
|
|
91
|
+
buildFtsQuery,
|
|
92
|
+
buildFilterClauses,
|
|
93
|
+
recencyBoost,
|
|
94
|
+
recencyDecayScore,
|
|
95
|
+
dotProduct,
|
|
96
|
+
reciprocalRankFusion,
|
|
97
|
+
} from "./search.js";
|
|
98
|
+
|
|
99
|
+
// Capture
|
|
100
|
+
export {
|
|
101
|
+
writeEntry,
|
|
102
|
+
updateEntryFile,
|
|
103
|
+
captureAndIndex,
|
|
104
|
+
} from "./capture.js";
|
|
105
|
+
|
|
106
|
+
// Ingest URL
|
|
107
|
+
export {
|
|
108
|
+
htmlToMarkdown,
|
|
109
|
+
extractHtmlContent,
|
|
110
|
+
ingestUrl,
|
|
111
|
+
} from "./ingest-url.js";
|