context-vault 2.17.1 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +795 -71
- package/node_modules/@context-vault/core/dist/capture.d.ts +21 -0
- package/node_modules/@context-vault/core/dist/capture.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/capture.js +269 -0
- package/node_modules/@context-vault/core/dist/capture.js.map +1 -0
- package/node_modules/@context-vault/core/dist/categories.d.ts +6 -0
- package/node_modules/@context-vault/core/dist/categories.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/categories.js +50 -0
- package/node_modules/@context-vault/core/dist/categories.js.map +1 -0
- package/node_modules/@context-vault/core/dist/config.d.ts +4 -0
- package/node_modules/@context-vault/core/dist/config.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/config.js +190 -0
- package/node_modules/@context-vault/core/dist/config.js.map +1 -0
- package/node_modules/@context-vault/core/dist/constants.d.ts +33 -0
- package/node_modules/@context-vault/core/dist/constants.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/constants.js +23 -0
- package/node_modules/@context-vault/core/dist/constants.js.map +1 -0
- package/node_modules/@context-vault/core/dist/db.d.ts +13 -0
- package/node_modules/@context-vault/core/dist/db.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/db.js +191 -0
- package/node_modules/@context-vault/core/dist/db.js.map +1 -0
- package/node_modules/@context-vault/core/dist/embed.d.ts +5 -0
- package/node_modules/@context-vault/core/dist/embed.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/embed.js +78 -0
- package/node_modules/@context-vault/core/dist/embed.js.map +1 -0
- package/node_modules/@context-vault/core/dist/files.d.ts +13 -0
- package/node_modules/@context-vault/core/dist/files.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/files.js +66 -0
- package/node_modules/@context-vault/core/dist/files.js.map +1 -0
- package/node_modules/@context-vault/core/dist/formatters.d.ts +8 -0
- package/node_modules/@context-vault/core/dist/formatters.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/formatters.js +18 -0
- package/node_modules/@context-vault/core/dist/formatters.js.map +1 -0
- package/node_modules/@context-vault/core/dist/frontmatter.d.ts +12 -0
- package/node_modules/@context-vault/core/dist/frontmatter.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/frontmatter.js +101 -0
- package/node_modules/@context-vault/core/dist/frontmatter.js.map +1 -0
- package/node_modules/@context-vault/core/dist/index.d.ts +10 -0
- package/node_modules/@context-vault/core/dist/index.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/index.js +297 -0
- package/node_modules/@context-vault/core/dist/index.js.map +1 -0
- package/node_modules/@context-vault/core/dist/ingest-url.d.ts +20 -0
- package/node_modules/@context-vault/core/dist/ingest-url.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/ingest-url.js +113 -0
- package/node_modules/@context-vault/core/dist/ingest-url.js.map +1 -0
- package/node_modules/@context-vault/core/dist/main.d.ts +14 -0
- package/node_modules/@context-vault/core/dist/main.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/main.js +25 -0
- package/node_modules/@context-vault/core/dist/main.js.map +1 -0
- package/node_modules/@context-vault/core/dist/search.d.ts +18 -0
- package/node_modules/@context-vault/core/dist/search.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/search.js +238 -0
- package/node_modules/@context-vault/core/dist/search.js.map +1 -0
- package/node_modules/@context-vault/core/dist/types.d.ts +176 -0
- package/node_modules/@context-vault/core/dist/types.d.ts.map +1 -0
- package/node_modules/@context-vault/core/dist/types.js +2 -0
- package/node_modules/@context-vault/core/dist/types.js.map +1 -0
- package/node_modules/@context-vault/core/package.json +66 -16
- package/node_modules/@context-vault/core/src/capture.ts +308 -0
- package/node_modules/@context-vault/core/src/categories.ts +54 -0
- package/node_modules/@context-vault/core/src/{core/config.js → config.ts} +34 -33
- package/node_modules/@context-vault/core/src/{constants.js → constants.ts} +6 -3
- package/node_modules/@context-vault/core/src/db.ts +229 -0
- package/node_modules/@context-vault/core/src/{index/embed.js → embed.ts} +10 -35
- package/node_modules/@context-vault/core/src/{core/files.js → files.ts} +15 -20
- package/node_modules/@context-vault/core/src/{capture/formatters.js → formatters.ts} +13 -11
- package/node_modules/@context-vault/core/src/{core/frontmatter.js → frontmatter.ts} +26 -33
- package/node_modules/@context-vault/core/src/index.ts +351 -0
- package/node_modules/@context-vault/core/src/ingest-url.ts +99 -0
- package/node_modules/@context-vault/core/src/main.ts +111 -0
- package/node_modules/@context-vault/core/src/{retrieve/index.js → search.ts} +62 -150
- package/node_modules/@context-vault/core/src/types.ts +166 -0
- package/package.json +12 -7
- package/scripts/postinstall.js +1 -1
- package/{node_modules/@context-vault/core/src/core → src}/error-log.js +1 -15
- package/{node_modules/@context-vault/core/src/server → src}/helpers.js +9 -4
- package/src/linking.js +100 -0
- package/{node_modules/@context-vault/core/src/server/tools.js → src/register-tools.js} +14 -13
- package/src/{server/index.js → server.js} +10 -38
- package/src/status.js +235 -0
- package/{node_modules/@context-vault/core/src/core → src}/telemetry.js +9 -19
- package/src/temporal.js +97 -0
- package/{node_modules/@context-vault/core/src/server → src}/tools/context-status.js +3 -4
- package/{node_modules/@context-vault/core/src/server → src}/tools/create-snapshot.js +6 -7
- package/{node_modules/@context-vault/core/src/server → src}/tools/delete-context.js +0 -2
- package/{node_modules/@context-vault/core/src/server → src}/tools/get-context.js +17 -21
- package/{node_modules/@context-vault/core/src/server → src}/tools/ingest-project.js +5 -6
- package/{node_modules/@context-vault/core/src/server → src}/tools/ingest-url.js +3 -4
- package/{node_modules/@context-vault/core/src/server → src}/tools/list-buckets.js +4 -5
- package/{node_modules/@context-vault/core/src/server → src}/tools/list-context.js +3 -6
- package/{node_modules/@context-vault/core/src/server → src}/tools/save-context.js +17 -20
- package/{node_modules/@context-vault/core/src/server → src}/tools/session-start.js +9 -16
- package/node_modules/@context-vault/core/src/capture/file-ops.js +0 -99
- package/node_modules/@context-vault/core/src/capture/import-pipeline.js +0 -46
- package/node_modules/@context-vault/core/src/capture/importers.js +0 -387
- package/node_modules/@context-vault/core/src/capture/index.js +0 -250
- package/node_modules/@context-vault/core/src/capture/ingest-url.js +0 -252
- package/node_modules/@context-vault/core/src/consolidation/index.js +0 -112
- package/node_modules/@context-vault/core/src/core/categories.js +0 -73
- package/node_modules/@context-vault/core/src/core/linking.js +0 -161
- package/node_modules/@context-vault/core/src/core/migrate-dirs.js +0 -196
- package/node_modules/@context-vault/core/src/core/status.js +0 -350
- package/node_modules/@context-vault/core/src/core/temporal.js +0 -146
- package/node_modules/@context-vault/core/src/index/db.js +0 -586
- package/node_modules/@context-vault/core/src/index/index.js +0 -583
- package/node_modules/@context-vault/core/src/index.js +0 -71
- package/node_modules/@context-vault/core/src/sync/sync.js +0 -235
- package/src/hooks/post-tool-call.mjs +0 -62
- package/src/hooks/session-end.mjs +0 -492
- /package/{node_modules/@context-vault/core/src/server → src}/tools/clear-context.js +0 -0
|
@@ -1,250 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Capture Layer — Public API
|
|
3
|
-
*
|
|
4
|
-
* Writes knowledge entries to vault as .md files and indexes them.
|
|
5
|
-
* captureAndIndex() is the write-through entry point (capture + index + rollback on failure).
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
|
|
9
|
-
import { resolve } from "node:path";
|
|
10
|
-
import { ulid, slugify, kindToPath } from "../core/files.js";
|
|
11
|
-
import { categoryFor } from "../core/categories.js";
|
|
12
|
-
import { parseFrontmatter, formatFrontmatter } from "../core/frontmatter.js";
|
|
13
|
-
import { formatBody } from "./formatters.js";
|
|
14
|
-
import { writeEntryFile } from "./file-ops.js";
|
|
15
|
-
import { indexEntry } from "../index/index.js";
|
|
16
|
-
|
|
17
|
-
export function writeEntry(
|
|
18
|
-
ctx,
|
|
19
|
-
{
|
|
20
|
-
kind,
|
|
21
|
-
title,
|
|
22
|
-
body,
|
|
23
|
-
meta,
|
|
24
|
-
tags,
|
|
25
|
-
source,
|
|
26
|
-
folder,
|
|
27
|
-
identity_key,
|
|
28
|
-
expires_at,
|
|
29
|
-
supersedes,
|
|
30
|
-
related_to,
|
|
31
|
-
source_files,
|
|
32
|
-
tier,
|
|
33
|
-
userId,
|
|
34
|
-
},
|
|
35
|
-
) {
|
|
36
|
-
if (!kind || typeof kind !== "string") {
|
|
37
|
-
throw new Error("writeEntry: kind is required (non-empty string)");
|
|
38
|
-
}
|
|
39
|
-
if (!body || typeof body !== "string" || !body.trim()) {
|
|
40
|
-
throw new Error("writeEntry: body is required (non-empty string)");
|
|
41
|
-
}
|
|
42
|
-
if (tags != null && !Array.isArray(tags)) {
|
|
43
|
-
throw new Error("writeEntry: tags must be an array if provided");
|
|
44
|
-
}
|
|
45
|
-
if (meta != null && typeof meta !== "object") {
|
|
46
|
-
throw new Error("writeEntry: meta must be an object if provided");
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const category = categoryFor(kind);
|
|
50
|
-
|
|
51
|
-
// Entity upsert: check for existing file at deterministic path
|
|
52
|
-
let id;
|
|
53
|
-
let createdAt;
|
|
54
|
-
let updatedAt;
|
|
55
|
-
if (category === "entity" && identity_key) {
|
|
56
|
-
const identitySlug = slugify(identity_key);
|
|
57
|
-
const dir = resolve(ctx.config.vaultDir, kindToPath(kind));
|
|
58
|
-
const existingPath = resolve(dir, `${identitySlug}.md`);
|
|
59
|
-
|
|
60
|
-
if (existsSync(existingPath)) {
|
|
61
|
-
// Preserve original ID and created timestamp from existing file
|
|
62
|
-
const raw = readFileSync(existingPath, "utf-8");
|
|
63
|
-
const { meta: fmMeta } = parseFrontmatter(raw);
|
|
64
|
-
id = fmMeta.id || ulid();
|
|
65
|
-
createdAt = fmMeta.created || new Date().toISOString();
|
|
66
|
-
updatedAt = new Date().toISOString();
|
|
67
|
-
} else {
|
|
68
|
-
id = ulid();
|
|
69
|
-
createdAt = new Date().toISOString();
|
|
70
|
-
updatedAt = createdAt;
|
|
71
|
-
}
|
|
72
|
-
} else {
|
|
73
|
-
id = ulid();
|
|
74
|
-
createdAt = new Date().toISOString();
|
|
75
|
-
updatedAt = createdAt;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
const filePath = writeEntryFile(ctx.config.vaultDir, kind, {
|
|
79
|
-
id,
|
|
80
|
-
title,
|
|
81
|
-
body,
|
|
82
|
-
meta,
|
|
83
|
-
tags,
|
|
84
|
-
source,
|
|
85
|
-
createdAt,
|
|
86
|
-
updatedAt,
|
|
87
|
-
folder,
|
|
88
|
-
category,
|
|
89
|
-
identity_key,
|
|
90
|
-
expires_at,
|
|
91
|
-
supersedes,
|
|
92
|
-
related_to,
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
return {
|
|
96
|
-
id,
|
|
97
|
-
filePath,
|
|
98
|
-
kind,
|
|
99
|
-
category,
|
|
100
|
-
title,
|
|
101
|
-
body,
|
|
102
|
-
meta,
|
|
103
|
-
tags,
|
|
104
|
-
source,
|
|
105
|
-
createdAt,
|
|
106
|
-
updatedAt,
|
|
107
|
-
identity_key,
|
|
108
|
-
expires_at,
|
|
109
|
-
supersedes,
|
|
110
|
-
related_to: related_to || null,
|
|
111
|
-
source_files: source_files || null,
|
|
112
|
-
tier: tier || null,
|
|
113
|
-
userId: userId || null,
|
|
114
|
-
};
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
/**
|
|
118
|
-
* Update an existing entry's file on disk (merge provided fields with existing).
|
|
119
|
-
* Does NOT re-index — caller must call indexEntry after.
|
|
120
|
-
*
|
|
121
|
-
* @param {{ config, stmts }} ctx
|
|
122
|
-
* @param {object} existing — Row from vault table (from getEntryById)
|
|
123
|
-
* @param {{ title?, body?, tags?, meta?, source?, expires_at? }} updates
|
|
124
|
-
* @returns {object} Entry object suitable for indexEntry
|
|
125
|
-
*/
|
|
126
|
-
export function updateEntryFile(ctx, existing, updates) {
|
|
127
|
-
const raw = readFileSync(existing.file_path, "utf-8");
|
|
128
|
-
const { meta: fmMeta } = parseFrontmatter(raw);
|
|
129
|
-
|
|
130
|
-
const existingMeta = existing.meta ? JSON.parse(existing.meta) : {};
|
|
131
|
-
const existingTags = existing.tags ? JSON.parse(existing.tags) : [];
|
|
132
|
-
const existingRelatedTo = existing.related_to
|
|
133
|
-
? JSON.parse(existing.related_to)
|
|
134
|
-
: fmMeta.related_to || null;
|
|
135
|
-
|
|
136
|
-
const title = updates.title !== undefined ? updates.title : existing.title;
|
|
137
|
-
const body = updates.body !== undefined ? updates.body : existing.body;
|
|
138
|
-
const tags = updates.tags !== undefined ? updates.tags : existingTags;
|
|
139
|
-
const source =
|
|
140
|
-
updates.source !== undefined ? updates.source : existing.source;
|
|
141
|
-
const expires_at =
|
|
142
|
-
updates.expires_at !== undefined ? updates.expires_at : existing.expires_at;
|
|
143
|
-
const supersedes =
|
|
144
|
-
updates.supersedes !== undefined
|
|
145
|
-
? updates.supersedes
|
|
146
|
-
: fmMeta.supersedes || null;
|
|
147
|
-
const related_to =
|
|
148
|
-
updates.related_to !== undefined ? updates.related_to : existingRelatedTo;
|
|
149
|
-
const source_files =
|
|
150
|
-
updates.source_files !== undefined
|
|
151
|
-
? updates.source_files
|
|
152
|
-
: existing.source_files
|
|
153
|
-
? JSON.parse(existing.source_files)
|
|
154
|
-
: null;
|
|
155
|
-
|
|
156
|
-
let mergedMeta;
|
|
157
|
-
if (updates.meta !== undefined) {
|
|
158
|
-
mergedMeta = { ...existingMeta, ...(updates.meta || {}) };
|
|
159
|
-
} else {
|
|
160
|
-
mergedMeta = { ...existingMeta };
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// Build frontmatter
|
|
164
|
-
const now = new Date().toISOString();
|
|
165
|
-
const fmFields = { id: existing.id };
|
|
166
|
-
for (const [k, v] of Object.entries(mergedMeta)) {
|
|
167
|
-
if (k === "folder") continue;
|
|
168
|
-
if (v !== null && v !== undefined) fmFields[k] = v;
|
|
169
|
-
}
|
|
170
|
-
if (existing.identity_key) fmFields.identity_key = existing.identity_key;
|
|
171
|
-
if (expires_at) fmFields.expires_at = expires_at;
|
|
172
|
-
if (supersedes?.length) fmFields.supersedes = supersedes;
|
|
173
|
-
if (related_to?.length) fmFields.related_to = related_to;
|
|
174
|
-
fmFields.tags = tags;
|
|
175
|
-
fmFields.source = source || "claude-code";
|
|
176
|
-
fmFields.created = fmMeta.created || existing.created_at;
|
|
177
|
-
if (now !== fmFields.created) fmFields.updated = now;
|
|
178
|
-
|
|
179
|
-
const mdBody = formatBody(existing.kind, { title, body, meta: mergedMeta });
|
|
180
|
-
const md = formatFrontmatter(fmFields) + mdBody;
|
|
181
|
-
|
|
182
|
-
writeFileSync(existing.file_path, md);
|
|
183
|
-
|
|
184
|
-
const finalMeta = Object.keys(mergedMeta).length ? mergedMeta : undefined;
|
|
185
|
-
|
|
186
|
-
return {
|
|
187
|
-
id: existing.id,
|
|
188
|
-
filePath: existing.file_path,
|
|
189
|
-
kind: existing.kind,
|
|
190
|
-
category: existing.category,
|
|
191
|
-
title,
|
|
192
|
-
body,
|
|
193
|
-
meta: finalMeta,
|
|
194
|
-
tags,
|
|
195
|
-
source,
|
|
196
|
-
createdAt: fmMeta.created || existing.created_at,
|
|
197
|
-
updatedAt: now,
|
|
198
|
-
identity_key: existing.identity_key,
|
|
199
|
-
expires_at,
|
|
200
|
-
supersedes,
|
|
201
|
-
related_to: related_to || null,
|
|
202
|
-
source_files: source_files || null,
|
|
203
|
-
userId: existing.user_id || null,
|
|
204
|
-
};
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
export async function captureAndIndex(ctx, data) {
|
|
208
|
-
// For entity upserts, preserve previous file content for safe rollback
|
|
209
|
-
let previousContent = null;
|
|
210
|
-
if (categoryFor(data.kind) === "entity" && data.identity_key) {
|
|
211
|
-
const identitySlug = slugify(data.identity_key);
|
|
212
|
-
const dir = resolve(ctx.config.vaultDir, kindToPath(data.kind));
|
|
213
|
-
const existingPath = resolve(dir, `${identitySlug}.md`);
|
|
214
|
-
if (existsSync(existingPath)) {
|
|
215
|
-
previousContent = readFileSync(existingPath, "utf-8");
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
const entry = writeEntry(ctx, data);
|
|
220
|
-
try {
|
|
221
|
-
await indexEntry(ctx, entry);
|
|
222
|
-
// Apply supersedes: mark referenced entries as superseded by this entry
|
|
223
|
-
if (entry.supersedes?.length && ctx.stmts.updateSupersededBy) {
|
|
224
|
-
for (const supersededId of entry.supersedes) {
|
|
225
|
-
if (typeof supersededId === "string" && supersededId.trim()) {
|
|
226
|
-
ctx.stmts.updateSupersededBy.run(entry.id, supersededId.trim());
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
// Store related_to links in DB
|
|
231
|
-
if (entry.related_to?.length && ctx.stmts.updateRelatedTo) {
|
|
232
|
-
ctx.stmts.updateRelatedTo.run(JSON.stringify(entry.related_to), entry.id);
|
|
233
|
-
}
|
|
234
|
-
return entry;
|
|
235
|
-
} catch (err) {
|
|
236
|
-
// Rollback: restore previous content for entity upserts, delete for new entries
|
|
237
|
-
if (previousContent) {
|
|
238
|
-
try {
|
|
239
|
-
writeFileSync(entry.filePath, previousContent);
|
|
240
|
-
} catch {}
|
|
241
|
-
} else {
|
|
242
|
-
try {
|
|
243
|
-
unlinkSync(entry.filePath);
|
|
244
|
-
} catch {}
|
|
245
|
-
}
|
|
246
|
-
throw new Error(
|
|
247
|
-
`Capture succeeded but indexing failed — file rolled back. ${err.message}`,
|
|
248
|
-
);
|
|
249
|
-
}
|
|
250
|
-
}
|
|
@@ -1,252 +0,0 @@
|
|
|
1
|
-
export function htmlToMarkdown(html) {
|
|
2
|
-
let md = html;
|
|
3
|
-
|
|
4
|
-
// Remove scripts, styles, nav, header, footer, aside
|
|
5
|
-
md = md.replace(/<script[\s\S]*?<\/script>/gi, "");
|
|
6
|
-
md = md.replace(/<style[\s\S]*?<\/style>/gi, "");
|
|
7
|
-
md = md.replace(/<nav[\s\S]*?<\/nav>/gi, "");
|
|
8
|
-
md = md.replace(/<header[\s\S]*?<\/header>/gi, "");
|
|
9
|
-
md = md.replace(/<footer[\s\S]*?<\/footer>/gi, "");
|
|
10
|
-
md = md.replace(/<aside[\s\S]*?<\/aside>/gi, "");
|
|
11
|
-
|
|
12
|
-
// Convert headings
|
|
13
|
-
md = md.replace(
|
|
14
|
-
/<h1[^>]*>([\s\S]*?)<\/h1>/gi,
|
|
15
|
-
(_, c) => `\n# ${stripTags(c).trim()}\n`,
|
|
16
|
-
);
|
|
17
|
-
md = md.replace(
|
|
18
|
-
/<h2[^>]*>([\s\S]*?)<\/h2>/gi,
|
|
19
|
-
(_, c) => `\n## ${stripTags(c).trim()}\n`,
|
|
20
|
-
);
|
|
21
|
-
md = md.replace(
|
|
22
|
-
/<h3[^>]*>([\s\S]*?)<\/h3>/gi,
|
|
23
|
-
(_, c) => `\n### ${stripTags(c).trim()}\n`,
|
|
24
|
-
);
|
|
25
|
-
md = md.replace(
|
|
26
|
-
/<h4[^>]*>([\s\S]*?)<\/h4>/gi,
|
|
27
|
-
(_, c) => `\n#### ${stripTags(c).trim()}\n`,
|
|
28
|
-
);
|
|
29
|
-
md = md.replace(
|
|
30
|
-
/<h5[^>]*>([\s\S]*?)<\/h5>/gi,
|
|
31
|
-
(_, c) => `\n##### ${stripTags(c).trim()}\n`,
|
|
32
|
-
);
|
|
33
|
-
md = md.replace(
|
|
34
|
-
/<h6[^>]*>([\s\S]*?)<\/h6>/gi,
|
|
35
|
-
(_, c) => `\n###### ${stripTags(c).trim()}\n`,
|
|
36
|
-
);
|
|
37
|
-
|
|
38
|
-
// Convert links
|
|
39
|
-
md = md.replace(
|
|
40
|
-
/<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi,
|
|
41
|
-
(_, href, text) => {
|
|
42
|
-
const cleanText = stripTags(text).trim();
|
|
43
|
-
return cleanText ? `[${cleanText}](${href})` : "";
|
|
44
|
-
},
|
|
45
|
-
);
|
|
46
|
-
|
|
47
|
-
// Convert code blocks
|
|
48
|
-
md = md.replace(
|
|
49
|
-
/<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi,
|
|
50
|
-
(_, c) => `\n\`\`\`\n${decodeEntities(c).trim()}\n\`\`\`\n`,
|
|
51
|
-
);
|
|
52
|
-
md = md.replace(
|
|
53
|
-
/<pre[^>]*>([\s\S]*?)<\/pre>/gi,
|
|
54
|
-
(_, c) => `\n\`\`\`\n${decodeEntities(stripTags(c)).trim()}\n\`\`\`\n`,
|
|
55
|
-
);
|
|
56
|
-
|
|
57
|
-
// Convert inline code
|
|
58
|
-
md = md.replace(
|
|
59
|
-
/<code[^>]*>([\s\S]*?)<\/code>/gi,
|
|
60
|
-
(_, c) => `\`${decodeEntities(c).trim()}\``,
|
|
61
|
-
);
|
|
62
|
-
|
|
63
|
-
// Convert strong/em
|
|
64
|
-
md = md.replace(
|
|
65
|
-
/<(strong|b)[^>]*>([\s\S]*?)<\/\1>/gi,
|
|
66
|
-
(_, __, c) => `**${stripTags(c).trim()}**`,
|
|
67
|
-
);
|
|
68
|
-
md = md.replace(
|
|
69
|
-
/<(em|i)[^>]*>([\s\S]*?)<\/\1>/gi,
|
|
70
|
-
(_, __, c) => `*${stripTags(c).trim()}*`,
|
|
71
|
-
);
|
|
72
|
-
|
|
73
|
-
// Convert list items
|
|
74
|
-
md = md.replace(
|
|
75
|
-
/<li[^>]*>([\s\S]*?)<\/li>/gi,
|
|
76
|
-
(_, c) => `- ${stripTags(c).trim()}\n`,
|
|
77
|
-
);
|
|
78
|
-
|
|
79
|
-
// Convert paragraphs and line breaks
|
|
80
|
-
md = md.replace(/<br\s*\/?>/gi, "\n");
|
|
81
|
-
md = md.replace(
|
|
82
|
-
/<p[^>]*>([\s\S]*?)<\/p>/gi,
|
|
83
|
-
(_, c) => `\n${stripTags(c).trim()}\n`,
|
|
84
|
-
);
|
|
85
|
-
md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (_, c) => {
|
|
86
|
-
return (
|
|
87
|
-
"\n" +
|
|
88
|
-
stripTags(c)
|
|
89
|
-
.trim()
|
|
90
|
-
.split("\n")
|
|
91
|
-
.map((l) => `> ${l}`)
|
|
92
|
-
.join("\n") +
|
|
93
|
-
"\n"
|
|
94
|
-
);
|
|
95
|
-
});
|
|
96
|
-
|
|
97
|
-
// Remove remaining HTML tags
|
|
98
|
-
md = stripTags(md);
|
|
99
|
-
|
|
100
|
-
// Decode HTML entities
|
|
101
|
-
md = decodeEntities(md);
|
|
102
|
-
|
|
103
|
-
// Clean up whitespace
|
|
104
|
-
md = md.replace(/\n{3,}/g, "\n\n").trim();
|
|
105
|
-
|
|
106
|
-
return md;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
function stripTags(html) {
|
|
110
|
-
return html.replace(/<[^>]+>/g, "");
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function decodeEntities(text) {
|
|
114
|
-
return text
|
|
115
|
-
.replace(/&/g, "&")
|
|
116
|
-
.replace(/</g, "<")
|
|
117
|
-
.replace(/>/g, ">")
|
|
118
|
-
.replace(/"/g, '"')
|
|
119
|
-
.replace(/'/g, "'")
|
|
120
|
-
.replace(/ /g, " ")
|
|
121
|
-
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
|
|
122
|
-
.replace(/&#x([0-9a-f]+);/gi, (_, n) =>
|
|
123
|
-
String.fromCharCode(parseInt(n, 16)),
|
|
124
|
-
);
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
/**
|
|
128
|
-
* Extract the main readable content from an HTML page.
|
|
129
|
-
* Prefers <article> or <main>, falls back to <body>.
|
|
130
|
-
*
|
|
131
|
-
* @param {string} html
|
|
132
|
-
* @param {string} url
|
|
133
|
-
* @returns {{ title: string, body: string }}
|
|
134
|
-
*/
|
|
135
|
-
export function extractHtmlContent(html, url) {
|
|
136
|
-
// Extract <title>
|
|
137
|
-
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
138
|
-
const title = titleMatch
|
|
139
|
-
? stripTags(decodeEntities(titleMatch[1])).trim()
|
|
140
|
-
: "";
|
|
141
|
-
|
|
142
|
-
// Try to extract main content area
|
|
143
|
-
let contentHtml = "";
|
|
144
|
-
|
|
145
|
-
const articleMatch = html.match(/<article[^>]*>([\s\S]*?)<\/article>/i);
|
|
146
|
-
const mainMatch = html.match(/<main[^>]*>([\s\S]*?)<\/main>/i);
|
|
147
|
-
|
|
148
|
-
if (articleMatch) {
|
|
149
|
-
contentHtml = articleMatch[1];
|
|
150
|
-
} else if (mainMatch) {
|
|
151
|
-
contentHtml = mainMatch[1];
|
|
152
|
-
} else {
|
|
153
|
-
// Fall back to <body>
|
|
154
|
-
const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
|
|
155
|
-
contentHtml = bodyMatch ? bodyMatch[1] : html;
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
const body = htmlToMarkdown(contentHtml);
|
|
159
|
-
|
|
160
|
-
return { title, body };
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
/**
|
|
164
|
-
* Fetch a URL, extract readable content, and return an EntryData object.
|
|
165
|
-
*
|
|
166
|
-
* @param {string} url
|
|
167
|
-
* @param {{ kind?: string, tags?: string[], source?: string, maxBodyLength?: number, timeoutMs?: number }} [opts]
|
|
168
|
-
* @returns {Promise<{ kind: string, title: string, body: string, tags: string[], meta: object, source: string }>}
|
|
169
|
-
*/
|
|
170
|
-
export async function ingestUrl(url, opts = {}) {
|
|
171
|
-
const {
|
|
172
|
-
kind = "reference",
|
|
173
|
-
tags = [],
|
|
174
|
-
source,
|
|
175
|
-
maxBodyLength = 50000,
|
|
176
|
-
timeoutMs = 15000,
|
|
177
|
-
} = opts;
|
|
178
|
-
|
|
179
|
-
let domain;
|
|
180
|
-
try {
|
|
181
|
-
domain = new URL(url).hostname;
|
|
182
|
-
} catch {
|
|
183
|
-
throw new Error(`Invalid URL: ${url}`);
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
const controller = new AbortController();
|
|
187
|
-
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
188
|
-
|
|
189
|
-
let response;
|
|
190
|
-
try {
|
|
191
|
-
response = await fetch(url, {
|
|
192
|
-
signal: controller.signal,
|
|
193
|
-
headers: {
|
|
194
|
-
"User-Agent":
|
|
195
|
-
"ContextVault/1.0 (+https://github.com/fellanH/context-vault)",
|
|
196
|
-
Accept: "text/html,application/xhtml+xml,text/plain,*/*",
|
|
197
|
-
},
|
|
198
|
-
});
|
|
199
|
-
} catch (err) {
|
|
200
|
-
if (err.name === "AbortError") {
|
|
201
|
-
throw new Error(`Request timed out after ${timeoutMs}ms`);
|
|
202
|
-
}
|
|
203
|
-
throw new Error(`Fetch failed: ${err.message}`);
|
|
204
|
-
} finally {
|
|
205
|
-
clearTimeout(timeout);
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
if (!response.ok) {
|
|
209
|
-
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
const contentType = response.headers.get("content-type") || "";
|
|
213
|
-
const html = await response.text();
|
|
214
|
-
|
|
215
|
-
let title, body;
|
|
216
|
-
|
|
217
|
-
if (
|
|
218
|
-
contentType.includes("text/html") ||
|
|
219
|
-
contentType.includes("application/xhtml")
|
|
220
|
-
) {
|
|
221
|
-
const extracted = extractHtmlContent(html, url);
|
|
222
|
-
title = extracted.title;
|
|
223
|
-
body = extracted.body;
|
|
224
|
-
} else {
|
|
225
|
-
// Plain text or other — use as-is
|
|
226
|
-
title = domain;
|
|
227
|
-
body = html;
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
// Truncate if too long
|
|
231
|
-
if (body.length > maxBodyLength) {
|
|
232
|
-
body = body.slice(0, maxBodyLength) + "\n\n[Content truncated]";
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
if (!body.trim()) {
|
|
236
|
-
throw new Error("No readable content extracted from URL");
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
return {
|
|
240
|
-
kind,
|
|
241
|
-
title: title || domain,
|
|
242
|
-
body,
|
|
243
|
-
tags: [...tags, "web-import"],
|
|
244
|
-
meta: {
|
|
245
|
-
url,
|
|
246
|
-
domain,
|
|
247
|
-
fetched_at: new Date().toISOString(),
|
|
248
|
-
content_type: contentType.split(";")[0].trim() || "text/html",
|
|
249
|
-
},
|
|
250
|
-
source: source || domain,
|
|
251
|
-
};
|
|
252
|
-
}
|
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Consolidation utilities — identifies tags and entries that warrant maintenance.
|
|
3
|
-
*
|
|
4
|
-
* These are pure DB queries with no LLM calls. The caller decides what to do
|
|
5
|
-
* with the results (e.g. run create_snapshot, archive entries, report to user).
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Identifies tags that have accumulated enough entries to warrant consolidation.
|
|
10
|
-
*
|
|
11
|
-
* A tag is "hot" when it has >= tagThreshold non-superseded entries AND no
|
|
12
|
-
* brief/snapshot was saved for it within the last maxSnapshotAgeDays days.
|
|
13
|
-
*
|
|
14
|
-
* @param {import('node:sqlite').DatabaseSync} db
|
|
15
|
-
* @param {{ tagThreshold?: number, maxSnapshotAgeDays?: number }} [opts]
|
|
16
|
-
* @returns {{ tag: string, entryCount: number, lastSnapshotAge: number | null }[]}
|
|
17
|
-
*/
|
|
18
|
-
export function findHotTags(
|
|
19
|
-
db,
|
|
20
|
-
{ tagThreshold = 10, maxSnapshotAgeDays = 7 } = {},
|
|
21
|
-
) {
|
|
22
|
-
const rows = db
|
|
23
|
-
.prepare(
|
|
24
|
-
`SELECT id, tags, kind FROM vault
|
|
25
|
-
WHERE superseded_by IS NULL
|
|
26
|
-
AND tags IS NOT NULL
|
|
27
|
-
AND tags != '[]'`,
|
|
28
|
-
)
|
|
29
|
-
.all();
|
|
30
|
-
|
|
31
|
-
const tagCounts = new Map();
|
|
32
|
-
|
|
33
|
-
for (const row of rows) {
|
|
34
|
-
let tags;
|
|
35
|
-
try {
|
|
36
|
-
tags = JSON.parse(row.tags);
|
|
37
|
-
} catch {
|
|
38
|
-
continue;
|
|
39
|
-
}
|
|
40
|
-
if (!Array.isArray(tags)) continue;
|
|
41
|
-
|
|
42
|
-
for (const tag of tags) {
|
|
43
|
-
if (typeof tag !== "string" || !tag) continue;
|
|
44
|
-
tagCounts.set(tag, (tagCounts.get(tag) ?? 0) + 1);
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
const hotTags = [];
|
|
49
|
-
|
|
50
|
-
for (const [tag, count] of tagCounts) {
|
|
51
|
-
if (count < tagThreshold) continue;
|
|
52
|
-
|
|
53
|
-
const snapshotRow = db
|
|
54
|
-
.prepare(
|
|
55
|
-
`SELECT created_at FROM vault
|
|
56
|
-
WHERE kind = 'brief'
|
|
57
|
-
AND tags LIKE ?
|
|
58
|
-
AND created_at > datetime('now', '-' || ? || ' days')
|
|
59
|
-
ORDER BY created_at DESC
|
|
60
|
-
LIMIT 1`,
|
|
61
|
-
)
|
|
62
|
-
.get(`%"${tag}"%`, String(maxSnapshotAgeDays));
|
|
63
|
-
|
|
64
|
-
if (snapshotRow) continue;
|
|
65
|
-
|
|
66
|
-
const lastSnapshotAny = db
|
|
67
|
-
.prepare(
|
|
68
|
-
`SELECT created_at FROM vault
|
|
69
|
-
WHERE kind = 'brief'
|
|
70
|
-
AND tags LIKE ?
|
|
71
|
-
ORDER BY created_at DESC
|
|
72
|
-
LIMIT 1`,
|
|
73
|
-
)
|
|
74
|
-
.get(`%"${tag}"%`);
|
|
75
|
-
|
|
76
|
-
let lastSnapshotAge = null;
|
|
77
|
-
if (lastSnapshotAny) {
|
|
78
|
-
const ms = Date.now() - new Date(lastSnapshotAny.created_at).getTime();
|
|
79
|
-
lastSnapshotAge = Math.floor(ms / (1000 * 60 * 60 * 24));
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
hotTags.push({ tag, entryCount: count, lastSnapshotAge });
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
hotTags.sort((a, b) => b.entryCount - a.entryCount);
|
|
86
|
-
|
|
87
|
-
return hotTags;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* Identifies cold entries (old, never or rarely accessed) that can be archived.
|
|
92
|
-
*
|
|
93
|
-
* Returns IDs of entries that are old enough, have low hit counts, are not
|
|
94
|
-
* superseded, and are not in permanent kinds (decision, architecture, brief).
|
|
95
|
-
*
|
|
96
|
-
* @param {import('node:sqlite').DatabaseSync} db
|
|
97
|
-
* @param {{ maxAgeDays?: number, maxHitCount?: number }} [opts]
|
|
98
|
-
* @returns {string[]} Entry IDs eligible for archiving
|
|
99
|
-
*/
|
|
100
|
-
export function findColdEntries(db, { maxAgeDays = 90, maxHitCount = 0 } = {}) {
|
|
101
|
-
const rows = db
|
|
102
|
-
.prepare(
|
|
103
|
-
`SELECT id FROM vault
|
|
104
|
-
WHERE hit_count <= ?
|
|
105
|
-
AND created_at < datetime('now', '-' || ? || ' days')
|
|
106
|
-
AND superseded_by IS NULL
|
|
107
|
-
AND kind NOT IN ('decision', 'architecture', 'brief')`,
|
|
108
|
-
)
|
|
109
|
-
.all(maxHitCount, String(maxAgeDays));
|
|
110
|
-
|
|
111
|
-
return rows.map((r) => r.id);
|
|
112
|
-
}
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* categories.js — Static kind→category mapping
|
|
3
|
-
*
|
|
4
|
-
* Three categories with distinct write semantics:
|
|
5
|
-
* knowledge — append-only, enduring (default)
|
|
6
|
-
* entity — upsert by identity_key, enduring
|
|
7
|
-
* event — append-only, decaying relevance
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
const KIND_CATEGORY = {
|
|
11
|
-
// Knowledge — append-only, enduring
|
|
12
|
-
insight: "knowledge",
|
|
13
|
-
decision: "knowledge",
|
|
14
|
-
pattern: "knowledge",
|
|
15
|
-
prompt: "knowledge",
|
|
16
|
-
note: "knowledge",
|
|
17
|
-
document: "knowledge",
|
|
18
|
-
reference: "knowledge",
|
|
19
|
-
// Entity — upsert, enduring
|
|
20
|
-
contact: "entity",
|
|
21
|
-
project: "entity",
|
|
22
|
-
tool: "entity",
|
|
23
|
-
source: "entity",
|
|
24
|
-
bucket: "entity",
|
|
25
|
-
// Event — append-only, decaying
|
|
26
|
-
event: "event",
|
|
27
|
-
conversation: "event",
|
|
28
|
-
message: "event",
|
|
29
|
-
session: "event",
|
|
30
|
-
task: "event",
|
|
31
|
-
log: "event",
|
|
32
|
-
feedback: "event",
|
|
33
|
-
};
|
|
34
|
-
|
|
35
|
-
/** Map category name → directory name on disk */
|
|
36
|
-
const CATEGORY_DIR_NAMES = {
|
|
37
|
-
knowledge: "knowledge",
|
|
38
|
-
entity: "entities",
|
|
39
|
-
event: "events",
|
|
40
|
-
};
|
|
41
|
-
|
|
42
|
-
/** Set of valid category directory names (for reindex discovery) */
|
|
43
|
-
export const CATEGORY_DIRS = new Set(Object.values(CATEGORY_DIR_NAMES));
|
|
44
|
-
|
|
45
|
-
/**
|
|
46
|
-
* Staleness thresholds (in days) per knowledge kind.
|
|
47
|
-
* Kinds not listed here are considered enduring (no staleness threshold).
|
|
48
|
-
* Based on updated_at; falls back to created_at if updated_at is null.
|
|
49
|
-
*/
|
|
50
|
-
export const KIND_STALENESS_DAYS = {
|
|
51
|
-
pattern: 180,
|
|
52
|
-
decision: 365,
|
|
53
|
-
reference: 90,
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
const DURABLE_KINDS = new Set(["decision", "architecture", "pattern"]);
|
|
57
|
-
const EPHEMERAL_KINDS = new Set(["session", "observation"]);
|
|
58
|
-
|
|
59
|
-
export function categoryFor(kind) {
|
|
60
|
-
return KIND_CATEGORY[kind] || "knowledge";
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
export function defaultTierFor(kind) {
|
|
64
|
-
if (DURABLE_KINDS.has(kind)) return "durable";
|
|
65
|
-
if (EPHEMERAL_KINDS.has(kind)) return "ephemeral";
|
|
66
|
-
return "working";
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
/** Returns the category directory name for a given kind (e.g. "insight" → "knowledge") */
|
|
70
|
-
export function categoryDirFor(kind) {
|
|
71
|
-
const cat = categoryFor(kind);
|
|
72
|
-
return CATEGORY_DIR_NAMES[cat] || "knowledge";
|
|
73
|
-
}
|