context-vault 2.17.1 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/bin/cli.js +795 -71
  2. package/node_modules/@context-vault/core/dist/capture.d.ts +21 -0
  3. package/node_modules/@context-vault/core/dist/capture.d.ts.map +1 -0
  4. package/node_modules/@context-vault/core/dist/capture.js +269 -0
  5. package/node_modules/@context-vault/core/dist/capture.js.map +1 -0
  6. package/node_modules/@context-vault/core/dist/categories.d.ts +6 -0
  7. package/node_modules/@context-vault/core/dist/categories.d.ts.map +1 -0
  8. package/node_modules/@context-vault/core/dist/categories.js +50 -0
  9. package/node_modules/@context-vault/core/dist/categories.js.map +1 -0
  10. package/node_modules/@context-vault/core/dist/config.d.ts +4 -0
  11. package/node_modules/@context-vault/core/dist/config.d.ts.map +1 -0
  12. package/node_modules/@context-vault/core/dist/config.js +190 -0
  13. package/node_modules/@context-vault/core/dist/config.js.map +1 -0
  14. package/node_modules/@context-vault/core/dist/constants.d.ts +33 -0
  15. package/node_modules/@context-vault/core/dist/constants.d.ts.map +1 -0
  16. package/node_modules/@context-vault/core/dist/constants.js +23 -0
  17. package/node_modules/@context-vault/core/dist/constants.js.map +1 -0
  18. package/node_modules/@context-vault/core/dist/db.d.ts +13 -0
  19. package/node_modules/@context-vault/core/dist/db.d.ts.map +1 -0
  20. package/node_modules/@context-vault/core/dist/db.js +191 -0
  21. package/node_modules/@context-vault/core/dist/db.js.map +1 -0
  22. package/node_modules/@context-vault/core/dist/embed.d.ts +5 -0
  23. package/node_modules/@context-vault/core/dist/embed.d.ts.map +1 -0
  24. package/node_modules/@context-vault/core/dist/embed.js +78 -0
  25. package/node_modules/@context-vault/core/dist/embed.js.map +1 -0
  26. package/node_modules/@context-vault/core/dist/files.d.ts +13 -0
  27. package/node_modules/@context-vault/core/dist/files.d.ts.map +1 -0
  28. package/node_modules/@context-vault/core/dist/files.js +66 -0
  29. package/node_modules/@context-vault/core/dist/files.js.map +1 -0
  30. package/node_modules/@context-vault/core/dist/formatters.d.ts +8 -0
  31. package/node_modules/@context-vault/core/dist/formatters.d.ts.map +1 -0
  32. package/node_modules/@context-vault/core/dist/formatters.js +18 -0
  33. package/node_modules/@context-vault/core/dist/formatters.js.map +1 -0
  34. package/node_modules/@context-vault/core/dist/frontmatter.d.ts +12 -0
  35. package/node_modules/@context-vault/core/dist/frontmatter.d.ts.map +1 -0
  36. package/node_modules/@context-vault/core/dist/frontmatter.js +101 -0
  37. package/node_modules/@context-vault/core/dist/frontmatter.js.map +1 -0
  38. package/node_modules/@context-vault/core/dist/index.d.ts +10 -0
  39. package/node_modules/@context-vault/core/dist/index.d.ts.map +1 -0
  40. package/node_modules/@context-vault/core/dist/index.js +297 -0
  41. package/node_modules/@context-vault/core/dist/index.js.map +1 -0
  42. package/node_modules/@context-vault/core/dist/ingest-url.d.ts +20 -0
  43. package/node_modules/@context-vault/core/dist/ingest-url.d.ts.map +1 -0
  44. package/node_modules/@context-vault/core/dist/ingest-url.js +113 -0
  45. package/node_modules/@context-vault/core/dist/ingest-url.js.map +1 -0
  46. package/node_modules/@context-vault/core/dist/main.d.ts +14 -0
  47. package/node_modules/@context-vault/core/dist/main.d.ts.map +1 -0
  48. package/node_modules/@context-vault/core/dist/main.js +25 -0
  49. package/node_modules/@context-vault/core/dist/main.js.map +1 -0
  50. package/node_modules/@context-vault/core/dist/search.d.ts +18 -0
  51. package/node_modules/@context-vault/core/dist/search.d.ts.map +1 -0
  52. package/node_modules/@context-vault/core/dist/search.js +238 -0
  53. package/node_modules/@context-vault/core/dist/search.js.map +1 -0
  54. package/node_modules/@context-vault/core/dist/types.d.ts +176 -0
  55. package/node_modules/@context-vault/core/dist/types.d.ts.map +1 -0
  56. package/node_modules/@context-vault/core/dist/types.js +2 -0
  57. package/node_modules/@context-vault/core/dist/types.js.map +1 -0
  58. package/node_modules/@context-vault/core/package.json +66 -16
  59. package/node_modules/@context-vault/core/src/capture.ts +308 -0
  60. package/node_modules/@context-vault/core/src/categories.ts +54 -0
  61. package/node_modules/@context-vault/core/src/{core/config.js → config.ts} +34 -33
  62. package/node_modules/@context-vault/core/src/{constants.js → constants.ts} +6 -3
  63. package/node_modules/@context-vault/core/src/db.ts +229 -0
  64. package/node_modules/@context-vault/core/src/{index/embed.js → embed.ts} +10 -35
  65. package/node_modules/@context-vault/core/src/{core/files.js → files.ts} +15 -20
  66. package/node_modules/@context-vault/core/src/{capture/formatters.js → formatters.ts} +13 -11
  67. package/node_modules/@context-vault/core/src/{core/frontmatter.js → frontmatter.ts} +26 -33
  68. package/node_modules/@context-vault/core/src/index.ts +351 -0
  69. package/node_modules/@context-vault/core/src/ingest-url.ts +99 -0
  70. package/node_modules/@context-vault/core/src/main.ts +111 -0
  71. package/node_modules/@context-vault/core/src/{retrieve/index.js → search.ts} +62 -150
  72. package/node_modules/@context-vault/core/src/types.ts +166 -0
  73. package/package.json +12 -7
  74. package/scripts/postinstall.js +1 -1
  75. package/{node_modules/@context-vault/core/src/core → src}/error-log.js +1 -15
  76. package/{node_modules/@context-vault/core/src/server → src}/helpers.js +9 -4
  77. package/src/linking.js +100 -0
  78. package/{node_modules/@context-vault/core/src/server/tools.js → src/register-tools.js} +14 -13
  79. package/src/{server/index.js → server.js} +10 -38
  80. package/src/status.js +235 -0
  81. package/{node_modules/@context-vault/core/src/core → src}/telemetry.js +9 -19
  82. package/src/temporal.js +97 -0
  83. package/{node_modules/@context-vault/core/src/server → src}/tools/context-status.js +3 -4
  84. package/{node_modules/@context-vault/core/src/server → src}/tools/create-snapshot.js +6 -7
  85. package/{node_modules/@context-vault/core/src/server → src}/tools/delete-context.js +0 -2
  86. package/{node_modules/@context-vault/core/src/server → src}/tools/get-context.js +17 -21
  87. package/{node_modules/@context-vault/core/src/server → src}/tools/ingest-project.js +5 -6
  88. package/{node_modules/@context-vault/core/src/server → src}/tools/ingest-url.js +3 -4
  89. package/{node_modules/@context-vault/core/src/server → src}/tools/list-buckets.js +4 -5
  90. package/{node_modules/@context-vault/core/src/server → src}/tools/list-context.js +3 -6
  91. package/{node_modules/@context-vault/core/src/server → src}/tools/save-context.js +17 -20
  92. package/{node_modules/@context-vault/core/src/server → src}/tools/session-start.js +9 -16
  93. package/node_modules/@context-vault/core/src/capture/file-ops.js +0 -99
  94. package/node_modules/@context-vault/core/src/capture/import-pipeline.js +0 -46
  95. package/node_modules/@context-vault/core/src/capture/importers.js +0 -387
  96. package/node_modules/@context-vault/core/src/capture/index.js +0 -250
  97. package/node_modules/@context-vault/core/src/capture/ingest-url.js +0 -252
  98. package/node_modules/@context-vault/core/src/consolidation/index.js +0 -112
  99. package/node_modules/@context-vault/core/src/core/categories.js +0 -73
  100. package/node_modules/@context-vault/core/src/core/linking.js +0 -161
  101. package/node_modules/@context-vault/core/src/core/migrate-dirs.js +0 -196
  102. package/node_modules/@context-vault/core/src/core/status.js +0 -350
  103. package/node_modules/@context-vault/core/src/core/temporal.js +0 -146
  104. package/node_modules/@context-vault/core/src/index/db.js +0 -586
  105. package/node_modules/@context-vault/core/src/index/index.js +0 -583
  106. package/node_modules/@context-vault/core/src/index.js +0 -71
  107. package/node_modules/@context-vault/core/src/sync/sync.js +0 -235
  108. package/src/hooks/post-tool-call.mjs +0 -62
  109. package/src/hooks/session-end.mjs +0 -492
  110. /package/{node_modules/@context-vault/core/src/server → src}/tools/clear-context.js +0 -0
@@ -1,250 +0,0 @@
1
- /**
2
- * Capture Layer — Public API
3
- *
4
- * Writes knowledge entries to vault as .md files and indexes them.
5
- * captureAndIndex() is the write-through entry point (capture + index + rollback on failure).
6
- */
7
-
8
- import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
9
- import { resolve } from "node:path";
10
- import { ulid, slugify, kindToPath } from "../core/files.js";
11
- import { categoryFor } from "../core/categories.js";
12
- import { parseFrontmatter, formatFrontmatter } from "../core/frontmatter.js";
13
- import { formatBody } from "./formatters.js";
14
- import { writeEntryFile } from "./file-ops.js";
15
- import { indexEntry } from "../index/index.js";
16
-
17
- export function writeEntry(
18
- ctx,
19
- {
20
- kind,
21
- title,
22
- body,
23
- meta,
24
- tags,
25
- source,
26
- folder,
27
- identity_key,
28
- expires_at,
29
- supersedes,
30
- related_to,
31
- source_files,
32
- tier,
33
- userId,
34
- },
35
- ) {
36
- if (!kind || typeof kind !== "string") {
37
- throw new Error("writeEntry: kind is required (non-empty string)");
38
- }
39
- if (!body || typeof body !== "string" || !body.trim()) {
40
- throw new Error("writeEntry: body is required (non-empty string)");
41
- }
42
- if (tags != null && !Array.isArray(tags)) {
43
- throw new Error("writeEntry: tags must be an array if provided");
44
- }
45
- if (meta != null && typeof meta !== "object") {
46
- throw new Error("writeEntry: meta must be an object if provided");
47
- }
48
-
49
- const category = categoryFor(kind);
50
-
51
- // Entity upsert: check for existing file at deterministic path
52
- let id;
53
- let createdAt;
54
- let updatedAt;
55
- if (category === "entity" && identity_key) {
56
- const identitySlug = slugify(identity_key);
57
- const dir = resolve(ctx.config.vaultDir, kindToPath(kind));
58
- const existingPath = resolve(dir, `${identitySlug}.md`);
59
-
60
- if (existsSync(existingPath)) {
61
- // Preserve original ID and created timestamp from existing file
62
- const raw = readFileSync(existingPath, "utf-8");
63
- const { meta: fmMeta } = parseFrontmatter(raw);
64
- id = fmMeta.id || ulid();
65
- createdAt = fmMeta.created || new Date().toISOString();
66
- updatedAt = new Date().toISOString();
67
- } else {
68
- id = ulid();
69
- createdAt = new Date().toISOString();
70
- updatedAt = createdAt;
71
- }
72
- } else {
73
- id = ulid();
74
- createdAt = new Date().toISOString();
75
- updatedAt = createdAt;
76
- }
77
-
78
- const filePath = writeEntryFile(ctx.config.vaultDir, kind, {
79
- id,
80
- title,
81
- body,
82
- meta,
83
- tags,
84
- source,
85
- createdAt,
86
- updatedAt,
87
- folder,
88
- category,
89
- identity_key,
90
- expires_at,
91
- supersedes,
92
- related_to,
93
- });
94
-
95
- return {
96
- id,
97
- filePath,
98
- kind,
99
- category,
100
- title,
101
- body,
102
- meta,
103
- tags,
104
- source,
105
- createdAt,
106
- updatedAt,
107
- identity_key,
108
- expires_at,
109
- supersedes,
110
- related_to: related_to || null,
111
- source_files: source_files || null,
112
- tier: tier || null,
113
- userId: userId || null,
114
- };
115
- }
116
-
117
- /**
118
- * Update an existing entry's file on disk (merge provided fields with existing).
119
- * Does NOT re-index — caller must call indexEntry after.
120
- *
121
- * @param {{ config, stmts }} ctx
122
- * @param {object} existing — Row from vault table (from getEntryById)
123
- * @param {{ title?, body?, tags?, meta?, source?, expires_at? }} updates
124
- * @returns {object} Entry object suitable for indexEntry
125
- */
126
- export function updateEntryFile(ctx, existing, updates) {
127
- const raw = readFileSync(existing.file_path, "utf-8");
128
- const { meta: fmMeta } = parseFrontmatter(raw);
129
-
130
- const existingMeta = existing.meta ? JSON.parse(existing.meta) : {};
131
- const existingTags = existing.tags ? JSON.parse(existing.tags) : [];
132
- const existingRelatedTo = existing.related_to
133
- ? JSON.parse(existing.related_to)
134
- : fmMeta.related_to || null;
135
-
136
- const title = updates.title !== undefined ? updates.title : existing.title;
137
- const body = updates.body !== undefined ? updates.body : existing.body;
138
- const tags = updates.tags !== undefined ? updates.tags : existingTags;
139
- const source =
140
- updates.source !== undefined ? updates.source : existing.source;
141
- const expires_at =
142
- updates.expires_at !== undefined ? updates.expires_at : existing.expires_at;
143
- const supersedes =
144
- updates.supersedes !== undefined
145
- ? updates.supersedes
146
- : fmMeta.supersedes || null;
147
- const related_to =
148
- updates.related_to !== undefined ? updates.related_to : existingRelatedTo;
149
- const source_files =
150
- updates.source_files !== undefined
151
- ? updates.source_files
152
- : existing.source_files
153
- ? JSON.parse(existing.source_files)
154
- : null;
155
-
156
- let mergedMeta;
157
- if (updates.meta !== undefined) {
158
- mergedMeta = { ...existingMeta, ...(updates.meta || {}) };
159
- } else {
160
- mergedMeta = { ...existingMeta };
161
- }
162
-
163
- // Build frontmatter
164
- const now = new Date().toISOString();
165
- const fmFields = { id: existing.id };
166
- for (const [k, v] of Object.entries(mergedMeta)) {
167
- if (k === "folder") continue;
168
- if (v !== null && v !== undefined) fmFields[k] = v;
169
- }
170
- if (existing.identity_key) fmFields.identity_key = existing.identity_key;
171
- if (expires_at) fmFields.expires_at = expires_at;
172
- if (supersedes?.length) fmFields.supersedes = supersedes;
173
- if (related_to?.length) fmFields.related_to = related_to;
174
- fmFields.tags = tags;
175
- fmFields.source = source || "claude-code";
176
- fmFields.created = fmMeta.created || existing.created_at;
177
- if (now !== fmFields.created) fmFields.updated = now;
178
-
179
- const mdBody = formatBody(existing.kind, { title, body, meta: mergedMeta });
180
- const md = formatFrontmatter(fmFields) + mdBody;
181
-
182
- writeFileSync(existing.file_path, md);
183
-
184
- const finalMeta = Object.keys(mergedMeta).length ? mergedMeta : undefined;
185
-
186
- return {
187
- id: existing.id,
188
- filePath: existing.file_path,
189
- kind: existing.kind,
190
- category: existing.category,
191
- title,
192
- body,
193
- meta: finalMeta,
194
- tags,
195
- source,
196
- createdAt: fmMeta.created || existing.created_at,
197
- updatedAt: now,
198
- identity_key: existing.identity_key,
199
- expires_at,
200
- supersedes,
201
- related_to: related_to || null,
202
- source_files: source_files || null,
203
- userId: existing.user_id || null,
204
- };
205
- }
206
-
207
- export async function captureAndIndex(ctx, data) {
208
- // For entity upserts, preserve previous file content for safe rollback
209
- let previousContent = null;
210
- if (categoryFor(data.kind) === "entity" && data.identity_key) {
211
- const identitySlug = slugify(data.identity_key);
212
- const dir = resolve(ctx.config.vaultDir, kindToPath(data.kind));
213
- const existingPath = resolve(dir, `${identitySlug}.md`);
214
- if (existsSync(existingPath)) {
215
- previousContent = readFileSync(existingPath, "utf-8");
216
- }
217
- }
218
-
219
- const entry = writeEntry(ctx, data);
220
- try {
221
- await indexEntry(ctx, entry);
222
- // Apply supersedes: mark referenced entries as superseded by this entry
223
- if (entry.supersedes?.length && ctx.stmts.updateSupersededBy) {
224
- for (const supersededId of entry.supersedes) {
225
- if (typeof supersededId === "string" && supersededId.trim()) {
226
- ctx.stmts.updateSupersededBy.run(entry.id, supersededId.trim());
227
- }
228
- }
229
- }
230
- // Store related_to links in DB
231
- if (entry.related_to?.length && ctx.stmts.updateRelatedTo) {
232
- ctx.stmts.updateRelatedTo.run(JSON.stringify(entry.related_to), entry.id);
233
- }
234
- return entry;
235
- } catch (err) {
236
- // Rollback: restore previous content for entity upserts, delete for new entries
237
- if (previousContent) {
238
- try {
239
- writeFileSync(entry.filePath, previousContent);
240
- } catch {}
241
- } else {
242
- try {
243
- unlinkSync(entry.filePath);
244
- } catch {}
245
- }
246
- throw new Error(
247
- `Capture succeeded but indexing failed — file rolled back. ${err.message}`,
248
- );
249
- }
250
- }
@@ -1,252 +0,0 @@
1
- export function htmlToMarkdown(html) {
2
- let md = html;
3
-
4
- // Remove scripts, styles, nav, header, footer, aside
5
- md = md.replace(/<script[\s\S]*?<\/script>/gi, "");
6
- md = md.replace(/<style[\s\S]*?<\/style>/gi, "");
7
- md = md.replace(/<nav[\s\S]*?<\/nav>/gi, "");
8
- md = md.replace(/<header[\s\S]*?<\/header>/gi, "");
9
- md = md.replace(/<footer[\s\S]*?<\/footer>/gi, "");
10
- md = md.replace(/<aside[\s\S]*?<\/aside>/gi, "");
11
-
12
- // Convert headings
13
- md = md.replace(
14
- /<h1[^>]*>([\s\S]*?)<\/h1>/gi,
15
- (_, c) => `\n# ${stripTags(c).trim()}\n`,
16
- );
17
- md = md.replace(
18
- /<h2[^>]*>([\s\S]*?)<\/h2>/gi,
19
- (_, c) => `\n## ${stripTags(c).trim()}\n`,
20
- );
21
- md = md.replace(
22
- /<h3[^>]*>([\s\S]*?)<\/h3>/gi,
23
- (_, c) => `\n### ${stripTags(c).trim()}\n`,
24
- );
25
- md = md.replace(
26
- /<h4[^>]*>([\s\S]*?)<\/h4>/gi,
27
- (_, c) => `\n#### ${stripTags(c).trim()}\n`,
28
- );
29
- md = md.replace(
30
- /<h5[^>]*>([\s\S]*?)<\/h5>/gi,
31
- (_, c) => `\n##### ${stripTags(c).trim()}\n`,
32
- );
33
- md = md.replace(
34
- /<h6[^>]*>([\s\S]*?)<\/h6>/gi,
35
- (_, c) => `\n###### ${stripTags(c).trim()}\n`,
36
- );
37
-
38
- // Convert links
39
- md = md.replace(
40
- /<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi,
41
- (_, href, text) => {
42
- const cleanText = stripTags(text).trim();
43
- return cleanText ? `[${cleanText}](${href})` : "";
44
- },
45
- );
46
-
47
- // Convert code blocks
48
- md = md.replace(
49
- /<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi,
50
- (_, c) => `\n\`\`\`\n${decodeEntities(c).trim()}\n\`\`\`\n`,
51
- );
52
- md = md.replace(
53
- /<pre[^>]*>([\s\S]*?)<\/pre>/gi,
54
- (_, c) => `\n\`\`\`\n${decodeEntities(stripTags(c)).trim()}\n\`\`\`\n`,
55
- );
56
-
57
- // Convert inline code
58
- md = md.replace(
59
- /<code[^>]*>([\s\S]*?)<\/code>/gi,
60
- (_, c) => `\`${decodeEntities(c).trim()}\``,
61
- );
62
-
63
- // Convert strong/em
64
- md = md.replace(
65
- /<(strong|b)[^>]*>([\s\S]*?)<\/\1>/gi,
66
- (_, __, c) => `**${stripTags(c).trim()}**`,
67
- );
68
- md = md.replace(
69
- /<(em|i)[^>]*>([\s\S]*?)<\/\1>/gi,
70
- (_, __, c) => `*${stripTags(c).trim()}*`,
71
- );
72
-
73
- // Convert list items
74
- md = md.replace(
75
- /<li[^>]*>([\s\S]*?)<\/li>/gi,
76
- (_, c) => `- ${stripTags(c).trim()}\n`,
77
- );
78
-
79
- // Convert paragraphs and line breaks
80
- md = md.replace(/<br\s*\/?>/gi, "\n");
81
- md = md.replace(
82
- /<p[^>]*>([\s\S]*?)<\/p>/gi,
83
- (_, c) => `\n${stripTags(c).trim()}\n`,
84
- );
85
- md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (_, c) => {
86
- return (
87
- "\n" +
88
- stripTags(c)
89
- .trim()
90
- .split("\n")
91
- .map((l) => `> ${l}`)
92
- .join("\n") +
93
- "\n"
94
- );
95
- });
96
-
97
- // Remove remaining HTML tags
98
- md = stripTags(md);
99
-
100
- // Decode HTML entities
101
- md = decodeEntities(md);
102
-
103
- // Clean up whitespace
104
- md = md.replace(/\n{3,}/g, "\n\n").trim();
105
-
106
- return md;
107
- }
108
-
109
- function stripTags(html) {
110
- return html.replace(/<[^>]+>/g, "");
111
- }
112
-
113
- function decodeEntities(text) {
114
- return text
115
- .replace(/&amp;/g, "&")
116
- .replace(/&lt;/g, "<")
117
- .replace(/&gt;/g, ">")
118
- .replace(/&quot;/g, '"')
119
- .replace(/&#39;/g, "'")
120
- .replace(/&nbsp;/g, " ")
121
- .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
122
- .replace(/&#x([0-9a-f]+);/gi, (_, n) =>
123
- String.fromCharCode(parseInt(n, 16)),
124
- );
125
- }
126
-
127
- /**
128
- * Extract the main readable content from an HTML page.
129
- * Prefers <article> or <main>, falls back to <body>.
130
- *
131
- * @param {string} html
132
- * @param {string} url
133
- * @returns {{ title: string, body: string }}
134
- */
135
- export function extractHtmlContent(html, url) {
136
- // Extract <title>
137
- const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
138
- const title = titleMatch
139
- ? stripTags(decodeEntities(titleMatch[1])).trim()
140
- : "";
141
-
142
- // Try to extract main content area
143
- let contentHtml = "";
144
-
145
- const articleMatch = html.match(/<article[^>]*>([\s\S]*?)<\/article>/i);
146
- const mainMatch = html.match(/<main[^>]*>([\s\S]*?)<\/main>/i);
147
-
148
- if (articleMatch) {
149
- contentHtml = articleMatch[1];
150
- } else if (mainMatch) {
151
- contentHtml = mainMatch[1];
152
- } else {
153
- // Fall back to <body>
154
- const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
155
- contentHtml = bodyMatch ? bodyMatch[1] : html;
156
- }
157
-
158
- const body = htmlToMarkdown(contentHtml);
159
-
160
- return { title, body };
161
- }
162
-
163
- /**
164
- * Fetch a URL, extract readable content, and return an EntryData object.
165
- *
166
- * @param {string} url
167
- * @param {{ kind?: string, tags?: string[], source?: string, maxBodyLength?: number, timeoutMs?: number }} [opts]
168
- * @returns {Promise<{ kind: string, title: string, body: string, tags: string[], meta: object, source: string }>}
169
- */
170
- export async function ingestUrl(url, opts = {}) {
171
- const {
172
- kind = "reference",
173
- tags = [],
174
- source,
175
- maxBodyLength = 50000,
176
- timeoutMs = 15000,
177
- } = opts;
178
-
179
- let domain;
180
- try {
181
- domain = new URL(url).hostname;
182
- } catch {
183
- throw new Error(`Invalid URL: ${url}`);
184
- }
185
-
186
- const controller = new AbortController();
187
- const timeout = setTimeout(() => controller.abort(), timeoutMs);
188
-
189
- let response;
190
- try {
191
- response = await fetch(url, {
192
- signal: controller.signal,
193
- headers: {
194
- "User-Agent":
195
- "ContextVault/1.0 (+https://github.com/fellanH/context-vault)",
196
- Accept: "text/html,application/xhtml+xml,text/plain,*/*",
197
- },
198
- });
199
- } catch (err) {
200
- if (err.name === "AbortError") {
201
- throw new Error(`Request timed out after ${timeoutMs}ms`);
202
- }
203
- throw new Error(`Fetch failed: ${err.message}`);
204
- } finally {
205
- clearTimeout(timeout);
206
- }
207
-
208
- if (!response.ok) {
209
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
210
- }
211
-
212
- const contentType = response.headers.get("content-type") || "";
213
- const html = await response.text();
214
-
215
- let title, body;
216
-
217
- if (
218
- contentType.includes("text/html") ||
219
- contentType.includes("application/xhtml")
220
- ) {
221
- const extracted = extractHtmlContent(html, url);
222
- title = extracted.title;
223
- body = extracted.body;
224
- } else {
225
- // Plain text or other — use as-is
226
- title = domain;
227
- body = html;
228
- }
229
-
230
- // Truncate if too long
231
- if (body.length > maxBodyLength) {
232
- body = body.slice(0, maxBodyLength) + "\n\n[Content truncated]";
233
- }
234
-
235
- if (!body.trim()) {
236
- throw new Error("No readable content extracted from URL");
237
- }
238
-
239
- return {
240
- kind,
241
- title: title || domain,
242
- body,
243
- tags: [...tags, "web-import"],
244
- meta: {
245
- url,
246
- domain,
247
- fetched_at: new Date().toISOString(),
248
- content_type: contentType.split(";")[0].trim() || "text/html",
249
- },
250
- source: source || domain,
251
- };
252
- }
@@ -1,112 +0,0 @@
1
- /**
2
- * Consolidation utilities — identifies tags and entries that warrant maintenance.
3
- *
4
- * These are pure DB queries with no LLM calls. The caller decides what to do
5
- * with the results (e.g. run create_snapshot, archive entries, report to user).
6
- */
7
-
8
- /**
9
- * Identifies tags that have accumulated enough entries to warrant consolidation.
10
- *
11
- * A tag is "hot" when it has >= tagThreshold non-superseded entries AND no
12
- * brief/snapshot was saved for it within the last maxSnapshotAgeDays days.
13
- *
14
- * @param {import('node:sqlite').DatabaseSync} db
15
- * @param {{ tagThreshold?: number, maxSnapshotAgeDays?: number }} [opts]
16
- * @returns {{ tag: string, entryCount: number, lastSnapshotAge: number | null }[]}
17
- */
18
- export function findHotTags(
19
- db,
20
- { tagThreshold = 10, maxSnapshotAgeDays = 7 } = {},
21
- ) {
22
- const rows = db
23
- .prepare(
24
- `SELECT id, tags, kind FROM vault
25
- WHERE superseded_by IS NULL
26
- AND tags IS NOT NULL
27
- AND tags != '[]'`,
28
- )
29
- .all();
30
-
31
- const tagCounts = new Map();
32
-
33
- for (const row of rows) {
34
- let tags;
35
- try {
36
- tags = JSON.parse(row.tags);
37
- } catch {
38
- continue;
39
- }
40
- if (!Array.isArray(tags)) continue;
41
-
42
- for (const tag of tags) {
43
- if (typeof tag !== "string" || !tag) continue;
44
- tagCounts.set(tag, (tagCounts.get(tag) ?? 0) + 1);
45
- }
46
- }
47
-
48
- const hotTags = [];
49
-
50
- for (const [tag, count] of tagCounts) {
51
- if (count < tagThreshold) continue;
52
-
53
- const snapshotRow = db
54
- .prepare(
55
- `SELECT created_at FROM vault
56
- WHERE kind = 'brief'
57
- AND tags LIKE ?
58
- AND created_at > datetime('now', '-' || ? || ' days')
59
- ORDER BY created_at DESC
60
- LIMIT 1`,
61
- )
62
- .get(`%"${tag}"%`, String(maxSnapshotAgeDays));
63
-
64
- if (snapshotRow) continue;
65
-
66
- const lastSnapshotAny = db
67
- .prepare(
68
- `SELECT created_at FROM vault
69
- WHERE kind = 'brief'
70
- AND tags LIKE ?
71
- ORDER BY created_at DESC
72
- LIMIT 1`,
73
- )
74
- .get(`%"${tag}"%`);
75
-
76
- let lastSnapshotAge = null;
77
- if (lastSnapshotAny) {
78
- const ms = Date.now() - new Date(lastSnapshotAny.created_at).getTime();
79
- lastSnapshotAge = Math.floor(ms / (1000 * 60 * 60 * 24));
80
- }
81
-
82
- hotTags.push({ tag, entryCount: count, lastSnapshotAge });
83
- }
84
-
85
- hotTags.sort((a, b) => b.entryCount - a.entryCount);
86
-
87
- return hotTags;
88
- }
89
-
90
- /**
91
- * Identifies cold entries (old, never or rarely accessed) that can be archived.
92
- *
93
- * Returns IDs of entries that are old enough, have low hit counts, are not
94
- * superseded, and are not in permanent kinds (decision, architecture, brief).
95
- *
96
- * @param {import('node:sqlite').DatabaseSync} db
97
- * @param {{ maxAgeDays?: number, maxHitCount?: number }} [opts]
98
- * @returns {string[]} Entry IDs eligible for archiving
99
- */
100
- export function findColdEntries(db, { maxAgeDays = 90, maxHitCount = 0 } = {}) {
101
- const rows = db
102
- .prepare(
103
- `SELECT id FROM vault
104
- WHERE hit_count <= ?
105
- AND created_at < datetime('now', '-' || ? || ' days')
106
- AND superseded_by IS NULL
107
- AND kind NOT IN ('decision', 'architecture', 'brief')`,
108
- )
109
- .all(maxHitCount, String(maxAgeDays));
110
-
111
- return rows.map((r) => r.id);
112
- }
@@ -1,73 +0,0 @@
1
- /**
2
- * categories.js — Static kind→category mapping
3
- *
4
- * Three categories with distinct write semantics:
5
- * knowledge — append-only, enduring (default)
6
- * entity — upsert by identity_key, enduring
7
- * event — append-only, decaying relevance
8
- */
9
-
10
- const KIND_CATEGORY = {
11
- // Knowledge — append-only, enduring
12
- insight: "knowledge",
13
- decision: "knowledge",
14
- pattern: "knowledge",
15
- prompt: "knowledge",
16
- note: "knowledge",
17
- document: "knowledge",
18
- reference: "knowledge",
19
- // Entity — upsert, enduring
20
- contact: "entity",
21
- project: "entity",
22
- tool: "entity",
23
- source: "entity",
24
- bucket: "entity",
25
- // Event — append-only, decaying
26
- event: "event",
27
- conversation: "event",
28
- message: "event",
29
- session: "event",
30
- task: "event",
31
- log: "event",
32
- feedback: "event",
33
- };
34
-
35
- /** Map category name → directory name on disk */
36
- const CATEGORY_DIR_NAMES = {
37
- knowledge: "knowledge",
38
- entity: "entities",
39
- event: "events",
40
- };
41
-
42
- /** Set of valid category directory names (for reindex discovery) */
43
- export const CATEGORY_DIRS = new Set(Object.values(CATEGORY_DIR_NAMES));
44
-
45
- /**
46
- * Staleness thresholds (in days) per knowledge kind.
47
- * Kinds not listed here are considered enduring (no staleness threshold).
48
- * Based on updated_at; falls back to created_at if updated_at is null.
49
- */
50
- export const KIND_STALENESS_DAYS = {
51
- pattern: 180,
52
- decision: 365,
53
- reference: 90,
54
- };
55
-
56
- const DURABLE_KINDS = new Set(["decision", "architecture", "pattern"]);
57
- const EPHEMERAL_KINDS = new Set(["session", "observation"]);
58
-
59
- export function categoryFor(kind) {
60
- return KIND_CATEGORY[kind] || "knowledge";
61
- }
62
-
63
- export function defaultTierFor(kind) {
64
- if (DURABLE_KINDS.has(kind)) return "durable";
65
- if (EPHEMERAL_KINDS.has(kind)) return "ephemeral";
66
- return "working";
67
- }
68
-
69
- /** Returns the category directory name for a given kind (e.g. "insight" → "knowledge") */
70
- export function categoryDirFor(kind) {
71
- const cat = categoryFor(kind);
72
- return CATEGORY_DIR_NAMES[cat] || "knowledge";
73
- }