@context-vault/core 2.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/package.json +52 -0
- package/src/capture/file-ops.js +93 -0
- package/src/capture/formatters.js +29 -0
- package/src/capture/import-pipeline.js +46 -0
- package/src/capture/importers.js +387 -0
- package/src/capture/index.js +199 -0
- package/src/capture/ingest-url.js +252 -0
- package/src/constants.js +8 -0
- package/src/core/categories.js +51 -0
- package/src/core/config.js +127 -0
- package/src/core/files.js +108 -0
- package/src/core/frontmatter.js +120 -0
- package/src/core/status.js +146 -0
- package/src/index/db.js +268 -0
- package/src/index/embed.js +101 -0
- package/src/index/index.js +451 -0
- package/src/index.js +62 -0
- package/src/retrieve/index.js +219 -0
- package/src/server/helpers.js +31 -0
- package/src/server/tools/context-status.js +104 -0
- package/src/server/tools/delete-context.js +53 -0
- package/src/server/tools/get-context.js +235 -0
- package/src/server/tools/ingest-url.js +99 -0
- package/src/server/tools/list-context.js +134 -0
- package/src/server/tools/save-context.js +297 -0
- package/src/server/tools/submit-feedback.js +55 -0
- package/src/server/tools.js +111 -0
- package/src/sync/sync.js +235 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* embed.js — Text embedding via HuggingFace transformers
|
|
3
|
+
*
|
|
4
|
+
* Graceful degradation: if the embedding model fails to load (offline, first run,
|
|
5
|
+
* disk issues), semantic search is disabled but FTS still works.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { join } from "node:path";
|
|
9
|
+
import { homedir } from "node:os";
|
|
10
|
+
import { mkdirSync } from "node:fs";
|
|
11
|
+
|
|
12
|
+
let extractor = null;
|
|
13
|
+
|
|
14
|
+
/** @type {null | true | false} null = unknown, true = working, false = failed */
|
|
15
|
+
let embedAvailable = null;
|
|
16
|
+
|
|
17
|
+
async function ensurePipeline() {
|
|
18
|
+
if (embedAvailable === false) return null;
|
|
19
|
+
if (extractor) return extractor;
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
// Dynamic import — @huggingface/transformers is optional (its transitive
|
|
23
|
+
// dep `sharp` can fail to install on some platforms). When missing, the
|
|
24
|
+
// server still works with full-text search only.
|
|
25
|
+
const { pipeline, env } = await import("@huggingface/transformers");
|
|
26
|
+
|
|
27
|
+
// Redirect model cache to ~/.context-mcp/models/ so it works when the
|
|
28
|
+
// package is installed globally in a root-owned directory (e.g. /usr/lib/node_modules/).
|
|
29
|
+
const modelCacheDir = join(homedir(), ".context-mcp", "models");
|
|
30
|
+
mkdirSync(modelCacheDir, { recursive: true });
|
|
31
|
+
env.cacheDir = modelCacheDir;
|
|
32
|
+
|
|
33
|
+
console.error(
|
|
34
|
+
"[context-vault] Loading embedding model (first run may download ~22MB)...",
|
|
35
|
+
);
|
|
36
|
+
extractor = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
|
|
37
|
+
embedAvailable = true;
|
|
38
|
+
return extractor;
|
|
39
|
+
} catch (e) {
|
|
40
|
+
embedAvailable = false;
|
|
41
|
+
console.error(
|
|
42
|
+
`[context-vault] Failed to load embedding model: ${e.message}`,
|
|
43
|
+
);
|
|
44
|
+
console.error(
|
|
45
|
+
`[context-vault] Semantic search disabled. Full-text search still works.`,
|
|
46
|
+
);
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export async function embed(text) {
|
|
52
|
+
const ext = await ensurePipeline();
|
|
53
|
+
if (!ext) return null;
|
|
54
|
+
|
|
55
|
+
const result = await ext([text], { pooling: "mean", normalize: true });
|
|
56
|
+
// Health check — force re-init on empty results
|
|
57
|
+
if (!result?.data?.length) {
|
|
58
|
+
extractor = null;
|
|
59
|
+
embedAvailable = null;
|
|
60
|
+
throw new Error("Embedding pipeline returned empty result");
|
|
61
|
+
}
|
|
62
|
+
return new Float32Array(result.data);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Batch embedding — embed multiple texts in a single pipeline call.
|
|
67
|
+
* Returns an array of Float32Array embeddings (one per input text).
|
|
68
|
+
* Returns array of nulls if embedding is unavailable.
|
|
69
|
+
*/
|
|
70
|
+
export async function embedBatch(texts) {
|
|
71
|
+
if (!texts.length) return [];
|
|
72
|
+
const ext = await ensurePipeline();
|
|
73
|
+
if (!ext) return texts.map(() => null);
|
|
74
|
+
|
|
75
|
+
const result = await ext(texts, { pooling: "mean", normalize: true });
|
|
76
|
+
if (!result?.data?.length) {
|
|
77
|
+
extractor = null;
|
|
78
|
+
embedAvailable = null;
|
|
79
|
+
throw new Error("Embedding pipeline returned empty result");
|
|
80
|
+
}
|
|
81
|
+
const dim = result.data.length / texts.length;
|
|
82
|
+
if (!Number.isInteger(dim) || dim <= 0) {
|
|
83
|
+
throw new Error(
|
|
84
|
+
`Unexpected embedding dimension: ${result.data.length} / ${texts.length} = ${dim}`,
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
return texts.map(
|
|
88
|
+
(_, i) => new Float32Array(result.data.buffer, i * dim * 4, dim),
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Force re-initialization on next embed call. */
|
|
93
|
+
export function resetEmbedPipeline() {
|
|
94
|
+
extractor = null;
|
|
95
|
+
embedAvailable = null;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/** Check if embedding is currently available. */
|
|
99
|
+
export function isEmbedAvailable() {
|
|
100
|
+
return embedAvailable;
|
|
101
|
+
}
|
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Index Layer — Public API
|
|
3
|
+
*
|
|
4
|
+
* Owns the database as a derived index. Handles both bulk sync (reindex)
|
|
5
|
+
* and single-entry indexing (indexEntry) for write-through capture.
|
|
6
|
+
*
|
|
7
|
+
* Agent Constraint: Can import ../core. Owns db.js and embed.js.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { readFileSync, readdirSync, existsSync, unlinkSync } from "node:fs";
|
|
11
|
+
import { join, basename } from "node:path";
|
|
12
|
+
import { dirToKind, walkDir, ulid } from "../core/files.js";
|
|
13
|
+
import { categoryFor, CATEGORY_DIRS } from "../core/categories.js";
|
|
14
|
+
import {
|
|
15
|
+
parseFrontmatter,
|
|
16
|
+
parseEntryFromMarkdown,
|
|
17
|
+
} from "../core/frontmatter.js";
|
|
18
|
+
import { embedBatch } from "./embed.js";
|
|
19
|
+
|
|
20
|
+
const EXCLUDED_DIRS = new Set(["projects", "_archive"]);
|
|
21
|
+
const EXCLUDED_FILES = new Set(["context.md", "memory.md", "README.md"]);
|
|
22
|
+
|
|
23
|
+
const EMBED_BATCH_SIZE = 32;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Index a single entry with idempotent upsert behavior.
|
|
27
|
+
* Called immediately after Capture Layer writes the file.
|
|
28
|
+
*
|
|
29
|
+
* For entities with identity_key: uses upsertByIdentityKey if existing row found.
|
|
30
|
+
*
|
|
31
|
+
* @param {import('../server/types.js').BaseCtx & Partial<import('../server/types.js').HostedCtxExtensions>} ctx
|
|
32
|
+
* @param {{ id, kind, category, title, body, meta, tags, source, filePath, createdAt, identity_key, expires_at, userId }} entry
|
|
33
|
+
*/
|
|
34
|
+
export async function indexEntry(
|
|
35
|
+
ctx,
|
|
36
|
+
{
|
|
37
|
+
id,
|
|
38
|
+
kind,
|
|
39
|
+
category,
|
|
40
|
+
title,
|
|
41
|
+
body,
|
|
42
|
+
meta,
|
|
43
|
+
tags,
|
|
44
|
+
source,
|
|
45
|
+
filePath,
|
|
46
|
+
createdAt,
|
|
47
|
+
identity_key,
|
|
48
|
+
expires_at,
|
|
49
|
+
userId,
|
|
50
|
+
},
|
|
51
|
+
) {
|
|
52
|
+
const tagsJson = tags ? JSON.stringify(tags) : null;
|
|
53
|
+
const metaJson = meta ? JSON.stringify(meta) : null;
|
|
54
|
+
const cat = category || categoryFor(kind);
|
|
55
|
+
const userIdVal = userId || null;
|
|
56
|
+
|
|
57
|
+
let wasUpdate = false;
|
|
58
|
+
|
|
59
|
+
// Entity upsert: check by (kind, identity_key, user_id) first
|
|
60
|
+
if (cat === "entity" && identity_key) {
|
|
61
|
+
const existing = ctx.stmts.getByIdentityKey.get(
|
|
62
|
+
kind,
|
|
63
|
+
identity_key,
|
|
64
|
+
userIdVal,
|
|
65
|
+
);
|
|
66
|
+
if (existing) {
|
|
67
|
+
ctx.stmts.upsertByIdentityKey.run(
|
|
68
|
+
title || null,
|
|
69
|
+
body,
|
|
70
|
+
metaJson,
|
|
71
|
+
tagsJson,
|
|
72
|
+
source || "claude-code",
|
|
73
|
+
cat,
|
|
74
|
+
filePath,
|
|
75
|
+
expires_at || null,
|
|
76
|
+
kind,
|
|
77
|
+
identity_key,
|
|
78
|
+
userIdVal,
|
|
79
|
+
);
|
|
80
|
+
wasUpdate = true;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (!wasUpdate) {
|
|
85
|
+
// Prepare encryption if ctx.encrypt is available
|
|
86
|
+
let encrypted = null;
|
|
87
|
+
if (ctx.encrypt) {
|
|
88
|
+
encrypted = await ctx.encrypt({ title, body, meta });
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
try {
|
|
92
|
+
if (encrypted) {
|
|
93
|
+
// Encrypted insert: store preview in body column for FTS, full content in encrypted columns
|
|
94
|
+
const bodyPreview = body.slice(0, 200);
|
|
95
|
+
ctx.stmts.insertEntryEncrypted.run(
|
|
96
|
+
id,
|
|
97
|
+
userIdVal,
|
|
98
|
+
kind,
|
|
99
|
+
cat,
|
|
100
|
+
title || null,
|
|
101
|
+
bodyPreview,
|
|
102
|
+
metaJson,
|
|
103
|
+
tagsJson,
|
|
104
|
+
source || "claude-code",
|
|
105
|
+
filePath,
|
|
106
|
+
identity_key || null,
|
|
107
|
+
expires_at || null,
|
|
108
|
+
createdAt,
|
|
109
|
+
encrypted.body_encrypted,
|
|
110
|
+
encrypted.title_encrypted,
|
|
111
|
+
encrypted.meta_encrypted,
|
|
112
|
+
encrypted.iv,
|
|
113
|
+
);
|
|
114
|
+
} else {
|
|
115
|
+
ctx.stmts.insertEntry.run(
|
|
116
|
+
id,
|
|
117
|
+
userIdVal,
|
|
118
|
+
kind,
|
|
119
|
+
cat,
|
|
120
|
+
title || null,
|
|
121
|
+
body,
|
|
122
|
+
metaJson,
|
|
123
|
+
tagsJson,
|
|
124
|
+
source || "claude-code",
|
|
125
|
+
filePath,
|
|
126
|
+
identity_key || null,
|
|
127
|
+
expires_at || null,
|
|
128
|
+
createdAt,
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
} catch (e) {
|
|
132
|
+
if (e.message.includes("UNIQUE constraint")) {
|
|
133
|
+
ctx.stmts.updateEntry.run(
|
|
134
|
+
title || null,
|
|
135
|
+
body,
|
|
136
|
+
metaJson,
|
|
137
|
+
tagsJson,
|
|
138
|
+
source || "claude-code",
|
|
139
|
+
cat,
|
|
140
|
+
identity_key || null,
|
|
141
|
+
expires_at || null,
|
|
142
|
+
filePath,
|
|
143
|
+
);
|
|
144
|
+
wasUpdate = true;
|
|
145
|
+
} else {
|
|
146
|
+
throw e;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// After update, get rowid by file_path (since id might differ); otherwise by id
|
|
152
|
+
const rowidResult = wasUpdate
|
|
153
|
+
? ctx.stmts.getRowidByPath.get(filePath)
|
|
154
|
+
: ctx.stmts.getRowid.get(id);
|
|
155
|
+
|
|
156
|
+
if (!rowidResult || rowidResult.rowid == null) {
|
|
157
|
+
throw new Error(
|
|
158
|
+
`Could not find rowid for entry: ${wasUpdate ? `file_path=${filePath}` : `id=${id}`}`,
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const rowid = Number(rowidResult.rowid);
|
|
163
|
+
if (!Number.isFinite(rowid) || rowid < 1) {
|
|
164
|
+
throw new Error(
|
|
165
|
+
`Invalid rowid retrieved: ${rowidResult.rowid} (type: ${typeof rowidResult.rowid})`,
|
|
166
|
+
);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Embeddings are always generated from plaintext (before encryption)
|
|
170
|
+
const embeddingText = [title, body].filter(Boolean).join(" ");
|
|
171
|
+
const embedding = await ctx.embed(embeddingText);
|
|
172
|
+
|
|
173
|
+
// Upsert vec: delete old if exists, then insert new (skip if embedding unavailable)
|
|
174
|
+
if (embedding) {
|
|
175
|
+
try {
|
|
176
|
+
ctx.deleteVec(rowid);
|
|
177
|
+
} catch {
|
|
178
|
+
/* no-op if not found */
|
|
179
|
+
}
|
|
180
|
+
ctx.insertVec(rowid, embedding);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Bulk reindex: sync vault directory into the database.
|
|
186
|
+
* P2: Wrapped in a transaction for atomicity.
|
|
187
|
+
* P3: Detects title/tag/meta changes, not just body.
|
|
188
|
+
* P4: Batches embedding calls for performance.
|
|
189
|
+
*
|
|
190
|
+
* @param {import('../server/types.js').BaseCtx} ctx
|
|
191
|
+
* @param {{ fullSync?: boolean }} opts — fullSync=true adds/updates/deletes; false=add-only
|
|
192
|
+
* @returns {Promise<{added: number, updated: number, removed: number, unchanged: number}>}
|
|
193
|
+
*/
|
|
194
|
+
export async function reindex(ctx, opts = {}) {
|
|
195
|
+
const { fullSync = true } = opts;
|
|
196
|
+
const stats = { added: 0, updated: 0, removed: 0, unchanged: 0 };
|
|
197
|
+
|
|
198
|
+
if (!existsSync(ctx.config.vaultDir)) return stats;
|
|
199
|
+
|
|
200
|
+
// Use INSERT OR IGNORE for reindex — handles files with duplicate frontmatter IDs
|
|
201
|
+
// user_id is NULL for reindex (always local mode)
|
|
202
|
+
const upsertEntry = ctx.db.prepare(
|
|
203
|
+
`INSERT OR IGNORE INTO vault (id, user_id, kind, category, title, body, meta, tags, source, file_path, identity_key, expires_at, created_at) VALUES (?, NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
204
|
+
);
|
|
205
|
+
|
|
206
|
+
// Auto-discover kind directories, supporting both:
|
|
207
|
+
// - Nested: knowledge/insights/, events/sessions/ (category dirs at top level)
|
|
208
|
+
// - Flat: insights/, decisions/ (legacy — kind dirs at top level)
|
|
209
|
+
const kindEntries = []; // { kind, dir }
|
|
210
|
+
const topDirs = readdirSync(ctx.config.vaultDir, {
|
|
211
|
+
withFileTypes: true,
|
|
212
|
+
}).filter(
|
|
213
|
+
(d) =>
|
|
214
|
+
d.isDirectory() && !EXCLUDED_DIRS.has(d.name) && !d.name.startsWith("_"),
|
|
215
|
+
);
|
|
216
|
+
|
|
217
|
+
for (const d of topDirs) {
|
|
218
|
+
if (CATEGORY_DIRS.has(d.name)) {
|
|
219
|
+
// Category directory — look one level deeper for kind directories
|
|
220
|
+
const catDir = join(ctx.config.vaultDir, d.name);
|
|
221
|
+
const subDirs = readdirSync(catDir, { withFileTypes: true }).filter(
|
|
222
|
+
(sd) => sd.isDirectory() && !sd.name.startsWith("_"),
|
|
223
|
+
);
|
|
224
|
+
for (const sd of subDirs) {
|
|
225
|
+
kindEntries.push({
|
|
226
|
+
kind: dirToKind(sd.name),
|
|
227
|
+
dir: join(catDir, sd.name),
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
} else {
|
|
231
|
+
// Legacy flat structure — top-level dir is a kind dir
|
|
232
|
+
kindEntries.push({
|
|
233
|
+
kind: dirToKind(d.name),
|
|
234
|
+
dir: join(ctx.config.vaultDir, d.name),
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Phase 1: Sync DB ops in a transaction — FTS is searchable immediately after COMMIT.
|
|
240
|
+
// Phase 2: Async embedding runs post-transaction so it can't hold the write lock
|
|
241
|
+
// or roll back DB state on failure.
|
|
242
|
+
const pendingEmbeds = []; // { rowid, text }
|
|
243
|
+
const staleVecRowids = []; // rowids whose old vectors need deleting before re-embed
|
|
244
|
+
|
|
245
|
+
ctx.db.exec("BEGIN");
|
|
246
|
+
try {
|
|
247
|
+
for (const { kind, dir } of kindEntries) {
|
|
248
|
+
const category = categoryFor(kind);
|
|
249
|
+
const mdFiles = walkDir(dir).filter(
|
|
250
|
+
(f) => !EXCLUDED_FILES.has(basename(f.filePath)),
|
|
251
|
+
);
|
|
252
|
+
|
|
253
|
+
// P3: Fetch all mutable fields for change detection
|
|
254
|
+
const dbRows = ctx.db
|
|
255
|
+
.prepare(
|
|
256
|
+
"SELECT id, file_path, body, title, tags, meta FROM vault WHERE kind = ?",
|
|
257
|
+
)
|
|
258
|
+
.all(kind);
|
|
259
|
+
const dbByPath = new Map(dbRows.map((r) => [r.file_path, r]));
|
|
260
|
+
const diskPaths = new Set(mdFiles.map((e) => e.filePath));
|
|
261
|
+
|
|
262
|
+
for (const { filePath, relDir } of mdFiles) {
|
|
263
|
+
const existing = dbByPath.get(filePath);
|
|
264
|
+
|
|
265
|
+
// In add-only mode, skip files already in DB
|
|
266
|
+
if (!fullSync && existing) {
|
|
267
|
+
stats.unchanged++;
|
|
268
|
+
continue;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
272
|
+
if (!raw.startsWith("---\n")) {
|
|
273
|
+
console.error(`[reindex] skipping (no frontmatter): ${filePath}`);
|
|
274
|
+
continue;
|
|
275
|
+
}
|
|
276
|
+
const { meta: fmMeta, body: rawBody } = parseFrontmatter(raw);
|
|
277
|
+
const parsed = parseEntryFromMarkdown(kind, rawBody, fmMeta);
|
|
278
|
+
|
|
279
|
+
// Extract identity_key and expires_at from frontmatter
|
|
280
|
+
const identity_key = fmMeta.identity_key || null;
|
|
281
|
+
const expires_at = fmMeta.expires_at || null;
|
|
282
|
+
|
|
283
|
+
// Derive folder from disk location (source of truth)
|
|
284
|
+
const meta = { ...(parsed.meta || {}) };
|
|
285
|
+
if (relDir) meta.folder = relDir;
|
|
286
|
+
else delete meta.folder;
|
|
287
|
+
const metaJson = Object.keys(meta).length ? JSON.stringify(meta) : null;
|
|
288
|
+
|
|
289
|
+
if (!existing) {
|
|
290
|
+
// New file — add to DB (OR IGNORE if ID already exists at another path)
|
|
291
|
+
const id = fmMeta.id || ulid();
|
|
292
|
+
const tagsJson = fmMeta.tags ? JSON.stringify(fmMeta.tags) : null;
|
|
293
|
+
const created = fmMeta.created || new Date().toISOString();
|
|
294
|
+
|
|
295
|
+
const result = upsertEntry.run(
|
|
296
|
+
id,
|
|
297
|
+
kind,
|
|
298
|
+
category,
|
|
299
|
+
parsed.title || null,
|
|
300
|
+
parsed.body,
|
|
301
|
+
metaJson,
|
|
302
|
+
tagsJson,
|
|
303
|
+
fmMeta.source || "file",
|
|
304
|
+
filePath,
|
|
305
|
+
identity_key,
|
|
306
|
+
expires_at,
|
|
307
|
+
created,
|
|
308
|
+
);
|
|
309
|
+
if (result.changes > 0) {
|
|
310
|
+
const rowid = ctx.stmts.getRowid.get(id).rowid;
|
|
311
|
+
const embeddingText = [parsed.title, parsed.body]
|
|
312
|
+
.filter(Boolean)
|
|
313
|
+
.join(" ");
|
|
314
|
+
pendingEmbeds.push({ rowid, text: embeddingText });
|
|
315
|
+
stats.added++;
|
|
316
|
+
} else {
|
|
317
|
+
stats.unchanged++;
|
|
318
|
+
}
|
|
319
|
+
} else if (fullSync) {
|
|
320
|
+
// P3: Compare all mutable fields, not just body
|
|
321
|
+
const tagsJson = fmMeta.tags ? JSON.stringify(fmMeta.tags) : null;
|
|
322
|
+
const titleChanged =
|
|
323
|
+
(parsed.title || null) !== (existing.title || null);
|
|
324
|
+
const bodyChanged = existing.body !== parsed.body;
|
|
325
|
+
const tagsChanged = tagsJson !== (existing.tags || null);
|
|
326
|
+
const metaChanged = metaJson !== (existing.meta || null);
|
|
327
|
+
|
|
328
|
+
if (bodyChanged || titleChanged || tagsChanged || metaChanged) {
|
|
329
|
+
ctx.stmts.updateEntry.run(
|
|
330
|
+
parsed.title || null,
|
|
331
|
+
parsed.body,
|
|
332
|
+
metaJson,
|
|
333
|
+
tagsJson,
|
|
334
|
+
fmMeta.source || "file",
|
|
335
|
+
category,
|
|
336
|
+
identity_key,
|
|
337
|
+
expires_at,
|
|
338
|
+
filePath,
|
|
339
|
+
);
|
|
340
|
+
|
|
341
|
+
// Queue re-embed if title or body changed (vector ops deferred to Phase 2)
|
|
342
|
+
if (bodyChanged || titleChanged) {
|
|
343
|
+
const rowid = ctx.stmts.getRowid.get(existing.id)?.rowid;
|
|
344
|
+
if (rowid) {
|
|
345
|
+
staleVecRowids.push(rowid);
|
|
346
|
+
const embeddingText = [parsed.title, parsed.body]
|
|
347
|
+
.filter(Boolean)
|
|
348
|
+
.join(" ");
|
|
349
|
+
pendingEmbeds.push({ rowid, text: embeddingText });
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
stats.updated++;
|
|
353
|
+
} else {
|
|
354
|
+
stats.unchanged++;
|
|
355
|
+
}
|
|
356
|
+
} else {
|
|
357
|
+
stats.unchanged++;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// Find deleted files (in DB but not on disk) — only in fullSync mode
|
|
362
|
+
if (fullSync) {
|
|
363
|
+
for (const [dbPath, row] of dbByPath) {
|
|
364
|
+
if (!diskPaths.has(dbPath)) {
|
|
365
|
+
const vRowid = ctx.stmts.getRowid.get(row.id)?.rowid;
|
|
366
|
+
if (vRowid) {
|
|
367
|
+
try {
|
|
368
|
+
ctx.deleteVec(vRowid);
|
|
369
|
+
} catch {}
|
|
370
|
+
}
|
|
371
|
+
ctx.stmts.deleteEntry.run(row.id);
|
|
372
|
+
stats.removed++;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// Clean up entries for kinds whose directories no longer exist on disk
|
|
379
|
+
if (fullSync) {
|
|
380
|
+
const indexedKinds = new Set(kindEntries.map((ke) => ke.kind));
|
|
381
|
+
const allDbKinds = ctx.db
|
|
382
|
+
.prepare("SELECT DISTINCT kind FROM vault")
|
|
383
|
+
.all();
|
|
384
|
+
for (const { kind } of allDbKinds) {
|
|
385
|
+
if (!indexedKinds.has(kind)) {
|
|
386
|
+
const orphaned = ctx.db
|
|
387
|
+
.prepare("SELECT id, rowid FROM vault WHERE kind = ?")
|
|
388
|
+
.all(kind);
|
|
389
|
+
for (const row of orphaned) {
|
|
390
|
+
try {
|
|
391
|
+
ctx.deleteVec(row.rowid);
|
|
392
|
+
} catch {}
|
|
393
|
+
ctx.stmts.deleteEntry.run(row.id);
|
|
394
|
+
stats.removed++;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Prune expired entries
|
|
401
|
+
const expired = ctx.db
|
|
402
|
+
.prepare(
|
|
403
|
+
"SELECT id, file_path FROM vault WHERE expires_at IS NOT NULL AND expires_at <= datetime('now')",
|
|
404
|
+
)
|
|
405
|
+
.all();
|
|
406
|
+
|
|
407
|
+
for (const row of expired) {
|
|
408
|
+
if (row.file_path) {
|
|
409
|
+
try {
|
|
410
|
+
unlinkSync(row.file_path);
|
|
411
|
+
} catch {}
|
|
412
|
+
}
|
|
413
|
+
const vRowid = ctx.stmts.getRowid.get(row.id)?.rowid;
|
|
414
|
+
if (vRowid) {
|
|
415
|
+
try {
|
|
416
|
+
ctx.deleteVec(vRowid);
|
|
417
|
+
} catch {}
|
|
418
|
+
}
|
|
419
|
+
ctx.stmts.deleteEntry.run(row.id);
|
|
420
|
+
stats.removed++;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
ctx.db.exec("COMMIT");
|
|
424
|
+
} catch (e) {
|
|
425
|
+
ctx.db.exec("ROLLBACK");
|
|
426
|
+
throw e;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// Phase 2: Async embedding — runs after COMMIT so FTS is already searchable.
|
|
430
|
+
// Failures here are non-fatal; semantic search catches up on next reindex.
|
|
431
|
+
|
|
432
|
+
// Delete stale vectors for updated entries before re-embedding
|
|
433
|
+
for (const rowid of staleVecRowids) {
|
|
434
|
+
try {
|
|
435
|
+
ctx.deleteVec(rowid);
|
|
436
|
+
} catch {}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// Batch embed all pending texts
|
|
440
|
+
for (let i = 0; i < pendingEmbeds.length; i += EMBED_BATCH_SIZE) {
|
|
441
|
+
const batch = pendingEmbeds.slice(i, i + EMBED_BATCH_SIZE);
|
|
442
|
+
const embeddings = await embedBatch(batch.map((e) => e.text));
|
|
443
|
+
for (let j = 0; j < batch.length; j++) {
|
|
444
|
+
if (embeddings[j]) {
|
|
445
|
+
ctx.insertVec(batch[j].rowid, embeddings[j]);
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return stats;
|
|
451
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @context-vault/core — Shared core for context-vault
|
|
3
|
+
*
|
|
4
|
+
* Re-exports all public APIs from capture, index, retrieve, server, and core layers.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
// Core utilities
|
|
8
|
+
export {
|
|
9
|
+
categoryFor,
|
|
10
|
+
categoryDirFor,
|
|
11
|
+
CATEGORY_DIRS,
|
|
12
|
+
} from "./core/categories.js";
|
|
13
|
+
export { parseArgs, resolveConfig } from "./core/config.js";
|
|
14
|
+
export {
|
|
15
|
+
ulid,
|
|
16
|
+
slugify,
|
|
17
|
+
kindToDir,
|
|
18
|
+
dirToKind,
|
|
19
|
+
normalizeKind,
|
|
20
|
+
kindToPath,
|
|
21
|
+
safeJoin,
|
|
22
|
+
walkDir,
|
|
23
|
+
} from "./core/files.js";
|
|
24
|
+
export {
|
|
25
|
+
formatFrontmatter,
|
|
26
|
+
parseFrontmatter,
|
|
27
|
+
extractCustomMeta,
|
|
28
|
+
parseEntryFromMarkdown,
|
|
29
|
+
} from "./core/frontmatter.js";
|
|
30
|
+
export { gatherVaultStatus } from "./core/status.js";
|
|
31
|
+
|
|
32
|
+
// Capture layer
|
|
33
|
+
export {
|
|
34
|
+
writeEntry,
|
|
35
|
+
updateEntryFile,
|
|
36
|
+
captureAndIndex,
|
|
37
|
+
} from "./capture/index.js";
|
|
38
|
+
export { writeEntryFile } from "./capture/file-ops.js";
|
|
39
|
+
export { formatBody } from "./capture/formatters.js";
|
|
40
|
+
|
|
41
|
+
// Index layer
|
|
42
|
+
export {
|
|
43
|
+
SCHEMA_DDL,
|
|
44
|
+
initDatabase,
|
|
45
|
+
prepareStatements,
|
|
46
|
+
insertVec,
|
|
47
|
+
deleteVec,
|
|
48
|
+
} from "./index/db.js";
|
|
49
|
+
export { embed, embedBatch, resetEmbedPipeline } from "./index/embed.js";
|
|
50
|
+
export { indexEntry, reindex } from "./index/index.js";
|
|
51
|
+
|
|
52
|
+
// Retrieve layer
|
|
53
|
+
export { hybridSearch } from "./retrieve/index.js";
|
|
54
|
+
|
|
55
|
+
// Server tools & helpers
|
|
56
|
+
export { registerTools } from "./server/tools.js";
|
|
57
|
+
export {
|
|
58
|
+
ok,
|
|
59
|
+
err,
|
|
60
|
+
ensureVaultExists,
|
|
61
|
+
ensureValidKind,
|
|
62
|
+
} from "./server/helpers.js";
|