@context-vault/core 3.1.6 → 3.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/capture.d.ts +1 -1
- package/dist/capture.d.ts.map +1 -1
- package/dist/capture.js +34 -47
- package/dist/capture.js.map +1 -1
- package/dist/categories.js +30 -30
- package/dist/config.d.ts +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +37 -43
- package/dist/config.js.map +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +4 -4
- package/dist/constants.js.map +1 -1
- package/dist/db.d.ts +2 -2
- package/dist/db.d.ts.map +1 -1
- package/dist/db.js +21 -20
- package/dist/db.js.map +1 -1
- package/dist/embed.d.ts.map +1 -1
- package/dist/embed.js +11 -11
- package/dist/embed.js.map +1 -1
- package/dist/files.d.ts.map +1 -1
- package/dist/files.js +12 -13
- package/dist/files.js.map +1 -1
- package/dist/formatters.js +5 -5
- package/dist/frontmatter.d.ts.map +1 -1
- package/dist/frontmatter.js +23 -23
- package/dist/frontmatter.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +58 -46
- package/dist/index.js.map +1 -1
- package/dist/ingest-url.d.ts.map +1 -1
- package/dist/ingest-url.js +30 -33
- package/dist/ingest-url.js.map +1 -1
- package/dist/main.d.ts +13 -13
- package/dist/main.d.ts.map +1 -1
- package/dist/main.js +12 -12
- package/dist/main.js.map +1 -1
- package/dist/search.d.ts +1 -1
- package/dist/search.d.ts.map +1 -1
- package/dist/search.js +20 -22
- package/dist/search.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/package.json +1 -1
- package/src/capture.ts +44 -81
- package/src/categories.ts +30 -30
- package/src/config.ts +45 -60
- package/src/constants.ts +8 -10
- package/src/db.ts +37 -56
- package/src/embed.ts +15 -26
- package/src/files.ts +13 -16
- package/src/formatters.ts +5 -5
- package/src/frontmatter.ts +26 -30
- package/src/index.ts +94 -100
- package/src/ingest-url.ts +56 -93
- package/src/main.ts +13 -18
- package/src/search.ts +34 -56
- package/src/types.ts +1 -1
package/src/frontmatter.ts
CHANGED
|
@@ -1,37 +1,35 @@
|
|
|
1
1
|
const NEEDS_QUOTING = /[:#'"{}[\],>|&*?!@`]/;
|
|
2
2
|
|
|
3
3
|
export function formatFrontmatter(meta: Record<string, unknown>): string {
|
|
4
|
-
const lines = [
|
|
4
|
+
const lines = ['---'];
|
|
5
5
|
for (const [k, v] of Object.entries(meta)) {
|
|
6
6
|
if (v === undefined || v === null) continue;
|
|
7
7
|
if (Array.isArray(v)) {
|
|
8
|
-
lines.push(`${k}: [${v.map((i) => JSON.stringify(i)).join(
|
|
8
|
+
lines.push(`${k}: [${v.map((i) => JSON.stringify(i)).join(', ')}]`);
|
|
9
9
|
} else {
|
|
10
10
|
const str = String(v);
|
|
11
|
-
lines.push(
|
|
12
|
-
`${k}: ${NEEDS_QUOTING.test(str) ? JSON.stringify(str) : str}`,
|
|
13
|
-
);
|
|
11
|
+
lines.push(`${k}: ${NEEDS_QUOTING.test(str) ? JSON.stringify(str) : str}`);
|
|
14
12
|
}
|
|
15
13
|
}
|
|
16
|
-
lines.push(
|
|
17
|
-
return lines.join(
|
|
14
|
+
lines.push('---');
|
|
15
|
+
return lines.join('\n');
|
|
18
16
|
}
|
|
19
17
|
|
|
20
18
|
export function parseFrontmatter(text: string): {
|
|
21
19
|
meta: Record<string, unknown>;
|
|
22
20
|
body: string;
|
|
23
21
|
} {
|
|
24
|
-
const normalized = text.replace(/\r\n/g,
|
|
22
|
+
const normalized = text.replace(/\r\n/g, '\n');
|
|
25
23
|
const match = normalized.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
|
|
26
24
|
if (!match) return { meta: {}, body: normalized.trim() };
|
|
27
25
|
const meta: Record<string, unknown> = {};
|
|
28
|
-
for (const line of match[1].split(
|
|
29
|
-
const idx = line.indexOf(
|
|
26
|
+
for (const line of match[1].split('\n')) {
|
|
27
|
+
const idx = line.indexOf(':');
|
|
30
28
|
if (idx === -1) continue;
|
|
31
29
|
const key = line.slice(0, idx).trim();
|
|
32
30
|
let val: unknown = line.slice(idx + 1).trim() as string;
|
|
33
31
|
if (
|
|
34
|
-
typeof val ===
|
|
32
|
+
typeof val === 'string' &&
|
|
35
33
|
val.length >= 2 &&
|
|
36
34
|
val.startsWith('"') &&
|
|
37
35
|
val.endsWith('"') &&
|
|
@@ -43,14 +41,14 @@ export function parseFrontmatter(text: string): {
|
|
|
43
41
|
/* keep as-is */
|
|
44
42
|
}
|
|
45
43
|
}
|
|
46
|
-
if (typeof val ===
|
|
44
|
+
if (typeof val === 'string' && val.startsWith('[') && val.endsWith(']')) {
|
|
47
45
|
try {
|
|
48
46
|
val = JSON.parse(val);
|
|
49
47
|
} catch {
|
|
50
48
|
val = (val as string)
|
|
51
49
|
.slice(1, -1)
|
|
52
|
-
.split(
|
|
53
|
-
.map((s: string) => s.trim().replace(/^"|"$/g,
|
|
50
|
+
.split(',')
|
|
51
|
+
.map((s: string) => s.trim().replace(/^"|"$/g, ''));
|
|
54
52
|
}
|
|
55
53
|
}
|
|
56
54
|
meta[key] = val;
|
|
@@ -59,20 +57,18 @@ export function parseFrontmatter(text: string): {
|
|
|
59
57
|
}
|
|
60
58
|
|
|
61
59
|
const RESERVED_FM_KEYS = new Set([
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
60
|
+
'id',
|
|
61
|
+
'tags',
|
|
62
|
+
'source',
|
|
63
|
+
'created',
|
|
64
|
+
'updated',
|
|
65
|
+
'identity_key',
|
|
66
|
+
'expires_at',
|
|
67
|
+
'supersedes',
|
|
68
|
+
'related_to',
|
|
71
69
|
]);
|
|
72
70
|
|
|
73
|
-
export function extractCustomMeta(
|
|
74
|
-
fmMeta: Record<string, unknown>,
|
|
75
|
-
): Record<string, unknown> | null {
|
|
71
|
+
export function extractCustomMeta(fmMeta: Record<string, unknown>): Record<string, unknown> | null {
|
|
76
72
|
const custom: Record<string, unknown> = {};
|
|
77
73
|
for (const [k, v] of Object.entries(fmMeta)) {
|
|
78
74
|
if (!RESERVED_FM_KEYS.has(k)) custom[k] = v;
|
|
@@ -83,17 +79,17 @@ export function extractCustomMeta(
|
|
|
83
79
|
export function parseEntryFromMarkdown(
|
|
84
80
|
kind: string,
|
|
85
81
|
body: string,
|
|
86
|
-
fmMeta: Record<string, unknown
|
|
82
|
+
fmMeta: Record<string, unknown>
|
|
87
83
|
): {
|
|
88
84
|
title: string | null;
|
|
89
85
|
body: string;
|
|
90
86
|
meta: Record<string, unknown> | null;
|
|
91
87
|
} {
|
|
92
|
-
if (kind ===
|
|
88
|
+
if (kind === 'insight') {
|
|
93
89
|
return { title: null, body, meta: extractCustomMeta(fmMeta) };
|
|
94
90
|
}
|
|
95
91
|
|
|
96
|
-
if (kind ===
|
|
92
|
+
if (kind === 'decision') {
|
|
97
93
|
const titleMatch = body.match(/^## Decision\s*\n+([\s\S]*?)(?=\n## |\n*$)/);
|
|
98
94
|
const rationaleMatch = body.match(/## Rationale\s*\n+([\s\S]*?)$/);
|
|
99
95
|
const title = titleMatch ? titleMatch[1].trim() : body.slice(0, 100);
|
|
@@ -101,7 +97,7 @@ export function parseEntryFromMarkdown(
|
|
|
101
97
|
return { title, body: rationale, meta: extractCustomMeta(fmMeta) };
|
|
102
98
|
}
|
|
103
99
|
|
|
104
|
-
if (kind ===
|
|
100
|
+
if (kind === 'pattern') {
|
|
105
101
|
const titleMatch = body.match(/^# (.+)/);
|
|
106
102
|
const title = titleMatch ? titleMatch[1].trim() : body.slice(0, 80);
|
|
107
103
|
const codeMatch = body.match(/```[\w]*\n([\s\S]*?)```/);
|
package/src/index.ts
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
import { readFileSync, readdirSync, existsSync, unlinkSync } from
|
|
2
|
-
import { join, basename } from
|
|
3
|
-
import { dirToKind, walkDir, ulid } from
|
|
4
|
-
import { categoryFor, defaultTierFor, CATEGORY_DIRS } from
|
|
5
|
-
import { parseFrontmatter, parseEntryFromMarkdown } from
|
|
6
|
-
import { embedBatch } from
|
|
7
|
-
import type { BaseCtx, IndexEntryInput, ReindexStats } from
|
|
8
|
-
|
|
9
|
-
const EXCLUDED_DIRS = new Set([
|
|
10
|
-
const EXCLUDED_FILES = new Set([
|
|
1
|
+
import { readFileSync, readdirSync, existsSync, unlinkSync } from 'node:fs';
|
|
2
|
+
import { join, basename } from 'node:path';
|
|
3
|
+
import { dirToKind, walkDir, ulid } from './files.js';
|
|
4
|
+
import { categoryFor, defaultTierFor, CATEGORY_DIRS } from './categories.js';
|
|
5
|
+
import { parseFrontmatter, parseEntryFromMarkdown } from './frontmatter.js';
|
|
6
|
+
import { embedBatch } from './embed.js';
|
|
7
|
+
import type { BaseCtx, IndexEntryInput, ReindexStats } from './types.js';
|
|
8
|
+
|
|
9
|
+
const EXCLUDED_DIRS = new Set(['projects', '_archive']);
|
|
10
|
+
const EXCLUDED_FILES = new Set(['context.md', 'memory.md', 'README.md']);
|
|
11
11
|
const EMBED_BATCH_SIZE = 32;
|
|
12
12
|
|
|
13
13
|
export async function indexEntry(
|
|
@@ -16,7 +16,7 @@ export async function indexEntry(
|
|
|
16
16
|
supersedes?: string[] | null;
|
|
17
17
|
related_to?: string[] | null;
|
|
18
18
|
},
|
|
19
|
-
precomputedEmbedding?: Float32Array | null
|
|
19
|
+
precomputedEmbedding?: Float32Array | null
|
|
20
20
|
): Promise<void> {
|
|
21
21
|
const {
|
|
22
22
|
id,
|
|
@@ -45,7 +45,7 @@ export async function indexEntry(
|
|
|
45
45
|
|
|
46
46
|
let wasUpdate = false;
|
|
47
47
|
|
|
48
|
-
if (cat ===
|
|
48
|
+
if (cat === 'entity' && identity_key) {
|
|
49
49
|
const existing = ctx.stmts.getByIdentityKey.get(kind, identity_key) as
|
|
50
50
|
| Record<string, unknown>
|
|
51
51
|
| undefined;
|
|
@@ -55,13 +55,13 @@ export async function indexEntry(
|
|
|
55
55
|
body,
|
|
56
56
|
metaJson,
|
|
57
57
|
tagsJson,
|
|
58
|
-
source ||
|
|
58
|
+
source || 'claude-code',
|
|
59
59
|
cat,
|
|
60
60
|
filePath,
|
|
61
61
|
expires_at || null,
|
|
62
62
|
sourceFilesJson,
|
|
63
63
|
kind,
|
|
64
|
-
identity_key
|
|
64
|
+
identity_key
|
|
65
65
|
);
|
|
66
66
|
wasUpdate = true;
|
|
67
67
|
}
|
|
@@ -77,38 +77,35 @@ export async function indexEntry(
|
|
|
77
77
|
body,
|
|
78
78
|
metaJson,
|
|
79
79
|
tagsJson,
|
|
80
|
-
source ||
|
|
80
|
+
source || 'claude-code',
|
|
81
81
|
filePath,
|
|
82
82
|
identity_key || null,
|
|
83
83
|
expires_at || null,
|
|
84
84
|
createdAt,
|
|
85
85
|
createdAt,
|
|
86
86
|
sourceFilesJson,
|
|
87
|
-
effectiveTier
|
|
87
|
+
effectiveTier
|
|
88
88
|
);
|
|
89
89
|
} catch (e) {
|
|
90
|
-
if ((e as Error).message.includes(
|
|
90
|
+
if ((e as Error).message.includes('UNIQUE constraint')) {
|
|
91
91
|
ctx.stmts.updateEntry.run(
|
|
92
92
|
title || null,
|
|
93
93
|
body,
|
|
94
94
|
metaJson,
|
|
95
95
|
tagsJson,
|
|
96
|
-
source ||
|
|
96
|
+
source || 'claude-code',
|
|
97
97
|
cat,
|
|
98
98
|
identity_key || null,
|
|
99
99
|
expires_at || null,
|
|
100
|
-
filePath
|
|
100
|
+
filePath
|
|
101
101
|
);
|
|
102
102
|
if (sourceFilesJson !== null && ctx.stmts.updateSourceFiles) {
|
|
103
|
-
const entryRow = ctx.stmts.getRowidByPath.get(filePath) as
|
|
104
|
-
| { rowid: number }
|
|
105
|
-
| undefined;
|
|
103
|
+
const entryRow = ctx.stmts.getRowidByPath.get(filePath) as { rowid: number } | undefined;
|
|
106
104
|
if (entryRow) {
|
|
107
105
|
const idRow = ctx.db
|
|
108
|
-
.prepare(
|
|
106
|
+
.prepare('SELECT id FROM vault WHERE file_path = ?')
|
|
109
107
|
.get(filePath) as { id: string } | undefined;
|
|
110
|
-
if (idRow)
|
|
111
|
-
ctx.stmts.updateSourceFiles.run(sourceFilesJson, idRow.id);
|
|
108
|
+
if (idRow) ctx.stmts.updateSourceFiles.run(sourceFilesJson, idRow.id);
|
|
112
109
|
}
|
|
113
110
|
}
|
|
114
111
|
wasUpdate = true;
|
|
@@ -124,27 +121,27 @@ export async function indexEntry(
|
|
|
124
121
|
|
|
125
122
|
if (!rowidResult || rowidResult.rowid == null) {
|
|
126
123
|
throw new Error(
|
|
127
|
-
`Could not find rowid for entry: ${wasUpdate ? `file_path=${filePath}` : `id=${id}`}
|
|
124
|
+
`Could not find rowid for entry: ${wasUpdate ? `file_path=${filePath}` : `id=${id}`}`
|
|
128
125
|
);
|
|
129
126
|
}
|
|
130
127
|
|
|
131
128
|
const rowid = Number(rowidResult.rowid);
|
|
132
129
|
if (!Number.isFinite(rowid) || rowid < 1) {
|
|
133
130
|
throw new Error(
|
|
134
|
-
`Invalid rowid retrieved: ${rowidResult.rowid} (type: ${typeof rowidResult.rowid})
|
|
131
|
+
`Invalid rowid retrieved: ${rowidResult.rowid} (type: ${typeof rowidResult.rowid})`
|
|
135
132
|
);
|
|
136
133
|
}
|
|
137
134
|
|
|
138
|
-
if (cat !==
|
|
135
|
+
if (cat !== 'event') {
|
|
139
136
|
let embedding: Float32Array | null = null;
|
|
140
137
|
if (precomputedEmbedding !== undefined) {
|
|
141
138
|
embedding = precomputedEmbedding;
|
|
142
139
|
} else {
|
|
143
140
|
try {
|
|
144
|
-
embedding = await ctx.embed([title, body].filter(Boolean).join(
|
|
141
|
+
embedding = await ctx.embed([title, body].filter(Boolean).join(' '));
|
|
145
142
|
} catch (embedErr) {
|
|
146
143
|
console.warn(
|
|
147
|
-
`[context-vault] embed() failed for entry ${id} — skipping vec insert: ${(embedErr as Error).message}
|
|
144
|
+
`[context-vault] embed() failed for entry ${id} — skipping vec insert: ${(embedErr as Error).message}`
|
|
148
145
|
);
|
|
149
146
|
}
|
|
150
147
|
}
|
|
@@ -163,7 +160,7 @@ export async function indexEntry(
|
|
|
163
160
|
export async function pruneExpired(ctx: BaseCtx): Promise<number> {
|
|
164
161
|
const expired = ctx.db
|
|
165
162
|
.prepare(
|
|
166
|
-
"SELECT id, file_path FROM vault WHERE expires_at IS NOT NULL AND expires_at <= datetime('now')"
|
|
163
|
+
"SELECT id, file_path FROM vault WHERE expires_at IS NOT NULL AND expires_at <= datetime('now')"
|
|
167
164
|
)
|
|
168
165
|
.all() as { id: string; file_path: string | null }[];
|
|
169
166
|
|
|
@@ -173,9 +170,7 @@ export async function pruneExpired(ctx: BaseCtx): Promise<number> {
|
|
|
173
170
|
unlinkSync(row.file_path);
|
|
174
171
|
} catch {}
|
|
175
172
|
}
|
|
176
|
-
const vRowid = (
|
|
177
|
-
ctx.stmts.getRowid.get(row.id) as { rowid: number } | undefined
|
|
178
|
-
)?.rowid;
|
|
173
|
+
const vRowid = (ctx.stmts.getRowid.get(row.id) as { rowid: number } | undefined)?.rowid;
|
|
179
174
|
if (vRowid) {
|
|
180
175
|
try {
|
|
181
176
|
ctx.deleteVec(Number(vRowid));
|
|
@@ -189,7 +184,7 @@ export async function pruneExpired(ctx: BaseCtx): Promise<number> {
|
|
|
189
184
|
|
|
190
185
|
export async function reindex(
|
|
191
186
|
ctx: BaseCtx,
|
|
192
|
-
opts: { fullSync?: boolean } = {}
|
|
187
|
+
opts: { fullSync?: boolean } = {}
|
|
193
188
|
): Promise<ReindexStats> {
|
|
194
189
|
const { fullSync = true } = opts;
|
|
195
190
|
const stats: ReindexStats = {
|
|
@@ -202,22 +197,19 @@ export async function reindex(
|
|
|
202
197
|
if (!existsSync(ctx.config.vaultDir)) return stats;
|
|
203
198
|
|
|
204
199
|
const upsertEntry = ctx.db.prepare(
|
|
205
|
-
`INSERT OR IGNORE INTO vault (id, kind, category, title, body, meta, tags, source, file_path, identity_key, expires_at, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
200
|
+
`INSERT OR IGNORE INTO vault (id, kind, category, title, body, meta, tags, source, file_path, identity_key, expires_at, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
206
201
|
);
|
|
207
202
|
|
|
208
203
|
const kindEntries: { kind: string; dir: string }[] = [];
|
|
209
204
|
const topDirs = readdirSync(ctx.config.vaultDir, {
|
|
210
205
|
withFileTypes: true,
|
|
211
|
-
}).filter(
|
|
212
|
-
(d) =>
|
|
213
|
-
d.isDirectory() && !EXCLUDED_DIRS.has(d.name) && !d.name.startsWith("_"),
|
|
214
|
-
);
|
|
206
|
+
}).filter((d) => d.isDirectory() && !EXCLUDED_DIRS.has(d.name) && !d.name.startsWith('_'));
|
|
215
207
|
|
|
216
208
|
for (const d of topDirs) {
|
|
217
209
|
if (CATEGORY_DIRS.has(d.name)) {
|
|
218
210
|
const catDir = join(ctx.config.vaultDir, d.name);
|
|
219
211
|
const subDirs = readdirSync(catDir, { withFileTypes: true }).filter(
|
|
220
|
-
(sd) => sd.isDirectory() && !sd.name.startsWith(
|
|
212
|
+
(sd) => sd.isDirectory() && !sd.name.startsWith('_')
|
|
221
213
|
);
|
|
222
214
|
for (const sd of subDirs) {
|
|
223
215
|
kindEntries.push({
|
|
@@ -235,17 +227,15 @@ export async function reindex(
|
|
|
235
227
|
|
|
236
228
|
const pendingEmbeds: { rowid: number; text: string }[] = [];
|
|
237
229
|
|
|
238
|
-
ctx.db.exec(
|
|
230
|
+
ctx.db.exec('BEGIN');
|
|
239
231
|
try {
|
|
240
232
|
for (const { kind, dir } of kindEntries) {
|
|
241
233
|
const category = categoryFor(kind);
|
|
242
|
-
const mdFiles = walkDir(dir).filter(
|
|
243
|
-
(f) => !EXCLUDED_FILES.has(basename(f.filePath)),
|
|
244
|
-
);
|
|
234
|
+
const mdFiles = walkDir(dir).filter((f) => !EXCLUDED_FILES.has(basename(f.filePath)));
|
|
245
235
|
|
|
246
236
|
const dbRows = ctx.db
|
|
247
237
|
.prepare(
|
|
248
|
-
|
|
238
|
+
'SELECT id, file_path, body, title, tags, meta, related_to FROM vault WHERE kind = ?'
|
|
249
239
|
)
|
|
250
240
|
.all(kind) as Record<string, unknown>[];
|
|
251
241
|
const dbByPath = new Map(dbRows.map((r) => [r.file_path as string, r]));
|
|
@@ -259,8 +249,8 @@ export async function reindex(
|
|
|
259
249
|
continue;
|
|
260
250
|
}
|
|
261
251
|
|
|
262
|
-
const raw = readFileSync(filePath,
|
|
263
|
-
if (!raw.startsWith(
|
|
252
|
+
const raw = readFileSync(filePath, 'utf-8');
|
|
253
|
+
if (!raw.startsWith('---\n')) {
|
|
264
254
|
console.error(`[reindex] skipping (no frontmatter): ${filePath}`);
|
|
265
255
|
continue;
|
|
266
256
|
}
|
|
@@ -272,9 +262,7 @@ export async function reindex(
|
|
|
272
262
|
const related_to = Array.isArray(fmMeta.related_to)
|
|
273
263
|
? (fmMeta.related_to as string[])
|
|
274
264
|
: null;
|
|
275
|
-
const relatedToJson = related_to?.length
|
|
276
|
-
? JSON.stringify(related_to)
|
|
277
|
-
: null;
|
|
265
|
+
const relatedToJson = related_to?.length ? JSON.stringify(related_to) : null;
|
|
278
266
|
|
|
279
267
|
const meta: Record<string, unknown> = { ...(parsed.meta || {}) };
|
|
280
268
|
if (relDir) meta.folder = relDir;
|
|
@@ -284,8 +272,7 @@ export async function reindex(
|
|
|
284
272
|
if (!existing) {
|
|
285
273
|
const id = (fmMeta.id as string) || ulid();
|
|
286
274
|
const tagsJson = fmMeta.tags ? JSON.stringify(fmMeta.tags) : null;
|
|
287
|
-
const created =
|
|
288
|
-
(fmMeta.created as string) || new Date().toISOString();
|
|
275
|
+
const created = (fmMeta.created as string) || new Date().toISOString();
|
|
289
276
|
|
|
290
277
|
const result = upsertEntry.run(
|
|
291
278
|
id,
|
|
@@ -295,25 +282,21 @@ export async function reindex(
|
|
|
295
282
|
parsed.body,
|
|
296
283
|
metaJson,
|
|
297
284
|
tagsJson,
|
|
298
|
-
(fmMeta.source as string) ||
|
|
285
|
+
(fmMeta.source as string) || 'file',
|
|
299
286
|
filePath,
|
|
300
287
|
identity_key,
|
|
301
288
|
expires_at,
|
|
302
289
|
created,
|
|
303
|
-
(fmMeta.updated as string) || created
|
|
290
|
+
(fmMeta.updated as string) || created
|
|
304
291
|
);
|
|
305
292
|
if ((result as { changes: number }).changes > 0) {
|
|
306
293
|
if (relatedToJson && ctx.stmts.updateRelatedTo) {
|
|
307
294
|
ctx.stmts.updateRelatedTo.run(relatedToJson, id);
|
|
308
295
|
}
|
|
309
|
-
if (category !==
|
|
310
|
-
const rowidResult = ctx.stmts.getRowid.get(id) as
|
|
311
|
-
| { rowid: number }
|
|
312
|
-
| undefined;
|
|
296
|
+
if (category !== 'event') {
|
|
297
|
+
const rowidResult = ctx.stmts.getRowid.get(id) as { rowid: number } | undefined;
|
|
313
298
|
if (rowidResult?.rowid) {
|
|
314
|
-
const embeddingText = [parsed.title, parsed.body]
|
|
315
|
-
.filter(Boolean)
|
|
316
|
-
.join(" ");
|
|
299
|
+
const embeddingText = [parsed.title, parsed.body].filter(Boolean).join(' ');
|
|
317
300
|
pendingEmbeds.push({
|
|
318
301
|
rowid: rowidResult.rowid,
|
|
319
302
|
text: embeddingText,
|
|
@@ -326,49 +309,34 @@ export async function reindex(
|
|
|
326
309
|
}
|
|
327
310
|
} else if (fullSync) {
|
|
328
311
|
const tagsJson = fmMeta.tags ? JSON.stringify(fmMeta.tags) : null;
|
|
329
|
-
const titleChanged =
|
|
330
|
-
(parsed.title || null) !== ((existing.title as string) || null);
|
|
312
|
+
const titleChanged = (parsed.title || null) !== ((existing.title as string) || null);
|
|
331
313
|
const bodyChanged = (existing.body as string) !== parsed.body;
|
|
332
314
|
const tagsChanged = tagsJson !== ((existing.tags as string) || null);
|
|
333
315
|
const metaChanged = metaJson !== ((existing.meta as string) || null);
|
|
334
|
-
const relatedToChanged =
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
if (
|
|
338
|
-
bodyChanged ||
|
|
339
|
-
titleChanged ||
|
|
340
|
-
tagsChanged ||
|
|
341
|
-
metaChanged ||
|
|
342
|
-
relatedToChanged
|
|
343
|
-
) {
|
|
316
|
+
const relatedToChanged = relatedToJson !== ((existing.related_to as string) || null);
|
|
317
|
+
|
|
318
|
+
if (bodyChanged || titleChanged || tagsChanged || metaChanged || relatedToChanged) {
|
|
344
319
|
ctx.stmts.updateEntry.run(
|
|
345
320
|
parsed.title || null,
|
|
346
321
|
parsed.body,
|
|
347
322
|
metaJson,
|
|
348
323
|
tagsJson,
|
|
349
|
-
(fmMeta.source as string) ||
|
|
324
|
+
(fmMeta.source as string) || 'file',
|
|
350
325
|
category,
|
|
351
326
|
identity_key,
|
|
352
327
|
expires_at,
|
|
353
|
-
filePath
|
|
328
|
+
filePath
|
|
354
329
|
);
|
|
355
330
|
if (relatedToChanged && ctx.stmts.updateRelatedTo) {
|
|
356
|
-
ctx.stmts.updateRelatedTo.run(
|
|
357
|
-
relatedToJson,
|
|
358
|
-
existing.id as string,
|
|
359
|
-
);
|
|
331
|
+
ctx.stmts.updateRelatedTo.run(relatedToJson, existing.id as string);
|
|
360
332
|
}
|
|
361
333
|
|
|
362
|
-
if ((bodyChanged || titleChanged) && category !==
|
|
334
|
+
if ((bodyChanged || titleChanged) && category !== 'event') {
|
|
363
335
|
const rowid = (
|
|
364
|
-
ctx.stmts.getRowid.get(existing.id as string) as
|
|
365
|
-
| { rowid: number }
|
|
366
|
-
| undefined
|
|
336
|
+
ctx.stmts.getRowid.get(existing.id as string) as { rowid: number } | undefined
|
|
367
337
|
)?.rowid;
|
|
368
338
|
if (rowid) {
|
|
369
|
-
const embeddingText = [parsed.title, parsed.body]
|
|
370
|
-
.filter(Boolean)
|
|
371
|
-
.join(" ");
|
|
339
|
+
const embeddingText = [parsed.title, parsed.body].filter(Boolean).join(' ');
|
|
372
340
|
pendingEmbeds.push({ rowid, text: embeddingText });
|
|
373
341
|
}
|
|
374
342
|
}
|
|
@@ -385,9 +353,7 @@ export async function reindex(
|
|
|
385
353
|
for (const [dbPath, row] of dbByPath) {
|
|
386
354
|
if (!diskPaths.has(dbPath)) {
|
|
387
355
|
const vRowid = (
|
|
388
|
-
ctx.stmts.getRowid.get(row.id as string) as
|
|
389
|
-
| { rowid: number }
|
|
390
|
-
| undefined
|
|
356
|
+
ctx.stmts.getRowid.get(row.id as string) as { rowid: number } | undefined
|
|
391
357
|
)?.rowid;
|
|
392
358
|
if (vRowid) {
|
|
393
359
|
try {
|
|
@@ -403,13 +369,13 @@ export async function reindex(
|
|
|
403
369
|
|
|
404
370
|
if (fullSync) {
|
|
405
371
|
const indexedKinds = new Set(kindEntries.map((ke) => ke.kind));
|
|
406
|
-
const allDbKinds = ctx.db
|
|
407
|
-
|
|
408
|
-
|
|
372
|
+
const allDbKinds = ctx.db.prepare('SELECT DISTINCT kind FROM vault').all() as {
|
|
373
|
+
kind: string;
|
|
374
|
+
}[];
|
|
409
375
|
for (const { kind } of allDbKinds) {
|
|
410
376
|
if (!indexedKinds.has(kind)) {
|
|
411
377
|
const orphaned = ctx.db
|
|
412
|
-
.prepare(
|
|
378
|
+
.prepare('SELECT id, rowid FROM vault WHERE kind = ?')
|
|
413
379
|
.all(kind) as { id: string; rowid: number }[];
|
|
414
380
|
for (const row of orphaned) {
|
|
415
381
|
try {
|
|
@@ -424,7 +390,7 @@ export async function reindex(
|
|
|
424
390
|
|
|
425
391
|
const expired = ctx.db
|
|
426
392
|
.prepare(
|
|
427
|
-
"SELECT id, file_path FROM vault WHERE expires_at IS NOT NULL AND expires_at <= datetime('now')"
|
|
393
|
+
"SELECT id, file_path FROM vault WHERE expires_at IS NOT NULL AND expires_at <= datetime('now')"
|
|
428
394
|
)
|
|
429
395
|
.all() as { id: string; file_path: string | null }[];
|
|
430
396
|
|
|
@@ -434,9 +400,7 @@ export async function reindex(
|
|
|
434
400
|
unlinkSync(row.file_path);
|
|
435
401
|
} catch {}
|
|
436
402
|
}
|
|
437
|
-
const vRowid = (
|
|
438
|
-
ctx.stmts.getRowid.get(row.id) as { rowid: number } | undefined
|
|
439
|
-
)?.rowid;
|
|
403
|
+
const vRowid = (ctx.stmts.getRowid.get(row.id) as { rowid: number } | undefined)?.rowid;
|
|
440
404
|
if (vRowid) {
|
|
441
405
|
try {
|
|
442
406
|
ctx.deleteVec(Number(vRowid));
|
|
@@ -446,9 +410,9 @@ export async function reindex(
|
|
|
446
410
|
stats.removed++;
|
|
447
411
|
}
|
|
448
412
|
|
|
449
|
-
ctx.db.exec(
|
|
413
|
+
ctx.db.exec('COMMIT');
|
|
450
414
|
} catch (e) {
|
|
451
|
-
ctx.db.exec(
|
|
415
|
+
ctx.db.exec('ROLLBACK');
|
|
452
416
|
throw e;
|
|
453
417
|
}
|
|
454
418
|
|
|
@@ -465,5 +429,35 @@ export async function reindex(
|
|
|
465
429
|
}
|
|
466
430
|
}
|
|
467
431
|
|
|
432
|
+
// Detect entries with missing embeddings and regenerate them
|
|
433
|
+
if (fullSync) {
|
|
434
|
+
const missingVec = ctx.db
|
|
435
|
+
.prepare(
|
|
436
|
+
`SELECT v.rowid, v.title, v.body FROM vault v
|
|
437
|
+
WHERE v.category != 'event'
|
|
438
|
+
AND v.rowid NOT IN (SELECT rowid FROM vault_vec)`
|
|
439
|
+
)
|
|
440
|
+
.all() as { rowid: number; title: string | null; body: string }[];
|
|
441
|
+
|
|
442
|
+
if (missingVec.length > 0) {
|
|
443
|
+
const missingEmbeds = missingVec.map((r) => ({
|
|
444
|
+
rowid: r.rowid,
|
|
445
|
+
text: [r.title, r.body].filter(Boolean).join(' '),
|
|
446
|
+
}));
|
|
447
|
+
|
|
448
|
+
for (let i = 0; i < missingEmbeds.length; i += EMBED_BATCH_SIZE) {
|
|
449
|
+
const batch = missingEmbeds.slice(i, i + EMBED_BATCH_SIZE);
|
|
450
|
+
const embeddings = await embedBatch(batch.map((e) => e.text));
|
|
451
|
+
for (let j = 0; j < batch.length; j++) {
|
|
452
|
+
if (embeddings[j]) {
|
|
453
|
+
ctx.insertVec(batch[j].rowid, embeddings[j]!);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
console.error(`[context-vault] Regenerated ${missingVec.length} missing embeddings`);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
468
462
|
return stats;
|
|
469
463
|
}
|