@context-vault/core 2.17.0 → 2.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -3
- package/src/capture/file-ops.js +2 -0
- package/src/capture/index.js +14 -0
- package/src/core/categories.js +1 -0
- package/src/core/files.js +6 -29
- package/src/core/frontmatter.js +1 -0
- package/src/core/linking.js +161 -0
- package/src/core/migrate-dirs.js +196 -0
- package/src/core/temporal.js +146 -0
- package/src/index/db.js +178 -8
- package/src/index/index.js +89 -28
- package/src/index.js +5 -0
- package/src/retrieve/index.js +9 -136
- package/src/server/tools/create-snapshot.js +37 -68
- package/src/server/tools/get-context.js +108 -21
- package/src/server/tools/save-context.js +29 -6
- package/src/server/tools.js +0 -2
- package/src/server/tools/submit-feedback.js +0 -55
package/src/index/db.js
CHANGED
|
@@ -42,7 +42,69 @@ function runTransaction(db, fn) {
|
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
// Local-mode schema: no multi-tenancy or encryption columns.
|
|
46
|
+
// Identity uniqueness is scoped to (kind, identity_key) — no user_id.
|
|
47
|
+
export const LOCAL_SCHEMA_DDL = `
|
|
48
|
+
CREATE TABLE IF NOT EXISTS vault (
|
|
49
|
+
id TEXT PRIMARY KEY,
|
|
50
|
+
kind TEXT NOT NULL,
|
|
51
|
+
category TEXT NOT NULL DEFAULT 'knowledge',
|
|
52
|
+
title TEXT,
|
|
53
|
+
body TEXT NOT NULL,
|
|
54
|
+
meta TEXT,
|
|
55
|
+
tags TEXT,
|
|
56
|
+
source TEXT,
|
|
57
|
+
file_path TEXT UNIQUE,
|
|
58
|
+
identity_key TEXT,
|
|
59
|
+
expires_at TEXT,
|
|
60
|
+
superseded_by TEXT,
|
|
61
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
62
|
+
updated_at TEXT,
|
|
63
|
+
hit_count INTEGER DEFAULT 0,
|
|
64
|
+
last_accessed_at TEXT,
|
|
65
|
+
source_files TEXT,
|
|
66
|
+
tier TEXT DEFAULT 'working' CHECK(tier IN ('ephemeral', 'working', 'durable')),
|
|
67
|
+
related_to TEXT
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
CREATE INDEX IF NOT EXISTS idx_vault_kind ON vault(kind);
|
|
71
|
+
CREATE INDEX IF NOT EXISTS idx_vault_category ON vault(category);
|
|
72
|
+
CREATE INDEX IF NOT EXISTS idx_vault_category_created ON vault(category, created_at DESC);
|
|
73
|
+
CREATE INDEX IF NOT EXISTS idx_vault_updated ON vault(updated_at DESC);
|
|
74
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_vault_identity ON vault(kind, identity_key) WHERE identity_key IS NOT NULL AND category = 'entity';
|
|
75
|
+
CREATE INDEX IF NOT EXISTS idx_vault_superseded ON vault(superseded_by) WHERE superseded_by IS NOT NULL;
|
|
76
|
+
CREATE INDEX IF NOT EXISTS idx_vault_tier ON vault(tier);
|
|
77
|
+
|
|
78
|
+
-- Single FTS5 table
|
|
79
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vault_fts USING fts5(
|
|
80
|
+
title, body, tags, kind,
|
|
81
|
+
content='vault', content_rowid='rowid'
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
-- FTS sync triggers
|
|
85
|
+
CREATE TRIGGER IF NOT EXISTS vault_ai AFTER INSERT ON vault BEGIN
|
|
86
|
+
INSERT INTO vault_fts(rowid, title, body, tags, kind)
|
|
87
|
+
VALUES (new.rowid, new.title, new.body, new.tags, new.kind);
|
|
88
|
+
END;
|
|
89
|
+
CREATE TRIGGER IF NOT EXISTS vault_ad AFTER DELETE ON vault BEGIN
|
|
90
|
+
INSERT INTO vault_fts(vault_fts, rowid, title, body, tags, kind)
|
|
91
|
+
VALUES ('delete', old.rowid, old.title, old.body, old.tags, old.kind);
|
|
92
|
+
END;
|
|
93
|
+
CREATE TRIGGER IF NOT EXISTS vault_au AFTER UPDATE ON vault BEGIN
|
|
94
|
+
INSERT INTO vault_fts(vault_fts, rowid, title, body, tags, kind)
|
|
95
|
+
VALUES ('delete', old.rowid, old.title, old.body, old.tags, old.kind);
|
|
96
|
+
INSERT INTO vault_fts(rowid, title, body, tags, kind)
|
|
97
|
+
VALUES (new.rowid, new.title, new.body, new.tags, new.kind);
|
|
98
|
+
END;
|
|
99
|
+
|
|
100
|
+
-- Single vec table (384-dim float32 for all-MiniLM-L6-v2)
|
|
101
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vault_vec USING vec0(embedding float[384]);
|
|
102
|
+
`;
|
|
103
|
+
|
|
104
|
+
// Hosted-mode schema: adds multi-tenancy (user_id, team_id) and at-rest
|
|
105
|
+
// encryption columns (body_encrypted, title_encrypted, meta_encrypted, iv).
|
|
106
|
+
// Identity uniqueness is scoped to (user_id, kind, identity_key).
|
|
107
|
+
export const HOSTED_SCHEMA_DDL = `
|
|
46
108
|
CREATE TABLE IF NOT EXISTS vault (
|
|
47
109
|
id TEXT PRIMARY KEY,
|
|
48
110
|
kind TEXT NOT NULL,
|
|
@@ -67,7 +129,8 @@ export const SCHEMA_DDL = `
|
|
|
67
129
|
hit_count INTEGER DEFAULT 0,
|
|
68
130
|
last_accessed_at TEXT,
|
|
69
131
|
source_files TEXT,
|
|
70
|
-
tier TEXT DEFAULT 'working' CHECK(tier IN ('ephemeral', 'working', 'durable'))
|
|
132
|
+
tier TEXT DEFAULT 'working' CHECK(tier IN ('ephemeral', 'working', 'durable')),
|
|
133
|
+
related_to TEXT
|
|
71
134
|
);
|
|
72
135
|
|
|
73
136
|
CREATE INDEX IF NOT EXISTS idx_vault_kind ON vault(kind);
|
|
@@ -106,7 +169,13 @@ export const SCHEMA_DDL = `
|
|
|
106
169
|
CREATE VIRTUAL TABLE IF NOT EXISTS vault_vec USING vec0(embedding float[384]);
|
|
107
170
|
`;
|
|
108
171
|
|
|
109
|
-
|
|
172
|
+
// Backward-compatible alias — kept for external consumers that reference SCHEMA_DDL.
|
|
173
|
+
export const SCHEMA_DDL = HOSTED_SCHEMA_DDL;
|
|
174
|
+
|
|
175
|
+
// Current target schema version. Bump this on every migration.
|
|
176
|
+
const CURRENT_VERSION = 14;
|
|
177
|
+
|
|
178
|
+
export async function initDatabase(dbPath, { mode = "local" } = {}) {
|
|
110
179
|
const sqliteVec = await loadSqliteVec();
|
|
111
180
|
|
|
112
181
|
function createDb(path) {
|
|
@@ -121,6 +190,8 @@ export async function initDatabase(dbPath) {
|
|
|
121
190
|
return db;
|
|
122
191
|
}
|
|
123
192
|
|
|
193
|
+
const schemaDdl = mode === "hosted" ? HOSTED_SCHEMA_DDL : LOCAL_SCHEMA_DDL;
|
|
194
|
+
|
|
124
195
|
const db = createDb(dbPath);
|
|
125
196
|
const version = db.prepare("PRAGMA user_version").get().user_version;
|
|
126
197
|
|
|
@@ -155,14 +226,14 @@ export async function initDatabase(dbPath) {
|
|
|
155
226
|
} catch {}
|
|
156
227
|
|
|
157
228
|
const freshDb = createDb(dbPath);
|
|
158
|
-
freshDb.exec(
|
|
159
|
-
freshDb.exec(
|
|
229
|
+
freshDb.exec(schemaDdl);
|
|
230
|
+
freshDb.exec(`PRAGMA user_version = ${CURRENT_VERSION}`);
|
|
160
231
|
return freshDb;
|
|
161
232
|
}
|
|
162
233
|
|
|
163
234
|
if (version < 5) {
|
|
164
|
-
db.exec(
|
|
165
|
-
db.exec(
|
|
235
|
+
db.exec(schemaDdl);
|
|
236
|
+
db.exec(`PRAGMA user_version = ${CURRENT_VERSION}`);
|
|
166
237
|
} else if (version === 5) {
|
|
167
238
|
// v5 -> v6 migration: add multi-tenancy + encryption columns
|
|
168
239
|
// Wrapped in transaction with duplicate-column guards for idempotent retry
|
|
@@ -344,12 +415,108 @@ export async function initDatabase(dbPath) {
|
|
|
344
415
|
});
|
|
345
416
|
}
|
|
346
417
|
|
|
418
|
+
if (version >= 5 && version <= 12) {
|
|
419
|
+
// v12 -> v13 migration: add related_to column for graph linking
|
|
420
|
+
runTransaction(db, () => {
|
|
421
|
+
try {
|
|
422
|
+
db.exec(`ALTER TABLE vault ADD COLUMN related_to TEXT`);
|
|
423
|
+
} catch (e) {
|
|
424
|
+
if (!e.message.includes("duplicate column")) throw e;
|
|
425
|
+
}
|
|
426
|
+
db.exec("PRAGMA user_version = 13");
|
|
427
|
+
});
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
if (version >= 5 && version <= 13) {
|
|
431
|
+
// v13 -> v14 migration: separate local and hosted schemas.
|
|
432
|
+
// Local mode: drop the 6 hosted-only columns (user_id, team_id,
|
|
433
|
+
// body_encrypted, title_encrypted, meta_encrypted, iv) and rebuild
|
|
434
|
+
// the identity index without user_id.
|
|
435
|
+
// Hosted mode: no structural change — just bump version.
|
|
436
|
+
runTransaction(db, () => {
|
|
437
|
+
if (mode === "local") {
|
|
438
|
+
// Must drop indexes that reference the columns before dropping columns.
|
|
439
|
+
db.exec(`DROP INDEX IF EXISTS idx_vault_user`);
|
|
440
|
+
db.exec(`DROP INDEX IF EXISTS idx_vault_team`);
|
|
441
|
+
db.exec(`DROP INDEX IF EXISTS idx_vault_identity`);
|
|
442
|
+
const dropColumnSafe = (col) => {
|
|
443
|
+
try {
|
|
444
|
+
db.exec(`ALTER TABLE vault DROP COLUMN ${col}`);
|
|
445
|
+
} catch (e) {
|
|
446
|
+
// Column may not exist on older schemas that never had it added.
|
|
447
|
+
if (!e.message.includes("no such column")) throw e;
|
|
448
|
+
}
|
|
449
|
+
};
|
|
450
|
+
dropColumnSafe("user_id");
|
|
451
|
+
dropColumnSafe("team_id");
|
|
452
|
+
dropColumnSafe("body_encrypted");
|
|
453
|
+
dropColumnSafe("title_encrypted");
|
|
454
|
+
dropColumnSafe("meta_encrypted");
|
|
455
|
+
dropColumnSafe("iv");
|
|
456
|
+
// Recreate identity uniqueness index scoped to (kind, identity_key),
|
|
457
|
+
// restricted to entity-category entries only (knowledge/event entries
|
|
458
|
+
// with identity_key are informational and may duplicate).
|
|
459
|
+
db.exec(
|
|
460
|
+
`CREATE UNIQUE INDEX IF NOT EXISTS idx_vault_identity ON vault(kind, identity_key) WHERE identity_key IS NOT NULL AND category = 'entity'`,
|
|
461
|
+
);
|
|
462
|
+
}
|
|
463
|
+
db.exec(`PRAGMA user_version = ${CURRENT_VERSION}`);
|
|
464
|
+
});
|
|
465
|
+
}
|
|
466
|
+
|
|
347
467
|
return db;
|
|
348
468
|
}
|
|
349
469
|
|
|
350
|
-
export function prepareStatements(db) {
|
|
470
|
+
export function prepareStatements(db, mode = "local") {
|
|
351
471
|
try {
|
|
472
|
+
if (mode === "local") {
|
|
473
|
+
// Local mode: no user_id, team_id, or encryption columns.
|
|
474
|
+
// insertEntry has 15 params (no user_id).
|
|
475
|
+
// getByIdentityKey and upsertByIdentityKey have no user_id WHERE clause.
|
|
476
|
+
return {
|
|
477
|
+
_mode: "local",
|
|
478
|
+
insertEntry: db.prepare(
|
|
479
|
+
`INSERT INTO vault (id, kind, category, title, body, meta, tags, source, file_path, identity_key, expires_at, created_at, updated_at, source_files, tier) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
480
|
+
),
|
|
481
|
+
updateEntry: db.prepare(
|
|
482
|
+
`UPDATE vault SET title = ?, body = ?, meta = ?, tags = ?, source = ?, category = ?, identity_key = ?, expires_at = ?, updated_at = datetime('now') WHERE file_path = ?`,
|
|
483
|
+
),
|
|
484
|
+
deleteEntry: db.prepare(`DELETE FROM vault WHERE id = ?`),
|
|
485
|
+
getRowid: db.prepare(`SELECT rowid FROM vault WHERE id = ?`),
|
|
486
|
+
getRowidByPath: db.prepare(
|
|
487
|
+
`SELECT rowid FROM vault WHERE file_path = ?`,
|
|
488
|
+
),
|
|
489
|
+
getEntryById: db.prepare(`SELECT * FROM vault WHERE id = ?`),
|
|
490
|
+
getByIdentityKey: db.prepare(
|
|
491
|
+
`SELECT * FROM vault WHERE kind = ? AND identity_key = ?`,
|
|
492
|
+
),
|
|
493
|
+
upsertByIdentityKey: db.prepare(
|
|
494
|
+
`UPDATE vault SET title = ?, body = ?, meta = ?, tags = ?, source = ?, category = ?, file_path = ?, expires_at = ?, source_files = ?, updated_at = datetime('now') WHERE kind = ? AND identity_key = ?`,
|
|
495
|
+
),
|
|
496
|
+
updateSourceFiles: db.prepare(
|
|
497
|
+
`UPDATE vault SET source_files = ? WHERE id = ?`,
|
|
498
|
+
),
|
|
499
|
+
updateRelatedTo: db.prepare(
|
|
500
|
+
`UPDATE vault SET related_to = ? WHERE id = ?`,
|
|
501
|
+
),
|
|
502
|
+
insertVecStmt: db.prepare(
|
|
503
|
+
`INSERT INTO vault_vec (rowid, embedding) VALUES (?, ?)`,
|
|
504
|
+
),
|
|
505
|
+
deleteVecStmt: db.prepare(`DELETE FROM vault_vec WHERE rowid = ?`),
|
|
506
|
+
updateSupersededBy: db.prepare(
|
|
507
|
+
`UPDATE vault SET superseded_by = ? WHERE id = ?`,
|
|
508
|
+
),
|
|
509
|
+
clearSupersededByRef: db.prepare(
|
|
510
|
+
`UPDATE vault SET superseded_by = NULL WHERE superseded_by = ?`,
|
|
511
|
+
),
|
|
512
|
+
};
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
// Hosted mode: full schema with user_id scoping and encryption support.
|
|
516
|
+
// insertEntry has 16 params (includes user_id).
|
|
517
|
+
// getByIdentityKey and upsertByIdentityKey scope by user_id IS ?.
|
|
352
518
|
return {
|
|
519
|
+
_mode: "hosted",
|
|
353
520
|
insertEntry: db.prepare(
|
|
354
521
|
`INSERT INTO vault (id, user_id, kind, category, title, body, meta, tags, source, file_path, identity_key, expires_at, created_at, updated_at, source_files, tier) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
355
522
|
),
|
|
@@ -372,6 +539,9 @@ export function prepareStatements(db) {
|
|
|
372
539
|
updateSourceFiles: db.prepare(
|
|
373
540
|
`UPDATE vault SET source_files = ? WHERE id = ?`,
|
|
374
541
|
),
|
|
542
|
+
updateRelatedTo: db.prepare(
|
|
543
|
+
`UPDATE vault SET related_to = ? WHERE id = ?`,
|
|
544
|
+
),
|
|
375
545
|
insertVecStmt: db.prepare(
|
|
376
546
|
`INSERT INTO vault_vec (rowid, embedding) VALUES (?, ?)`,
|
|
377
547
|
),
|
package/src/index/index.js
CHANGED
|
@@ -66,37 +66,53 @@ export async function indexEntry(
|
|
|
66
66
|
const cat = category || categoryFor(kind);
|
|
67
67
|
const effectiveTier = tier || defaultTierFor(kind);
|
|
68
68
|
const userIdVal = userId || null;
|
|
69
|
+
const isLocal = ctx.stmts._mode === "local";
|
|
69
70
|
|
|
70
71
|
let wasUpdate = false;
|
|
71
72
|
|
|
72
|
-
// Entity upsert: check by (kind, identity_key, user_id) first
|
|
73
|
+
// Entity upsert: check by (kind, identity_key[, user_id]) first.
|
|
74
|
+
// Local mode omits user_id — all entries are user-agnostic.
|
|
73
75
|
if (cat === "entity" && identity_key) {
|
|
74
|
-
const existing =
|
|
75
|
-
kind,
|
|
76
|
-
identity_key,
|
|
77
|
-
userIdVal,
|
|
78
|
-
);
|
|
76
|
+
const existing = isLocal
|
|
77
|
+
? ctx.stmts.getByIdentityKey.get(kind, identity_key)
|
|
78
|
+
: ctx.stmts.getByIdentityKey.get(kind, identity_key, userIdVal);
|
|
79
79
|
if (existing) {
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
80
|
+
if (isLocal) {
|
|
81
|
+
ctx.stmts.upsertByIdentityKey.run(
|
|
82
|
+
title || null,
|
|
83
|
+
body,
|
|
84
|
+
metaJson,
|
|
85
|
+
tagsJson,
|
|
86
|
+
source || "claude-code",
|
|
87
|
+
cat,
|
|
88
|
+
filePath,
|
|
89
|
+
expires_at || null,
|
|
90
|
+
sourceFilesJson,
|
|
91
|
+
kind,
|
|
92
|
+
identity_key,
|
|
93
|
+
);
|
|
94
|
+
} else {
|
|
95
|
+
ctx.stmts.upsertByIdentityKey.run(
|
|
96
|
+
title || null,
|
|
97
|
+
body,
|
|
98
|
+
metaJson,
|
|
99
|
+
tagsJson,
|
|
100
|
+
source || "claude-code",
|
|
101
|
+
cat,
|
|
102
|
+
filePath,
|
|
103
|
+
expires_at || null,
|
|
104
|
+
sourceFilesJson,
|
|
105
|
+
kind,
|
|
106
|
+
identity_key,
|
|
107
|
+
userIdVal,
|
|
108
|
+
);
|
|
109
|
+
}
|
|
94
110
|
wasUpdate = true;
|
|
95
111
|
}
|
|
96
112
|
}
|
|
97
113
|
|
|
98
114
|
if (!wasUpdate) {
|
|
99
|
-
// Prepare encryption if ctx.encrypt is available
|
|
115
|
+
// Prepare encryption if ctx.encrypt is available (hosted mode only)
|
|
100
116
|
let encrypted = null;
|
|
101
117
|
if (ctx.encrypt) {
|
|
102
118
|
encrypted = await ctx.encrypt({ title, body, meta });
|
|
@@ -104,7 +120,8 @@ export async function indexEntry(
|
|
|
104
120
|
|
|
105
121
|
try {
|
|
106
122
|
if (encrypted) {
|
|
107
|
-
//
|
|
123
|
+
// Hosted-mode encrypted insert: store preview in body for FTS,
|
|
124
|
+
// full content in encrypted columns.
|
|
108
125
|
const bodyPreview = body.slice(0, 200);
|
|
109
126
|
ctx.stmts.insertEntryEncrypted.run(
|
|
110
127
|
id,
|
|
@@ -128,7 +145,27 @@ export async function indexEntry(
|
|
|
128
145
|
sourceFilesJson,
|
|
129
146
|
effectiveTier,
|
|
130
147
|
);
|
|
148
|
+
} else if (isLocal) {
|
|
149
|
+
// Local mode: no user_id column — 15 params.
|
|
150
|
+
ctx.stmts.insertEntry.run(
|
|
151
|
+
id,
|
|
152
|
+
kind,
|
|
153
|
+
cat,
|
|
154
|
+
title || null,
|
|
155
|
+
body,
|
|
156
|
+
metaJson,
|
|
157
|
+
tagsJson,
|
|
158
|
+
source || "claude-code",
|
|
159
|
+
filePath,
|
|
160
|
+
identity_key || null,
|
|
161
|
+
expires_at || null,
|
|
162
|
+
createdAt,
|
|
163
|
+
createdAt,
|
|
164
|
+
sourceFilesJson,
|
|
165
|
+
effectiveTier,
|
|
166
|
+
);
|
|
131
167
|
} else {
|
|
168
|
+
// Hosted mode without encryption: 16 params (includes user_id).
|
|
132
169
|
ctx.stmts.insertEntry.run(
|
|
133
170
|
id,
|
|
134
171
|
userIdVal,
|
|
@@ -262,10 +299,14 @@ export async function reindex(ctx, opts = {}) {
|
|
|
262
299
|
|
|
263
300
|
if (!existsSync(ctx.config.vaultDir)) return stats;
|
|
264
301
|
|
|
265
|
-
// Use INSERT OR IGNORE for reindex — handles files with duplicate frontmatter IDs
|
|
266
|
-
//
|
|
302
|
+
// Use INSERT OR IGNORE for reindex — handles files with duplicate frontmatter IDs.
|
|
303
|
+
// Local mode: no user_id column (15 params).
|
|
304
|
+
// Hosted mode: user_id is NULL for file-sourced entries (14 params, NULL literal).
|
|
305
|
+
const isLocalReindex = ctx.stmts._mode === "local";
|
|
267
306
|
const upsertEntry = ctx.db.prepare(
|
|
268
|
-
|
|
307
|
+
isLocalReindex
|
|
308
|
+
? `INSERT OR IGNORE INTO vault (id, kind, category, title, body, meta, tags, source, file_path, identity_key, expires_at, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
309
|
+
: `INSERT OR IGNORE INTO vault (id, user_id, kind, category, title, body, meta, tags, source, file_path, identity_key, expires_at, created_at, updated_at) VALUES (?, NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
269
310
|
);
|
|
270
311
|
|
|
271
312
|
// Auto-discover kind directories, supporting both:
|
|
@@ -317,7 +358,7 @@ export async function reindex(ctx, opts = {}) {
|
|
|
317
358
|
// P3: Fetch all mutable fields for change detection
|
|
318
359
|
const dbRows = ctx.db
|
|
319
360
|
.prepare(
|
|
320
|
-
"SELECT id, file_path, body, title, tags, meta FROM vault WHERE kind = ?",
|
|
361
|
+
"SELECT id, file_path, body, title, tags, meta, related_to FROM vault WHERE kind = ?",
|
|
321
362
|
)
|
|
322
363
|
.all(kind);
|
|
323
364
|
const dbByPath = new Map(dbRows.map((r) => [r.file_path, r]));
|
|
@@ -343,6 +384,12 @@ export async function reindex(ctx, opts = {}) {
|
|
|
343
384
|
// Extract identity_key and expires_at from frontmatter
|
|
344
385
|
const identity_key = fmMeta.identity_key || null;
|
|
345
386
|
const expires_at = fmMeta.expires_at || null;
|
|
387
|
+
const related_to = Array.isArray(fmMeta.related_to)
|
|
388
|
+
? fmMeta.related_to
|
|
389
|
+
: null;
|
|
390
|
+
const relatedToJson = related_to?.length
|
|
391
|
+
? JSON.stringify(related_to)
|
|
392
|
+
: null;
|
|
346
393
|
|
|
347
394
|
// Derive folder from disk location (source of truth)
|
|
348
395
|
const meta = { ...(parsed.meta || {}) };
|
|
@@ -372,6 +419,9 @@ export async function reindex(ctx, opts = {}) {
|
|
|
372
419
|
fmMeta.updated || created,
|
|
373
420
|
);
|
|
374
421
|
if (result.changes > 0) {
|
|
422
|
+
if (relatedToJson && ctx.stmts.updateRelatedTo) {
|
|
423
|
+
ctx.stmts.updateRelatedTo.run(relatedToJson, id);
|
|
424
|
+
}
|
|
375
425
|
if (category !== "event") {
|
|
376
426
|
const rowidResult = ctx.stmts.getRowid.get(id);
|
|
377
427
|
if (rowidResult?.rowid) {
|
|
@@ -396,8 +446,16 @@ export async function reindex(ctx, opts = {}) {
|
|
|
396
446
|
const bodyChanged = existing.body !== parsed.body;
|
|
397
447
|
const tagsChanged = tagsJson !== (existing.tags || null);
|
|
398
448
|
const metaChanged = metaJson !== (existing.meta || null);
|
|
399
|
-
|
|
400
|
-
|
|
449
|
+
const relatedToChanged =
|
|
450
|
+
relatedToJson !== (existing.related_to || null);
|
|
451
|
+
|
|
452
|
+
if (
|
|
453
|
+
bodyChanged ||
|
|
454
|
+
titleChanged ||
|
|
455
|
+
tagsChanged ||
|
|
456
|
+
metaChanged ||
|
|
457
|
+
relatedToChanged
|
|
458
|
+
) {
|
|
401
459
|
ctx.stmts.updateEntry.run(
|
|
402
460
|
parsed.title || null,
|
|
403
461
|
parsed.body,
|
|
@@ -409,6 +467,9 @@ export async function reindex(ctx, opts = {}) {
|
|
|
409
467
|
expires_at,
|
|
410
468
|
filePath,
|
|
411
469
|
);
|
|
470
|
+
if (relatedToChanged && ctx.stmts.updateRelatedTo) {
|
|
471
|
+
ctx.stmts.updateRelatedTo.run(relatedToJson, existing.id);
|
|
472
|
+
}
|
|
412
473
|
|
|
413
474
|
// Queue re-embed if title or body changed (vector ops deferred to Phase 2)
|
|
414
475
|
if ((bodyChanged || titleChanged) && category !== "event") {
|
package/src/index.js
CHANGED
|
@@ -29,6 +29,11 @@ export {
|
|
|
29
29
|
parseEntryFromMarkdown,
|
|
30
30
|
} from "./core/frontmatter.js";
|
|
31
31
|
export { gatherVaultStatus } from "./core/status.js";
|
|
32
|
+
export {
|
|
33
|
+
PLURAL_TO_SINGULAR,
|
|
34
|
+
planMigration,
|
|
35
|
+
executeMigration,
|
|
36
|
+
} from "./core/migrate-dirs.js";
|
|
32
37
|
|
|
33
38
|
// Capture layer
|
|
34
39
|
export {
|
package/src/retrieve/index.js
CHANGED
|
@@ -11,8 +11,6 @@ const NEAR_DUP_THRESHOLD = 0.92;
|
|
|
11
11
|
|
|
12
12
|
const RRF_K = 60;
|
|
13
13
|
|
|
14
|
-
const MMR_LAMBDA = 0.7;
|
|
15
|
-
|
|
16
14
|
/**
|
|
17
15
|
* Exponential recency decay score based on updated_at timestamp.
|
|
18
16
|
* Returns e^(-decayRate * ageDays) for valid dates, or 0.5 as a neutral
|
|
@@ -132,108 +130,16 @@ export function reciprocalRankFusion(rankedLists, k = RRF_K) {
|
|
|
132
130
|
return scores;
|
|
133
131
|
}
|
|
134
132
|
|
|
135
|
-
/**
|
|
136
|
-
* Jaccard similarity between two strings based on word sets.
|
|
137
|
-
* Used as a fallback for MMR when embedding vectors are unavailable.
|
|
138
|
-
*
|
|
139
|
-
* @param {string} a
|
|
140
|
-
* @param {string} b
|
|
141
|
-
* @returns {number} Similarity in [0, 1].
|
|
142
|
-
*/
|
|
143
|
-
export function jaccardSimilarity(a, b) {
|
|
144
|
-
const wordsA = new Set((a ?? "").toLowerCase().split(/\W+/).filter(Boolean));
|
|
145
|
-
const wordsB = new Set((b ?? "").toLowerCase().split(/\W+/).filter(Boolean));
|
|
146
|
-
if (wordsA.size === 0 && wordsB.size === 0) return 1;
|
|
147
|
-
if (wordsA.size === 0 || wordsB.size === 0) return 0;
|
|
148
|
-
let intersection = 0;
|
|
149
|
-
for (const w of wordsA) if (wordsB.has(w)) intersection++;
|
|
150
|
-
return intersection / (wordsA.size + wordsB.size - intersection);
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
/**
|
|
154
|
-
* Maximal Marginal Relevance reranking.
|
|
155
|
-
*
|
|
156
|
-
* Selects up to n candidates that balance relevance to the query and
|
|
157
|
-
* diversity from already-selected results.
|
|
158
|
-
*
|
|
159
|
-
* MMR_score = lambda * querySim(doc) - (1 - lambda) * max(sim(doc, selected))
|
|
160
|
-
*
|
|
161
|
-
* @param {Array<object>} candidates - Entries with at least {id, title, body}.
|
|
162
|
-
* @param {Map<string, number>} querySimMap - Map of id -> relevance score.
|
|
163
|
-
* @param {Map<string, Float32Array|null>} embeddingMap - Map of id -> embedding (null if unavailable).
|
|
164
|
-
* @param {number} n - Number of results to select.
|
|
165
|
-
* @param {number} lambda - Trade-off weight (default MMR_LAMBDA = 0.7).
|
|
166
|
-
* @returns {Array<object>} Reranked subset of candidates (length <= n).
|
|
167
|
-
*/
|
|
168
|
-
export function maximalMarginalRelevance(
|
|
169
|
-
candidates,
|
|
170
|
-
querySimMap,
|
|
171
|
-
embeddingMap,
|
|
172
|
-
n,
|
|
173
|
-
lambda = MMR_LAMBDA,
|
|
174
|
-
) {
|
|
175
|
-
if (candidates.length === 0) return [];
|
|
176
|
-
|
|
177
|
-
const remaining = [...candidates];
|
|
178
|
-
const selected = [];
|
|
179
|
-
const selectedVecs = [];
|
|
180
|
-
const selectedEntries = [];
|
|
181
|
-
|
|
182
|
-
while (selected.length < n && remaining.length > 0) {
|
|
183
|
-
let bestIdx = -1;
|
|
184
|
-
let bestScore = -Infinity;
|
|
185
|
-
|
|
186
|
-
for (let i = 0; i < remaining.length; i++) {
|
|
187
|
-
const candidate = remaining[i];
|
|
188
|
-
const relevance = querySimMap.get(candidate.id) ?? 0;
|
|
189
|
-
|
|
190
|
-
let maxRedundancy = 0;
|
|
191
|
-
if (selectedVecs.length > 0) {
|
|
192
|
-
const vec = embeddingMap.get(candidate.id);
|
|
193
|
-
for (let j = 0; j < selectedVecs.length; j++) {
|
|
194
|
-
let sim;
|
|
195
|
-
if (vec && selectedVecs[j]) {
|
|
196
|
-
sim = dotProduct(vec, selectedVecs[j]);
|
|
197
|
-
} else {
|
|
198
|
-
const selEntry = selectedEntries[j];
|
|
199
|
-
sim = jaccardSimilarity(
|
|
200
|
-
`${candidate.title} ${candidate.body}`,
|
|
201
|
-
`${selEntry.title} ${selEntry.body}`,
|
|
202
|
-
);
|
|
203
|
-
}
|
|
204
|
-
if (sim > maxRedundancy) maxRedundancy = sim;
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
const score = lambda * relevance - (1 - lambda) * maxRedundancy;
|
|
209
|
-
if (score > bestScore) {
|
|
210
|
-
bestScore = score;
|
|
211
|
-
bestIdx = i;
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
if (bestIdx === -1) break;
|
|
216
|
-
|
|
217
|
-
const chosen = remaining.splice(bestIdx, 1)[0];
|
|
218
|
-
selected.push(chosen);
|
|
219
|
-
selectedVecs.push(embeddingMap.get(chosen.id) ?? null);
|
|
220
|
-
selectedEntries.push(chosen);
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
return selected;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
133
|
/**
|
|
227
134
|
* Hybrid search combining FTS5 text matching and vector similarity,
|
|
228
|
-
* with RRF merging
|
|
135
|
+
* with RRF merging, recency decay, and near-duplicate suppression.
|
|
229
136
|
*
|
|
230
137
|
* Pipeline:
|
|
231
138
|
* 1. FTS5 ranked list
|
|
232
139
|
* 2. Vector (semantic) ranked list
|
|
233
140
|
* 3. RRF: merge the two ranked lists into a single score
|
|
234
|
-
* 4.
|
|
235
|
-
* 5.
|
|
236
|
-
* 6. Near-duplicate suppression on the final selection
|
|
141
|
+
* 4. Recency decay: penalise old events (knowledge/entity entries unaffected)
|
|
142
|
+
* 5. Near-duplicate suppression (cosine similarity > 0.92 threshold)
|
|
237
143
|
*
|
|
238
144
|
* @param {import('../server/types.js').BaseCtx} ctx
|
|
239
145
|
* @param {string} query
|
|
@@ -383,20 +289,6 @@ export async function hybridSearch(
|
|
|
383
289
|
rrfScores.set(id, (rrfScores.get(id) ?? 0) * boost);
|
|
384
290
|
}
|
|
385
291
|
|
|
386
|
-
// Stage 3b: Frequency signal — log(1 + hit_count) / log(1 + max_hit_count)
|
|
387
|
-
const allRows = [...rowMap.values()];
|
|
388
|
-
const maxHitCount = Math.max(...allRows.map((e) => e.hit_count || 0), 0);
|
|
389
|
-
if (maxHitCount > 0) {
|
|
390
|
-
const logMax = Math.log(1 + maxHitCount);
|
|
391
|
-
for (const entry of allRows) {
|
|
392
|
-
const freqScore = Math.log(1 + (entry.hit_count || 0)) / logMax;
|
|
393
|
-
rrfScores.set(
|
|
394
|
-
entry.id,
|
|
395
|
-
(rrfScores.get(entry.id) ?? 0) + freqScore * 0.13,
|
|
396
|
-
);
|
|
397
|
-
}
|
|
398
|
-
}
|
|
399
|
-
|
|
400
292
|
// Attach final score to each entry and sort by RRF score descending
|
|
401
293
|
const candidates = [...rowMap.values()].map((entry) => ({
|
|
402
294
|
...entry,
|
|
@@ -404,7 +296,7 @@ export async function hybridSearch(
|
|
|
404
296
|
}));
|
|
405
297
|
candidates.sort((a, b) => b.score - a.score);
|
|
406
298
|
|
|
407
|
-
// Stage 4: Fetch embeddings for
|
|
299
|
+
// Stage 4: Fetch embeddings for near-duplicate suppression
|
|
408
300
|
const embeddingMap = new Map();
|
|
409
301
|
if (queryVec && idToRowid.size > 0) {
|
|
410
302
|
const rowidToId = new Map();
|
|
@@ -429,34 +321,15 @@ export async function hybridSearch(
|
|
|
429
321
|
}
|
|
430
322
|
}
|
|
431
323
|
} catch (_) {
|
|
432
|
-
// Embeddings unavailable —
|
|
324
|
+
// Embeddings unavailable — near-dup suppression skipped
|
|
433
325
|
}
|
|
434
326
|
}
|
|
435
327
|
|
|
436
|
-
//
|
|
437
|
-
|
|
438
|
-
for (const candidate of candidates) {
|
|
439
|
-
querySimMap.set(
|
|
440
|
-
candidate.id,
|
|
441
|
-
vecSimMap.has(candidate.id)
|
|
442
|
-
? vecSimMap.get(candidate.id)
|
|
443
|
-
: candidate.score,
|
|
444
|
-
);
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
// Stage 5: MMR — rerank for diversity using embeddings or Jaccard fallback
|
|
448
|
-
const mmrSelected = maximalMarginalRelevance(
|
|
449
|
-
candidates,
|
|
450
|
-
querySimMap,
|
|
451
|
-
embeddingMap,
|
|
452
|
-
offset + limit,
|
|
453
|
-
);
|
|
454
|
-
|
|
455
|
-
// Stage 6: Near-duplicate suppression (hard filter, not reorder)
|
|
456
|
-
if (queryVec && embeddingMap.size > 0 && mmrSelected.length > limit) {
|
|
328
|
+
// Stage 5: Near-duplicate suppression (cosine similarity > 0.92 threshold)
|
|
329
|
+
if (queryVec && embeddingMap.size > 0) {
|
|
457
330
|
const selected = [];
|
|
458
331
|
const selectedVecs = [];
|
|
459
|
-
for (const candidate of
|
|
332
|
+
for (const candidate of candidates) {
|
|
460
333
|
if (selected.length >= offset + limit) break;
|
|
461
334
|
const vec = embeddingMap.get(candidate.id);
|
|
462
335
|
if (vec && selectedVecs.length > 0) {
|
|
@@ -475,7 +348,7 @@ export async function hybridSearch(
|
|
|
475
348
|
return dedupedPage;
|
|
476
349
|
}
|
|
477
350
|
|
|
478
|
-
const finalPage =
|
|
351
|
+
const finalPage = candidates.slice(offset, offset + limit);
|
|
479
352
|
trackAccess(ctx.db, finalPage);
|
|
480
353
|
return finalPage;
|
|
481
354
|
}
|