alvin-bot 4.20.0 β 4.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/bin/cli.js +47 -1
- package/dist/services/embeddings-migration.js +100 -21
- package/dist/services/embeddings.js +75 -20
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to Alvin Bot are documented here.
|
|
4
4
|
|
|
5
|
+
## [4.20.1] β 2026-05-03
|
|
6
|
+
|
|
7
|
+
### π‘οΈ Hardening for the v4.20.0 SQLite migration
|
|
8
|
+
|
|
9
|
+
The v4.20 migration is fully automatic on first start, but a few things could go wrong on user installations that the maintainer instance never hits. v4.20.1 plugs each of them.
|
|
10
|
+
|
|
11
|
+
- **Lazy native binary load.** `better-sqlite3` is now `require()`-d inside `embeddings.ts`, not at module import time. If the prebuilt isn't available for the user's platform and a build-from-source fails (exotic Node version, missing toolchain, glibc mismatch), the bot logs a single clear warning with the exact rebuild command, and **keeps running** β only semantic memory search is disabled until the user fixes their install. Previously this would have crashed bot startup.
|
|
12
|
+
- **Pre-flight disk-space check.** Migration refuses to start unless the volume holding `~/.alvin-bot/memory/` has at least 2Γ the source JSON's size free (covers source + target + WAL during the transaction). Skipped migration leaves the JSON intact for retry on the next boot once space is free.
|
|
13
|
+
- **Progress logging.** On indexes larger than ~5 000 entries, the migration logs `β¦migrated N / M entries (P %)` every 5 000 rows so the user can see it isn't stuck.
|
|
14
|
+
- **Corrupt JSON recovery.** If `JSON.parse` of `.embeddings.json` throws, the file is moved aside to `.embeddings.json.broken.<timestamp>` and the next bot start treats this as a fresh install (rebuild-from-source on first search). No more boot-loop on a damaged index.
|
|
15
|
+
- **`alvin-bot doctor` shows memory health.** New "Memory:" section reports: native binary loadable, vector-store entry count + size, or β for not-yet-migrated installs β the legacy JSON's size and a hint that the next start will migrate.
|
|
16
|
+
- **Cleanup on failed migration.** WAL/SHM sidecars are removed alongside the half-written `.embeddings.db` so the next attempt starts from a clean slate.
|
|
17
|
+
|
|
18
|
+
No schema or API changes β drop-in over v4.20.0.
|
|
19
|
+
|
|
5
20
|
## [4.20.0] β 2026-05-03
|
|
6
21
|
|
|
7
22
|
### π Embeddings: JSON β SQLite
|
package/bin/cli.js
CHANGED
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
19
|
import { createInterface } from "readline";
|
|
20
|
-
import { existsSync, writeFileSync, readFileSync, mkdirSync, copyFileSync, readdirSync } from "fs";
|
|
20
|
+
import { existsSync, writeFileSync, readFileSync, mkdirSync, copyFileSync, readdirSync, statSync } from "fs";
|
|
21
21
|
import { resolve, join } from "path";
|
|
22
22
|
import { homedir } from "os";
|
|
23
23
|
import { execSync } from "child_process";
|
|
@@ -1361,6 +1361,52 @@ async function doctor() {
|
|
|
1361
1361
|
console.log(` β ALLOWED_USERS not set (nobody can message the bot)`);
|
|
1362
1362
|
}
|
|
1363
1363
|
|
|
1364
|
+
// ββ Memory (semantic search backend) ββ
|
|
1365
|
+
console.log("\n Memory:");
|
|
1366
|
+
const embJson = resolve(DATA_DIR, "memory", ".embeddings.json");
|
|
1367
|
+
const embDb = resolve(DATA_DIR, "memory", ".embeddings.db");
|
|
1368
|
+
const embBakSqlite = resolve(DATA_DIR, "memory", ".embeddings.json.bak-pre-sqlite");
|
|
1369
|
+
|
|
1370
|
+
// better-sqlite3 native binary loadable?
|
|
1371
|
+
let sqliteOk = false;
|
|
1372
|
+
let sqliteErr = "";
|
|
1373
|
+
try {
|
|
1374
|
+
const req = (await import("module")).createRequire(import.meta.url);
|
|
1375
|
+
req("better-sqlite3");
|
|
1376
|
+
sqliteOk = true;
|
|
1377
|
+
} catch (err) {
|
|
1378
|
+
sqliteErr = err instanceof Error ? err.message : String(err);
|
|
1379
|
+
}
|
|
1380
|
+
if (sqliteOk) {
|
|
1381
|
+
console.log(` β
better-sqlite3 native binary loadable`);
|
|
1382
|
+
} else {
|
|
1383
|
+
console.log(` β better-sqlite3 native binary not loadable β semantic search disabled`);
|
|
1384
|
+
console.log(` Fix: cd $(npm root -g)/alvin-bot && npm rebuild better-sqlite3`);
|
|
1385
|
+
console.log(` Detail: ${sqliteErr.split("\n")[0]}`);
|
|
1386
|
+
}
|
|
1387
|
+
|
|
1388
|
+
if (sqliteOk && existsSync(embDb)) {
|
|
1389
|
+
try {
|
|
1390
|
+
const req = (await import("module")).createRequire(import.meta.url);
|
|
1391
|
+
const Database = req("better-sqlite3");
|
|
1392
|
+
const db = new Database(embDb, { readonly: true });
|
|
1393
|
+
const entries = db.prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
1394
|
+
const files = db.prepare("SELECT COUNT(*) AS c FROM file_mtimes").get().c;
|
|
1395
|
+
const sizeMb = (statSync(embDb).size / 1024 / 1024).toFixed(0);
|
|
1396
|
+
db.close();
|
|
1397
|
+
console.log(` β
Vector store: ${entries} entries across ${files} sources (${sizeMb} MB SQLite)`);
|
|
1398
|
+
} catch (err) {
|
|
1399
|
+
console.log(` β οΈ Vector store exists but unreadable: ${err.message}`);
|
|
1400
|
+
}
|
|
1401
|
+
} else if (existsSync(embJson)) {
|
|
1402
|
+
const sizeMb = (statSync(embJson).size / 1024 / 1024).toFixed(0);
|
|
1403
|
+
console.log(` β οΈ Legacy JSON index found (${sizeMb} MB) β will auto-migrate to SQLite on next bot start`);
|
|
1404
|
+
} else if (existsSync(embBakSqlite)) {
|
|
1405
|
+
console.log(` β
Migration to SQLite already done (legacy JSON kept as .bak-pre-sqlite)`);
|
|
1406
|
+
} else {
|
|
1407
|
+
console.log(` βΉοΈ No vector store yet β will be built on first message`);
|
|
1408
|
+
}
|
|
1409
|
+
|
|
1364
1410
|
// ββ Extras ββ
|
|
1365
1411
|
console.log("\n Extras:");
|
|
1366
1412
|
|
|
@@ -4,15 +4,25 @@
|
|
|
4
4
|
* Triggered on startup if .embeddings.json exists but .embeddings.db does not.
|
|
5
5
|
* Idempotent: skips silently if the DB is already populated.
|
|
6
6
|
*
|
|
7
|
+
* Hardening (v4.20.1):
|
|
8
|
+
* - Lazy require of better-sqlite3 β missing native binary degrades to a clear
|
|
9
|
+
* warning + skip (bot keeps running, falls back to legacy JSON path until
|
|
10
|
+
* the user fixes their install).
|
|
11
|
+
* - Pre-flight disk-space check: refuses to start if free space < 2Γ source.
|
|
12
|
+
* - Progress logging every 1 000 entries on large indexes.
|
|
13
|
+
* - Corrupt source JSON is renamed to `.broken.<timestamp>` so the next run
|
|
14
|
+
* doesn't loop on the same parse error.
|
|
15
|
+
*
|
|
7
16
|
* Safety:
|
|
8
17
|
* - Source JSON is renamed to .embeddings.json.bak-pre-sqlite (kept on disk).
|
|
9
|
-
* - Entry counts are compared after import; mismatch β throw, leaving the
|
|
10
|
-
*
|
|
18
|
+
* - Entry counts are compared after import; mismatch β throw, leaving the
|
|
19
|
+
* half-written DB removed and the source JSON untouched.
|
|
11
20
|
*/
|
|
12
21
|
import fs from "fs";
|
|
13
22
|
import path from "path";
|
|
14
|
-
import
|
|
23
|
+
import { createRequire } from "module";
|
|
15
24
|
import { EMBEDDINGS_IDX, EMBEDDINGS_DB } from "../paths.js";
|
|
25
|
+
const cjsRequire = createRequire(import.meta.url);
|
|
16
26
|
function vectorToBlob(v) {
|
|
17
27
|
const f32 = new Float32Array(v);
|
|
18
28
|
return Buffer.from(f32.buffer, f32.byteOffset, f32.byteLength);
|
|
@@ -20,14 +30,58 @@ function vectorToBlob(v) {
|
|
|
20
30
|
export function shouldMigrateEmbeddingsToSqlite() {
|
|
21
31
|
return fs.existsSync(EMBEDDINGS_IDX) && !fs.existsSync(EMBEDDINGS_DB);
|
|
22
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Best-effort free-space probe. Returns Infinity if the platform has no
|
|
35
|
+
* statfs (which means we'll proceed without the safety check rather than
|
|
36
|
+
* blocking the migration). Node 18.15+ ships statfsSync on all major platforms.
|
|
37
|
+
*/
|
|
38
|
+
function freeBytesOnVolume(forPath) {
|
|
39
|
+
try {
|
|
40
|
+
const fsAny = fs;
|
|
41
|
+
if (typeof fsAny.statfsSync !== "function")
|
|
42
|
+
return Number.POSITIVE_INFINITY;
|
|
43
|
+
const stat = fsAny.statfsSync(forPath);
|
|
44
|
+
const bavail = typeof stat.bavail === "bigint" ? Number(stat.bavail) : stat.bavail;
|
|
45
|
+
const bsize = typeof stat.bsize === "bigint" ? Number(stat.bsize) : stat.bsize;
|
|
46
|
+
return bavail * bsize;
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
return Number.POSITIVE_INFINITY;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
23
52
|
/**
|
|
24
53
|
* Run the migration. Returns the entry count migrated, or null if skipped.
|
|
25
54
|
*/
|
|
26
55
|
export function migrateEmbeddingsToSqlite() {
|
|
27
56
|
if (!shouldMigrateEmbeddingsToSqlite())
|
|
28
57
|
return null;
|
|
29
|
-
|
|
58
|
+
// ββ Pre-flight: better-sqlite3 loadable? βββββββββββββββββββββββββββββββββββ
|
|
59
|
+
let Database;
|
|
60
|
+
try {
|
|
61
|
+
Database = cjsRequire("better-sqlite3");
|
|
62
|
+
}
|
|
63
|
+
catch (err) {
|
|
64
|
+
console.warn("β οΈ Embeddings migration skipped: better-sqlite3 native binary unavailable. " +
|
|
65
|
+
"Bot continues with legacy JSON index. Fix: `npm rebuild better-sqlite3` " +
|
|
66
|
+
"or reinstall alvin-bot. Underlying error:", err instanceof Error ? err.message : err);
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
30
69
|
const sourceSize = fs.statSync(EMBEDDINGS_IDX).size;
|
|
70
|
+
// ββ Pre-flight: enough free space? βββββββββββββββββββββββββββββββββββββββββ
|
|
71
|
+
const targetDir = path.dirname(EMBEDDINGS_DB);
|
|
72
|
+
fs.mkdirSync(targetDir, { recursive: true });
|
|
73
|
+
const free = freeBytesOnVolume(targetDir);
|
|
74
|
+
// We need source + about half of source for the SQLite file, plus headroom
|
|
75
|
+
// for WAL during the transaction. Demand 2Γ source size to be comfortable.
|
|
76
|
+
const required = sourceSize * 2;
|
|
77
|
+
if (free < required) {
|
|
78
|
+
console.warn(`β οΈ Embeddings migration skipped: insufficient free disk space on ${targetDir}. ` +
|
|
79
|
+
`Need ~${(required / 1024 / 1024).toFixed(0)} MB, have ${(free / 1024 / 1024).toFixed(0)} MB. ` +
|
|
80
|
+
`Free up some space and restart the bot to retry.`);
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
// ββ Read & parse source ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
84
|
+
const t0 = Date.now();
|
|
31
85
|
console.log(`π¦ Migrating embeddings JSON (${(sourceSize / 1024 / 1024).toFixed(0)} MB) β SQLite...`);
|
|
32
86
|
const raw = fs.readFileSync(EMBEDDINGS_IDX, "utf-8");
|
|
33
87
|
let legacy;
|
|
@@ -35,10 +89,21 @@ export function migrateEmbeddingsToSqlite() {
|
|
|
35
89
|
legacy = JSON.parse(raw);
|
|
36
90
|
}
|
|
37
91
|
catch (err) {
|
|
38
|
-
|
|
92
|
+
// Move the broken JSON aside so we don't try to migrate it again next boot.
|
|
93
|
+
const broken = `${EMBEDDINGS_IDX}.broken.${Date.now()}`;
|
|
94
|
+
try {
|
|
95
|
+
fs.renameSync(EMBEDDINGS_IDX, broken);
|
|
96
|
+
console.error(`β Embeddings migration: source JSON is corrupt β renamed to ${path.basename(broken)} ` +
|
|
97
|
+
`and skipped. The bot will rebuild the index from scratch on first search ` +
|
|
98
|
+
`(this may incur Google API calls). Underlying parse error:`, err);
|
|
99
|
+
}
|
|
100
|
+
catch (renameErr) {
|
|
101
|
+
console.error("β Embeddings migration: source JSON is corrupt AND could not be renamed:", err, "Rename error:", renameErr);
|
|
102
|
+
}
|
|
39
103
|
return null;
|
|
40
104
|
}
|
|
41
|
-
|
|
105
|
+
const validEntries = (legacy.entries ?? []).filter(e => Array.isArray(e.vector) && e.vector.length > 0);
|
|
106
|
+
// ββ Write DB βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
42
107
|
const db = new Database(EMBEDDINGS_DB);
|
|
43
108
|
try {
|
|
44
109
|
db.pragma("journal_mode = WAL");
|
|
@@ -62,9 +127,9 @@ export function migrateEmbeddingsToSqlite() {
|
|
|
62
127
|
CREATE INDEX IF NOT EXISTS idx_entries_source ON entries(source);
|
|
63
128
|
`);
|
|
64
129
|
const setMeta = db.prepare("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value");
|
|
65
|
-
setMeta.run("model", legacy.model);
|
|
130
|
+
setMeta.run("model", legacy.model || "gemini-embedding-001");
|
|
66
131
|
setMeta.run("schemaVersion", "1");
|
|
67
|
-
setMeta.run("lastReindex", String(legacy.lastReindex));
|
|
132
|
+
setMeta.run("lastReindex", String(legacy.lastReindex || 0));
|
|
68
133
|
setMeta.run("migratedFromJson", String(Date.now()));
|
|
69
134
|
const insMtime = db.prepare("INSERT INTO file_mtimes (source, mtime_ms) VALUES (?, ?) ON CONFLICT(source) DO UPDATE SET mtime_ms = excluded.mtime_ms");
|
|
70
135
|
const writeMtimes = db.transaction((rows) => {
|
|
@@ -73,38 +138,52 @@ export function migrateEmbeddingsToSqlite() {
|
|
|
73
138
|
});
|
|
74
139
|
writeMtimes(Object.entries(legacy.fileMtimes ?? {}));
|
|
75
140
|
const insEntry = db.prepare("INSERT INTO entries (id, source, text, vector, indexed_at) VALUES (?, ?, ?, ?, ?)");
|
|
76
|
-
|
|
141
|
+
// Write entries in chunks of 1 000 so we can log progress on huge indexes.
|
|
142
|
+
const CHUNK = 1000;
|
|
143
|
+
const total = validEntries.length;
|
|
144
|
+
let written = 0;
|
|
145
|
+
const writeChunk = db.transaction((rows) => {
|
|
77
146
|
for (const e of rows) {
|
|
78
|
-
if (!Array.isArray(e.vector) || e.vector.length === 0)
|
|
79
|
-
continue;
|
|
80
147
|
insEntry.run(e.id, e.source, e.text, vectorToBlob(e.vector), e.indexedAt);
|
|
81
148
|
}
|
|
82
149
|
});
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
150
|
+
for (let i = 0; i < total; i += CHUNK) {
|
|
151
|
+
const slice = validEntries.slice(i, i + CHUNK);
|
|
152
|
+
writeChunk(slice);
|
|
153
|
+
written += slice.length;
|
|
154
|
+
if (total > 5000 && (written === total || written % 5000 === 0)) {
|
|
155
|
+
console.log(` β¦migrated ${written} / ${total} entries (${Math.round((written / total) * 100)} %)`);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
const writtenCount = db.prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
159
|
+
if (writtenCount !== validEntries.length) {
|
|
160
|
+
throw new Error(`Entry-count mismatch after migration: expected ${validEntries.length}, got ${writtenCount}`);
|
|
88
161
|
}
|
|
89
162
|
db.close();
|
|
90
|
-
// Move source JSON aside so we never re-migrate
|
|
163
|
+
// ββ Move source JSON aside so we never re-migrate ββββββββββββββββββββββββ
|
|
91
164
|
const bak = `${EMBEDDINGS_IDX}.bak-pre-sqlite`;
|
|
92
165
|
try {
|
|
93
166
|
fs.renameSync(EMBEDDINGS_IDX, bak);
|
|
94
167
|
}
|
|
95
168
|
catch (err) {
|
|
96
|
-
console.warn("β οΈ Could not rename source JSON:", err);
|
|
169
|
+
console.warn("β οΈ Could not rename source JSON (migration still succeeded):", err);
|
|
97
170
|
}
|
|
98
171
|
const targetSize = fs.statSync(EMBEDDINGS_DB).size;
|
|
99
172
|
const dt = Date.now() - t0;
|
|
100
|
-
console.log(`β
Embeddings migrated: ${
|
|
101
|
-
return { entries:
|
|
173
|
+
console.log(`β
Embeddings migrated: ${writtenCount} entries, ${(sourceSize / 1024 / 1024).toFixed(0)} MB JSON β ${(targetSize / 1024 / 1024).toFixed(0)} MB SQLite in ${dt} ms`);
|
|
174
|
+
return { entries: writtenCount, sourceMb: sourceSize / 1024 / 1024, targetMb: targetSize / 1024 / 1024 };
|
|
102
175
|
}
|
|
103
176
|
catch (err) {
|
|
104
177
|
db.close();
|
|
105
|
-
// Remove half-written DB so the next boot retries cleanly.
|
|
178
|
+
// Remove half-written DB so the next boot retries cleanly with the original JSON intact.
|
|
106
179
|
try {
|
|
107
180
|
fs.unlinkSync(EMBEDDINGS_DB);
|
|
181
|
+
// also unlink WAL/SHM if present
|
|
182
|
+
for (const ext of ["-wal", "-shm"]) {
|
|
183
|
+
const p = `${EMBEDDINGS_DB}${ext}`;
|
|
184
|
+
if (fs.existsSync(p))
|
|
185
|
+
fs.unlinkSync(p);
|
|
186
|
+
}
|
|
108
187
|
}
|
|
109
188
|
catch {
|
|
110
189
|
/* nothing to clean */
|
|
@@ -19,10 +19,36 @@ import fs from "fs";
|
|
|
19
19
|
import path from "path";
|
|
20
20
|
import { resolve } from "path";
|
|
21
21
|
import os from "os";
|
|
22
|
-
import
|
|
22
|
+
import { createRequire } from "module";
|
|
23
23
|
import { config } from "../config.js";
|
|
24
24
|
import { MEMORY_DIR, MEMORY_FILE, EMBEDDINGS_DB } from "../paths.js";
|
|
25
25
|
import { ASSETS_DIR, ASSETS_INDEX_MD } from "../paths.js";
|
|
26
|
+
let SqliteClass = null;
|
|
27
|
+
let sqliteLoadAttempted = false;
|
|
28
|
+
let sqliteLoadError = null;
|
|
29
|
+
const cjsRequire = createRequire(import.meta.url);
|
|
30
|
+
function loadSqlite() {
|
|
31
|
+
if (sqliteLoadAttempted)
|
|
32
|
+
return SqliteClass;
|
|
33
|
+
sqliteLoadAttempted = true;
|
|
34
|
+
try {
|
|
35
|
+
SqliteClass = cjsRequire("better-sqlite3");
|
|
36
|
+
return SqliteClass;
|
|
37
|
+
}
|
|
38
|
+
catch (err) {
|
|
39
|
+
sqliteLoadError = err instanceof Error ? err : new Error(String(err));
|
|
40
|
+
console.warn("β οΈ better-sqlite3 native binary unavailable β embeddings disabled. " +
|
|
41
|
+
"Bot continues without semantic memory search. Fix: rebuild deps with " +
|
|
42
|
+
"`cd $(npm root -g)/alvin-bot && npm rebuild better-sqlite3` or reinstall " +
|
|
43
|
+
"alvin-bot. Underlying error: " +
|
|
44
|
+
sqliteLoadError.message);
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
export function getEmbeddingsBackendStatus() {
|
|
49
|
+
loadSqlite();
|
|
50
|
+
return { available: SqliteClass !== null, error: sqliteLoadError?.message ?? null };
|
|
51
|
+
}
|
|
26
52
|
// Hub memory directory (Claude Hub β read-only, additional context)
|
|
27
53
|
const HUB_MEMORY_DIR = resolve(os.homedir(), ".claude", "hub", "MEMORY");
|
|
28
54
|
// ββ Constants βββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -47,9 +73,16 @@ function blobToVector(b) {
|
|
|
47
73
|
}
|
|
48
74
|
// ββ DB lifecycle ββββββββββββββββββββββββββββββββββββββββ
|
|
49
75
|
let dbInstance = null;
|
|
76
|
+
/**
|
|
77
|
+
* Returns the live DB handle, or null when better-sqlite3 isn't loadable.
|
|
78
|
+
* Callers must handle the null case (treat as "search unavailable").
|
|
79
|
+
*/
|
|
50
80
|
function db() {
|
|
51
81
|
if (dbInstance)
|
|
52
82
|
return dbInstance;
|
|
83
|
+
const Database = loadSqlite();
|
|
84
|
+
if (!Database)
|
|
85
|
+
return null;
|
|
53
86
|
// Ensure directory exists (handles fresh installs).
|
|
54
87
|
fs.mkdirSync(path.dirname(EMBEDDINGS_DB), { recursive: true });
|
|
55
88
|
dbInstance = new Database(EMBEDDINGS_DB);
|
|
@@ -88,25 +121,35 @@ export function closeEmbeddingsDb() {
|
|
|
88
121
|
dbInstance = null;
|
|
89
122
|
}
|
|
90
123
|
}
|
|
124
|
+
/** Sharper assertion for use inside helpers that require an open DB. */
|
|
125
|
+
function dbOrThrow() {
|
|
126
|
+
const d = db();
|
|
127
|
+
if (!d) {
|
|
128
|
+
throw new Error("Embeddings DB unavailable β better-sqlite3 native module not loaded");
|
|
129
|
+
}
|
|
130
|
+
return d;
|
|
131
|
+
}
|
|
91
132
|
// ββ Meta helpers ββββββββββββββββββββββββββββββββββββββββ
|
|
92
133
|
function getMeta(key) {
|
|
93
|
-
const row =
|
|
134
|
+
const row = dbOrThrow().prepare("SELECT value FROM meta WHERE key = ?").get(key);
|
|
94
135
|
return row?.value ?? null;
|
|
95
136
|
}
|
|
96
137
|
function setMeta(key, value) {
|
|
97
|
-
|
|
138
|
+
dbOrThrow()
|
|
98
139
|
.prepare("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value")
|
|
99
140
|
.run(key, value);
|
|
100
141
|
}
|
|
101
142
|
function getFileMtimes() {
|
|
102
|
-
const rows =
|
|
143
|
+
const rows = dbOrThrow()
|
|
144
|
+
.prepare("SELECT source, mtime_ms FROM file_mtimes")
|
|
145
|
+
.all();
|
|
103
146
|
const out = {};
|
|
104
147
|
for (const r of rows)
|
|
105
148
|
out[r.source] = r.mtime_ms;
|
|
106
149
|
return out;
|
|
107
150
|
}
|
|
108
151
|
function setFileMtime(source, mtimeMs) {
|
|
109
|
-
|
|
152
|
+
dbOrThrow()
|
|
110
153
|
.prepare("INSERT INTO file_mtimes (source, mtime_ms) VALUES (?, ?) ON CONFLICT(source) DO UPDATE SET mtime_ms = excluded.mtime_ms")
|
|
111
154
|
.run(source, mtimeMs);
|
|
112
155
|
}
|
|
@@ -310,14 +353,17 @@ function getStaleFiles() {
|
|
|
310
353
|
}
|
|
311
354
|
// ββ Public API ββββββββββββββββββββββββββββββββββββββββββ
|
|
312
355
|
export async function reindexMemory(force = false) {
|
|
356
|
+
if (!loadSqlite()) {
|
|
357
|
+
return { indexed: 0, total: 0 };
|
|
358
|
+
}
|
|
313
359
|
const filesToIndex = force ? getIndexableFiles() : getStaleFiles();
|
|
314
360
|
if (filesToIndex.length === 0) {
|
|
315
|
-
const total =
|
|
361
|
+
const total = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
316
362
|
return { indexed: 0, total };
|
|
317
363
|
}
|
|
318
364
|
// Drop existing entries for files being reindexed (per-source DELETE is O(log n) thanks to idx).
|
|
319
|
-
const delStmt =
|
|
320
|
-
const dropOld =
|
|
365
|
+
const delStmt = dbOrThrow().prepare("DELETE FROM entries WHERE source = ?");
|
|
366
|
+
const dropOld = dbOrThrow().transaction((sources) => {
|
|
321
367
|
for (const s of sources)
|
|
322
368
|
delStmt.run(s);
|
|
323
369
|
});
|
|
@@ -339,7 +385,7 @@ export async function reindexMemory(force = false) {
|
|
|
339
385
|
}
|
|
340
386
|
if (allChunks.length === 0) {
|
|
341
387
|
// Even with zero chunks, keep mtimes in sync so we don't re-walk on next run.
|
|
342
|
-
const updMtime =
|
|
388
|
+
const updMtime = dbOrThrow().transaction((files) => {
|
|
343
389
|
for (const f of files) {
|
|
344
390
|
try {
|
|
345
391
|
setFileMtime(f.relativePath, fs.statSync(f.path).mtimeMs);
|
|
@@ -350,16 +396,16 @@ export async function reindexMemory(force = false) {
|
|
|
350
396
|
}
|
|
351
397
|
});
|
|
352
398
|
updMtime(filesToIndex);
|
|
353
|
-
const total =
|
|
399
|
+
const total = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
354
400
|
return { indexed: 0, total };
|
|
355
401
|
}
|
|
356
402
|
// Get embeddings for all chunks (network).
|
|
357
403
|
const texts = allChunks.map(c => c.text);
|
|
358
404
|
const vectors = await getEmbeddings(texts);
|
|
359
405
|
// Single transaction for all writes.
|
|
360
|
-
const insertStmt =
|
|
406
|
+
const insertStmt = dbOrThrow().prepare("INSERT INTO entries (id, source, text, vector, indexed_at) VALUES (?, ?, ?, ?, ?) " +
|
|
361
407
|
"ON CONFLICT(id) DO UPDATE SET source=excluded.source, text=excluded.text, vector=excluded.vector, indexed_at=excluded.indexed_at");
|
|
362
|
-
const writeAll =
|
|
408
|
+
const writeAll = dbOrThrow().transaction((rows) => {
|
|
363
409
|
for (const r of rows) {
|
|
364
410
|
insertStmt.run(r.id, r.source, r.text, r.vector, r.indexedAt);
|
|
365
411
|
}
|
|
@@ -373,7 +419,7 @@ export async function reindexMemory(force = false) {
|
|
|
373
419
|
indexedAt: now,
|
|
374
420
|
})));
|
|
375
421
|
// Update mtimes for the files we just (re-)indexed.
|
|
376
|
-
const updMtime =
|
|
422
|
+
const updMtime = dbOrThrow().transaction((files) => {
|
|
377
423
|
for (const f of files) {
|
|
378
424
|
try {
|
|
379
425
|
setFileMtime(f.relativePath, fs.statSync(f.path).mtimeMs);
|
|
@@ -385,20 +431,23 @@ export async function reindexMemory(force = false) {
|
|
|
385
431
|
});
|
|
386
432
|
updMtime(filesToIndex);
|
|
387
433
|
setMeta("lastReindex", String(now));
|
|
388
|
-
const total =
|
|
434
|
+
const total = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
389
435
|
return { indexed: allChunks.length, total };
|
|
390
436
|
}
|
|
391
437
|
export async function searchMemory(query, topK = 5, minScore = 0.3) {
|
|
438
|
+
if (!loadSqlite()) {
|
|
439
|
+
return [];
|
|
440
|
+
}
|
|
392
441
|
// Auto-index if empty.
|
|
393
|
-
const total =
|
|
442
|
+
const total = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
394
443
|
if (total === 0) {
|
|
395
444
|
await reindexMemory();
|
|
396
|
-
const after =
|
|
445
|
+
const after = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
397
446
|
if (after === 0)
|
|
398
447
|
return [];
|
|
399
448
|
}
|
|
400
449
|
const queryVector = Float32Array.from(await getQueryEmbedding(query));
|
|
401
|
-
const rows =
|
|
450
|
+
const rows = dbOrThrow().prepare("SELECT id, source, text, vector FROM entries").all();
|
|
402
451
|
const scored = [];
|
|
403
452
|
for (const row of rows) {
|
|
404
453
|
const v = blobToVector(row.vector);
|
|
@@ -411,11 +460,14 @@ export async function searchMemory(query, topK = 5, minScore = 0.3) {
|
|
|
411
460
|
return scored.slice(0, topK);
|
|
412
461
|
}
|
|
413
462
|
export async function initEmbeddings() {
|
|
463
|
+
if (!loadSqlite()) {
|
|
464
|
+
return; // already warned via loadSqlite
|
|
465
|
+
}
|
|
414
466
|
try {
|
|
415
467
|
db(); // Open & migrate schema.
|
|
416
468
|
const stale = getStaleFiles();
|
|
417
469
|
if (stale.length === 0) {
|
|
418
|
-
const total =
|
|
470
|
+
const total = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
419
471
|
if (total > 0)
|
|
420
472
|
return;
|
|
421
473
|
}
|
|
@@ -433,9 +485,12 @@ export function getIndexStats() {
|
|
|
433
485
|
let files = 0;
|
|
434
486
|
let lastReindex = 0;
|
|
435
487
|
let sizeBytes = 0;
|
|
488
|
+
if (!loadSqlite()) {
|
|
489
|
+
return { entries, files, lastReindex, sizeBytes };
|
|
490
|
+
}
|
|
436
491
|
try {
|
|
437
|
-
entries =
|
|
438
|
-
files =
|
|
492
|
+
entries = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
493
|
+
files = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM file_mtimes").get().c;
|
|
439
494
|
const meta = getMeta("lastReindex");
|
|
440
495
|
if (meta)
|
|
441
496
|
lastReindex = Number(meta);
|