alvin-bot 4.20.0 β 4.20.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/bin/cli.js +78 -1
- package/dist/config.js +14 -0
- package/dist/platforms/slack.js +41 -0
- package/dist/services/embeddings-migration.js +100 -21
- package/dist/services/embeddings.js +75 -20
- package/dist/web/server.js +13 -2
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,35 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to Alvin Bot are documented here.
|
|
4
4
|
|
|
5
|
+
## [4.20.2] β 2026-05-04
|
|
6
|
+
|
|
7
|
+
### π‘οΈ Security: Web UI loopback by default + Slack caller allowlist
|
|
8
|
+
|
|
9
|
+
Two real attack surfaces closed.
|
|
10
|
+
|
|
11
|
+
**Web UI binds to 127.0.0.1 by default.** Previous versions called `server.listen(port)` with no host argument, which Node interprets as "listen on all interfaces". Combined with an empty `WEB_PASSWORD` (which the login route silently treats as "anyone can log in"), this meant any device on the same LAN could log into the bot's Web UI and reach every authenticated endpoint β user list, memory contents, model switch, the WebSocket chat, etc. New default: bind to `127.0.0.1`. To restore LAN access, set `WEB_HOST=0.0.0.0` explicitly in `.env`. If both `WEB_HOST=0.0.0.0` and an empty `WEB_PASSWORD` are present, the bot logs a loud warning on startup.
|
|
12
|
+
|
|
13
|
+
**Slack caller allowlist.** New `SLACK_ALLOWED_USERS` env var: comma-separated list of Slack user IDs allowed to talk to the bot (DMs, @mentions, slash commands). Empty list keeps the legacy behaviour β any workspace member can interact, which is safe iff the workspace is private to the operator. To find your Slack user ID: open your profile in Slack β "..." β "Copy member ID", or just message the bot once and read the line `[slack] caller discovered: user=Uβ¦ β to lock the bot to specific users, add to .env: SLACK_ALLOWED_USERS=Uβ¦` from the logs (we log each unique caller once when the allowlist is empty).
|
|
14
|
+
|
|
15
|
+
**`alvin-bot doctor` now reports both.** New `Web UI:` and `Slack:` sections flag insecure combos and show whether an allowlist is active.
|
|
16
|
+
|
|
17
|
+
No schema or behaviour changes for users who already have `WEB_PASSWORD` set or only use the bot via Telegram. Telegram allowlist (`ALLOWED_USERS`) is unchanged.
|
|
18
|
+
|
|
19
|
+
## [4.20.1] β 2026-05-03
|
|
20
|
+
|
|
21
|
+
### π‘οΈ Hardening for the v4.20.0 SQLite migration
|
|
22
|
+
|
|
23
|
+
The v4.20 migration is fully automatic on first start, but a few things could go wrong on user installations that the maintainer instance never hits. v4.20.1 plugs each of them.
|
|
24
|
+
|
|
25
|
+
- **Lazy native binary load.** `better-sqlite3` is now `require()`-d inside `embeddings.ts`, not at module import time. If the prebuilt isn't available for the user's platform and a build-from-source fails (exotic Node version, missing toolchain, glibc mismatch), the bot logs a single clear warning with the exact rebuild command, and **keeps running** β only semantic memory search is disabled until the user fixes their install. Previously this would have crashed bot startup.
|
|
26
|
+
- **Pre-flight disk-space check.** Migration refuses to start unless the volume holding `~/.alvin-bot/memory/` has at least 2Γ the source JSON's size free (covers source + target + WAL during the transaction). Skipped migration leaves the JSON intact for retry on the next boot once space is free.
|
|
27
|
+
- **Progress logging.** On indexes larger than ~5 000 entries, the migration logs `β¦migrated N / M entries (P %)` every 5 000 rows so the user can see it isn't stuck.
|
|
28
|
+
- **Corrupt JSON recovery.** If `JSON.parse` of `.embeddings.json` throws, the file is moved aside to `.embeddings.json.broken.<timestamp>` and the next bot start treats this as a fresh install (rebuild-from-source on first search). No more boot-loop on a damaged index.
|
|
29
|
+
- **`alvin-bot doctor` shows memory health.** New "Memory:" section reports: native binary loadable, vector-store entry count + size, or β for not-yet-migrated installs β the legacy JSON's size and a hint that the next start will migrate.
|
|
30
|
+
- **Cleanup on failed migration.** WAL/SHM sidecars are removed alongside the half-written `.embeddings.db` so the next attempt starts from a clean slate.
|
|
31
|
+
|
|
32
|
+
No schema or API changes β drop-in over v4.20.0.
|
|
33
|
+
|
|
5
34
|
## [4.20.0] β 2026-05-03
|
|
6
35
|
|
|
7
36
|
### π Embeddings: JSON β SQLite
|
package/bin/cli.js
CHANGED
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
19
|
import { createInterface } from "readline";
|
|
20
|
-
import { existsSync, writeFileSync, readFileSync, mkdirSync, copyFileSync, readdirSync } from "fs";
|
|
20
|
+
import { existsSync, writeFileSync, readFileSync, mkdirSync, copyFileSync, readdirSync, statSync } from "fs";
|
|
21
21
|
import { resolve, join } from "path";
|
|
22
22
|
import { homedir } from "os";
|
|
23
23
|
import { execSync } from "child_process";
|
|
@@ -1361,6 +1361,83 @@ async function doctor() {
|
|
|
1361
1361
|
console.log(` β ALLOWED_USERS not set (nobody can message the bot)`);
|
|
1362
1362
|
}
|
|
1363
1363
|
|
|
1364
|
+
// ββ Web UI security ββ
|
|
1365
|
+
console.log("\n Web UI:");
|
|
1366
|
+
const webHost = getEnv("WEB_HOST") || "127.0.0.1";
|
|
1367
|
+
const webPw = getEnv("WEB_PASSWORD");
|
|
1368
|
+
if (webHost === "127.0.0.1" || webHost === "::1") {
|
|
1369
|
+
console.log(` β
WEB_HOST=${webHost} β loopback only (LAN unreachable)`);
|
|
1370
|
+
} else if (webHost === "0.0.0.0" || webHost === "*") {
|
|
1371
|
+
if (webPw) {
|
|
1372
|
+
console.log(` β
WEB_HOST=${webHost} (LAN-reachable) + WEB_PASSWORD set`);
|
|
1373
|
+
} else {
|
|
1374
|
+
console.log(` β WEB_HOST=${webHost} (LAN-reachable) WITHOUT WEB_PASSWORD β anyone on LAN can log in`);
|
|
1375
|
+
console.log(` Fix: set WEB_PASSWORD in .env, or set WEB_HOST=127.0.0.1`);
|
|
1376
|
+
}
|
|
1377
|
+
} else {
|
|
1378
|
+
console.log(` βΉοΈ WEB_HOST=${webHost}${webPw ? " + WEB_PASSWORD set" : " β WEB_PASSWORD empty"}`);
|
|
1379
|
+
}
|
|
1380
|
+
|
|
1381
|
+
// ββ Slack caller allowlist ββ
|
|
1382
|
+
if (getEnv("SLACK_BOT_TOKEN")) {
|
|
1383
|
+
console.log("\n Slack:");
|
|
1384
|
+
const slackAllow = getEnv("SLACK_ALLOWED_USERS");
|
|
1385
|
+
if (slackAllow) {
|
|
1386
|
+
const ids = slackAllow.split(",").map(s => s.trim()).filter(Boolean);
|
|
1387
|
+
console.log(` β
SLACK_ALLOWED_USERS: ${ids.length} user${ids.length === 1 ? "" : "s"} (caller allowlist active)`);
|
|
1388
|
+
} else {
|
|
1389
|
+
console.log(` β οΈ SLACK_ALLOWED_USERS not set β any workspace member can talk to the bot`);
|
|
1390
|
+
console.log(` Safe iff the Slack workspace is private to you. Otherwise add e.g.:`);
|
|
1391
|
+
console.log(` SLACK_ALLOWED_USERS=U0ABC123,U0DEF456`);
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
// ββ Memory (semantic search backend) ββ
|
|
1396
|
+
console.log("\n Memory:");
|
|
1397
|
+
const embJson = resolve(DATA_DIR, "memory", ".embeddings.json");
|
|
1398
|
+
const embDb = resolve(DATA_DIR, "memory", ".embeddings.db");
|
|
1399
|
+
const embBakSqlite = resolve(DATA_DIR, "memory", ".embeddings.json.bak-pre-sqlite");
|
|
1400
|
+
|
|
1401
|
+
// better-sqlite3 native binary loadable?
|
|
1402
|
+
let sqliteOk = false;
|
|
1403
|
+
let sqliteErr = "";
|
|
1404
|
+
try {
|
|
1405
|
+
const req = (await import("module")).createRequire(import.meta.url);
|
|
1406
|
+
req("better-sqlite3");
|
|
1407
|
+
sqliteOk = true;
|
|
1408
|
+
} catch (err) {
|
|
1409
|
+
sqliteErr = err instanceof Error ? err.message : String(err);
|
|
1410
|
+
}
|
|
1411
|
+
if (sqliteOk) {
|
|
1412
|
+
console.log(` β
better-sqlite3 native binary loadable`);
|
|
1413
|
+
} else {
|
|
1414
|
+
console.log(` β better-sqlite3 native binary not loadable β semantic search disabled`);
|
|
1415
|
+
console.log(` Fix: cd $(npm root -g)/alvin-bot && npm rebuild better-sqlite3`);
|
|
1416
|
+
console.log(` Detail: ${sqliteErr.split("\n")[0]}`);
|
|
1417
|
+
}
|
|
1418
|
+
|
|
1419
|
+
if (sqliteOk && existsSync(embDb)) {
|
|
1420
|
+
try {
|
|
1421
|
+
const req = (await import("module")).createRequire(import.meta.url);
|
|
1422
|
+
const Database = req("better-sqlite3");
|
|
1423
|
+
const db = new Database(embDb, { readonly: true });
|
|
1424
|
+
const entries = db.prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
1425
|
+
const files = db.prepare("SELECT COUNT(*) AS c FROM file_mtimes").get().c;
|
|
1426
|
+
const sizeMb = (statSync(embDb).size / 1024 / 1024).toFixed(0);
|
|
1427
|
+
db.close();
|
|
1428
|
+
console.log(` β
Vector store: ${entries} entries across ${files} sources (${sizeMb} MB SQLite)`);
|
|
1429
|
+
} catch (err) {
|
|
1430
|
+
console.log(` β οΈ Vector store exists but unreadable: ${err.message}`);
|
|
1431
|
+
}
|
|
1432
|
+
} else if (existsSync(embJson)) {
|
|
1433
|
+
const sizeMb = (statSync(embJson).size / 1024 / 1024).toFixed(0);
|
|
1434
|
+
console.log(` β οΈ Legacy JSON index found (${sizeMb} MB) β will auto-migrate to SQLite on next bot start`);
|
|
1435
|
+
} else if (existsSync(embBakSqlite)) {
|
|
1436
|
+
console.log(` β
Migration to SQLite already done (legacy JSON kept as .bak-pre-sqlite)`);
|
|
1437
|
+
} else {
|
|
1438
|
+
console.log(` βΉοΈ No vector store yet β will be built on first message`);
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1364
1441
|
// ββ Extras ββ
|
|
1365
1442
|
console.log("\n Extras:");
|
|
1366
1443
|
|
package/dist/config.js
CHANGED
|
@@ -63,6 +63,20 @@ export const config = {
|
|
|
63
63
|
sessionMode: (process.env.SESSION_MODE || "per-user"),
|
|
64
64
|
webhookEnabled: process.env.WEBHOOK_ENABLED === "true",
|
|
65
65
|
webhookToken: process.env.WEBHOOK_TOKEN || "",
|
|
66
|
+
// Web UI bind host. Default is 127.0.0.1 (loopback only) β set to "0.0.0.0"
|
|
67
|
+
// explicitly if you want LAN/external access. Combined with WEB_PASSWORD
|
|
68
|
+
// this is the safe default since v4.20.2; previous versions defaulted to
|
|
69
|
+
// listening on all interfaces with no auth required when WEB_PASSWORD was
|
|
70
|
+
// empty.
|
|
71
|
+
webHost: process.env.WEB_HOST || "127.0.0.1",
|
|
72
|
+
// Slack caller allowlist. Comma-separated Slack user IDs (e.g. "U0ABC123,U0DEF456").
|
|
73
|
+
// When non-empty, only these users can talk to the bot in Slack DMs and via @mention.
|
|
74
|
+
// When empty, the bot accepts any Slack workspace member (legacy behavior; safe iff
|
|
75
|
+
// the workspace is private to you).
|
|
76
|
+
slackAllowedUsers: (process.env.SLACK_ALLOWED_USERS || "")
|
|
77
|
+
.split(",")
|
|
78
|
+
.map(s => s.trim())
|
|
79
|
+
.filter(Boolean),
|
|
66
80
|
// Browser
|
|
67
81
|
cdpUrl: process.env.CDP_URL || "",
|
|
68
82
|
browseServerPort: Number(process.env.BROWSE_SERVER_PORT) || 3800,
|
package/dist/platforms/slack.js
CHANGED
|
@@ -18,6 +18,32 @@
|
|
|
18
18
|
*/
|
|
19
19
|
import fs from "fs";
|
|
20
20
|
import { parseSlackSlashCommand } from "./slack-slash-parser.js";
|
|
21
|
+
import { config } from "../config.js";
|
|
22
|
+
/**
|
|
23
|
+
* v4.20.2 β Slack caller allowlist. When SLACK_ALLOWED_USERS is set in the
|
|
24
|
+
* environment (comma-separated Slack user IDs), only those users get past
|
|
25
|
+
* this gate. When the list is empty, fall back to legacy behaviour: any
|
|
26
|
+
* member of the workspace can talk to the bot. The empty-list case is safe
|
|
27
|
+
* iff the workspace is private to the operator.
|
|
28
|
+
*
|
|
29
|
+
* Slack user IDs are workspace-scoped (e.g. "U0ABC123"); rotate the list if
|
|
30
|
+
* you migrate workspaces.
|
|
31
|
+
*/
|
|
32
|
+
function isSlackUserAllowed(userId) {
|
|
33
|
+
if (config.slackAllowedUsers.length === 0) {
|
|
34
|
+
// No allowlist set β log each unique caller once so the operator can
|
|
35
|
+
// copy a known ID into SLACK_ALLOWED_USERS and lock the bot down.
|
|
36
|
+
if (userId && !discoveredCallers.has(userId)) {
|
|
37
|
+
discoveredCallers.add(userId);
|
|
38
|
+
console.warn(`[slack] caller discovered: user=${userId} β to lock the bot to specific users, ` +
|
|
39
|
+
`add to .env: SLACK_ALLOWED_USERS=${userId}` +
|
|
40
|
+
(discoveredCallers.size > 1 ? ` (or comma-separate multiple)` : ""));
|
|
41
|
+
}
|
|
42
|
+
return true;
|
|
43
|
+
}
|
|
44
|
+
return config.slackAllowedUsers.includes(userId);
|
|
45
|
+
}
|
|
46
|
+
const discoveredCallers = new Set();
|
|
21
47
|
let _slackState = {
|
|
22
48
|
status: "disconnected",
|
|
23
49
|
botName: null,
|
|
@@ -148,6 +174,13 @@ export class SlackAdapter {
|
|
|
148
174
|
const userId = message.user || "";
|
|
149
175
|
const channelId = message.channel || "";
|
|
150
176
|
const messageId = message.ts || "";
|
|
177
|
+
// v4.20.2 β caller allowlist. If SLACK_ALLOWED_USERS is set, silently
|
|
178
|
+
// ignore anyone not on the list. Empty list = legacy behaviour
|
|
179
|
+
// (any workspace member can talk to the bot β safe iff the workspace
|
|
180
|
+
// is private to the operator).
|
|
181
|
+
if (!isSlackUserAllowed(userId)) {
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
151
184
|
// Determine channel type
|
|
152
185
|
// DMs (im) have channel_type "im", group DMs are "mpim", channels are "channel"/"group"
|
|
153
186
|
const channelType = message.channel_type || "";
|
|
@@ -222,6 +255,10 @@ export class SlackAdapter {
|
|
|
222
255
|
const channelId = command.channel_id || "";
|
|
223
256
|
const userId = command.user_id || "";
|
|
224
257
|
const userName = command.user_name || userId;
|
|
258
|
+
// v4.20.2 β caller allowlist for slash commands.
|
|
259
|
+
if (!isSlackUserAllowed(userId)) {
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
225
262
|
const incoming = {
|
|
226
263
|
platform: "slack",
|
|
227
264
|
messageId: `cmd-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
@@ -247,6 +284,10 @@ export class SlackAdapter {
|
|
|
247
284
|
const userId = event.user || "";
|
|
248
285
|
const channelId = event.channel || "";
|
|
249
286
|
const messageId = event.ts || "";
|
|
287
|
+
// v4.20.2 β same caller allowlist as DMs.
|
|
288
|
+
if (!isSlackUserAllowed(userId)) {
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
250
291
|
// Strip the @mention from text
|
|
251
292
|
text = text.replace(new RegExp(`<@${this.botUserId}>`, "g"), "").trim();
|
|
252
293
|
if (!text)
|
|
@@ -4,15 +4,25 @@
|
|
|
4
4
|
* Triggered on startup if .embeddings.json exists but .embeddings.db does not.
|
|
5
5
|
* Idempotent: skips silently if the DB is already populated.
|
|
6
6
|
*
|
|
7
|
+
* Hardening (v4.20.1):
|
|
8
|
+
* - Lazy require of better-sqlite3 β missing native binary degrades to a clear
|
|
9
|
+
* warning + skip (bot keeps running, falls back to legacy JSON path until
|
|
10
|
+
* the user fixes their install).
|
|
11
|
+
* - Pre-flight disk-space check: refuses to start if free space < 2Γ source.
|
|
12
|
+
* - Progress logging every 1 000 entries on large indexes.
|
|
13
|
+
* - Corrupt source JSON is renamed to `.broken.<timestamp>` so the next run
|
|
14
|
+
* doesn't loop on the same parse error.
|
|
15
|
+
*
|
|
7
16
|
* Safety:
|
|
8
17
|
* - Source JSON is renamed to .embeddings.json.bak-pre-sqlite (kept on disk).
|
|
9
|
-
* - Entry counts are compared after import; mismatch β throw, leaving the
|
|
10
|
-
*
|
|
18
|
+
* - Entry counts are compared after import; mismatch β throw, leaving the
|
|
19
|
+
* half-written DB removed and the source JSON untouched.
|
|
11
20
|
*/
|
|
12
21
|
import fs from "fs";
|
|
13
22
|
import path from "path";
|
|
14
|
-
import
|
|
23
|
+
import { createRequire } from "module";
|
|
15
24
|
import { EMBEDDINGS_IDX, EMBEDDINGS_DB } from "../paths.js";
|
|
25
|
+
const cjsRequire = createRequire(import.meta.url);
|
|
16
26
|
function vectorToBlob(v) {
|
|
17
27
|
const f32 = new Float32Array(v);
|
|
18
28
|
return Buffer.from(f32.buffer, f32.byteOffset, f32.byteLength);
|
|
@@ -20,14 +30,58 @@ function vectorToBlob(v) {
|
|
|
20
30
|
export function shouldMigrateEmbeddingsToSqlite() {
|
|
21
31
|
return fs.existsSync(EMBEDDINGS_IDX) && !fs.existsSync(EMBEDDINGS_DB);
|
|
22
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Best-effort free-space probe. Returns Infinity if the platform has no
|
|
35
|
+
* statfs (which means we'll proceed without the safety check rather than
|
|
36
|
+
* blocking the migration). Node 18.15+ ships statfsSync on all major platforms.
|
|
37
|
+
*/
|
|
38
|
+
function freeBytesOnVolume(forPath) {
|
|
39
|
+
try {
|
|
40
|
+
const fsAny = fs;
|
|
41
|
+
if (typeof fsAny.statfsSync !== "function")
|
|
42
|
+
return Number.POSITIVE_INFINITY;
|
|
43
|
+
const stat = fsAny.statfsSync(forPath);
|
|
44
|
+
const bavail = typeof stat.bavail === "bigint" ? Number(stat.bavail) : stat.bavail;
|
|
45
|
+
const bsize = typeof stat.bsize === "bigint" ? Number(stat.bsize) : stat.bsize;
|
|
46
|
+
return bavail * bsize;
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
return Number.POSITIVE_INFINITY;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
23
52
|
/**
|
|
24
53
|
* Run the migration. Returns the entry count migrated, or null if skipped.
|
|
25
54
|
*/
|
|
26
55
|
export function migrateEmbeddingsToSqlite() {
|
|
27
56
|
if (!shouldMigrateEmbeddingsToSqlite())
|
|
28
57
|
return null;
|
|
29
|
-
|
|
58
|
+
// ββ Pre-flight: better-sqlite3 loadable? βββββββββββββββββββββββββββββββββββ
|
|
59
|
+
let Database;
|
|
60
|
+
try {
|
|
61
|
+
Database = cjsRequire("better-sqlite3");
|
|
62
|
+
}
|
|
63
|
+
catch (err) {
|
|
64
|
+
console.warn("β οΈ Embeddings migration skipped: better-sqlite3 native binary unavailable. " +
|
|
65
|
+
"Bot continues with legacy JSON index. Fix: `npm rebuild better-sqlite3` " +
|
|
66
|
+
"or reinstall alvin-bot. Underlying error:", err instanceof Error ? err.message : err);
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
30
69
|
const sourceSize = fs.statSync(EMBEDDINGS_IDX).size;
|
|
70
|
+
// ββ Pre-flight: enough free space? βββββββββββββββββββββββββββββββββββββββββ
|
|
71
|
+
const targetDir = path.dirname(EMBEDDINGS_DB);
|
|
72
|
+
fs.mkdirSync(targetDir, { recursive: true });
|
|
73
|
+
const free = freeBytesOnVolume(targetDir);
|
|
74
|
+
// We need source + about half of source for the SQLite file, plus headroom
|
|
75
|
+
// for WAL during the transaction. Demand 2Γ source size to be comfortable.
|
|
76
|
+
const required = sourceSize * 2;
|
|
77
|
+
if (free < required) {
|
|
78
|
+
console.warn(`β οΈ Embeddings migration skipped: insufficient free disk space on ${targetDir}. ` +
|
|
79
|
+
`Need ~${(required / 1024 / 1024).toFixed(0)} MB, have ${(free / 1024 / 1024).toFixed(0)} MB. ` +
|
|
80
|
+
`Free up some space and restart the bot to retry.`);
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
// ββ Read & parse source ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
84
|
+
const t0 = Date.now();
|
|
31
85
|
console.log(`π¦ Migrating embeddings JSON (${(sourceSize / 1024 / 1024).toFixed(0)} MB) β SQLite...`);
|
|
32
86
|
const raw = fs.readFileSync(EMBEDDINGS_IDX, "utf-8");
|
|
33
87
|
let legacy;
|
|
@@ -35,10 +89,21 @@ export function migrateEmbeddingsToSqlite() {
|
|
|
35
89
|
legacy = JSON.parse(raw);
|
|
36
90
|
}
|
|
37
91
|
catch (err) {
|
|
38
|
-
|
|
92
|
+
// Move the broken JSON aside so we don't try to migrate it again next boot.
|
|
93
|
+
const broken = `${EMBEDDINGS_IDX}.broken.${Date.now()}`;
|
|
94
|
+
try {
|
|
95
|
+
fs.renameSync(EMBEDDINGS_IDX, broken);
|
|
96
|
+
console.error(`β Embeddings migration: source JSON is corrupt β renamed to ${path.basename(broken)} ` +
|
|
97
|
+
`and skipped. The bot will rebuild the index from scratch on first search ` +
|
|
98
|
+
`(this may incur Google API calls). Underlying parse error:`, err);
|
|
99
|
+
}
|
|
100
|
+
catch (renameErr) {
|
|
101
|
+
console.error("β Embeddings migration: source JSON is corrupt AND could not be renamed:", err, "Rename error:", renameErr);
|
|
102
|
+
}
|
|
39
103
|
return null;
|
|
40
104
|
}
|
|
41
|
-
|
|
105
|
+
const validEntries = (legacy.entries ?? []).filter(e => Array.isArray(e.vector) && e.vector.length > 0);
|
|
106
|
+
// ββ Write DB βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
42
107
|
const db = new Database(EMBEDDINGS_DB);
|
|
43
108
|
try {
|
|
44
109
|
db.pragma("journal_mode = WAL");
|
|
@@ -62,9 +127,9 @@ export function migrateEmbeddingsToSqlite() {
|
|
|
62
127
|
CREATE INDEX IF NOT EXISTS idx_entries_source ON entries(source);
|
|
63
128
|
`);
|
|
64
129
|
const setMeta = db.prepare("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value");
|
|
65
|
-
setMeta.run("model", legacy.model);
|
|
130
|
+
setMeta.run("model", legacy.model || "gemini-embedding-001");
|
|
66
131
|
setMeta.run("schemaVersion", "1");
|
|
67
|
-
setMeta.run("lastReindex", String(legacy.lastReindex));
|
|
132
|
+
setMeta.run("lastReindex", String(legacy.lastReindex || 0));
|
|
68
133
|
setMeta.run("migratedFromJson", String(Date.now()));
|
|
69
134
|
const insMtime = db.prepare("INSERT INTO file_mtimes (source, mtime_ms) VALUES (?, ?) ON CONFLICT(source) DO UPDATE SET mtime_ms = excluded.mtime_ms");
|
|
70
135
|
const writeMtimes = db.transaction((rows) => {
|
|
@@ -73,38 +138,52 @@ export function migrateEmbeddingsToSqlite() {
|
|
|
73
138
|
});
|
|
74
139
|
writeMtimes(Object.entries(legacy.fileMtimes ?? {}));
|
|
75
140
|
const insEntry = db.prepare("INSERT INTO entries (id, source, text, vector, indexed_at) VALUES (?, ?, ?, ?, ?)");
|
|
76
|
-
|
|
141
|
+
// Write entries in chunks of 1 000 so we can log progress on huge indexes.
|
|
142
|
+
const CHUNK = 1000;
|
|
143
|
+
const total = validEntries.length;
|
|
144
|
+
let written = 0;
|
|
145
|
+
const writeChunk = db.transaction((rows) => {
|
|
77
146
|
for (const e of rows) {
|
|
78
|
-
if (!Array.isArray(e.vector) || e.vector.length === 0)
|
|
79
|
-
continue;
|
|
80
147
|
insEntry.run(e.id, e.source, e.text, vectorToBlob(e.vector), e.indexedAt);
|
|
81
148
|
}
|
|
82
149
|
});
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
150
|
+
for (let i = 0; i < total; i += CHUNK) {
|
|
151
|
+
const slice = validEntries.slice(i, i + CHUNK);
|
|
152
|
+
writeChunk(slice);
|
|
153
|
+
written += slice.length;
|
|
154
|
+
if (total > 5000 && (written === total || written % 5000 === 0)) {
|
|
155
|
+
console.log(` β¦migrated ${written} / ${total} entries (${Math.round((written / total) * 100)} %)`);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
const writtenCount = db.prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
159
|
+
if (writtenCount !== validEntries.length) {
|
|
160
|
+
throw new Error(`Entry-count mismatch after migration: expected ${validEntries.length}, got ${writtenCount}`);
|
|
88
161
|
}
|
|
89
162
|
db.close();
|
|
90
|
-
// Move source JSON aside so we never re-migrate
|
|
163
|
+
// ββ Move source JSON aside so we never re-migrate ββββββββββββββββββββββββ
|
|
91
164
|
const bak = `${EMBEDDINGS_IDX}.bak-pre-sqlite`;
|
|
92
165
|
try {
|
|
93
166
|
fs.renameSync(EMBEDDINGS_IDX, bak);
|
|
94
167
|
}
|
|
95
168
|
catch (err) {
|
|
96
|
-
console.warn("β οΈ Could not rename source JSON:", err);
|
|
169
|
+
console.warn("β οΈ Could not rename source JSON (migration still succeeded):", err);
|
|
97
170
|
}
|
|
98
171
|
const targetSize = fs.statSync(EMBEDDINGS_DB).size;
|
|
99
172
|
const dt = Date.now() - t0;
|
|
100
|
-
console.log(`β
Embeddings migrated: ${
|
|
101
|
-
return { entries:
|
|
173
|
+
console.log(`β
Embeddings migrated: ${writtenCount} entries, ${(sourceSize / 1024 / 1024).toFixed(0)} MB JSON β ${(targetSize / 1024 / 1024).toFixed(0)} MB SQLite in ${dt} ms`);
|
|
174
|
+
return { entries: writtenCount, sourceMb: sourceSize / 1024 / 1024, targetMb: targetSize / 1024 / 1024 };
|
|
102
175
|
}
|
|
103
176
|
catch (err) {
|
|
104
177
|
db.close();
|
|
105
|
-
// Remove half-written DB so the next boot retries cleanly.
|
|
178
|
+
// Remove half-written DB so the next boot retries cleanly with the original JSON intact.
|
|
106
179
|
try {
|
|
107
180
|
fs.unlinkSync(EMBEDDINGS_DB);
|
|
181
|
+
// also unlink WAL/SHM if present
|
|
182
|
+
for (const ext of ["-wal", "-shm"]) {
|
|
183
|
+
const p = `${EMBEDDINGS_DB}${ext}`;
|
|
184
|
+
if (fs.existsSync(p))
|
|
185
|
+
fs.unlinkSync(p);
|
|
186
|
+
}
|
|
108
187
|
}
|
|
109
188
|
catch {
|
|
110
189
|
/* nothing to clean */
|
|
@@ -19,10 +19,36 @@ import fs from "fs";
|
|
|
19
19
|
import path from "path";
|
|
20
20
|
import { resolve } from "path";
|
|
21
21
|
import os from "os";
|
|
22
|
-
import
|
|
22
|
+
import { createRequire } from "module";
|
|
23
23
|
import { config } from "../config.js";
|
|
24
24
|
import { MEMORY_DIR, MEMORY_FILE, EMBEDDINGS_DB } from "../paths.js";
|
|
25
25
|
import { ASSETS_DIR, ASSETS_INDEX_MD } from "../paths.js";
|
|
26
|
+
let SqliteClass = null;
|
|
27
|
+
let sqliteLoadAttempted = false;
|
|
28
|
+
let sqliteLoadError = null;
|
|
29
|
+
const cjsRequire = createRequire(import.meta.url);
|
|
30
|
+
function loadSqlite() {
|
|
31
|
+
if (sqliteLoadAttempted)
|
|
32
|
+
return SqliteClass;
|
|
33
|
+
sqliteLoadAttempted = true;
|
|
34
|
+
try {
|
|
35
|
+
SqliteClass = cjsRequire("better-sqlite3");
|
|
36
|
+
return SqliteClass;
|
|
37
|
+
}
|
|
38
|
+
catch (err) {
|
|
39
|
+
sqliteLoadError = err instanceof Error ? err : new Error(String(err));
|
|
40
|
+
console.warn("β οΈ better-sqlite3 native binary unavailable β embeddings disabled. " +
|
|
41
|
+
"Bot continues without semantic memory search. Fix: rebuild deps with " +
|
|
42
|
+
"`cd $(npm root -g)/alvin-bot && npm rebuild better-sqlite3` or reinstall " +
|
|
43
|
+
"alvin-bot. Underlying error: " +
|
|
44
|
+
sqliteLoadError.message);
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
export function getEmbeddingsBackendStatus() {
|
|
49
|
+
loadSqlite();
|
|
50
|
+
return { available: SqliteClass !== null, error: sqliteLoadError?.message ?? null };
|
|
51
|
+
}
|
|
26
52
|
// Hub memory directory (Claude Hub β read-only, additional context)
|
|
27
53
|
const HUB_MEMORY_DIR = resolve(os.homedir(), ".claude", "hub", "MEMORY");
|
|
28
54
|
// ββ Constants βββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -47,9 +73,16 @@ function blobToVector(b) {
|
|
|
47
73
|
}
|
|
48
74
|
// ββ DB lifecycle ββββββββββββββββββββββββββββββββββββββββ
|
|
49
75
|
let dbInstance = null;
|
|
76
|
+
/**
|
|
77
|
+
* Returns the live DB handle, or null when better-sqlite3 isn't loadable.
|
|
78
|
+
* Callers must handle the null case (treat as "search unavailable").
|
|
79
|
+
*/
|
|
50
80
|
function db() {
|
|
51
81
|
if (dbInstance)
|
|
52
82
|
return dbInstance;
|
|
83
|
+
const Database = loadSqlite();
|
|
84
|
+
if (!Database)
|
|
85
|
+
return null;
|
|
53
86
|
// Ensure directory exists (handles fresh installs).
|
|
54
87
|
fs.mkdirSync(path.dirname(EMBEDDINGS_DB), { recursive: true });
|
|
55
88
|
dbInstance = new Database(EMBEDDINGS_DB);
|
|
@@ -88,25 +121,35 @@ export function closeEmbeddingsDb() {
|
|
|
88
121
|
dbInstance = null;
|
|
89
122
|
}
|
|
90
123
|
}
|
|
124
|
+
/** Sharper assertion for use inside helpers that require an open DB. */
|
|
125
|
+
function dbOrThrow() {
|
|
126
|
+
const d = db();
|
|
127
|
+
if (!d) {
|
|
128
|
+
throw new Error("Embeddings DB unavailable β better-sqlite3 native module not loaded");
|
|
129
|
+
}
|
|
130
|
+
return d;
|
|
131
|
+
}
|
|
91
132
|
// ββ Meta helpers ββββββββββββββββββββββββββββββββββββββββ
|
|
92
133
|
function getMeta(key) {
|
|
93
|
-
const row =
|
|
134
|
+
const row = dbOrThrow().prepare("SELECT value FROM meta WHERE key = ?").get(key);
|
|
94
135
|
return row?.value ?? null;
|
|
95
136
|
}
|
|
96
137
|
function setMeta(key, value) {
|
|
97
|
-
|
|
138
|
+
dbOrThrow()
|
|
98
139
|
.prepare("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value")
|
|
99
140
|
.run(key, value);
|
|
100
141
|
}
|
|
101
142
|
function getFileMtimes() {
|
|
102
|
-
const rows =
|
|
143
|
+
const rows = dbOrThrow()
|
|
144
|
+
.prepare("SELECT source, mtime_ms FROM file_mtimes")
|
|
145
|
+
.all();
|
|
103
146
|
const out = {};
|
|
104
147
|
for (const r of rows)
|
|
105
148
|
out[r.source] = r.mtime_ms;
|
|
106
149
|
return out;
|
|
107
150
|
}
|
|
108
151
|
function setFileMtime(source, mtimeMs) {
|
|
109
|
-
|
|
152
|
+
dbOrThrow()
|
|
110
153
|
.prepare("INSERT INTO file_mtimes (source, mtime_ms) VALUES (?, ?) ON CONFLICT(source) DO UPDATE SET mtime_ms = excluded.mtime_ms")
|
|
111
154
|
.run(source, mtimeMs);
|
|
112
155
|
}
|
|
@@ -310,14 +353,17 @@ function getStaleFiles() {
|
|
|
310
353
|
}
|
|
311
354
|
// ββ Public API ββββββββββββββββββββββββββββββββββββββββββ
|
|
312
355
|
export async function reindexMemory(force = false) {
|
|
356
|
+
if (!loadSqlite()) {
|
|
357
|
+
return { indexed: 0, total: 0 };
|
|
358
|
+
}
|
|
313
359
|
const filesToIndex = force ? getIndexableFiles() : getStaleFiles();
|
|
314
360
|
if (filesToIndex.length === 0) {
|
|
315
|
-
const total =
|
|
361
|
+
const total = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
316
362
|
return { indexed: 0, total };
|
|
317
363
|
}
|
|
318
364
|
// Drop existing entries for files being reindexed (per-source DELETE is O(log n) thanks to idx).
|
|
319
|
-
const delStmt =
|
|
320
|
-
const dropOld =
|
|
365
|
+
const delStmt = dbOrThrow().prepare("DELETE FROM entries WHERE source = ?");
|
|
366
|
+
const dropOld = dbOrThrow().transaction((sources) => {
|
|
321
367
|
for (const s of sources)
|
|
322
368
|
delStmt.run(s);
|
|
323
369
|
});
|
|
@@ -339,7 +385,7 @@ export async function reindexMemory(force = false) {
|
|
|
339
385
|
}
|
|
340
386
|
if (allChunks.length === 0) {
|
|
341
387
|
// Even with zero chunks, keep mtimes in sync so we don't re-walk on next run.
|
|
342
|
-
const updMtime =
|
|
388
|
+
const updMtime = dbOrThrow().transaction((files) => {
|
|
343
389
|
for (const f of files) {
|
|
344
390
|
try {
|
|
345
391
|
setFileMtime(f.relativePath, fs.statSync(f.path).mtimeMs);
|
|
@@ -350,16 +396,16 @@ export async function reindexMemory(force = false) {
|
|
|
350
396
|
}
|
|
351
397
|
});
|
|
352
398
|
updMtime(filesToIndex);
|
|
353
|
-
const total =
|
|
399
|
+
const total = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
354
400
|
return { indexed: 0, total };
|
|
355
401
|
}
|
|
356
402
|
// Get embeddings for all chunks (network).
|
|
357
403
|
const texts = allChunks.map(c => c.text);
|
|
358
404
|
const vectors = await getEmbeddings(texts);
|
|
359
405
|
// Single transaction for all writes.
|
|
360
|
-
const insertStmt =
|
|
406
|
+
const insertStmt = dbOrThrow().prepare("INSERT INTO entries (id, source, text, vector, indexed_at) VALUES (?, ?, ?, ?, ?) " +
|
|
361
407
|
"ON CONFLICT(id) DO UPDATE SET source=excluded.source, text=excluded.text, vector=excluded.vector, indexed_at=excluded.indexed_at");
|
|
362
|
-
const writeAll =
|
|
408
|
+
const writeAll = dbOrThrow().transaction((rows) => {
|
|
363
409
|
for (const r of rows) {
|
|
364
410
|
insertStmt.run(r.id, r.source, r.text, r.vector, r.indexedAt);
|
|
365
411
|
}
|
|
@@ -373,7 +419,7 @@ export async function reindexMemory(force = false) {
|
|
|
373
419
|
indexedAt: now,
|
|
374
420
|
})));
|
|
375
421
|
// Update mtimes for the files we just (re-)indexed.
|
|
376
|
-
const updMtime =
|
|
422
|
+
const updMtime = dbOrThrow().transaction((files) => {
|
|
377
423
|
for (const f of files) {
|
|
378
424
|
try {
|
|
379
425
|
setFileMtime(f.relativePath, fs.statSync(f.path).mtimeMs);
|
|
@@ -385,20 +431,23 @@ export async function reindexMemory(force = false) {
|
|
|
385
431
|
});
|
|
386
432
|
updMtime(filesToIndex);
|
|
387
433
|
setMeta("lastReindex", String(now));
|
|
388
|
-
const total =
|
|
434
|
+
const total = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
389
435
|
return { indexed: allChunks.length, total };
|
|
390
436
|
}
|
|
391
437
|
export async function searchMemory(query, topK = 5, minScore = 0.3) {
|
|
438
|
+
if (!loadSqlite()) {
|
|
439
|
+
return [];
|
|
440
|
+
}
|
|
392
441
|
// Auto-index if empty.
|
|
393
|
-
const total =
|
|
442
|
+
const total = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
394
443
|
if (total === 0) {
|
|
395
444
|
await reindexMemory();
|
|
396
|
-
const after =
|
|
445
|
+
const after = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
397
446
|
if (after === 0)
|
|
398
447
|
return [];
|
|
399
448
|
}
|
|
400
449
|
const queryVector = Float32Array.from(await getQueryEmbedding(query));
|
|
401
|
-
const rows =
|
|
450
|
+
const rows = dbOrThrow().prepare("SELECT id, source, text, vector FROM entries").all();
|
|
402
451
|
const scored = [];
|
|
403
452
|
for (const row of rows) {
|
|
404
453
|
const v = blobToVector(row.vector);
|
|
@@ -411,11 +460,14 @@ export async function searchMemory(query, topK = 5, minScore = 0.3) {
|
|
|
411
460
|
return scored.slice(0, topK);
|
|
412
461
|
}
|
|
413
462
|
export async function initEmbeddings() {
|
|
463
|
+
if (!loadSqlite()) {
|
|
464
|
+
return; // already warned via loadSqlite
|
|
465
|
+
}
|
|
414
466
|
try {
|
|
415
467
|
db(); // Open & migrate schema.
|
|
416
468
|
const stale = getStaleFiles();
|
|
417
469
|
if (stale.length === 0) {
|
|
418
|
-
const total =
|
|
470
|
+
const total = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
419
471
|
if (total > 0)
|
|
420
472
|
return;
|
|
421
473
|
}
|
|
@@ -433,9 +485,12 @@ export function getIndexStats() {
|
|
|
433
485
|
let files = 0;
|
|
434
486
|
let lastReindex = 0;
|
|
435
487
|
let sizeBytes = 0;
|
|
488
|
+
if (!loadSqlite()) {
|
|
489
|
+
return { entries, files, lastReindex, sizeBytes };
|
|
490
|
+
}
|
|
436
491
|
try {
|
|
437
|
-
entries =
|
|
438
|
-
files =
|
|
492
|
+
entries = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM entries").get().c;
|
|
493
|
+
files = dbOrThrow().prepare("SELECT COUNT(*) AS c FROM file_mtimes").get().c;
|
|
439
494
|
const meta = getMeta("lastReindex");
|
|
440
495
|
if (meta)
|
|
441
496
|
lastReindex = Number(meta);
|
package/dist/web/server.js
CHANGED
|
@@ -1566,7 +1566,11 @@ function scheduleBindAttempt(port, attempt) {
|
|
|
1566
1566
|
// invalid backlog, kernel hiccup) can throw synchronously. Catch here
|
|
1567
1567
|
// so the main routine never crashes during web-UI bind.
|
|
1568
1568
|
try {
|
|
1569
|
-
|
|
1569
|
+
// v4.20.2 β bind to config.webHost (default 127.0.0.1) so the Web UI
|
|
1570
|
+
// is loopback-only unless the operator opts in by setting WEB_HOST=0.0.0.0.
|
|
1571
|
+
// Empty/"*" maps to all interfaces.
|
|
1572
|
+
const bindHost = (config.webHost === "*" || config.webHost === "") ? undefined : config.webHost;
|
|
1573
|
+
server.listen(port, bindHost, () => {
|
|
1570
1574
|
if (handled)
|
|
1571
1575
|
return; // Should be impossible; paranoia.
|
|
1572
1576
|
handled = true;
|
|
@@ -1587,10 +1591,17 @@ function scheduleBindAttempt(port, attempt) {
|
|
|
1587
1591
|
server.on("error", (err) => {
|
|
1588
1592
|
console.warn(`[web] post-bind server error (ignored): ${err.message}`);
|
|
1589
1593
|
});
|
|
1590
|
-
|
|
1594
|
+
const bindLabel = bindHost && bindHost !== "127.0.0.1" && bindHost !== "::1"
|
|
1595
|
+
? `http://${bindHost}:${actualWebPort}` + (bindHost === "0.0.0.0" ? " (LAN-reachable)" : "")
|
|
1596
|
+
: `http://localhost:${actualWebPort}`;
|
|
1597
|
+
console.log(`π Web UI: ${bindLabel}`);
|
|
1591
1598
|
if (actualWebPort !== originalPort) {
|
|
1592
1599
|
console.log(` (Port ${originalPort} was busy, using ${actualWebPort} instead)`);
|
|
1593
1600
|
}
|
|
1601
|
+
if (bindHost === "0.0.0.0" && !process.env.WEB_PASSWORD) {
|
|
1602
|
+
console.warn("β οΈ Web UI is bound to 0.0.0.0 but WEB_PASSWORD is empty β anyone on the LAN can log in. " +
|
|
1603
|
+
"Set WEB_PASSWORD in ~/.alvin-bot/.env or set WEB_HOST=127.0.0.1.");
|
|
1604
|
+
}
|
|
1594
1605
|
});
|
|
1595
1606
|
}
|
|
1596
1607
|
catch (err) {
|