alvin-bot 4.16.1 → 4.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -0
- package/dist/handlers/commands.js +31 -0
- package/dist/index.js +14 -1
- package/dist/platforms/whatsapp.js +13 -0
- package/dist/services/async-agent-watcher.js +23 -0
- package/dist/services/browser-manager.js +11 -0
- package/dist/services/cdp-bootstrap.js +6 -0
- package/dist/services/disk-cleanup.js +203 -0
- package/dist/services/embeddings.js +24 -1
- package/dist/services/heartbeat.js +4 -0
- package/dist/services/mcp.js +11 -0
- package/dist/services/skills.js +4 -2
- package/dist/services/subagents.js +38 -0
- package/dist/services/updater.js +1 -0
- package/dist/services/users.js +82 -11
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,53 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to Alvin Bot are documented here.
|
|
4
4
|
|
|
5
|
+
## [4.18.0] — 2026-04-20
|
|
6
|
+
|
|
7
|
+
### ⚡ Performance + Hardening: medium-priority cleanups from the stability audit
|
|
8
|
+
|
|
9
|
+
Completes the audit work started in 4.17.0 by addressing the remaining medium-severity findings.
|
|
10
|
+
|
|
11
|
+
**Performance (hot path):**
|
|
12
|
+
- **User profiles now cached in memory** (`src/services/users.ts`). Previously `touchProfile` — called on every inbound message — did a sync `readFileSync` + `writeFileSync` on disk. Now it updates an in-memory cache and schedules a debounced flush (2s batch window). A final flush runs on graceful shutdown so nothing is lost. Drops 2 blocking fs operations per message.
|
|
13
|
+
- **Embeddings index now cached** (`src/services/embeddings.ts`). Semantic search previously re-read + re-parsed the full on-disk index on every query (100+ MB for large memories). Now cached in memory with mtime-based invalidation — external reindexers still picked up without a restart.
|
|
14
|
+
- **Skills no longer force-reload every 5 minutes** (`src/services/skills.ts`). `getSkills()` used to re-scan the disk after 5min even though `fs.watch` already triggers hot-reload on change. Cache is now authoritative.
|
|
15
|
+
|
|
16
|
+
**Hardening (unbounded growth):**
|
|
17
|
+
- **Sub-agents map capped at 1000** (`src/services/subagents.ts`). Hits the 90%-target on overflow and evicts oldest delivered/terminated entries first. Running agents are never evicted.
|
|
18
|
+
- **Async-agent pending map capped at 500** (`src/services/async-agent-watcher.ts`). Same LRU strategy for orphaned `registerPending` entries.
|
|
19
|
+
- **Browser gateway + MCP subprocess stderr now have error handlers** (`browser-manager.ts`, `mcp.ts`). Previously a stream error would throw unhandled and could crash the node process.
|
|
20
|
+
|
|
21
|
+
**Net effect:** message path now does zero blocking fs reads/writes on the profile/skills/embeddings side. Long-running installs can't grow the in-memory state beyond the caps. No API changes.
|
|
22
|
+
|
|
23
|
+
## [4.17.0] — 2026-04-20
|
|
24
|
+
|
|
25
|
+
### 🛡️ Hardening: long-running stability audit + leak fixes
|
|
26
|
+
|
|
27
|
+
Ran a full audit of leak/stability hazards for 24/7 operation. Fixed the critical findings and added a disk-cleanup service so the bot stays lean over months of uptime.
|
|
28
|
+
|
|
29
|
+
**Fixes:**
|
|
30
|
+
- **WhatsApp event-listener leak on reconnect** (`src/platforms/whatsapp.ts`): Before every new socket, the previous socket's listeners are now removed and the old socket is ended. Without this, every reconnect stacked new listeners on top of old ones — causing memory growth and duplicate message processing after long sessions.
|
|
31
|
+
- **CDP file-descriptor leak** (`src/services/cdp-bootstrap.ts`): The log-file fd passed to the detached Chromium spawn is now closed in the parent after the child inherits it. Previously leaked one fd per browser bootstrap.
|
|
32
|
+
- **Heartbeat + auto-update timers now `.unref()`'d** and explicitly stopped in the shutdown handler. Prevents timers from keeping the process alive during graceful exit.
|
|
33
|
+
|
|
34
|
+
### 🧹 Feature: disk-cleanup service
|
|
35
|
+
|
|
36
|
+
New service (`src/services/disk-cleanup.ts`) that runs automatically once a day. Deletes transient files that grow without bound on long-running installs:
|
|
37
|
+
- Bot log rotation (>100 MB by default)
|
|
38
|
+
- Browser screenshots (>30 days)
|
|
39
|
+
- Subagent output streams (>30 days)
|
|
40
|
+
- `/tmp/alvin-bot/` media (>7 days)
|
|
41
|
+
- WhatsApp media cache (>30 days)
|
|
42
|
+
- CDP log file
|
|
43
|
+
|
|
44
|
+
**NEVER touched:** memory, assets, workspaces, cron-jobs, .env, session-store, delivery-queue. Memory is protected.
|
|
45
|
+
|
|
46
|
+
**Configuration via env:** `CLEANUP_LOG_MAX_MB`, `CLEANUP_SCREENSHOTS_DAYS`, `CLEANUP_SUBAGENTS_DAYS`, `CLEANUP_TMP_DAYS`, `CLEANUP_WA_MEDIA_DAYS`. Set any to `0` to disable that category.
|
|
47
|
+
|
|
48
|
+
**Telegram command:**
|
|
49
|
+
- `/cleanup` — show current policy + protected paths
|
|
50
|
+
- `/cleanup run` — trigger manual pass, get stats back
|
|
51
|
+
|
|
5
52
|
## [4.16.1] — 2026-04-20
|
|
6
53
|
|
|
7
54
|
### 🆕 Feature: /update shows release highlights
|
|
@@ -28,6 +28,7 @@ import { getWebPort } from "../web/server.js";
|
|
|
28
28
|
import { getUsageSummary, getAllRateLimits, formatTokens } from "../services/usage-tracker.js";
|
|
29
29
|
import { runUpdate, getAutoUpdate, setAutoUpdate, startAutoUpdateLoop } from "../services/updater.js";
|
|
30
30
|
import { getReleaseHighlights } from "../services/release-highlights.js";
|
|
31
|
+
import { runCleanup, getCleanupPolicy } from "../services/disk-cleanup.js";
|
|
31
32
|
import { getHealthStatus, isFailedOver } from "../services/heartbeat.js";
|
|
32
33
|
import { t, LOCALE_NAMES, LOCALE_FLAGS } from "../i18n.js";
|
|
33
34
|
// Kick off auto-update loop on module load if the persistent flag is set.
|
|
@@ -1919,6 +1920,36 @@ export function registerCommands(bot) {
|
|
|
1919
1920
|
await ctx.reply(`${t("bot.autoupdate.statusLabel", lang)} *${status ? "ON" : "OFF"}*\n\n${t("bot.autoupdate.commandsLabel", lang)}\n\`/autoupdate on\`\n\`/autoupdate off\``, { parse_mode: "Markdown" });
|
|
1920
1921
|
}
|
|
1921
1922
|
});
|
|
1923
|
+
// /cleanup — trigger disk cleanup manually, or show current policy.
|
|
1924
|
+
// /cleanup → show policy
|
|
1925
|
+
// /cleanup run → run a cleanup pass and report what was deleted
|
|
1926
|
+
bot.command("cleanup", async (ctx) => {
|
|
1927
|
+
const arg = (ctx.match || "").trim().toLowerCase();
|
|
1928
|
+
if (arg === "run" || arg === "now") {
|
|
1929
|
+
await ctx.reply("🧹 Running disk cleanup...");
|
|
1930
|
+
const r = await runCleanup();
|
|
1931
|
+
const bytes = r.bytesReclaimed;
|
|
1932
|
+
const human = bytes < 1024 * 1024
|
|
1933
|
+
? `${(bytes / 1024).toFixed(1)} KB`
|
|
1934
|
+
: bytes < 1024 * 1024 * 1024
|
|
1935
|
+
? `${(bytes / 1024 / 1024).toFixed(1)} MB`
|
|
1936
|
+
: `${(bytes / 1024 / 1024 / 1024).toFixed(2)} GB`;
|
|
1937
|
+
const errLine = r.errors.length > 0 ? `\n⚠️ ${r.errors.length} error(s)` : "";
|
|
1938
|
+
await ctx.reply(`✅ Cleanup done\n• Files deleted: ${r.filesDeleted}\n• Logs rotated: ${r.logsRotated}\n• Reclaimed: ${human}${errLine}`);
|
|
1939
|
+
}
|
|
1940
|
+
else {
|
|
1941
|
+
const p = getCleanupPolicy();
|
|
1942
|
+
await ctx.reply(`🧹 *Cleanup policy*\n` +
|
|
1943
|
+
`• Log rotation: >${p.logMaxSizeMb} MB\n` +
|
|
1944
|
+
`• Screenshots: >${p.screenshotsMaxAgeDays} days\n` +
|
|
1945
|
+
`• Subagent outputs: >${p.subagentsMaxAgeDays} days\n` +
|
|
1946
|
+
`• /tmp/alvin-bot: >${p.tmpMaxAgeDays} days\n` +
|
|
1947
|
+
`• WhatsApp media: >${p.waMediaMaxAgeDays} days\n\n` +
|
|
1948
|
+
`Memory, assets, workspaces, cron jobs are NEVER touched.\n\n` +
|
|
1949
|
+
`Configure via env: \`CLEANUP_LOG_MAX_MB\`, \`CLEANUP_SCREENSHOTS_DAYS\`, \`CLEANUP_SUBAGENTS_DAYS\`, \`CLEANUP_TMP_DAYS\`, \`CLEANUP_WA_MEDIA_DAYS\`\n\n` +
|
|
1950
|
+
`Run manually: \`/cleanup run\``, { parse_mode: "Markdown" });
|
|
1951
|
+
}
|
|
1952
|
+
});
|
|
1922
1953
|
// ── /sub-agents — manage background subagents (cron jobs + manual spawns) ──
|
|
1923
1954
|
//
|
|
1924
1955
|
// /sub-agents → show current config + running agents
|
package/dist/index.js
CHANGED
|
@@ -155,7 +155,10 @@ import { startSessionCleanup, stopSessionCleanup, attachPersistHook } from "./se
|
|
|
155
155
|
import { loadPersistedSessions, flushSessions, schedulePersist, } from "./services/session-persistence.js";
|
|
156
156
|
import { processQueue, cleanupQueue, setSenders, enqueue } from "./services/delivery-queue.js";
|
|
157
157
|
import { discoverTools } from "./services/tool-discovery.js";
|
|
158
|
-
import { startHeartbeat } from "./services/heartbeat.js";
|
|
158
|
+
import { startHeartbeat, stopHeartbeat } from "./services/heartbeat.js";
|
|
159
|
+
import { stopAutoUpdateLoop } from "./services/updater.js";
|
|
160
|
+
import { startCleanupLoop, stopCleanupLoop } from "./services/disk-cleanup.js";
|
|
161
|
+
import { flushProfiles } from "./services/users.js";
|
|
159
162
|
import { initEmbeddings } from "./services/embeddings.js";
|
|
160
163
|
import { loadSkills } from "./services/skills.js";
|
|
161
164
|
import { loadHooks } from "./services/hooks.js";
|
|
@@ -335,10 +338,19 @@ const shutdown = async () => {
|
|
|
335
338
|
stopAsyncAgentWatcher();
|
|
336
339
|
stopSessionCleanup();
|
|
337
340
|
stopWorkspaceWatcher();
|
|
341
|
+
stopHeartbeat();
|
|
342
|
+
stopAutoUpdateLoop();
|
|
343
|
+
stopCleanupLoop();
|
|
338
344
|
// v4.11.0 — Final immediate flush of in-memory sessions to disk before exit.
|
|
339
345
|
// The debounced timer might be pending; flushSessions() cancels it and writes
|
|
340
346
|
// synchronously so the next boot can rehydrate the latest state.
|
|
341
347
|
await flushSessions().catch((err) => console.warn("[shutdown] flushSessions failed:", err));
|
|
348
|
+
try {
|
|
349
|
+
flushProfiles();
|
|
350
|
+
}
|
|
351
|
+
catch (err) {
|
|
352
|
+
console.warn("[shutdown] flushProfiles failed:", err);
|
|
353
|
+
}
|
|
342
354
|
if (queueInterval)
|
|
343
355
|
clearInterval(queueInterval);
|
|
344
356
|
if (queueCleanupInterval)
|
|
@@ -612,5 +624,6 @@ else {
|
|
|
612
624
|
// Start heartbeat monitor even without Telegram
|
|
613
625
|
startHeartbeat();
|
|
614
626
|
startWatchdog();
|
|
627
|
+
startCleanupLoop();
|
|
615
628
|
initEmbeddings().catch(() => { });
|
|
616
629
|
}
|
|
@@ -252,6 +252,19 @@ export class WhatsAppAdapter {
|
|
|
252
252
|
fs.mkdirSync(authDir, { recursive: true });
|
|
253
253
|
const { state, saveCreds } = await useMultiFileAuthState(authDir);
|
|
254
254
|
const { version } = await fetchLatestBaileysVersion();
|
|
255
|
+
// Cleanup previous socket (reconnect path) — without this, every reconnect
|
|
256
|
+
// stacks a new set of listeners on baileys' EventEmitter, so messages get
|
|
257
|
+
// processed N times after N reconnects and closures leak.
|
|
258
|
+
if (this.sock) {
|
|
259
|
+
try {
|
|
260
|
+
this.sock.ev?.removeAllListeners?.();
|
|
261
|
+
this.sock.end?.(new Error("reconnect"));
|
|
262
|
+
}
|
|
263
|
+
catch {
|
|
264
|
+
// best-effort cleanup — ignore failures from already-dead socket
|
|
265
|
+
}
|
|
266
|
+
this.sock = null;
|
|
267
|
+
}
|
|
255
268
|
const sock = makeWASocket({
|
|
256
269
|
version,
|
|
257
270
|
auth: {
|
|
@@ -62,6 +62,28 @@ function getMissingFileFailureMs() {
|
|
|
62
62
|
const pending = new Map();
|
|
63
63
|
let pollTimer = null;
|
|
64
64
|
let started = false;
|
|
65
|
+
/**
|
|
66
|
+
* Hard cap on the pending-agents map. Without this, a bot that runs many
|
|
67
|
+
* async agents but sees some fail to write their outputFile would see
|
|
68
|
+
* entries linger up to `giveUpAt` (12h default). If the rate of
|
|
69
|
+
* registerPending() outpaces resolutions for days, memory and the disk
|
|
70
|
+
* state file grow unbounded. We evict oldest-first when over the cap.
|
|
71
|
+
*/
|
|
72
|
+
const MAX_PENDING_AGENTS = 500;
|
|
73
|
+
function enforcePendingCap() {
|
|
74
|
+
if (pending.size < MAX_PENDING_AGENTS)
|
|
75
|
+
return;
|
|
76
|
+
const entries = [...pending.entries()].sort((a, b) => a[1].startedAt - b[1].startedAt);
|
|
77
|
+
const target = Math.floor(MAX_PENDING_AGENTS * 0.9);
|
|
78
|
+
let toEvict = pending.size - target;
|
|
79
|
+
for (const [id] of entries) {
|
|
80
|
+
if (toEvict <= 0)
|
|
81
|
+
break;
|
|
82
|
+
pending.delete(id);
|
|
83
|
+
toEvict--;
|
|
84
|
+
}
|
|
85
|
+
console.warn(`[async-agent-watcher] pending map hit cap ${MAX_PENDING_AGENTS}, evicted to ${pending.size}`);
|
|
86
|
+
}
|
|
65
87
|
// ── Persistence ───────────────────────────────────────────────────
|
|
66
88
|
function loadFromDisk() {
|
|
67
89
|
try {
|
|
@@ -110,6 +132,7 @@ export function registerPendingAgent(input) {
|
|
|
110
132
|
sessionKey: input.sessionKey,
|
|
111
133
|
platform: input.platform,
|
|
112
134
|
};
|
|
135
|
+
enforcePendingCap();
|
|
113
136
|
pending.set(input.agentId, entry);
|
|
114
137
|
saveToDisk();
|
|
115
138
|
}
|
|
@@ -233,6 +233,17 @@ async function ensureGateway() {
|
|
|
233
233
|
gatewayProcess.on("exit", () => {
|
|
234
234
|
gatewayProcess = null;
|
|
235
235
|
});
|
|
236
|
+
// Surface spawn failures so we don't silently think the gateway is running.
|
|
237
|
+
gatewayProcess.on("error", (err) => {
|
|
238
|
+
log(`gateway spawn error: ${err.message}`);
|
|
239
|
+
gatewayProcess = null;
|
|
240
|
+
});
|
|
241
|
+
// Drain stdio pipes — otherwise stdout/stderr buffer fills and the child
|
|
242
|
+
// blocks on write. We don't care about the content (just that they drain).
|
|
243
|
+
gatewayProcess.stdout?.on("error", () => { });
|
|
244
|
+
gatewayProcess.stderr?.on("error", () => { });
|
|
245
|
+
gatewayProcess.stdout?.resume();
|
|
246
|
+
gatewayProcess.stderr?.resume();
|
|
236
247
|
// Wait for startup (max 10s)
|
|
237
248
|
for (let i = 0; i < 20; i++) {
|
|
238
249
|
await new Promise((r) => setTimeout(r, 500));
|
|
@@ -196,6 +196,12 @@ export async function ensureRunning(opts = {}) {
|
|
|
196
196
|
detached: true,
|
|
197
197
|
});
|
|
198
198
|
child.unref();
|
|
199
|
+
// The child inherits its own copy of the fd. Close our copy so the parent
|
|
200
|
+
// process doesn't leak a file descriptor per Chromium bootstrap.
|
|
201
|
+
try {
|
|
202
|
+
fs.closeSync(logStream);
|
|
203
|
+
}
|
|
204
|
+
catch { /* already closed — fine */ }
|
|
199
205
|
if (!child.pid) {
|
|
200
206
|
throw new Error("Failed to spawn Chromium (no PID)");
|
|
201
207
|
}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Disk Cleanup Service — periodic cleanup of transient bot files.
|
|
3
|
+
*
|
|
4
|
+
* Targets files that are SAFE to delete (logs, temp screenshots, browser
|
|
5
|
+
* artifacts, old subagent streams) and leaves critical data alone
|
|
6
|
+
* (memory, assets, workspaces, cron-jobs, .env, session-store).
|
|
7
|
+
*
|
|
8
|
+
* Strategy:
|
|
9
|
+
* - Each path has a max age (days) OR a max size (MB, with rotation)
|
|
10
|
+
* - Defaults are conservative: keep 30 days of artifacts, rotate logs >100MB
|
|
11
|
+
* - All knobs overridable via env (CLEANUP_* vars) and via /cleanup set <key>
|
|
12
|
+
* - Runs once at boot + every 24h thereafter, unref'd so it doesn't
|
|
13
|
+
* prevent shutdown
|
|
14
|
+
*
|
|
15
|
+
* NEVER cleaned:
|
|
16
|
+
* ~/.alvin-bot/memory/ (daily logs, long-term memory)
|
|
17
|
+
* ~/.alvin-bot/assets/ (user-supplied files)
|
|
18
|
+
* ~/.alvin-bot/workspaces/ (user configuration)
|
|
19
|
+
* ~/.alvin-bot/cron-jobs.json (scheduled tasks)
|
|
20
|
+
* ~/.alvin-bot/.env (secrets)
|
|
21
|
+
* ~/.alvin-bot/session-store.json (resume tokens)
|
|
22
|
+
* ~/.alvin-bot/delivery-queue.json
|
|
23
|
+
* ~/.alvin-bot/standing-orders
|
|
24
|
+
* ~/.alvin-bot/auto-update.flag
|
|
25
|
+
*/
|
|
26
|
+
import fs from "fs";
|
|
27
|
+
import path from "path";
|
|
28
|
+
import os from "os";
|
|
29
|
+
import { DATA_DIR } from "../paths.js";
|
|
30
|
+
const DEFAULT_POLICY = {
|
|
31
|
+
logMaxSizeMb: parseInt(process.env.CLEANUP_LOG_MAX_MB || "100", 10),
|
|
32
|
+
screenshotsMaxAgeDays: parseInt(process.env.CLEANUP_SCREENSHOTS_DAYS || "30", 10),
|
|
33
|
+
subagentsMaxAgeDays: parseInt(process.env.CLEANUP_SUBAGENTS_DAYS || "30", 10),
|
|
34
|
+
tmpMaxAgeDays: parseInt(process.env.CLEANUP_TMP_DAYS || "7", 10),
|
|
35
|
+
waMediaMaxAgeDays: parseInt(process.env.CLEANUP_WA_MEDIA_DAYS || "30", 10),
|
|
36
|
+
};
|
|
37
|
+
const CLEANUP_INTERVAL_MS = 24 * 60 * 60 * 1000; // once a day
|
|
38
|
+
let cleanupTimer = null;
|
|
39
|
+
/**
|
|
40
|
+
* Return the current effective policy (env-overridden defaults).
|
|
41
|
+
*/
|
|
42
|
+
export function getCleanupPolicy() {
|
|
43
|
+
return { ...DEFAULT_POLICY };
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Run a cleanup pass once. Safe to call manually (e.g. /cleanup command).
|
|
47
|
+
*/
|
|
48
|
+
export async function runCleanup(policyOverride) {
|
|
49
|
+
const policy = { ...DEFAULT_POLICY, ...policyOverride };
|
|
50
|
+
const result = {
|
|
51
|
+
filesDeleted: 0,
|
|
52
|
+
bytesReclaimed: 0,
|
|
53
|
+
logsRotated: 0,
|
|
54
|
+
errors: [],
|
|
55
|
+
details: [],
|
|
56
|
+
};
|
|
57
|
+
// 1. Rotate large log files (launchd stdout/stderr)
|
|
58
|
+
if (policy.logMaxSizeMb > 0) {
|
|
59
|
+
const logsDir = path.join(DATA_DIR, "logs");
|
|
60
|
+
try {
|
|
61
|
+
if (fs.existsSync(logsDir)) {
|
|
62
|
+
for (const name of fs.readdirSync(logsDir)) {
|
|
63
|
+
if (!name.endsWith(".log"))
|
|
64
|
+
continue;
|
|
65
|
+
const full = path.join(logsDir, name);
|
|
66
|
+
try {
|
|
67
|
+
const st = fs.statSync(full);
|
|
68
|
+
if (st.size > policy.logMaxSizeMb * 1024 * 1024) {
|
|
69
|
+
// Rotate: keep a .old, overwrite current. Launchd will reopen on next write.
|
|
70
|
+
const oldPath = full + ".old";
|
|
71
|
+
try {
|
|
72
|
+
fs.rmSync(oldPath, { force: true });
|
|
73
|
+
}
|
|
74
|
+
catch { }
|
|
75
|
+
fs.renameSync(full, oldPath);
|
|
76
|
+
fs.writeFileSync(full, "");
|
|
77
|
+
result.logsRotated++;
|
|
78
|
+
result.bytesReclaimed += st.size;
|
|
79
|
+
result.details.push({ path: full, action: "rotated", size: st.size });
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
catch (err) {
|
|
83
|
+
result.errors.push(`log-rotate ${full}: ${err.message}`);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
catch (err) {
|
|
89
|
+
result.errors.push(`logs scan: ${err.message}`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
// 2. Browser screenshots (bot-owned CDP)
|
|
93
|
+
if (policy.screenshotsMaxAgeDays > 0) {
|
|
94
|
+
const dir = path.join(DATA_DIR, "browser", "screenshots");
|
|
95
|
+
cleanupOldFiles(dir, policy.screenshotsMaxAgeDays, result);
|
|
96
|
+
}
|
|
97
|
+
// 3. Subagent streaming outputs — only delete FINISHED ones (older than N days).
|
|
98
|
+
// We trust that the async-agent-watcher has already marked them done — files
|
|
99
|
+
// older than a few days are either delivered or definitively abandoned.
|
|
100
|
+
if (policy.subagentsMaxAgeDays > 0) {
|
|
101
|
+
const dir = path.join(DATA_DIR, "subagents");
|
|
102
|
+
cleanupOldFiles(dir, policy.subagentsMaxAgeDays, result, [".jsonl", ".err"]);
|
|
103
|
+
}
|
|
104
|
+
// 4. /tmp/alvin-bot/* (media, temp scrapes)
|
|
105
|
+
if (policy.tmpMaxAgeDays > 0) {
|
|
106
|
+
cleanupOldFiles("/tmp/alvin-bot", policy.tmpMaxAgeDays, result);
|
|
107
|
+
}
|
|
108
|
+
// 5. WhatsApp media cache
|
|
109
|
+
if (policy.waMediaMaxAgeDays > 0) {
|
|
110
|
+
const dir = path.join(DATA_DIR, "data", "wa-media");
|
|
111
|
+
cleanupOldFiles(dir, policy.waMediaMaxAgeDays, result);
|
|
112
|
+
}
|
|
113
|
+
// 6. CDP log (/tmp/chrome-cdp.log) — always keep just the latest boot
|
|
114
|
+
const cdpLog = path.join(os.tmpdir(), "chrome-cdp.log");
|
|
115
|
+
try {
|
|
116
|
+
if (fs.existsSync(cdpLog)) {
|
|
117
|
+
const st = fs.statSync(cdpLog);
|
|
118
|
+
const ageDays = (Date.now() - st.mtimeMs) / (24 * 60 * 60 * 1000);
|
|
119
|
+
if (ageDays > 7) {
|
|
120
|
+
fs.unlinkSync(cdpLog);
|
|
121
|
+
result.filesDeleted++;
|
|
122
|
+
result.bytesReclaimed += st.size;
|
|
123
|
+
result.details.push({ path: cdpLog, action: "deleted", size: st.size });
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
catch {
|
|
128
|
+
// Not critical
|
|
129
|
+
}
|
|
130
|
+
return result;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Delete files in `dir` older than `maxAgeDays`. Safe if `dir` doesn't exist.
|
|
134
|
+
* Optional extension filter — e.g. [".jsonl", ".err"] restricts to those types.
|
|
135
|
+
*/
|
|
136
|
+
function cleanupOldFiles(dir, maxAgeDays, result, extensions) {
|
|
137
|
+
if (!fs.existsSync(dir))
|
|
138
|
+
return;
|
|
139
|
+
const cutoffMs = Date.now() - maxAgeDays * 24 * 60 * 60 * 1000;
|
|
140
|
+
try {
|
|
141
|
+
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
142
|
+
const full = path.join(dir, entry.name);
|
|
143
|
+
if (!entry.isFile())
|
|
144
|
+
continue;
|
|
145
|
+
if (extensions && !extensions.some((ext) => entry.name.endsWith(ext)))
|
|
146
|
+
continue;
|
|
147
|
+
try {
|
|
148
|
+
const st = fs.statSync(full);
|
|
149
|
+
if (st.mtimeMs < cutoffMs) {
|
|
150
|
+
fs.unlinkSync(full);
|
|
151
|
+
result.filesDeleted++;
|
|
152
|
+
result.bytesReclaimed += st.size;
|
|
153
|
+
result.details.push({ path: full, action: "deleted", size: st.size });
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
catch (err) {
|
|
157
|
+
result.errors.push(`${full}: ${err.message}`);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
catch (err) {
|
|
162
|
+
result.errors.push(`scan ${dir}: ${err.message}`);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Start the periodic cleanup loop. Runs first pass after 5 minutes (let the
|
|
167
|
+
* bot fully boot and avoid competing with startup I/O), then every 24h.
|
|
168
|
+
*/
|
|
169
|
+
export function startCleanupLoop() {
|
|
170
|
+
if (cleanupTimer)
|
|
171
|
+
return;
|
|
172
|
+
// First run delayed so we don't step on a restart that's still writing logs
|
|
173
|
+
setTimeout(() => {
|
|
174
|
+
void runCleanup().then((r) => {
|
|
175
|
+
if (r.filesDeleted > 0 || r.logsRotated > 0) {
|
|
176
|
+
console.log(`[cleanup] ${r.filesDeleted} files deleted, ${r.logsRotated} logs rotated, ${formatBytes(r.bytesReclaimed)} reclaimed`);
|
|
177
|
+
}
|
|
178
|
+
});
|
|
179
|
+
}, 5 * 60 * 1000);
|
|
180
|
+
cleanupTimer = setInterval(() => {
|
|
181
|
+
void runCleanup().then((r) => {
|
|
182
|
+
if (r.filesDeleted > 0 || r.logsRotated > 0) {
|
|
183
|
+
console.log(`[cleanup] ${r.filesDeleted} files deleted, ${r.logsRotated} logs rotated, ${formatBytes(r.bytesReclaimed)} reclaimed`);
|
|
184
|
+
}
|
|
185
|
+
});
|
|
186
|
+
}, CLEANUP_INTERVAL_MS);
|
|
187
|
+
cleanupTimer.unref?.();
|
|
188
|
+
}
|
|
189
|
+
export function stopCleanupLoop() {
|
|
190
|
+
if (cleanupTimer) {
|
|
191
|
+
clearInterval(cleanupTimer);
|
|
192
|
+
cleanupTimer = null;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
function formatBytes(n) {
|
|
196
|
+
if (n < 1024)
|
|
197
|
+
return `${n} B`;
|
|
198
|
+
if (n < 1024 * 1024)
|
|
199
|
+
return `${(n / 1024).toFixed(1)} KB`;
|
|
200
|
+
if (n < 1024 * 1024 * 1024)
|
|
201
|
+
return `${(n / 1024 / 1024).toFixed(1)} MB`;
|
|
202
|
+
return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`;
|
|
203
|
+
}
|
|
@@ -143,12 +143,26 @@ function chunkMarkdown(content, source) {
|
|
|
143
143
|
return chunks;
|
|
144
144
|
}
|
|
145
145
|
// ── Index Management ────────────────────────────────────
|
|
146
|
+
// In-memory cache for the embedding index. Without this, every query would
|
|
147
|
+
// re-read and re-parse the on-disk index (can be 100+ MB, making searchMemory
|
|
148
|
+
// the slowest step in a message turn). We keep the parsed object and invalidate
|
|
149
|
+
// via mtime check — so external reindexers are still picked up.
|
|
150
|
+
let indexCache = null;
|
|
151
|
+
let indexCacheMtime = 0;
|
|
146
152
|
function loadIndex() {
|
|
147
153
|
try {
|
|
154
|
+
const st = fs.statSync(INDEX_FILE);
|
|
155
|
+
if (indexCache && st.mtimeMs === indexCacheMtime) {
|
|
156
|
+
return indexCache;
|
|
157
|
+
}
|
|
148
158
|
const raw = fs.readFileSync(INDEX_FILE, "utf-8");
|
|
149
|
-
|
|
159
|
+
indexCache = JSON.parse(raw);
|
|
160
|
+
indexCacheMtime = st.mtimeMs;
|
|
161
|
+
return indexCache;
|
|
150
162
|
}
|
|
151
163
|
catch {
|
|
164
|
+
// File missing or unparseable — return an empty index and don't cache it
|
|
165
|
+
// (next call will retry, so a freshly-written index gets picked up).
|
|
152
166
|
return {
|
|
153
167
|
model: EMBEDDING_MODEL,
|
|
154
168
|
lastReindex: 0,
|
|
@@ -159,6 +173,15 @@ function loadIndex() {
|
|
|
159
173
|
}
|
|
160
174
|
function saveIndex(index) {
|
|
161
175
|
fs.writeFileSync(INDEX_FILE, JSON.stringify(index));
|
|
176
|
+
// Refresh cache immediately so the next loadIndex() sees the new state
|
|
177
|
+
// without a disk round-trip.
|
|
178
|
+
indexCache = index;
|
|
179
|
+
try {
|
|
180
|
+
indexCacheMtime = fs.statSync(INDEX_FILE).mtimeMs;
|
|
181
|
+
}
|
|
182
|
+
catch {
|
|
183
|
+
indexCacheMtime = Date.now();
|
|
184
|
+
}
|
|
162
185
|
}
|
|
163
186
|
/**
|
|
164
187
|
* Recursively walk a directory, returning file paths.
|
|
@@ -72,6 +72,10 @@ export function startHeartbeat() {
|
|
|
72
72
|
setTimeout(() => {
|
|
73
73
|
runHeartbeat();
|
|
74
74
|
state.intervalId = setInterval(runHeartbeat, HEARTBEAT_INTERVAL_MS);
|
|
75
|
+
// .unref() so this interval alone doesn't keep the process alive during
|
|
76
|
+
// graceful shutdown — the bot's main loop (grammy, platforms) keeps it
|
|
77
|
+
// running, and once those stop we want the process to exit cleanly.
|
|
78
|
+
state.intervalId?.unref?.();
|
|
75
79
|
}, 30_000);
|
|
76
80
|
}
|
|
77
81
|
/**
|
package/dist/services/mcp.js
CHANGED
|
@@ -116,6 +116,17 @@ async function connectStdio(name, config) {
|
|
|
116
116
|
proc.stderr.on("data", (data) => {
|
|
117
117
|
console.error(`MCP ${name} stderr:`, data.toString().trim());
|
|
118
118
|
});
|
|
119
|
+
// Surface stderr stream errors so we don't silently lose the channel
|
|
120
|
+
// (EPIPE, ECONNRESET etc). Without this, unhandled 'error' on the
|
|
121
|
+
// stream would crash the whole Node process.
|
|
122
|
+
proc.stderr.on("error", (err) => {
|
|
123
|
+
console.error(`MCP ${name} stderr stream error:`, err.message);
|
|
124
|
+
server.connected = false;
|
|
125
|
+
});
|
|
126
|
+
proc.stdout?.on("error", (err) => {
|
|
127
|
+
console.error(`MCP ${name} stdout stream error:`, err.message);
|
|
128
|
+
server.connected = false;
|
|
129
|
+
});
|
|
119
130
|
proc.on("error", (err) => {
|
|
120
131
|
console.error(`MCP ${name} process error:`, err);
|
|
121
132
|
server.connected = false;
|
package/dist/services/skills.js
CHANGED
|
@@ -167,10 +167,12 @@ export function loadSkills() {
|
|
|
167
167
|
return cachedSkills;
|
|
168
168
|
}
|
|
169
169
|
/**
|
|
170
|
-
* Get all loaded skills.
|
|
170
|
+
* Get all loaded skills. Cached after the first loadSkills() call; hot-reload
|
|
171
|
+
* happens via fs.watch when files change on disk. We only force a scan here if
|
|
172
|
+
* the cache is empty (init-order edge case).
|
|
171
173
|
*/
|
|
172
174
|
export function getSkills() {
|
|
173
|
-
if (cachedSkills.length === 0
|
|
175
|
+
if (cachedSkills.length === 0) {
|
|
174
176
|
reloadAllSkills();
|
|
175
177
|
}
|
|
176
178
|
return cachedSkills;
|
|
@@ -128,6 +128,43 @@ export function setDefaultTimeoutMs(ms) {
|
|
|
128
128
|
}
|
|
129
129
|
// ── State ───────────────────────────────────────────────
|
|
130
130
|
const activeAgents = new Map();
|
|
131
|
+
/**
|
|
132
|
+
* Hard cap on the activeAgents map. Without this, a long-running bot that
|
|
133
|
+
* spawns many agents (e.g. a chatty cron + manual triggers over months) would
|
|
134
|
+
* accumulate delivered entries indefinitely. The 30-min auto-cleanup inside
|
|
135
|
+
* runSubAgent only fires on graceful completion, so crashed/orphaned entries
|
|
136
|
+
* would linger until the 12h giveUpAt ceiling.
|
|
137
|
+
*
|
|
138
|
+
* Enforcement: whenever we insert a new entry and the map is at-or-over the
|
|
139
|
+
* cap, evict the oldest finished-and-delivered entries first. Running agents
|
|
140
|
+
* are never evicted.
|
|
141
|
+
*/
|
|
142
|
+
const MAX_ACTIVE_AGENTS = 1000;
|
|
143
|
+
function enforceAgentCap() {
|
|
144
|
+
if (activeAgents.size < MAX_ACTIVE_AGENTS)
|
|
145
|
+
return;
|
|
146
|
+
// Collect evictable entries (delivered OR terminal status), sort by startedAt
|
|
147
|
+
const evictable = [];
|
|
148
|
+
for (const [id, entry] of activeAgents) {
|
|
149
|
+
const status = entry.info.status;
|
|
150
|
+
const done = entry.delivered || status === "error" || status === "timeout" || status === "cancelled";
|
|
151
|
+
if (done)
|
|
152
|
+
evictable.push([id, entry.info.startedAt]);
|
|
153
|
+
}
|
|
154
|
+
evictable.sort((a, b) => a[1] - b[1]);
|
|
155
|
+
// Evict enough to land 10% below the cap, so we don't oscillate.
|
|
156
|
+
const target = Math.floor(MAX_ACTIVE_AGENTS * 0.9);
|
|
157
|
+
let toEvict = activeAgents.size - target;
|
|
158
|
+
for (const [id] of evictable) {
|
|
159
|
+
if (toEvict <= 0)
|
|
160
|
+
break;
|
|
161
|
+
activeAgents.delete(id);
|
|
162
|
+
toEvict--;
|
|
163
|
+
}
|
|
164
|
+
if (toEvict > 0) {
|
|
165
|
+
console.warn(`[subagents] map at ${activeAgents.size}/${MAX_ACTIVE_AGENTS} — could not evict enough finished entries (too many still running)`);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
131
168
|
// ── Name resolver (B2) ──────────────────────────────────
|
|
132
169
|
/**
|
|
133
170
|
* Return all currently-tracked agents whose *base* name matches `base`.
|
|
@@ -563,6 +600,7 @@ export function spawnSubAgent(agentConfig) {
|
|
|
563
600
|
nameIndex: resolved.index,
|
|
564
601
|
queuePosition: willRunImmediately ? undefined : queuedLen + 1,
|
|
565
602
|
};
|
|
603
|
+
enforceAgentCap();
|
|
566
604
|
activeAgents.set(id, { info, abort, delivered: false });
|
|
567
605
|
const queuedSpawn = { id, resolvedName, agentConfig, depth, timeoutId };
|
|
568
606
|
if (willRunImmediately) {
|
package/dist/services/updater.js
CHANGED
|
@@ -272,6 +272,7 @@ export function startAutoUpdateLoop() {
|
|
|
272
272
|
console.log(`[auto-update] check failed: ${result.message}`);
|
|
273
273
|
}
|
|
274
274
|
}, AUTO_CHECK_INTERVAL_MS);
|
|
275
|
+
autoTimer.unref?.();
|
|
275
276
|
console.log(`[auto-update] loop started (interval: 6h)`);
|
|
276
277
|
}
|
|
277
278
|
export function stopAutoUpdateLoop() {
|
package/dist/services/users.js
CHANGED
|
@@ -8,6 +8,12 @@
|
|
|
8
8
|
*
|
|
9
9
|
* The admin/owner user uses the global docs/memory/ and docs/MEMORY.md.
|
|
10
10
|
* Additional users get isolated memory spaces.
|
|
11
|
+
*
|
|
12
|
+
* Performance:
|
|
13
|
+
* Profiles are cached in memory after first read. `touchProfile` — called
|
|
14
|
+
* on every inbound message — writes to cache and schedules a debounced
|
|
15
|
+
* disk flush (2s). This avoids two sync fs operations per message on the
|
|
16
|
+
* hot path. A final flush happens on graceful shutdown so nothing is lost.
|
|
11
17
|
*/
|
|
12
18
|
import fs from "fs";
|
|
13
19
|
import { resolve } from "path";
|
|
@@ -18,6 +24,42 @@ import { USERS_DIR, MEMORY_DIR } from "../paths.js";
|
|
|
18
24
|
// Ensure users dir exists
|
|
19
25
|
if (!fs.existsSync(USERS_DIR))
|
|
20
26
|
fs.mkdirSync(USERS_DIR, { recursive: true });
|
|
27
|
+
// ── In-memory cache + debounced persistence ─────────────
|
|
28
|
+
const cache = new Map();
|
|
29
|
+
const dirty = new Set();
|
|
30
|
+
let flushTimer = null;
|
|
31
|
+
const FLUSH_DELAY_MS = 2000;
|
|
32
|
+
function schedule_flush() {
|
|
33
|
+
if (flushTimer)
|
|
34
|
+
return;
|
|
35
|
+
flushTimer = setTimeout(() => {
|
|
36
|
+
flushTimer = null;
|
|
37
|
+
flushProfiles();
|
|
38
|
+
}, FLUSH_DELAY_MS);
|
|
39
|
+
flushTimer.unref?.();
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Write every dirty profile to disk synchronously. Called by the debounce
|
|
43
|
+
* timer AND by the graceful-shutdown handler so no in-flight updates are
|
|
44
|
+
* lost even if the bot exits between debounce ticks.
|
|
45
|
+
*/
|
|
46
|
+
export function flushProfiles() {
|
|
47
|
+
if (dirty.size === 0)
|
|
48
|
+
return;
|
|
49
|
+
for (const userId of dirty) {
|
|
50
|
+
const profile = cache.get(userId);
|
|
51
|
+
if (!profile)
|
|
52
|
+
continue;
|
|
53
|
+
try {
|
|
54
|
+
fs.writeFileSync(profilePath(userId), JSON.stringify(profile, null, 2));
|
|
55
|
+
}
|
|
56
|
+
catch (err) {
|
|
57
|
+
// Don't throw — a persistent error would block future flushes.
|
|
58
|
+
console.warn(`[users] flush ${userId} failed: ${err.message}`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
dirty.clear();
|
|
62
|
+
}
|
|
21
63
|
// ── Profile Management ──────────────────────────────────
|
|
22
64
|
function profilePath(userId) {
|
|
23
65
|
return resolve(USERS_DIR, `${userId}.json`);
|
|
@@ -26,22 +68,32 @@ function userMemoryDir(userId) {
|
|
|
26
68
|
return resolve(USERS_DIR, `${userId}`);
|
|
27
69
|
}
|
|
28
70
|
/**
|
|
29
|
-
* Load a user profile. Returns null if not found.
|
|
71
|
+
* Load a user profile. Returns null if not found. Reads from cache first,
|
|
72
|
+
* falls back to disk on cache miss.
|
|
30
73
|
*/
|
|
31
74
|
export function loadProfile(userId) {
|
|
75
|
+
const cached = cache.get(userId);
|
|
76
|
+
if (cached)
|
|
77
|
+
return cached;
|
|
32
78
|
try {
|
|
33
79
|
const raw = fs.readFileSync(profilePath(userId), "utf-8");
|
|
34
|
-
|
|
80
|
+
const profile = JSON.parse(raw);
|
|
81
|
+
cache.set(userId, profile);
|
|
82
|
+
return profile;
|
|
35
83
|
}
|
|
36
84
|
catch {
|
|
37
85
|
return null;
|
|
38
86
|
}
|
|
39
87
|
}
|
|
40
88
|
/**
|
|
41
|
-
* Save a user profile.
|
|
89
|
+
* Save a user profile — updates cache and schedules a debounced disk flush.
|
|
90
|
+
* For immediate durability (e.g. during shutdown), call flushProfiles()
|
|
91
|
+
* after this.
|
|
42
92
|
*/
|
|
43
93
|
export function saveProfile(profile) {
|
|
44
|
-
|
|
94
|
+
cache.set(profile.userId, profile);
|
|
95
|
+
dirty.add(profile.userId);
|
|
96
|
+
schedule_flush();
|
|
45
97
|
}
|
|
46
98
|
/**
|
|
47
99
|
* Get or create a user profile.
|
|
@@ -76,6 +128,9 @@ export function getOrCreateProfile(userId, name, username) {
|
|
|
76
128
|
}
|
|
77
129
|
/**
|
|
78
130
|
* Update a user's activity (call on each message).
|
|
131
|
+
*
|
|
132
|
+
* Previously this did a sync read + write per message. Now it works purely
|
|
133
|
+
* in memory and lets the debounce timer batch writes to disk.
|
|
79
134
|
*/
|
|
80
135
|
export function touchProfile(userId, name, username, platform, messageText) {
|
|
81
136
|
const profile = getOrCreateProfile(userId, name, username);
|
|
@@ -95,20 +150,33 @@ export function touchProfile(userId, name, username, platform, messageText) {
|
|
|
95
150
|
return profile;
|
|
96
151
|
}
|
|
97
152
|
/**
|
|
98
|
-
* List all known user profiles.
|
|
153
|
+
* List all known user profiles. Reads from disk; populates cache for
|
|
154
|
+
* subsequent fast access.
|
|
99
155
|
*/
|
|
100
156
|
export function listProfiles() {
|
|
101
157
|
const profiles = [];
|
|
102
158
|
try {
|
|
103
159
|
const files = fs.readdirSync(USERS_DIR);
|
|
104
160
|
for (const file of files) {
|
|
105
|
-
if (file.endsWith(".json"))
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
161
|
+
if (!file.endsWith(".json"))
|
|
162
|
+
continue;
|
|
163
|
+
// Parse user id from filename — skip non-numeric (e.g. stray files)
|
|
164
|
+
const userId = parseInt(file.slice(0, -5), 10);
|
|
165
|
+
if (!Number.isFinite(userId))
|
|
166
|
+
continue;
|
|
167
|
+
// If cached, use that; otherwise read once and cache
|
|
168
|
+
const cached = cache.get(userId);
|
|
169
|
+
if (cached) {
|
|
170
|
+
profiles.push(cached);
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
try {
|
|
174
|
+
const raw = fs.readFileSync(resolve(USERS_DIR, file), "utf-8");
|
|
175
|
+
const p = JSON.parse(raw);
|
|
176
|
+
cache.set(userId, p);
|
|
177
|
+
profiles.push(p);
|
|
111
178
|
}
|
|
179
|
+
catch { /* skip corrupt */ }
|
|
112
180
|
}
|
|
113
181
|
}
|
|
114
182
|
catch { /* dir doesn't exist */ }
|
|
@@ -145,6 +213,9 @@ export function addUserNote(userId, note) {
|
|
|
145
213
|
export function deleteUser(userId) {
|
|
146
214
|
const deleted = [];
|
|
147
215
|
const errors = [];
|
|
216
|
+
// 0. Drop from cache + dirty set so the debounce doesn't re-create the file
|
|
217
|
+
cache.delete(userId);
|
|
218
|
+
dirty.delete(userId);
|
|
148
219
|
// 1. Delete profile JSON
|
|
149
220
|
const pPath = profilePath(userId);
|
|
150
221
|
try {
|