npm - alvin-bot - Versions diffs - 5.3.0 → 5.4.0 - Mend

alvin-bot 5.3.0 → 5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/.env.example +100 -0
package/CHANGELOG.md +43 -3
package/README.md +2 -0
package/alvin-bot.config.example.json +1 -1
package/dist/config.js +7 -4
package/dist/handlers/document.js +8 -1
package/dist/handlers/message.js +102 -17
package/dist/i18n.js +15 -0
package/dist/index.js +12 -0
package/dist/init-data-dir.js +17 -0
package/dist/middleware/auth.js +19 -1
package/dist/providers/tool-executor.js +29 -4
package/dist/services/async-agent-watcher.js +52 -8
package/dist/services/browser-manager.js +11 -9
package/dist/services/browser-webfetch.js +47 -13
package/dist/services/cron-scheduling.js +79 -19
package/dist/services/cron.js +205 -16
package/dist/services/delivery-queue.js +19 -0
package/dist/services/embeddings/index.js +2 -5
package/dist/services/env-file.js +4 -0
package/dist/services/personality.js +40 -37
package/dist/services/session-persistence.js +21 -3
package/dist/services/session.js +3 -0
package/dist/services/ssrf-guard.js +162 -0
package/dist/services/steer-channel.js +7 -2
package/dist/services/voice.js +0 -3
package/dist/web/server.js +155 -5
package/package.json +8 -7

package/dist/services/embeddings/index.js CHANGED Viewed

@@ -40,11 +40,8 @@ function loadSqlite() {
     }
     catch (err) {
         sqliteLoadError = err instanceof Error ? err : new Error(String(err));
-        console.warn("⚠️ better-sqlite3 native binary unavailable — embeddings disabled. " +
-            "Bot continues without semantic memory search. Fix: rebuild deps with " +
-            "`cd $(npm root -g)/alvin-bot && npm rebuild better-sqlite3` or reinstall " +
-            "alvin-bot. Underlying error: " +
-            sqliteLoadError.message);
+        console.log("ℹ️ Semantic memory (better-sqlite3) unavailable — using keyword search (FTS5). " +
+            "Reinstall or run `npm rebuild better-sqlite3` to enable.");
         return null;
     }
 }

package/dist/services/env-file.js CHANGED Viewed

@@ -26,6 +26,10 @@ export function readEnv() {
 }
 /** Upsert a key=value pair in the env file, preserving all other lines. */
 export function writeEnvVar(key, value) {
+    // M6 (centralized): reject values containing newline characters — they allow
+    // injecting extra .env lines (e.g. value="good\nEVIL=injected").
+    if (/[\n\r]/.test(value))
+        throw new Error("env value must not contain newline characters");
     let content = fs.existsSync(ENV_FILE) ? fs.readFileSync(ENV_FILE, "utf-8") : "";
     const regex = new RegExp(`^${key}=.*$`, "m");
     if (regex.test(content)) {

package/dist/services/personality.js CHANGED Viewed

@@ -35,7 +35,7 @@ try {
     soulContent = readFileSync(SOUL_FILE, "utf-8");
 }
 catch {
-    console.warn("SOUL.md not found — using default personality");
+    console.log("ℹ️ soul.md not found — using built-in default personality. Create ~/.alvin-bot/soul.md to customize.");
 }
 loadStandingOrders();
 /** Base system prompt — adapts to user language */
@@ -49,9 +49,14 @@ const SDK_ADDON = `When you run commands or edit files, briefly explain what you
 /**
  * Stage 1 of Fix #17 — async sub-agents.
  *
- * Tells Claude to use the SDK's `run_in_background` flag for long-running
- * Agent tool calls so the main Telegram session doesn't stay locked for
- * 10+ minutes while sub-agents crawl the web, run audits, or build reports.
+ * Tells Claude that `mcp__alvin__dispatch_agent` is the ONLY sanctioned
+ * path for long-running work on chat platforms.  The built-in Task/Agent
+ * tool is explicitly disallowed for anything that takes more than ~30 s
+ * because it blocks the session: isProcessing stays true, the typing
+ * indicator keeps firing, and the user cannot send a new message until
+ * the sub-agent finishes.  Path A (mcp__alvin__dispatch_agent) spawns a
+ * truly detached subprocess and returns in milliseconds so the session
+ * is freed immediately.
  *
  * Only injected into the prompt when isSDK === true (non-SDK providers
  * have no Agent tool). The bot's async-agent-watcher (Stage 2) picks up
@@ -63,34 +68,29 @@ const SDK_ADDON = `When you run commands or edit files, briefly explain what you
  */
 const BACKGROUND_SUBAGENT_HINT = `## ⚠️ CRITICAL: Background Sub-Agents on Telegram/WhatsApp/Slack
-**THE RULE (v4.13 update)**: For ANY long-running work (research,
-audits, multi-step analysis, >30 seconds), you MUST prefer the
-\`mcp__alvin__dispatch_agent\` tool over the built-in \`Task\` tool.
+**THE RULE**: For ANY long-running work (research, audits, multi-step
+analysis, >30 seconds), you MUST use \`mcp__alvin__dispatch_agent\`.
+The built-in \`Task\`/\`Agent\` tool is BANNED for long work on chat
+platforms (Telegram/WhatsApp/Slack/Discord). Do not use it for anything
+that takes more than ~30 seconds.
-**Why alvin_dispatch over Task(run_in_background: true)?**
+**Why mcp__alvin__dispatch_agent is the ONLY background path:**
-The built-in Task tool (even with \`run_in_background: true\`) keeps
-the sub-agent tied to THIS session's CLI subprocess. If the user sends
-me a new message mid-work, aborting this session would kill the
-sub-agent half-way through and leave garbage partial output. Alvin
-learned this the hard way (v4.12.1–v4.12.3 all got this wrong).
+The built-in Task/Agent tool — even with \`run_in_background: true\` —
+keeps the sub-agent tied to THIS session's CLI subprocess. The session
+stays blocked: \`isProcessing\` remains true, the typing indicator
+keeps firing, and the user's next message is queued until the
+synchronous Task finishes. There is no way to free the session while a
+built-in Task is running — that is a hard platform constraint.
 \`mcp__alvin__dispatch_agent\` spawns a COMPLETELY INDEPENDENT
 \`claude -p\` subprocess with its own PID, own process group. It
-survives ANY abort of this session. The user can interrupt me freely
-and keep chatting while the sub-agent finishes cleanly. The bot
-delivers the result as a separate message when ready.
+returns in milliseconds, the session is freed immediately, and the user
+can keep chatting. The bot delivers the result as a separate message
+when ready.
 **THIS IS THE SINGLE MOST IMPORTANT RULE FOR USER RESPONSIVENESS.**
-**Why it matters**: During a synchronous Agent tool call the parent
-session has no way to know the sub-agent is still working. It appears
-hung. After a long silence the session can be aborted, destroying the
-work. Using \`run_in_background: true\` solves this: the tool returns
-an \`agentId\` + \`outputFile\` path IMMEDIATELY, your turn ends in
-seconds, the user can keep chatting with me, and the bot automatically
-delivers the sub-agent's final result as a separate message when ready.
 **Decision tree** (apply every time you consider any sub-agent tool):
   Does the task involve ANY of the following?
@@ -103,16 +103,11 @@ delivers the sub-agent's final result as a separate message when ready.
     • Crawling, scraping, or fetching multiple resources
     • Research across multiple sources or domains
-  YES → use \`mcp__alvin__dispatch_agent\` (truly detached, preferred)
+  YES → use \`mcp__alvin__dispatch_agent\` (the ONLY sanctioned path)
   NO  → foreground is fine (single quick sub-query under 30s, answer
         yourself if possible)
-NOTE: The built-in Task tool with run_in_background: true still works
-but is now deprecated on Telegram/Slack/Discord/WhatsApp because it
-ties sub-agent lifetime to this session. Only use Task directly when
-you explicitly need the sub-agent's result IN THIS SAME TURN (rare).
-**Examples where you MUST use \`run_in_background: true\`:**
+**Examples where you MUST use \`mcp__alvin__dispatch_agent\`:**
 - ANY audit (SEO, security, code quality, performance, accessibility, GEO)
 - Research visiting more than 1-2 web pages
 - Code reviews on more than a single file
@@ -122,12 +117,12 @@ you explicitly need the sub-agent's result IN THIS SAME TURN (rare).
 - Long data-processing jobs
 - Anything involving the word "analyze", "audit", "review", "scan", "research"
-**Examples where foreground is fine:**
+**Examples where foreground is fine (no sub-agent needed):**
 - "Read this file and summarize it" (single file, <10s)
 - "What's 2+2?" (no sub-agent needed — answer yourself)
 - "Check if package.json has foo" (one quick tool call)
-**After launching a background agent (either tool), you MUST:**
+**After calling \`mcp__alvin__dispatch_agent\` you MUST:**
 1. Tell the user in ONE short sentence what you kicked off.
    Example: "Starting SEO audit for example.com in the background —
    I'll send the report when it's done."
@@ -135,6 +130,14 @@ you explicitly need the sub-agent's result IN THIS SAME TURN (rare).
 3. The bot will deliver the result as a separate message when ready.
    You don't need to poll the outputFile proactively.
+Only say "running in the background — you can keep chatting" if you
+actually called \`mcp__alvin__dispatch_agent\` in this turn. If the
+task ran inline (foreground tool calls, no dispatch), it blocked the
+session and the user could NOT chat during it — do NOT claim otherwise.
+If a task was too long for foreground but you didn't dispatch it, tell
+the user truthfully: "That ran inline and took a while. Your messages
+were queued until it finished."
 **For PARALLEL dispatch** (e.g. user says "research X and Y in parallel"):
 Call \`mcp__alvin__dispatch_agent\` multiple times in the SAME assistant
 turn, once per sub-task. Each returns its own agentId immediately. Your
@@ -145,10 +148,10 @@ If the user asks "is it done yet?" before the bot delivers the result,
 you MAY read the agent's \`outputFile\` (from the original tool result)
 using the Read tool to peek at progress — but don't block on it.
-**Never** call the Agent/Task tool without \`run_in_background: true\`
-for anything you're not 100% sure completes in under 30 seconds. The
-cost of unnecessary background mode is zero. The cost of blocking the
-Telegram user for 20 minutes on a synchronous call is very high.`;
+**Never** call the built-in Agent/Task tool for anything you're not
+100% sure completes in under 30 seconds. The cost of dispatch is zero.
+The cost of blocking the chat user for 20 minutes on a synchronous
+inline Task is very high.`;
 /**
  * Self-Awareness Core — Dynamic introspection block.
  *

package/dist/services/session-persistence.js CHANGED Viewed

@@ -142,6 +142,12 @@ export function loadPersistedSessions() {
     if (!raw_parsed || typeof raw_parsed !== "object")
         return 0;
     // v4.12.0 — Detect envelope format vs legacy v4.11.0 flat format
+    // M4: Validate the top-level shape before trusting any field.
+    if (Array.isArray(raw_parsed)) {
+        // An array at root is not a valid sessions file
+        console.warn("⚠️ session-persistence: sessions file contains an array at root, starting fresh");
+        return 0;
+    }
     let parsed;
     let tgWorkspaces = {};
     if (raw_parsed &&
@@ -149,11 +155,22 @@ export function loadPersistedSessions() {
         "version" in raw_parsed &&
         "sessions" in raw_parsed) {
         const env = raw_parsed;
-        parsed = env.sessions ?? {};
-        tgWorkspaces = env.telegramWorkspaces ?? {};
+        // M4: sessions field must be a non-null object; degrade gracefully if tampered
+        if (!env.sessions || typeof env.sessions !== "object" || Array.isArray(env.sessions)) {
+            console.warn("⚠️ session-persistence: 'sessions' field is not an object, starting fresh");
+            return 0;
+        }
+        parsed = env.sessions;
+        tgWorkspaces = (env.telegramWorkspaces && typeof env.telegramWorkspaces === "object" && !Array.isArray(env.telegramWorkspaces))
+            ? env.telegramWorkspaces
+            : {};
     }
     else {
-        // Legacy flat format (v4.11.0)
+        // Legacy flat format (v4.11.0) — must also be an object
+        if (!raw_parsed || typeof raw_parsed !== "object") {
+            console.warn("⚠️ session-persistence: sessions file is not a valid object, starting fresh");
+            return 0;
+        }
         parsed = raw_parsed;
     }
     // Rehydrate Telegram workspace map
@@ -184,6 +201,7 @@ export function loadPersistedSessions() {
             _qHandle: null,
             _steerChannel: null,
             _steerAckSentThisTurn: false,
+            _turnId: null,
             lastActivity: persisted.lastActivity ?? Date.now(),
             startedAt: persisted.startedAt ?? Date.now(),
             totalCost: persisted.totalCost ?? 0,

package/dist/services/session.js CHANGED Viewed

@@ -84,6 +84,7 @@ export function getSession(key) {
             _qHandle: null,
             _steerChannel: null,
             _steerAckSentThisTurn: false,
+            _turnId: null,
             lastActivity: Date.now(),
             startedAt: Date.now(),
             totalCost: 0,
@@ -120,6 +121,8 @@ export function getSession(key) {
             session._steerChannel = null;
         if (session._steerAckSentThisTurn === undefined)
             session._steerAckSentThisTurn = false;
+        if (session._turnId === undefined)
+            session._turnId = null;
     }
     return session;
 }

package/dist/services/ssrf-guard.js ADDED Viewed

@@ -0,0 +1,162 @@
+/**
+ * SSRF Guard — rejects requests to private/internal network destinations.
+ *
+ * Blocks:
+ *   - Non-http(s) schemes (file://, ftp://, etc.)
+ *   - IPv4 loopback (127.0.0.0/8), link-local (169.254.0.0/16 — incl. cloud
+ *     metadata endpoint 169.254.169.254), RFC-1918 private ranges
+ *     (10/8, 172.16/12, 192.168/16), and the catch-all 0.0.0.0
+ *   - IPv6 loopback (::1), ULA (fc00::/7), link-local (fe80::/10)
+ *
+ * Opt-out: set ALLOW_PRIVATE_FETCH=1 to disable blocking (for local dev /
+ * self-hosted setups). Default = blocked.
+ *
+ * No new runtime dependencies — uses Node's built-in `dns` and `net` modules.
+ */
+import dns from "dns";
+import net from "net";
+export class SsrfBlockedError extends Error {
+    url;
+    constructor(url, reason) {
+        super(`SSRF blocked: ${reason} (url: ${url})`);
+        this.name = "SsrfBlockedError";
+        this.url = url;
+    }
+}
+/**
+ * Return true if the given IPv4 address string falls into a private/reserved
+ * range (RFC-1918, loopback, link-local, unspecified).
+ *
+ * Ranges blocked:
+ *   0.0.0.0/8        — "this" network
+ *   10.0.0.0/8       — RFC-1918 class A
+ *   127.0.0.0/8      — loopback
+ *   169.254.0.0/16   — link-local (incl. IMDS 169.254.169.254)
+ *   172.16.0.0/12    — RFC-1918 class B (172.16–172.31)
+ *   192.168.0.0/16   — RFC-1918 class C
+ */
+function isPrivateIPv4(ip) {
+    const parts = ip.split(".").map(Number);
+    if (parts.length !== 4 || parts.some(p => isNaN(p) || p < 0 || p > 255)) {
+        return false; // not a valid IPv4 — let DNS resolve decide
+    }
+    const [a, b] = parts;
+    if (a === 0)
+        return true; // 0.0.0.0/8
+    if (a === 10)
+        return true; // 10.0.0.0/8
+    if (a === 127)
+        return true; // 127.0.0.0/8 loopback
+    if (a === 169 && b === 254)
+        return true; // 169.254.0.0/16 link-local
+    if (a === 172 && b >= 16 && b <= 31)
+        return true; // 172.16.0.0/12
+    if (a === 192 && b === 168)
+        return true; // 192.168.0.0/16
+    return false;
+}
+/**
+ * Return true if the given IPv6 address string falls into a blocked range.
+ *
+ * Ranges blocked:
+ *   ::1              — loopback
+ *   fc00::/7         — ULA (fc00:: – fdff::)
+ *   fe80::/10        — link-local
+ */
+function isPrivateIPv6(ip) {
+    // Node net.isIPv6 normalises the address but we need the raw string
+    // for prefix checks. Use the compressed form from net.
+    const normalized = ip.toLowerCase().replace(/^\[|\]$/g, "");
+    // Loopback
+    if (normalized === "::1")
+        return true;
+    // For prefix checks, expand just the first 16-bit group
+    const firstGroup = normalized.split(":")[0];
+    const value = parseInt(firstGroup || "0", 16);
+    // fc00::/7 — fc00 to fdff (bit 7 of first byte = 1, bit 6 = 1, bit 5 doesn't matter… easier: 0xfc00–0xfdff range for the first 16 bits)
+    // The /7 prefix means: binary prefix 1111110x — so 0xfc00 to 0xfdff
+    if (value >= 0xfc00 && value <= 0xfdff)
+        return true;
+    // fe80::/10 — fe80 to febf
+    if (value >= 0xfe80 && value <= 0xfebf)
+        return true;
+    return false;
+}
+/**
+ * Check whether a raw IP string (v4 or v6) is a private/internal address.
+ */
+function isPrivateIP(ip) {
+    const stripped = ip.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
+    if (net.isIPv4(stripped))
+        return isPrivateIPv4(stripped);
+    if (net.isIPv6(stripped))
+        return isPrivateIPv6(stripped);
+    return false;
+}
+/**
+ * Check the hostname from a URL — if it's a literal IP, classify immediately.
+ * If it's a hostname, resolve it via DNS and check every returned address.
+ *
+ * Throws SsrfBlockedError if any resolved address is private.
+ * Resolves void if all addresses are public (or DNS fails — fail-open on DNS
+ * errors so a temporary resolver blip doesn't DoS the bot; the risk is low
+ * because the per-host literal-IP checks run before DNS).
+ */
+async function checkHost(hostname, url) {
+    const bare = hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets for net.isIP
+    // Literal IP — no DNS needed
+    if (net.isIP(bare)) {
+        if (isPrivateIP(bare)) {
+            throw new SsrfBlockedError(url, `destination ${bare} is a private/loopback/link-local address`);
+        }
+        return;
+    }
+    // Named hostname — also catch obvious loopback hostnames without DNS
+    const lower = bare.toLowerCase();
+    if (lower === "localhost" || lower.endsWith(".localhost") || lower === "::1") {
+        throw new SsrfBlockedError(url, `destination hostname '${bare}' resolves to loopback`);
+    }
+    // DNS resolution — check every returned address
+    try {
+        const { promises: dnsPromises } = dns;
+        const addresses = await dnsPromises.resolve(bare);
+        for (const addr of addresses) {
+            if (isPrivateIP(addr)) {
+                throw new SsrfBlockedError(url, `destination hostname '${bare}' resolves to private address ${addr}`);
+            }
+        }
+    }
+    catch (err) {
+        // Re-throw our own error; swallow DNS failures (fail-open)
+        if (err instanceof SsrfBlockedError)
+            throw err;
+        // DNS error (ENOTFOUND, etc.) — let the actual fetch fail naturally
+    }
+}
+/**
+ * Assert that `url` is safe to fetch (not SSRF-risky).
+ *
+ * - Rejects non-http(s) schemes immediately (synchronous check).
+ * - Resolves hostnames to detect private IP destinations.
+ * - Respects ALLOW_PRIVATE_FETCH=1 for operator opt-out.
+ *
+ * Throws SsrfBlockedError when blocked.
+ * Resolves void when safe.
+ */
+export async function assertSsrfSafe(url) {
+    // Opt-out for trusted local / self-hosted environments
+    if (process.env.ALLOW_PRIVATE_FETCH === "1")
+        return;
+    let parsed;
+    try {
+        parsed = new URL(url);
+    }
+    catch {
+        throw new SsrfBlockedError(url, "invalid URL");
+    }
+    const scheme = parsed.protocol; // includes trailing ':'
+    if (scheme !== "http:" && scheme !== "https:") {
+        throw new SsrfBlockedError(url, `scheme '${parsed.protocol}' is not allowed (only http/https)`);
+    }
+    await checkHost(parsed.hostname, url);
+}

package/dist/services/steer-channel.js CHANGED Viewed

@@ -7,17 +7,22 @@ export class SteerChannel {
     constructor(cap = DEFAULT_CAP) {
         this.cap = cap;
     }
+    /** Push a message into the channel.
+     *  Returns true if the message was accepted, false if it was dropped
+     *  (channel closed or buffer cap reached). Callers must check the
+     *  return value to decide whether to send a 📨 ack or a bufferFull notice. */
     push(text) {
         if (this.closed)
-            return;
+            return false;
         if (this.buf.length >= this.cap) {
             console.warn(`[steer-channel] cap ${this.cap} reached — dropping steer message`);
-            return;
+            return false;
         }
         this.buf.push({ type: "user", message: { role: "user", content: text }, parent_tool_use_id: null });
         const r = this.resolveNext;
         this.resolveNext = null;
         r?.();
+        return true;
     }
     close() {
         if (this.closed)

package/dist/services/voice.js CHANGED Viewed

@@ -21,9 +21,6 @@ export async function transcribeAudio(audioPath) {
     const bodyEnd = Buffer.from(`\r\n--${boundary}\r\n` +
         `Content-Disposition: form-data; name="model"\r\n\r\n` +
         `whisper-large-v3-turbo\r\n` +
-        `--${boundary}\r\n` +
-        `Content-Disposition: form-data; name="language"\r\n\r\n` +
-        `de\r\n` +
         `--${boundary}--\r\n`, "utf-8");
     const fullBody = Buffer.concat([bodyStart, fileBuffer, bodyEnd]);
     return new Promise((resolve, reject) => {