npm - alvin-bot - Versions diffs - 4.9.3 → 4.10.0 - Mend

alvin-bot 4.9.3 → 4.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/CHANGELOG.md +108 -0
package/README.md +12 -1
package/dist/handlers/async-agent-chunk-handler.js +33 -0
package/dist/handlers/message.js +34 -6
package/dist/index.js +15 -3
package/dist/paths.js +4 -0
package/dist/providers/claude-sdk-provider.js +43 -0
package/dist/services/async-agent-parser.js +152 -0
package/dist/services/async-agent-watcher.js +206 -0
package/dist/services/personality.js +55 -0
package/dist/web/bind-strategy.js +42 -0
package/dist/web/server.js +231 -101
package/package.json +1 -1
package/test/async-agent-chunk-flow.test.ts +131 -0
package/test/async-agent-parser.test.ts +322 -0
package/test/async-agent-watcher.test.ts +229 -0
package/test/stress-scenarios.test.ts +1 -1
package/test/system-prompt-background-hint.test.ts +48 -0
package/test/web-server-integration.test.ts +189 -0
package/test/web-server-resilience.test.ts +118 -0
package/test/web-server-shutdown.test.ts +7 -1

package/dist/services/async-agent-watcher.js ADDED Viewed

@@ -0,0 +1,206 @@
+/**
+ * Async Sub-Agent Watcher (Fix #17 Stage 2)
+ *
+ * Tracks pending background sub-agents that Claude launched with
+ * `run_in_background: true`. Polls each agent's outputFile every
+ * POLL_INTERVAL_MS, detects completion (success/failure/timeout),
+ * and delivers the final result as a separate Telegram message via
+ * the existing subagent-delivery.ts pipeline.
+ *
+ * Persistence: pending agents survive bot restarts via
+ * ~/.alvin-bot/state/async-agents.json. On boot, startWatcher() loads
+ * the file and resumes polling — same catchup pattern as the v4.9.0
+ * cron scheduler.
+ *
+ * Why this exists: Claude's Agent tool defaults to synchronous, which
+ * blocks the main Telegram session for 10+ minutes during long audits.
+ * Stage 1 of the fix tells Claude to use run_in_background; Stage 2
+ * (this file) catches the resulting outputFile and delivers the result
+ * when ready, so the user can keep chatting while the agent works.
+ *
+ * See docs/superpowers/plans/2026-04-13-async-subagents.md for the
+ * full plan and docs/superpowers/specs/sdk-async-agent-outputfile-format.md
+ * for the JSONL format details.
+ */
+import fs from "fs";
+import { dirname } from "path";
+import { parseOutputFileStatus } from "./async-agent-parser.js";
+import { ASYNC_AGENTS_STATE_FILE } from "../paths.js";
+/** How often the polling loop runs against each pending agent. */
+const POLL_INTERVAL_MS = 15_000;
+/** Hard ceiling per agent — 12h. After this, give up and deliver
+ *  a timeout banner. SEO audits historically take ~13 min, so 12h
+ *  is absurdly generous and protects against state-file growth. */
+const MAX_AGENT_AGE_MS = 12 * 60 * 60 * 1000;
+// ── Module state ──────────────────────────────────────────────────
+const pending = new Map();
+let pollTimer = null;
+let started = false;
+// ── Persistence ───────────────────────────────────────────────────
+function loadFromDisk() {
+    try {
+        const raw = fs.readFileSync(ASYNC_AGENTS_STATE_FILE, "utf-8");
+        const arr = JSON.parse(raw);
+        if (!Array.isArray(arr))
+            return;
+        for (const entry of arr) {
+            if (typeof entry?.agentId === "string" && typeof entry?.outputFile === "string") {
+                pending.set(entry.agentId, entry);
+            }
+        }
+    }
+    catch {
+        // No state file yet — fresh start. Not an error.
+    }
+}
+function saveToDisk() {
+    try {
+        fs.mkdirSync(dirname(ASYNC_AGENTS_STATE_FILE), { recursive: true });
+        fs.writeFileSync(ASYNC_AGENTS_STATE_FILE, JSON.stringify([...pending.values()], null, 2), "utf-8");
+    }
+    catch (err) {
+        console.error("[async-watcher] failed to persist state:", err);
+    }
+}
+// ── Public API ────────────────────────────────────────────────────
+/**
+ * Register a new async agent that Claude just launched. Persists
+ * immediately so a crash right after registration still delivers
+ * the result on the next boot.
+ */
+export function registerPendingAgent(input) {
+    const now = Date.now();
+    const entry = {
+        agentId: input.agentId,
+        outputFile: input.outputFile,
+        description: input.description,
+        prompt: input.prompt,
+        chatId: input.chatId,
+        userId: input.userId,
+        startedAt: now,
+        lastCheckedAt: 0,
+        giveUpAt: input.giveUpAt ?? now + MAX_AGENT_AGE_MS,
+        toolUseId: input.toolUseId,
+    };
+    pending.set(input.agentId, entry);
+    saveToDisk();
+}
+/** Returns a snapshot of in-memory pending agents (for /subagents + diagnostics). */
+export function listPendingAgents() {
+    return [...pending.values()];
+}
+/** Start the polling loop. Idempotent. Loads any persisted state from disk. */
+export function startWatcher() {
+    if (started)
+        return;
+    started = true;
+    loadFromDisk();
+    pollTimer = setInterval(() => {
+        pollOnce().catch((err) => console.error("[async-watcher] poll cycle failed:", err));
+    }, POLL_INTERVAL_MS);
+    console.log(`⏳ Async-agent watcher started (${pending.size} pending, ${POLL_INTERVAL_MS / 1000}s interval)`);
+}
+/** Stop the polling loop. Idempotent. */
+export function stopWatcher() {
+    if (pollTimer)
+        clearInterval(pollTimer);
+    pollTimer = null;
+    started = false;
+}
+/**
+ * Run one poll cycle: check every pending agent, deliver the completed
+ * ones, drop them from the in-memory + on-disk state. Exported for
+ * tests; production uses the setInterval from startWatcher().
+ */
+export async function pollOnce() {
+    const now = Date.now();
+    const toRemove = [];
+    for (const entry of pending.values()) {
+        entry.lastCheckedAt = now;
+        // Timeout check first — if the agent is past its giveUpAt, give up
+        // regardless of whether the file shows progress.
+        if (now >= entry.giveUpAt) {
+            await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
+            toRemove.push(entry.agentId);
+            continue;
+        }
+        const status = await parseOutputFileStatus(entry.outputFile);
+        if (status.state === "completed") {
+            await deliverAsCompleted(entry, status.output, status.tokensUsed);
+            toRemove.push(entry.agentId);
+        }
+        else if (status.state === "failed") {
+            await deliverAsFailure(entry, "error", status.error);
+            toRemove.push(entry.agentId);
+        }
+        // running / missing → keep polling next cycle
+    }
+    if (toRemove.length > 0) {
+        for (const id of toRemove)
+            pending.delete(id);
+        saveToDisk();
+    }
+}
+// ── Delivery helpers ──────────────────────────────────────────────
+async function deliverAsCompleted(entry, output, tokensUsed) {
+    const { deliverSubAgentResult } = await import("./subagent-delivery.js");
+    const info = {
+        id: entry.agentId,
+        name: entry.description,
+        status: "completed",
+        startedAt: entry.startedAt,
+        source: "cron", // Reuse cron banner format — fits async background agents.
+        depth: 0,
+        parentChatId: entry.chatId,
+    };
+    const result = {
+        id: entry.agentId,
+        name: entry.description,
+        status: "completed",
+        output,
+        tokensUsed: tokensUsed ?? { input: 0, output: 0 },
+        duration: Date.now() - entry.startedAt,
+    };
+    try {
+        await deliverSubAgentResult(info, result);
+    }
+    catch (err) {
+        console.error(`[async-watcher] delivery failed for ${entry.agentId}:`, err);
+    }
+}
+async function deliverAsFailure(entry, status, error) {
+    const { deliverSubAgentResult } = await import("./subagent-delivery.js");
+    const info = {
+        id: entry.agentId,
+        name: entry.description,
+        status,
+        startedAt: entry.startedAt,
+        source: "cron",
+        depth: 0,
+        parentChatId: entry.chatId,
+    };
+    const result = {
+        id: entry.agentId,
+        name: entry.description,
+        status,
+        output: "",
+        tokensUsed: { input: 0, output: 0 },
+        duration: Date.now() - entry.startedAt,
+        error,
+    };
+    try {
+        await deliverSubAgentResult(info, result);
+    }
+    catch (err) {
+        console.error(`[async-watcher] failure delivery failed for ${entry.agentId}:`, err);
+    }
+}
+// ── Test helpers ──────────────────────────────────────────────────
+/** Test-only: drop in-memory state. Doesn't touch disk. */
+export function __resetForTest() {
+    pending.clear();
+    if (pollTimer)
+        clearInterval(pollTimer);
+    pollTimer = null;
+    started = false;
+}

package/dist/services/personality.js CHANGED Viewed

@@ -46,6 +46,58 @@ function getBasePrompt(lang) {
 }
 /** Additional instructions for SDK providers (tool use) */
 const SDK_ADDON = `When you run commands or edit files, briefly explain what you did.`;
+/**
+ * Stage 1 of Fix #17 — async sub-agents.
+ *
+ * Tells Claude to use the SDK's `run_in_background` flag for long-running
+ * Agent tool calls so the main Telegram session doesn't stay locked for
+ * 10+ minutes while sub-agents crawl the web, run audits, or build reports.
+ *
+ * Only injected into the prompt when isSDK === true (non-SDK providers
+ * have no Agent tool). The bot's async-agent-watcher (Stage 2) picks up
+ * the resulting outputFile, polls for completion, and delivers the
+ * result as a separate Telegram message via subagent-delivery.ts.
+ *
+ * See test/system-prompt-background-hint.test.ts and
+ * docs/superpowers/plans/2026-04-13-async-subagents.md
+ */
+const BACKGROUND_SUBAGENT_HINT = `## Background Sub-Agents (IMPORTANT)
+For long-running work, ALWAYS set \`run_in_background: true\` on your Agent
+tool call. This makes the tool return IMMEDIATELY with an \`agentId\` and
+\`outputFile\` path instead of blocking for the full duration. Your main
+turn ends quickly, the user's session unlocks, and they can keep chatting
+with me while the sub-agent works in parallel.
+After launching a background agent, you MUST:
+1. Tell the user in one short sentence what you kicked off ("Starting SEO
+   audit for gethomes.io in the background — I'll send the report when
+   it's done").
+2. End your turn promptly. Do NOT wait around. Do NOT keep working on
+   that task. Wrap up within 1-2 short messages.
+3. When the agent finishes, its final report will arrive as a SEPARATE
+   message in this chat — the bot handles that delivery automatically.
+   You don't need to poll the outputFile proactively.
+If the user asks "is it done yet?" before the bot delivers the result,
+you MAY read the agent's \`outputFile\` (from the tool result) using the
+Read tool to check progress.
+**DO use \`run_in_background: true\` for:**
+- Audits (SEO, security, code quality, performance)
+- Research tasks that visit more than 3 web pages
+- Multi-file codebase analyses, full-repo scans
+- Report generation with multiple sub-steps
+- Anything you estimate will take longer than 2 minutes
+**DON'T use run_in_background for:**
+- Simple questions the user is actively waiting on a quick answer
+- Single file reads
+- Quick web fetches for a specific fact
+- Short tool chains under ~30 seconds
+When in doubt: prefer background for audits/research, foreground for
+conversational answers.`;
 /**
  * Self-Awareness Core — Dynamic introspection block.
  *
@@ -164,6 +216,9 @@ export function buildSystemPrompt(isSDK, language = "en", chatId) {
     }
     if (isSDK) {
         parts.push(SDK_ADDON);
+        // Stage 1 — teach Claude to use run_in_background for long-running
+        // Agent tool calls so the main session unlocks fast.
+        parts.push(BACKGROUND_SUBAGENT_HINT);
         // SDK providers have bash access — inject discovered tools so they know what's available
         parts.push(getToolSummary());
     }

package/dist/web/bind-strategy.js ADDED Viewed

@@ -0,0 +1,42 @@
+/**
+ * Pure decision helper for the web-server bind loop.
+ *
+ * Decouples the "what should happen next" logic from the side-effect
+ * spaghetti of real http.Server binding so it can be unit-tested in
+ * isolation. See test/web-server-resilience.test.ts for the contract.
+ *
+ * Why this exists: the v4.8.x and earlier implementations crashed the
+ * entire bot when port 3100 was held by a foreign process. A colleague
+ * running an OpenClaw fork hit the same bug years ago and ended up
+ * decoupling the web server completely — the main bot should never be
+ * gated on a web-UI bind. This helper encodes the decision logic so
+ * the new startWebServer() can just act on the returned action.
+ */
+/**
+ * Decide what the bind loop should do next after a failed listen().
+ *
+ * Rule of thumb:
+ *   - EADDRINUSE AND attempts remaining  → climb the port ladder.
+ *   - EADDRINUSE AND ladder exhausted    → background retry at original port.
+ *   - any other error (EACCES, listen-called-twice, etc.) → background retry.
+ *
+ * PURE: no timers, no I/O, no mutation of inputs. Safe to call from tests.
+ */
+export function decideNextBindAction(err, attempt, opts) {
+    const code = err?.code;
+    if (code === "EADDRINUSE" && attempt < opts.maxPortTries - 1) {
+        return {
+            type: "retry-port",
+            port: opts.originalPort + attempt + 1,
+            attempt: attempt + 1,
+        };
+    }
+    // EADDRINUSE with no attempts left, OR any non-EADDRINUSE error:
+    // don't walk the port ladder further, just back off and retry the
+    // original port in the background.
+    return {
+        type: "retry-background",
+        delayMs: opts.backgroundRetryMs,
+        port: opts.originalPort,
+    };
+}