npm - alvin-bot - Versions diffs - 4.9.4 → 4.10.0 - Mend

alvin-bot 4.9.4 → 4.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/CHANGELOG.md +65 -0
package/dist/handlers/async-agent-chunk-handler.js +33 -0
package/dist/handlers/message.js +34 -6
package/dist/index.js +7 -0
package/dist/paths.js +4 -0
package/dist/providers/claude-sdk-provider.js +43 -0
package/dist/services/async-agent-parser.js +152 -0
package/dist/services/async-agent-watcher.js +206 -0
package/dist/services/personality.js +55 -0
package/package.json +1 -1
package/test/async-agent-chunk-flow.test.ts +131 -0
package/test/async-agent-parser.test.ts +322 -0
package/test/async-agent-watcher.test.ts +229 -0
package/test/system-prompt-background-hint.test.ts +48 -0

package/dist/services/personality.js CHANGED Viewed

@@ -46,6 +46,58 @@ function getBasePrompt(lang) {
 }
 /** Additional instructions for SDK providers (tool use) */
 const SDK_ADDON = `When you run commands or edit files, briefly explain what you did.`;
+/**
+ * Stage 1 of Fix #17 — async sub-agents.
+ *
+ * Tells Claude to use the SDK's `run_in_background` flag for long-running
+ * Agent tool calls so the main Telegram session doesn't stay locked for
+ * 10+ minutes while sub-agents crawl the web, run audits, or build reports.
+ *
+ * Only injected into the prompt when isSDK === true (non-SDK providers
+ * have no Agent tool). The bot's async-agent-watcher (Stage 2) picks up
+ * the resulting outputFile, polls for completion, and delivers the
+ * result as a separate Telegram message via subagent-delivery.ts.
+ *
+ * See test/system-prompt-background-hint.test.ts and
+ * docs/superpowers/plans/2026-04-13-async-subagents.md
+ */
+const BACKGROUND_SUBAGENT_HINT = `## Background Sub-Agents (IMPORTANT)
+For long-running work, ALWAYS set \`run_in_background: true\` on your Agent
+tool call. This makes the tool return IMMEDIATELY with an \`agentId\` and
+\`outputFile\` path instead of blocking for the full duration. Your main
+turn ends quickly, the user's session unlocks, and they can keep chatting
+with me while the sub-agent works in parallel.
+After launching a background agent, you MUST:
+1. Tell the user in one short sentence what you kicked off ("Starting SEO
+   audit for gethomes.io in the background — I'll send the report when
+   it's done").
+2. End your turn promptly. Do NOT wait around. Do NOT keep working on
+   that task. Wrap up within 1-2 short messages.
+3. When the agent finishes, its final report will arrive as a SEPARATE
+   message in this chat — the bot handles that delivery automatically.
+   You don't need to poll the outputFile proactively.
+If the user asks "is it done yet?" before the bot delivers the result,
+you MAY read the agent's \`outputFile\` (from the tool result) using the
+Read tool to check progress.
+**DO use \`run_in_background: true\` for:**
+- Audits (SEO, security, code quality, performance)
+- Research tasks that visit more than 3 web pages
+- Multi-file codebase analyses, full-repo scans
+- Report generation with multiple sub-steps
+- Anything you estimate will take longer than 2 minutes
+**DON'T use run_in_background for:**
+- Simple questions the user is actively waiting on a quick answer
+- Single file reads
+- Quick web fetches for a specific fact
+- Short tool chains under ~30 seconds
+When in doubt: prefer background for audits/research, foreground for
+conversational answers.`;
 /**
  * Self-Awareness Core — Dynamic introspection block.
  *
@@ -164,6 +216,9 @@ export function buildSystemPrompt(isSDK, language = "en", chatId) {
     }
     if (isSDK) {
         parts.push(SDK_ADDON);
+        // Stage 1 — teach Claude to use run_in_background for long-running
+        // Agent tool calls so the main session unlocks fast.
+        parts.push(BACKGROUND_SUBAGENT_HINT);
         // SDK providers have bash access — inject discovered tools so they know what's available
         parts.push(getToolSummary());
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "alvin-bot",
-  "version": "4.9.4",
+  "version": "4.10.0",
   "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
   "type": "module",
   "main": "dist/index.js",

package/test/async-agent-chunk-flow.test.ts ADDED Viewed

@@ -0,0 +1,131 @@
+/**
+ * Fix #17 (Stage 2) — when the SDK yields a tool_result chunk with an
+ * "Async agent launched successfully" payload, the message handler
+ * must register the pending agent with the watcher.
+ *
+ * This tests the helper `handleToolResultChunk` in isolation —
+ * the integration with message.ts is covered by the live e2e test.
+ */
+import { describe, it, expect, beforeEach, vi } from "vitest";
+describe("async agent chunk flow (Stage 2)", () => {
+  beforeEach(() => vi.resetModules());
+  it("tool_result with async_launched gets registered with the watcher", async () => {
+    const registered: unknown[] = [];
+    vi.doMock("../src/services/async-agent-watcher.js", () => ({
+      registerPendingAgent: (input: unknown) => registered.push(input),
+      startWatcher: () => {},
+      stopWatcher: () => {},
+      pollOnce: async () => {},
+      listPendingAgents: () => [],
+    }));
+    const { handleToolResultChunk } = await import(
+      "../src/handlers/async-agent-chunk-handler.js"
+    );
+    const chunk = {
+      type: "tool_result" as const,
+      toolUseId: "toolu_1",
+      toolResultContent:
+        "Async agent launched successfully.\n" +
+        "agentId: abc-1 (something)\n" +
+        "output_file: /tmp/out-abc-1.jsonl\n" +
+        "If asked, you can check progress.",
+    };
+    handleToolResultChunk(chunk, {
+      chatId: 42,
+      userId: 99,
+      lastToolUseInput: {
+        description: "SEO audit",
+        prompt: "audit gethomes.io",
+      },
+    });
+    expect(registered).toHaveLength(1);
+    const r = registered[0] as { agentId: string; description: string; outputFile: string };
+    expect(r.agentId).toBe("abc-1");
+    expect(r.description).toBe("SEO audit");
+    expect(r.outputFile).toBe("/tmp/out-abc-1.jsonl");
+  });
+  it("falls back to a generic description when no toolUseInput is provided", async () => {
+    const registered: unknown[] = [];
+    vi.doMock("../src/services/async-agent-watcher.js", () => ({
+      registerPendingAgent: (input: unknown) => registered.push(input),
+      startWatcher: () => {},
+      stopWatcher: () => {},
+      pollOnce: async () => {},
+      listPendingAgents: () => [],
+    }));
+    const { handleToolResultChunk } = await import(
+      "../src/handlers/async-agent-chunk-handler.js"
+    );
+    handleToolResultChunk(
+      {
+        type: "tool_result",
+        toolUseId: "toolu_2",
+        toolResultContent:
+          "Async agent launched successfully.\n" +
+          "agentId: x\n" +
+          "output_file: /tmp/o\n",
+      },
+      { chatId: 42, userId: 99 },
+    );
+    expect(registered).toHaveLength(1);
+    const r = registered[0] as { description: string };
+    expect(r.description.length).toBeGreaterThan(0);
+  });
+  it("non-async tool_result (e.g. Read) is ignored", async () => {
+    const registered: unknown[] = [];
+    vi.doMock("../src/services/async-agent-watcher.js", () => ({
+      registerPendingAgent: (input: unknown) => registered.push(input),
+      startWatcher: () => {},
+      stopWatcher: () => {},
+      pollOnce: async () => {},
+      listPendingAgents: () => [],
+    }));
+    const { handleToolResultChunk } = await import(
+      "../src/handlers/async-agent-chunk-handler.js"
+    );
+    handleToolResultChunk(
+      {
+        type: "tool_result",
+        toolUseId: "toolu_3",
+        toolResultContent: "file contents here (plain Read result)",
+      },
+      { chatId: 42, userId: 99 },
+    );
+    expect(registered).toHaveLength(0);
+  });
+  it("non-tool_result chunks are ignored without throwing", async () => {
+    vi.doMock("../src/services/async-agent-watcher.js", () => ({
+      registerPendingAgent: () => {
+        throw new Error("should not be called");
+      },
+      startWatcher: () => {},
+      stopWatcher: () => {},
+      pollOnce: async () => {},
+      listPendingAgents: () => [],
+    }));
+    const { handleToolResultChunk } = await import(
+      "../src/handlers/async-agent-chunk-handler.js"
+    );
+    expect(() =>
+      handleToolResultChunk(
+        { type: "text", text: "hi" },
+        { chatId: 42, userId: 99 },
+      ),
+    ).not.toThrow();
+  });
+});

package/test/async-agent-parser.test.ts ADDED Viewed

@@ -0,0 +1,322 @@
+/**
+ * Fix #17 (Stage 2) — async-agent-parser unit tests.
+ *
+ * Two pure helpers:
+ *   parseAsyncLaunchedToolResult(text) → { agentId, outputFile } | null
+ *   parseOutputFileStatus(path) → { state: "running"|"completed"|"failed"|"missing" }
+ *
+ * Format details captured from the live SDK probe in
+ * docs/superpowers/specs/sdk-async-agent-outputfile-format.md
+ */
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "fs";
+import os from "os";
+import { resolve } from "path";
+import {
+  parseAsyncLaunchedToolResult,
+  parseOutputFileStatus,
+} from "../src/services/async-agent-parser.js";
+describe("parseAsyncLaunchedToolResult — plain text format (Stage 2)", () => {
+  it("extracts agentId and output_file from the real SDK tool-result text", () => {
+    const text = `Async agent launched successfully.
+agentId: a9e9c5913b2faec71 (internal ID - do not mention to user. Use SendMessage with to: 'a9e9c5913b2faec71' to continue this agent.)
+The agent is working in the background. You will be notified automatically when it completes.
+Do not duplicate this agent's work — avoid working with the same files or topics it is using.
+output_file: /private/tmp/claude-502/-Users-alvin-de-Projects-alvin-bot/abc/tasks/a9e9c5913b2faec71.output
+If asked, you can check progress before completion by using Read or Bash tail on the output file.`;
+    const info = parseAsyncLaunchedToolResult(text);
+    expect(info).not.toBeNull();
+    expect(info?.agentId).toBe("a9e9c5913b2faec71");
+    expect(info?.outputFile).toBe(
+      "/private/tmp/claude-502/-Users-alvin-de-Projects-alvin-bot/abc/tasks/a9e9c5913b2faec71.output",
+    );
+  });
+  it("returns null for ordinary tool result text (e.g. Read output)", () => {
+    expect(parseAsyncLaunchedToolResult("file contents here")).toBeNull();
+  });
+  it("returns null for an empty string", () => {
+    expect(parseAsyncLaunchedToolResult("")).toBeNull();
+  });
+  it("returns null when the marker line is missing", () => {
+    expect(
+      parseAsyncLaunchedToolResult("agentId: x\noutput_file: /tmp/a"),
+    ).toBeNull();
+  });
+  it("returns null when output_file line is missing", () => {
+    const text =
+      "Async agent launched successfully.\nagentId: abc123\nMore prose";
+    expect(parseAsyncLaunchedToolResult(text)).toBeNull();
+  });
+  it("returns null when agentId line is missing", () => {
+    const text =
+      "Async agent launched successfully.\noutput_file: /tmp/a\nMore prose";
+    expect(parseAsyncLaunchedToolResult(text)).toBeNull();
+  });
+  it("trims whitespace around extracted values", () => {
+    const text = `Async agent launched successfully.
+agentId:    abc-with-spaces   (something)
+output_file:    /tmp/path with spaces.output   `;
+    const info = parseAsyncLaunchedToolResult(text);
+    expect(info?.agentId).toBe("abc-with-spaces");
+    // Path can contain spaces — we just trim leading/trailing
+    expect(info?.outputFile).toBe("/tmp/path with spaces.output");
+  });
+  it("handles input that is an array of content blocks (Anthropic SDK shape)", () => {
+    const blocks = [
+      { type: "text", text: "Async agent launched successfully.\nagentId: id1\noutput_file: /tmp/o1\n" },
+    ];
+    const info = parseAsyncLaunchedToolResult(blocks);
+    expect(info?.agentId).toBe("id1");
+    expect(info?.outputFile).toBe("/tmp/o1");
+  });
+  it("handles non-string input gracefully", () => {
+    expect(parseAsyncLaunchedToolResult(null)).toBeNull();
+    expect(parseAsyncLaunchedToolResult(undefined)).toBeNull();
+    expect(parseAsyncLaunchedToolResult(42 as unknown as string)).toBeNull();
+  });
+});
+const TMP_BASE = resolve(os.tmpdir(), `alvin-parser-${process.pid}`);
+beforeEach(() => {
+  fs.mkdirSync(TMP_BASE, { recursive: true });
+});
+afterEach(() => {
+  try {
+    fs.rmSync(TMP_BASE, { recursive: true, force: true });
+  } catch { /* ignore */ }
+});
+async function writeJsonl(name: string, lines: object[]): Promise<string> {
+  const path = resolve(TMP_BASE, name);
+  fs.writeFileSync(
+    path,
+    lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
+    "utf-8",
+  );
+  return path;
+}
+describe("parseOutputFileStatus — JSONL completion detection (Stage 2)", () => {
+  it("returns 'missing' when the file doesn't exist", async () => {
+    const status = await parseOutputFileStatus(`${TMP_BASE}/nonexistent.jsonl`);
+    expect(status.state).toBe("missing");
+  });
+  it("returns 'missing' for an empty file", async () => {
+    const path = resolve(TMP_BASE, "empty.jsonl");
+    fs.writeFileSync(path, "", "utf-8");
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("missing");
+  });
+  it("returns 'running' when the file has events but no end_turn", async () => {
+    const path = await writeJsonl("running.jsonl", [
+      {
+        type: "user",
+        isSidechain: true,
+        agentId: "x",
+        message: { role: "user", content: "do the thing" },
+      },
+      {
+        type: "assistant",
+        isSidechain: true,
+        agentId: "x",
+        message: {
+          role: "assistant",
+          content: [{ type: "tool_use", name: "Bash", input: { command: "ls" } }],
+          stop_reason: "tool_use",
+        },
+      },
+    ]);
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("running");
+  });
+  it("returns 'completed' with the final text when stop_reason is end_turn", async () => {
+    const path = await writeJsonl("completed.jsonl", [
+      {
+        type: "user",
+        isSidechain: true,
+        agentId: "x",
+        message: { role: "user", content: "p" },
+      },
+      {
+        type: "assistant",
+        isSidechain: true,
+        agentId: "x",
+        message: {
+          role: "assistant",
+          content: [{ type: "text", text: "Final report: it works!" }],
+          stop_reason: "end_turn",
+          usage: { input_tokens: 100, output_tokens: 50 },
+        },
+      },
+    ]);
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("completed");
+    if (status.state === "completed") {
+      expect(status.output).toContain("Final report: it works!");
+      expect(status.tokensUsed).toEqual({ input: 100, output: 50 });
+    }
+  });
+  it("concatenates multiple text blocks in the final assistant message", async () => {
+    const path = await writeJsonl("multi-block.jsonl", [
+      {
+        type: "assistant",
+        isSidechain: true,
+        agentId: "x",
+        message: {
+          role: "assistant",
+          content: [
+            { type: "thinking", text: "let me think" },
+            { type: "text", text: "Part one." },
+            { type: "text", text: "Part two." },
+          ],
+          stop_reason: "end_turn",
+        },
+      },
+    ]);
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("completed");
+    if (status.state === "completed") {
+      expect(status.output).toBe("Part one.\n\nPart two.");
+      // thinking blocks are NOT included
+      expect(status.output).not.toContain("let me think");
+    }
+  });
+  it("ignores assistant messages with stop_reason !== end_turn (still running)", async () => {
+    const path = await writeJsonl("intermediate.jsonl", [
+      {
+        type: "assistant",
+        isSidechain: true,
+        agentId: "x",
+        message: {
+          role: "assistant",
+          content: [{ type: "text", text: "checking..." }],
+          stop_reason: "tool_use",
+        },
+      },
+    ]);
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("running");
+  });
+  it("uses the LAST end_turn assistant message when there are multiple turns", async () => {
+    const path = await writeJsonl("multi-turn.jsonl", [
+      {
+        type: "assistant",
+        agentId: "x",
+        message: {
+          content: [{ type: "text", text: "first answer" }],
+          stop_reason: "end_turn",
+        },
+      },
+      {
+        type: "user",
+        agentId: "x",
+        message: { content: [{ type: "tool_result", content: "..." }] },
+      },
+      {
+        type: "assistant",
+        agentId: "x",
+        message: {
+          content: [{ type: "text", text: "second and final answer" }],
+          stop_reason: "end_turn",
+        },
+      },
+    ]);
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("completed");
+    if (status.state === "completed") {
+      expect(status.output).toBe("second and final answer");
+    }
+  });
+  it("survives partial final lines (mid-write)", async () => {
+    const path = resolve(TMP_BASE, "partial.jsonl");
+    fs.writeFileSync(
+      path,
+      JSON.stringify({
+        type: "assistant",
+        agentId: "x",
+        message: {
+          content: [{ type: "text", text: "checking" }],
+          stop_reason: "tool_use",
+        },
+      }) +
+        "\n" +
+        '{"type":"assistant","agentId":"x","mes',
+      "utf-8",
+    );
+    const status = await parseOutputFileStatus(path);
+    // Partial line is ignored; only the complete event counts
+    expect(status.state).toBe("running");
+  });
+  it("survives unparseable lines (skip them, keep checking)", async () => {
+    const path = resolve(TMP_BASE, "garbage.jsonl");
+    fs.writeFileSync(
+      path,
+      "garbage line\n" +
+        JSON.stringify({
+          type: "assistant",
+          agentId: "x",
+          message: {
+            content: [{ type: "text", text: "the answer" }],
+            stop_reason: "end_turn",
+          },
+        }) +
+        "\n",
+      "utf-8",
+    );
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("completed");
+    if (status.state === "completed") {
+      expect(status.output).toBe("the answer");
+    }
+  });
+  it("only tail-reads large files (does not load entire content into memory)", async () => {
+    const path = resolve(TMP_BASE, "huge.jsonl");
+    // Write a 200KB padding stream of 'running' events, then an end_turn
+    const padding = JSON.stringify({
+      type: "assistant",
+      agentId: "x",
+      message: { content: [{ type: "text", text: "x".repeat(500) }], stop_reason: "tool_use" },
+    });
+    let buf = "";
+    for (let i = 0; i < 200; i++) buf += padding + "\n";
+    buf +=
+      JSON.stringify({
+        type: "assistant",
+        agentId: "x",
+        message: {
+          content: [{ type: "text", text: "FINAL" }],
+          stop_reason: "end_turn",
+        },
+      }) + "\n";
+    fs.writeFileSync(path, buf, "utf-8");
+    expect(fs.statSync(path).size).toBeGreaterThan(100_000);
+    const status = await parseOutputFileStatus(path, { maxTailBytes: 8192 });
+    // Tail should still find the last end_turn
+    expect(status.state).toBe("completed");
+    if (status.state === "completed") {
+      expect(status.output).toBe("FINAL");
+    }
+  });
+});