npm - alvin-bot - Versions diffs - 4.12.4 → 4.13.1 - Mend

alvin-bot 4.12.4 → 4.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/CHANGELOG.md +121 -0
package/dist/handlers/message.js +9 -0
package/dist/paths.js +8 -0
package/dist/providers/claude-sdk-provider.js +25 -5
package/dist/services/alvin-dispatch.js +125 -0
package/dist/services/alvin-mcp-tools.js +103 -0
package/dist/services/async-agent-parser.js +50 -0
package/dist/services/personality.js +36 -10
package/dist/services/process-manager.js +291 -0
package/dist/web/doctor-api.js +59 -67
package/dist/web/setup-api.js +52 -0
package/package.json +1 -1
package/test/alvin-dispatch.test.ts +220 -0
package/test/async-agent-parser-streamjson.test.ts +273 -0
package/test/process-manager.test.ts +186 -0
package/test/slack-test-connection.test.ts +176 -0

package/test/alvin-dispatch.test.ts ADDED Viewed

@@ -0,0 +1,220 @@
+/**
+ * v4.13 — alvin_dispatch custom-tool service.
+ *
+ * `dispatchDetachedAgent(input)` spawns a truly independent `claude -p`
+ * subprocess that survives the parent handler's abort. This is the
+ * architectural replacement for SDK's built-in Task(run_in_background)
+ * tool, which was tied to the parent SDK subprocess lifecycle.
+ *
+ * Contract:
+ *   - Input: { prompt, description, chatId, userId, sessionKey }
+ *   - Output (synchronous): { agentId, outputFile, spawned: true }
+ *   - Side effect: spawns detached subprocess writing stream-json
+ *     output to outputFile, registers with async-agent-watcher.
+ *
+ * These tests stub child_process.spawn so they run fast and deterministic.
+ * The "real subprocess survives parent" property was verified empirically
+ * in Phase A (see plan doc).
+ */
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
+import os from "os";
+import fs from "fs";
+import { resolve } from "path";
+const TEST_DATA_DIR = resolve(
+  os.tmpdir(),
+  `alvin-dispatch-${process.pid}-${Date.now()}`,
+);
+interface SpawnRecord {
+  cmd: string;
+  args: string[];
+  opts: {
+    detached?: boolean;
+    stdio?: unknown;
+    cwd?: string;
+    env?: Record<string, string | undefined>;
+  };
+  unreffed: boolean;
+}
+let spawned: SpawnRecord[] = [];
+beforeEach(async () => {
+  if (fs.existsSync(TEST_DATA_DIR))
+    fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
+  fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
+  process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
+  spawned = [];
+  vi.resetModules();
+  vi.doMock("node:child_process", async () => {
+    const actual = await vi.importActual<typeof import("node:child_process")>(
+      "node:child_process",
+    );
+    return {
+      ...actual,
+      spawn: (cmd: string, args: string[], opts: SpawnRecord["opts"]) => {
+        const record: SpawnRecord = {
+          cmd,
+          args,
+          opts,
+          unreffed: false,
+        };
+        spawned.push(record);
+        return {
+          pid: 12345,
+          unref() {
+            record.unreffed = true;
+          },
+          on() {},
+          kill() {},
+        };
+      },
+    };
+  });
+  vi.doMock("../src/services/subagent-delivery.js", () => ({
+    deliverSubAgentResult: async () => {},
+    attachBotApi: () => {},
+    __setBotApiForTest: () => {},
+  }));
+});
+afterEach(async () => {
+  try {
+    const mod = await import("../src/services/async-agent-watcher.js");
+    mod.stopWatcher();
+    mod.__resetForTest();
+  } catch {
+    /* ignore */
+  }
+});
+describe("dispatchDetachedAgent (v4.13)", () => {
+  it("spawns claude -p with detached: true and unrefs", async () => {
+    const mod = await import("../src/services/alvin-dispatch.js");
+    const result = mod.dispatchDetachedAgent({
+      prompt: "research X",
+      description: "X research",
+      chatId: 42,
+      userId: 42,
+      sessionKey: "s1",
+    });
+    expect(result.agentId).toMatch(/^alvin-[a-f0-9]{16,}$/);
+    expect(result.outputFile).toContain(TEST_DATA_DIR);
+    expect(result.spawned).toBe(true);
+    expect(spawned).toHaveLength(1);
+    const [s] = spawned;
+    expect(s.cmd).toMatch(/claude/);
+    expect(s.args).toContain("-p");
+    expect(s.args).toContain("research X");
+    expect(s.args).toContain("--output-format");
+    expect(s.args).toContain("stream-json");
+    expect(s.opts.detached).toBe(true);
+    expect(s.unreffed).toBe(true);
+  });
+  it("returns unique agentIds for concurrent dispatches", async () => {
+    const mod = await import("../src/services/alvin-dispatch.js");
+    const r1 = mod.dispatchDetachedAgent({
+      prompt: "a",
+      description: "a",
+      chatId: 1,
+      userId: 1,
+      sessionKey: "s1",
+    });
+    const r2 = mod.dispatchDetachedAgent({
+      prompt: "b",
+      description: "b",
+      chatId: 1,
+      userId: 1,
+      sessionKey: "s1",
+    });
+    expect(r1.agentId).not.toBe(r2.agentId);
+    expect(r1.outputFile).not.toBe(r2.outputFile);
+  });
+  it("registers the pending agent with the watcher", async () => {
+    const mod = await import("../src/services/alvin-dispatch.js");
+    const watcher = await import("../src/services/async-agent-watcher.js");
+    mod.dispatchDetachedAgent({
+      prompt: "x",
+      description: "X audit",
+      chatId: 42,
+      userId: 42,
+      sessionKey: "s1",
+    });
+    const pending = watcher.listPendingAgents();
+    expect(pending).toHaveLength(1);
+    expect(pending[0].description).toBe("X audit");
+    expect(pending[0].sessionKey).toBe("s1");
+  });
+  it("increments session.pendingBackgroundCount on dispatch", async () => {
+    const mod = await import("../src/services/alvin-dispatch.js");
+    const { getSession } = await import("../src/services/session.js");
+    const session = getSession("s-count");
+    session.pendingBackgroundCount = 0;
+    mod.dispatchDetachedAgent({
+      prompt: "p",
+      description: "d",
+      chatId: 1,
+      userId: 1,
+      sessionKey: "s-count",
+    });
+    expect(session.pendingBackgroundCount).toBe(1);
+    mod.dispatchDetachedAgent({
+      prompt: "p2",
+      description: "d2",
+      chatId: 1,
+      userId: 1,
+      sessionKey: "s-count",
+    });
+    expect(session.pendingBackgroundCount).toBe(2);
+  });
+  it("uses stdio redirect so child's stdout goes to outputFile", async () => {
+    const mod = await import("../src/services/alvin-dispatch.js");
+    mod.dispatchDetachedAgent({
+      prompt: "p",
+      description: "d",
+      chatId: 1,
+      userId: 1,
+      sessionKey: "s1",
+    });
+    const [s] = spawned;
+    // stdio should be an array with FD redirects (ignore, pipe-to-file, ignore)
+    // or similar. We verify it's NOT "inherit" (which would attach to parent).
+    expect(s.opts.stdio).not.toBe("inherit");
+    expect(s.opts.stdio).not.toBe(undefined);
+  });
+  it("cleans env of CLAUDECODE/CLAUDE_CODE_ENTRYPOINT to prevent nested session errors", async () => {
+    const mod = await import("../src/services/alvin-dispatch.js");
+    process.env.CLAUDECODE = "1";
+    process.env.CLAUDE_CODE_ENTRYPOINT = "cli";
+    try {
+      mod.dispatchDetachedAgent({
+        prompt: "p",
+        description: "d",
+        chatId: 1,
+        userId: 1,
+        sessionKey: "s1",
+      });
+      const [s] = spawned;
+      expect(s.opts.env).toBeDefined();
+      expect(s.opts.env?.CLAUDECODE).toBeUndefined();
+      expect(s.opts.env?.CLAUDE_CODE_ENTRYPOINT).toBeUndefined();
+    } finally {
+      delete process.env.CLAUDECODE;
+      delete process.env.CLAUDE_CODE_ENTRYPOINT;
+    }
+  });
+});

package/test/async-agent-parser-streamjson.test.ts ADDED Viewed

@@ -0,0 +1,273 @@
+/**
+ * v4.13 — parseOutputFileStatus support for `claude -p --output-format stream-json`.
+ *
+ * The SDK's built-in Task tool writes its sub-agent output in one JSONL
+ * format (events with `message.stop_reason: "end_turn"`). The new v4.13
+ * dispatch mechanism spawns `claude -p --output-format stream-json`
+ * which writes a DIFFERENT format:
+ *
+ *   - Assistant messages have `message.stop_reason: null` (streaming shape)
+ *   - A final `{"type":"result","subtype":"success","stop_reason":"end_turn",...}`
+ *     event marks completion explicitly
+ *   - `result.duration_ms`, `total_cost_usd`, `num_turns`, `usage`
+ *     are the authoritative completion signals
+ *
+ * The parser must recognize BOTH formats. v4.13 adds detection for the
+ * result-event format while preserving backward compat with the existing
+ * SDK-internal format (tested in the sibling test files).
+ */
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "fs";
+import os from "os";
+import { resolve } from "path";
+import { parseOutputFileStatus } from "../src/services/async-agent-parser.js";
+const TMP_BASE = resolve(
+  os.tmpdir(),
+  `alvin-parser-streamjson-${process.pid}`,
+);
+beforeEach(() => {
+  fs.mkdirSync(TMP_BASE, { recursive: true });
+});
+afterEach(() => {
+  try {
+    fs.rmSync(TMP_BASE, { recursive: true, force: true });
+  } catch {
+    /* ignore */
+  }
+});
+describe("parseOutputFileStatus — stream-json format (v4.13)", () => {
+  it("returns 'completed' when final event is type:result + subtype:success", async () => {
+    const path = resolve(TMP_BASE, "stream-success.jsonl");
+    const lines = [
+      { type: "system", subtype: "init", session_id: "s1" },
+      {
+        type: "assistant",
+        message: {
+          role: "assistant",
+          content: [{ type: "text", text: "The answer is 42." }],
+          stop_reason: null, // streaming shape — NOT end_turn yet
+        },
+        session_id: "s1",
+      },
+      {
+        type: "result",
+        subtype: "success",
+        stop_reason: "end_turn",
+        session_id: "s1",
+        total_cost_usd: 0.01,
+        duration_ms: 500,
+        usage: { input_tokens: 10, output_tokens: 5 },
+        result: "The answer is 42.",
+      },
+    ];
+    fs.writeFileSync(
+      path,
+      lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
+      "utf-8",
+    );
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("completed");
+    if (status.state === "completed") {
+      expect(status.output).toContain("The answer is 42.");
+      expect(status.output).not.toMatch(/interrupted|partial/i);
+    }
+  });
+  it("extracts tokens from result.usage when using stream-json format", async () => {
+    const path = resolve(TMP_BASE, "stream-tokens.jsonl");
+    const lines = [
+      {
+        type: "assistant",
+        message: {
+          content: [{ type: "text", text: "x" }],
+          stop_reason: null,
+        },
+      },
+      {
+        type: "result",
+        subtype: "success",
+        stop_reason: "end_turn",
+        usage: { input_tokens: 1234, output_tokens: 567 },
+      },
+    ];
+    fs.writeFileSync(
+      path,
+      lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
+      "utf-8",
+    );
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("completed");
+    if (status.state === "completed") {
+      expect(status.tokensUsed).toEqual({ input: 1234, output: 567 });
+    }
+  });
+  it("recognises 'failed' state when result.is_error is true", async () => {
+    const path = resolve(TMP_BASE, "stream-failed.jsonl");
+    const lines = [
+      {
+        type: "assistant",
+        message: {
+          content: [{ type: "text", text: "I tried..." }],
+          stop_reason: null,
+        },
+      },
+      {
+        type: "result",
+        subtype: "error_max_turns",
+        is_error: true,
+        stop_reason: "max_turns",
+      },
+    ];
+    fs.writeFileSync(
+      path,
+      lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
+      "utf-8",
+    );
+    const status = await parseOutputFileStatus(path);
+    // With an is_error result + text content, we still deliver the text
+    // as completed (better to give the user SOMETHING than nothing).
+    // The delivery layer can annotate differently if it chooses.
+    expect(status.state).toBe("completed");
+    if (status.state === "completed") {
+      expect(status.output).toContain("I tried...");
+    }
+  });
+  it("returns 'running' when stream-json events are present but no result yet", async () => {
+    const path = resolve(TMP_BASE, "stream-running.jsonl");
+    const lines = [
+      { type: "system", subtype: "init", session_id: "s1" },
+      {
+        type: "assistant",
+        message: {
+          content: [{ type: "text", text: "Thinking..." }],
+          stop_reason: null,
+        },
+      },
+      {
+        type: "assistant",
+        message: {
+          content: [{ type: "tool_use", name: "Bash", input: {} }],
+          stop_reason: null,
+        },
+      },
+    ];
+    fs.writeFileSync(
+      path,
+      lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
+      "utf-8",
+    );
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("running");
+  });
+  it("aggregates text from ALL assistant messages when result arrives", async () => {
+    const path = resolve(TMP_BASE, "stream-multi-text.jsonl");
+    const lines = [
+      {
+        type: "assistant",
+        message: {
+          content: [{ type: "text", text: "First thought." }],
+          stop_reason: null,
+        },
+      },
+      {
+        type: "user",
+        message: { content: [{ type: "tool_result", content: "ok" }] },
+      },
+      {
+        type: "assistant",
+        message: {
+          content: [{ type: "text", text: "Continuing..." }],
+          stop_reason: null,
+        },
+      },
+      {
+        type: "user",
+        message: { content: [{ type: "tool_result", content: "ok" }] },
+      },
+      {
+        type: "assistant",
+        message: {
+          content: [{ type: "text", text: "Final answer." }],
+          stop_reason: null,
+        },
+      },
+      { type: "result", subtype: "success", stop_reason: "end_turn" },
+    ];
+    fs.writeFileSync(
+      path,
+      lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
+      "utf-8",
+    );
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("completed");
+    if (status.state === "completed") {
+      // All three text blocks must be present
+      expect(status.output).toContain("First thought");
+      expect(status.output).toContain("Continuing");
+      expect(status.output).toContain("Final answer");
+    }
+  });
+  it("prefers result.result field as authoritative output when available", async () => {
+    // The stream-json's result event has a `result` field with the
+    // already-concatenated final answer. Use it directly when present
+    // (more accurate than re-aggregating from streaming chunks).
+    const path = resolve(TMP_BASE, "stream-result-field.jsonl");
+    const lines = [
+      {
+        type: "assistant",
+        message: {
+          content: [{ type: "text", text: "Intermediate chunk" }],
+          stop_reason: null,
+        },
+      },
+      {
+        type: "result",
+        subtype: "success",
+        stop_reason: "end_turn",
+        result: "FINAL AUTHORITATIVE ANSWER",
+      },
+    ];
+    fs.writeFileSync(
+      path,
+      lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
+      "utf-8",
+    );
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("completed");
+    if (status.state === "completed") {
+      expect(status.output).toContain("FINAL AUTHORITATIVE ANSWER");
+    }
+  });
+  it("handles result event with only partial fields (defensive)", async () => {
+    const path = resolve(TMP_BASE, "stream-result-minimal.jsonl");
+    const lines = [
+      {
+        type: "assistant",
+        message: {
+          content: [{ type: "text", text: "Some output" }],
+          stop_reason: null,
+        },
+      },
+      { type: "result" }, // no subtype, no result field, no usage
+    ];
+    fs.writeFileSync(
+      path,
+      lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
+      "utf-8",
+    );
+    const status = await parseOutputFileStatus(path);
+    expect(status.state).toBe("completed");
+    if (status.state === "completed") {
+      expect(status.output).toContain("Some output");
+    }
+  });
+});

package/test/process-manager.test.ts ADDED Viewed

@@ -0,0 +1,186 @@
+/**
+ * v4.13.1 — process-manager abstraction tests.
+ *
+ * The maintenance section in the Web UI used to hard-wire PM2 commands
+ * (`pm2 jlist`, `pm2 restart`, `pm2 stop`, `pm2 logs ...`). Since v4.8
+ * the Mac install uses launchd (`com.alvinbot.app.plist`) — PM2 isn't
+ * running, so those calls returned "PM2 not available" and the buttons
+ * did nothing.
+ *
+ * This module abstracts the process manager and auto-detects which one
+ * is actually managing the bot. Detection order:
+ *
+ *   1. launchd (macOS) — if `launchctl print gui/$UID/com.alvinbot.app`
+ *      succeeds AND the bot's actual running pid matches
+ *   2. PM2 — if `pm2 jlist` returns our process
+ *   3. standalone — neither detected; only the in-process graceful
+ *      restart works (scheduleGracefulRestart — since there's no
+ *      supervisor to bring it back, "stop" is effectively "kill")
+ *
+ * Each manager implements: getStatus(), stop(), start(), getLogs().
+ * Restart is intentionally NOT on the manager — it always routes through
+ * scheduleGracefulRestart() (Grammy-safe) and the supervisor auto-brings-
+ * back behaviour.
+ */
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
+interface ExecCall {
+  cmd: string;
+  opts?: unknown;
+}
+let execLog: ExecCall[] = [];
+let execReturn: Record<string, string | Error> = {};
+function stubExec() {
+  vi.doMock("node:child_process", () => ({
+    execSync: (cmd: string, opts?: unknown) => {
+      execLog.push({ cmd, opts });
+      // Find match by pattern — longest matching prefix wins
+      const matches = Object.keys(execReturn).filter((k) => cmd.includes(k));
+      matches.sort((a, b) => b.length - a.length);
+      const key = matches[0];
+      if (key) {
+        const v = execReturn[key];
+        if (v instanceof Error) throw v;
+        return v;
+      }
+      throw new Error(`execSync: no stub for ${cmd}`);
+    },
+  }));
+}
+beforeEach(() => {
+  execLog = [];
+  execReturn = {};
+  vi.resetModules();
+  stubExec();
+});
+afterEach(() => {
+  vi.doUnmock("node:child_process");
+});
+describe("detectProcessManager (v4.13.1)", () => {
+  it("detects 'launchd' when launchctl print succeeds on darwin", async () => {
+    execReturn["launchctl print"] = `gui/502/com.alvinbot.app = {
+      state = running
+      program = /opt/homebrew/bin/node
+    }`;
+    const mod = await import("../src/services/process-manager.js");
+    const pm = mod.detectProcessManager({ platform: "darwin" });
+    expect(pm.kind).toBe("launchd");
+  });
+  it("falls through to 'pm2' when launchd is not detected", async () => {
+    execReturn["launchctl print"] = new Error("Could not find service");
+    execReturn["pm2 jlist"] = JSON.stringify([
+      { name: "alvin-bot", pid: 1234, pm2_env: { status: "online" } },
+    ]);
+    const mod = await import("../src/services/process-manager.js");
+    const pm = mod.detectProcessManager({ platform: "linux" });
+    expect(pm.kind).toBe("pm2");
+  });
+  it("falls through to 'standalone' when neither is detected", async () => {
+    execReturn["launchctl print"] = new Error("not found");
+    execReturn["pm2 jlist"] = new Error("command not found");
+    const mod = await import("../src/services/process-manager.js");
+    const pm = mod.detectProcessManager({ platform: "linux" });
+    expect(pm.kind).toBe("standalone");
+  });
+  it("skips launchd detection on non-darwin platforms", async () => {
+    // No launchctl command should be issued on Linux
+    execReturn["pm2 jlist"] = JSON.stringify([
+      { name: "alvin-bot", pid: 1234, pm2_env: { status: "online" } },
+    ]);
+    const mod = await import("../src/services/process-manager.js");
+    const pm = mod.detectProcessManager({ platform: "linux" });
+    expect(pm.kind).toBe("pm2");
+    // Verify launchctl was NOT called
+    expect(execLog.some((e) => e.cmd.includes("launchctl"))).toBe(false);
+  });
+});
+describe("launchd process manager (v4.13.1)", () => {
+  it("getStatus parses launchctl print output for state + PID", async () => {
+    execReturn["launchctl print"] = `gui/502/com.alvinbot.app = {
+      active count = 1
+      state = running
+      program = /opt/homebrew/Cellar/node/25.9.0_1/bin/node
+      pid = 65432
+      program path = /usr/bin/node
+      working directory = /Users/alvin_de/Projects/alvin-bot
+      stdout path = /Users/alvin_de/.alvin-bot/logs/alvin-bot.out.log
+    }`;
+    const mod = await import("../src/services/process-manager.js");
+    const pm = mod.createLaunchdManager(502);
+    const status = await pm.getStatus();
+    expect(status.status).toBe("running");
+    expect(status.pid).toBe(65432);
+    expect(status.kind).toBe("launchd");
+  });
+  it("getStatus returns 'not-loaded' when service is not registered", async () => {
+    execReturn["launchctl print"] = new Error("Could not find service");
+    const mod = await import("../src/services/process-manager.js");
+    const pm = mod.createLaunchdManager(502);
+    const status = await pm.getStatus();
+    expect(status.status).toBe("not-loaded");
+  });
+  it("stop uses launchctl bootout", async () => {
+    execReturn["launchctl bootout"] = "";
+    const mod = await import("../src/services/process-manager.js");
+    const pm = mod.createLaunchdManager(502);
+    await pm.stop();
+    const stopCall = execLog.find((e) => e.cmd.includes("bootout"));
+    expect(stopCall).toBeDefined();
+    expect(stopCall!.cmd).toContain("gui/502/com.alvinbot.app");
+  });
+  it("start uses launchctl bootstrap", async () => {
+    execReturn["launchctl bootstrap"] = "";
+    const mod = await import("../src/services/process-manager.js");
+    const pm = mod.createLaunchdManager(502);
+    await pm.start();
+    const startCall = execLog.find((e) => e.cmd.includes("bootstrap"));
+    expect(startCall).toBeDefined();
+    expect(startCall!.cmd).toMatch(/com\.alvinbot\.app\.plist/);
+  });
+});
+describe("pm2 process manager (v4.13.1)", () => {
+  it("getStatus parses pm2 jlist for our process", async () => {
+    execReturn["pm2 jlist"] = JSON.stringify([
+      {
+        name: "alvin-bot",
+        pid: 9999,
+        pm2_env: {
+          status: "online",
+          pm_uptime: Date.now() - 60_000,
+          restart_time: 2,
+        },
+        monit: { memory: 123456, cpu: 1.5 },
+      },
+    ]);
+    const mod = await import("../src/services/process-manager.js");
+    const pm = mod.createPm2Manager();
+    const status = await pm.getStatus();
+    expect(status.status).toBe("online");
+    expect(status.pid).toBe(9999);
+    expect(status.kind).toBe("pm2");
+    expect(status.restarts).toBe(2);
+  });
+  it("getStatus returns 'unknown' if pm2 jlist does not include our process", async () => {
+    execReturn["pm2 jlist"] = JSON.stringify([
+      { name: "other-service", pid: 1111, pm2_env: { status: "online" } },
+    ]);
+    const mod = await import("../src/services/process-manager.js");
+    const pm = mod.createPm2Manager();
+    const status = await pm.getStatus();
+    expect(status.status).toBe("unknown");
+  });
+});