alvin-bot 4.9.4 → 4.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,6 +46,58 @@ function getBasePrompt(lang) {
46
46
  }
47
47
  /** Additional instructions for SDK providers (tool use) */
48
48
  const SDK_ADDON = `When you run commands or edit files, briefly explain what you did.`;
49
+ /**
50
+ * Stage 1 of Fix #17 — async sub-agents.
51
+ *
52
+ * Tells Claude to use the SDK's `run_in_background` flag for long-running
53
+ * Agent tool calls so the main Telegram session doesn't stay locked for
54
+ * 10+ minutes while sub-agents crawl the web, run audits, or build reports.
55
+ *
56
+ * Only injected into the prompt when isSDK === true (non-SDK providers
57
+ * have no Agent tool). The bot's async-agent-watcher (Stage 2) picks up
58
+ * the resulting outputFile, polls for completion, and delivers the
59
+ * result as a separate Telegram message via subagent-delivery.ts.
60
+ *
61
+ * See test/system-prompt-background-hint.test.ts and
62
+ * docs/superpowers/plans/2026-04-13-async-subagents.md
63
+ */
64
+ const BACKGROUND_SUBAGENT_HINT = `## Background Sub-Agents (IMPORTANT)
65
+
66
+ For long-running work, ALWAYS set \`run_in_background: true\` on your Agent
67
+ tool call. This makes the tool return IMMEDIATELY with an \`agentId\` and
68
+ \`outputFile\` path instead of blocking for the full duration. Your main
69
+ turn ends quickly, the user's session unlocks, and they can keep chatting
70
+ with me while the sub-agent works in parallel.
71
+
72
+ After launching a background agent, you MUST:
73
+ 1. Tell the user in one short sentence what you kicked off ("Starting SEO
74
+ audit for gethomes.io in the background — I'll send the report when
75
+ it's done").
76
+ 2. End your turn promptly. Do NOT wait around. Do NOT keep working on
77
+ that task. Wrap up within 1-2 short messages.
78
+ 3. When the agent finishes, its final report will arrive as a SEPARATE
79
+ message in this chat — the bot handles that delivery automatically.
80
+ You don't need to poll the outputFile proactively.
81
+
82
+ If the user asks "is it done yet?" before the bot delivers the result,
83
+ you MAY read the agent's \`outputFile\` (from the tool result) using the
84
+ Read tool to check progress.
85
+
86
+ **DO use \`run_in_background: true\` for:**
87
+ - Audits (SEO, security, code quality, performance)
88
+ - Research tasks that visit more than 3 web pages
89
+ - Multi-file codebase analyses, full-repo scans
90
+ - Report generation with multiple sub-steps
91
+ - Anything you estimate will take longer than 2 minutes
92
+
93
+ **DON'T use run_in_background for:**
94
+ - Simple questions the user is actively waiting on a quick answer
95
+ - Single file reads
96
+ - Quick web fetches for a specific fact
97
+ - Short tool chains under ~30 seconds
98
+
99
+ When in doubt: prefer background for audits/research, foreground for
100
+ conversational answers.`;
49
101
  /**
50
102
  * Self-Awareness Core — Dynamic introspection block.
51
103
  *
@@ -164,6 +216,9 @@ export function buildSystemPrompt(isSDK, language = "en", chatId) {
164
216
  }
165
217
  if (isSDK) {
166
218
  parts.push(SDK_ADDON);
219
+ // Stage 1 — teach Claude to use run_in_background for long-running
220
+ // Agent tool calls so the main session unlocks fast.
221
+ parts.push(BACKGROUND_SUBAGENT_HINT);
167
222
  // SDK providers have bash access — inject discovered tools so they know what's available
168
223
  parts.push(getToolSummary());
169
224
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "alvin-bot",
3
- "version": "4.9.4",
3
+ "version": "4.10.0",
4
4
  "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -0,0 +1,131 @@
1
+ /**
2
+ * Fix #17 (Stage 2) — when the SDK yields a tool_result chunk with an
3
+ * "Async agent launched successfully" payload, the message handler
4
+ * must register the pending agent with the watcher.
5
+ *
6
+ * This tests the helper `handleToolResultChunk` in isolation —
7
+ * the integration with message.ts is covered by the live e2e test.
8
+ */
9
+ import { describe, it, expect, beforeEach, vi } from "vitest";
10
+
11
+ describe("async agent chunk flow (Stage 2)", () => {
12
+ beforeEach(() => vi.resetModules());
13
+
14
+ it("tool_result with async_launched gets registered with the watcher", async () => {
15
+ const registered: unknown[] = [];
16
+ vi.doMock("../src/services/async-agent-watcher.js", () => ({
17
+ registerPendingAgent: (input: unknown) => registered.push(input),
18
+ startWatcher: () => {},
19
+ stopWatcher: () => {},
20
+ pollOnce: async () => {},
21
+ listPendingAgents: () => [],
22
+ }));
23
+
24
+ const { handleToolResultChunk } = await import(
25
+ "../src/handlers/async-agent-chunk-handler.js"
26
+ );
27
+
28
+ const chunk = {
29
+ type: "tool_result" as const,
30
+ toolUseId: "toolu_1",
31
+ toolResultContent:
32
+ "Async agent launched successfully.\n" +
33
+ "agentId: abc-1 (something)\n" +
34
+ "output_file: /tmp/out-abc-1.jsonl\n" +
35
+ "If asked, you can check progress.",
36
+ };
37
+ handleToolResultChunk(chunk, {
38
+ chatId: 42,
39
+ userId: 99,
40
+ lastToolUseInput: {
41
+ description: "SEO audit",
42
+ prompt: "audit gethomes.io",
43
+ },
44
+ });
45
+
46
+ expect(registered).toHaveLength(1);
47
+ const r = registered[0] as { agentId: string; description: string; outputFile: string };
48
+ expect(r.agentId).toBe("abc-1");
49
+ expect(r.description).toBe("SEO audit");
50
+ expect(r.outputFile).toBe("/tmp/out-abc-1.jsonl");
51
+ });
52
+
53
+ it("falls back to a generic description when no toolUseInput is provided", async () => {
54
+ const registered: unknown[] = [];
55
+ vi.doMock("../src/services/async-agent-watcher.js", () => ({
56
+ registerPendingAgent: (input: unknown) => registered.push(input),
57
+ startWatcher: () => {},
58
+ stopWatcher: () => {},
59
+ pollOnce: async () => {},
60
+ listPendingAgents: () => [],
61
+ }));
62
+
63
+ const { handleToolResultChunk } = await import(
64
+ "../src/handlers/async-agent-chunk-handler.js"
65
+ );
66
+
67
+ handleToolResultChunk(
68
+ {
69
+ type: "tool_result",
70
+ toolUseId: "toolu_2",
71
+ toolResultContent:
72
+ "Async agent launched successfully.\n" +
73
+ "agentId: x\n" +
74
+ "output_file: /tmp/o\n",
75
+ },
76
+ { chatId: 42, userId: 99 },
77
+ );
78
+
79
+ expect(registered).toHaveLength(1);
80
+ const r = registered[0] as { description: string };
81
+ expect(r.description.length).toBeGreaterThan(0);
82
+ });
83
+
84
+ it("non-async tool_result (e.g. Read) is ignored", async () => {
85
+ const registered: unknown[] = [];
86
+ vi.doMock("../src/services/async-agent-watcher.js", () => ({
87
+ registerPendingAgent: (input: unknown) => registered.push(input),
88
+ startWatcher: () => {},
89
+ stopWatcher: () => {},
90
+ pollOnce: async () => {},
91
+ listPendingAgents: () => [],
92
+ }));
93
+
94
+ const { handleToolResultChunk } = await import(
95
+ "../src/handlers/async-agent-chunk-handler.js"
96
+ );
97
+
98
+ handleToolResultChunk(
99
+ {
100
+ type: "tool_result",
101
+ toolUseId: "toolu_3",
102
+ toolResultContent: "file contents here (plain Read result)",
103
+ },
104
+ { chatId: 42, userId: 99 },
105
+ );
106
+ expect(registered).toHaveLength(0);
107
+ });
108
+
109
+ it("non-tool_result chunks are ignored without throwing", async () => {
110
+ vi.doMock("../src/services/async-agent-watcher.js", () => ({
111
+ registerPendingAgent: () => {
112
+ throw new Error("should not be called");
113
+ },
114
+ startWatcher: () => {},
115
+ stopWatcher: () => {},
116
+ pollOnce: async () => {},
117
+ listPendingAgents: () => [],
118
+ }));
119
+
120
+ const { handleToolResultChunk } = await import(
121
+ "../src/handlers/async-agent-chunk-handler.js"
122
+ );
123
+
124
+ expect(() =>
125
+ handleToolResultChunk(
126
+ { type: "text", text: "hi" },
127
+ { chatId: 42, userId: 99 },
128
+ ),
129
+ ).not.toThrow();
130
+ });
131
+ });
@@ -0,0 +1,322 @@
1
+ /**
2
+ * Fix #17 (Stage 2) — async-agent-parser unit tests.
3
+ *
4
+ * Two pure helpers:
5
+ * parseAsyncLaunchedToolResult(text) → { agentId, outputFile } | null
6
+ * parseOutputFileStatus(path) → { state: "running"|"completed"|"failed"|"missing" }
7
+ *
8
+ * Format details captured from the live SDK probe in
9
+ * docs/superpowers/specs/sdk-async-agent-outputfile-format.md
10
+ */
11
+ import { describe, it, expect, beforeEach, afterEach } from "vitest";
12
+ import fs from "fs";
13
+ import os from "os";
14
+ import { resolve } from "path";
15
+ import {
16
+ parseAsyncLaunchedToolResult,
17
+ parseOutputFileStatus,
18
+ } from "../src/services/async-agent-parser.js";
19
+
20
+ describe("parseAsyncLaunchedToolResult — plain text format (Stage 2)", () => {
21
+ it("extracts agentId and output_file from the real SDK tool-result text", () => {
22
+ const text = `Async agent launched successfully.
23
+ agentId: a9e9c5913b2faec71 (internal ID - do not mention to user. Use SendMessage with to: 'a9e9c5913b2faec71' to continue this agent.)
24
+ The agent is working in the background. You will be notified automatically when it completes.
25
+ Do not duplicate this agent's work — avoid working with the same files or topics it is using.
26
+ output_file: /private/tmp/claude-502/-Users-alvin-de-Projects-alvin-bot/abc/tasks/a9e9c5913b2faec71.output
27
+ If asked, you can check progress before completion by using Read or Bash tail on the output file.`;
28
+
29
+ const info = parseAsyncLaunchedToolResult(text);
30
+ expect(info).not.toBeNull();
31
+ expect(info?.agentId).toBe("a9e9c5913b2faec71");
32
+ expect(info?.outputFile).toBe(
33
+ "/private/tmp/claude-502/-Users-alvin-de-Projects-alvin-bot/abc/tasks/a9e9c5913b2faec71.output",
34
+ );
35
+ });
36
+
37
+ it("returns null for ordinary tool result text (e.g. Read output)", () => {
38
+ expect(parseAsyncLaunchedToolResult("file contents here")).toBeNull();
39
+ });
40
+
41
+ it("returns null for an empty string", () => {
42
+ expect(parseAsyncLaunchedToolResult("")).toBeNull();
43
+ });
44
+
45
+ it("returns null when the marker line is missing", () => {
46
+ expect(
47
+ parseAsyncLaunchedToolResult("agentId: x\noutput_file: /tmp/a"),
48
+ ).toBeNull();
49
+ });
50
+
51
+ it("returns null when output_file line is missing", () => {
52
+ const text =
53
+ "Async agent launched successfully.\nagentId: abc123\nMore prose";
54
+ expect(parseAsyncLaunchedToolResult(text)).toBeNull();
55
+ });
56
+
57
+ it("returns null when agentId line is missing", () => {
58
+ const text =
59
+ "Async agent launched successfully.\noutput_file: /tmp/a\nMore prose";
60
+ expect(parseAsyncLaunchedToolResult(text)).toBeNull();
61
+ });
62
+
63
+ it("trims whitespace around extracted values", () => {
64
+ const text = `Async agent launched successfully.
65
+ agentId: abc-with-spaces (something)
66
+ output_file: /tmp/path with spaces.output `;
67
+ const info = parseAsyncLaunchedToolResult(text);
68
+ expect(info?.agentId).toBe("abc-with-spaces");
69
+ // Path can contain spaces — we just trim leading/trailing
70
+ expect(info?.outputFile).toBe("/tmp/path with spaces.output");
71
+ });
72
+
73
+ it("handles input that is an array of content blocks (Anthropic SDK shape)", () => {
74
+ const blocks = [
75
+ { type: "text", text: "Async agent launched successfully.\nagentId: id1\noutput_file: /tmp/o1\n" },
76
+ ];
77
+ const info = parseAsyncLaunchedToolResult(blocks);
78
+ expect(info?.agentId).toBe("id1");
79
+ expect(info?.outputFile).toBe("/tmp/o1");
80
+ });
81
+
82
+ it("handles non-string input gracefully", () => {
83
+ expect(parseAsyncLaunchedToolResult(null)).toBeNull();
84
+ expect(parseAsyncLaunchedToolResult(undefined)).toBeNull();
85
+ expect(parseAsyncLaunchedToolResult(42 as unknown as string)).toBeNull();
86
+ });
87
+ });
88
+
89
+ const TMP_BASE = resolve(os.tmpdir(), `alvin-parser-${process.pid}`);
90
+
91
+ beforeEach(() => {
92
+ fs.mkdirSync(TMP_BASE, { recursive: true });
93
+ });
94
+
95
+ afterEach(() => {
96
+ try {
97
+ fs.rmSync(TMP_BASE, { recursive: true, force: true });
98
+ } catch { /* ignore */ }
99
+ });
100
+
101
+ async function writeJsonl(name: string, lines: object[]): Promise<string> {
102
+ const path = resolve(TMP_BASE, name);
103
+ fs.writeFileSync(
104
+ path,
105
+ lines.map((l) => JSON.stringify(l)).join("\n") + "\n",
106
+ "utf-8",
107
+ );
108
+ return path;
109
+ }
110
+
111
+ describe("parseOutputFileStatus — JSONL completion detection (Stage 2)", () => {
112
+ it("returns 'missing' when the file doesn't exist", async () => {
113
+ const status = await parseOutputFileStatus(`${TMP_BASE}/nonexistent.jsonl`);
114
+ expect(status.state).toBe("missing");
115
+ });
116
+
117
+ it("returns 'missing' for an empty file", async () => {
118
+ const path = resolve(TMP_BASE, "empty.jsonl");
119
+ fs.writeFileSync(path, "", "utf-8");
120
+ const status = await parseOutputFileStatus(path);
121
+ expect(status.state).toBe("missing");
122
+ });
123
+
124
+ it("returns 'running' when the file has events but no end_turn", async () => {
125
+ const path = await writeJsonl("running.jsonl", [
126
+ {
127
+ type: "user",
128
+ isSidechain: true,
129
+ agentId: "x",
130
+ message: { role: "user", content: "do the thing" },
131
+ },
132
+ {
133
+ type: "assistant",
134
+ isSidechain: true,
135
+ agentId: "x",
136
+ message: {
137
+ role: "assistant",
138
+ content: [{ type: "tool_use", name: "Bash", input: { command: "ls" } }],
139
+ stop_reason: "tool_use",
140
+ },
141
+ },
142
+ ]);
143
+ const status = await parseOutputFileStatus(path);
144
+ expect(status.state).toBe("running");
145
+ });
146
+
147
+ it("returns 'completed' with the final text when stop_reason is end_turn", async () => {
148
+ const path = await writeJsonl("completed.jsonl", [
149
+ {
150
+ type: "user",
151
+ isSidechain: true,
152
+ agentId: "x",
153
+ message: { role: "user", content: "p" },
154
+ },
155
+ {
156
+ type: "assistant",
157
+ isSidechain: true,
158
+ agentId: "x",
159
+ message: {
160
+ role: "assistant",
161
+ content: [{ type: "text", text: "Final report: it works!" }],
162
+ stop_reason: "end_turn",
163
+ usage: { input_tokens: 100, output_tokens: 50 },
164
+ },
165
+ },
166
+ ]);
167
+ const status = await parseOutputFileStatus(path);
168
+ expect(status.state).toBe("completed");
169
+ if (status.state === "completed") {
170
+ expect(status.output).toContain("Final report: it works!");
171
+ expect(status.tokensUsed).toEqual({ input: 100, output: 50 });
172
+ }
173
+ });
174
+
175
+ it("concatenates multiple text blocks in the final assistant message", async () => {
176
+ const path = await writeJsonl("multi-block.jsonl", [
177
+ {
178
+ type: "assistant",
179
+ isSidechain: true,
180
+ agentId: "x",
181
+ message: {
182
+ role: "assistant",
183
+ content: [
184
+ { type: "thinking", text: "let me think" },
185
+ { type: "text", text: "Part one." },
186
+ { type: "text", text: "Part two." },
187
+ ],
188
+ stop_reason: "end_turn",
189
+ },
190
+ },
191
+ ]);
192
+ const status = await parseOutputFileStatus(path);
193
+ expect(status.state).toBe("completed");
194
+ if (status.state === "completed") {
195
+ expect(status.output).toBe("Part one.\n\nPart two.");
196
+ // thinking blocks are NOT included
197
+ expect(status.output).not.toContain("let me think");
198
+ }
199
+ });
200
+
201
+ it("ignores assistant messages with stop_reason !== end_turn (still running)", async () => {
202
+ const path = await writeJsonl("intermediate.jsonl", [
203
+ {
204
+ type: "assistant",
205
+ isSidechain: true,
206
+ agentId: "x",
207
+ message: {
208
+ role: "assistant",
209
+ content: [{ type: "text", text: "checking..." }],
210
+ stop_reason: "tool_use",
211
+ },
212
+ },
213
+ ]);
214
+ const status = await parseOutputFileStatus(path);
215
+ expect(status.state).toBe("running");
216
+ });
217
+
218
+ it("uses the LAST end_turn assistant message when there are multiple turns", async () => {
219
+ const path = await writeJsonl("multi-turn.jsonl", [
220
+ {
221
+ type: "assistant",
222
+ agentId: "x",
223
+ message: {
224
+ content: [{ type: "text", text: "first answer" }],
225
+ stop_reason: "end_turn",
226
+ },
227
+ },
228
+ {
229
+ type: "user",
230
+ agentId: "x",
231
+ message: { content: [{ type: "tool_result", content: "..." }] },
232
+ },
233
+ {
234
+ type: "assistant",
235
+ agentId: "x",
236
+ message: {
237
+ content: [{ type: "text", text: "second and final answer" }],
238
+ stop_reason: "end_turn",
239
+ },
240
+ },
241
+ ]);
242
+ const status = await parseOutputFileStatus(path);
243
+ expect(status.state).toBe("completed");
244
+ if (status.state === "completed") {
245
+ expect(status.output).toBe("second and final answer");
246
+ }
247
+ });
248
+
249
+ it("survives partial final lines (mid-write)", async () => {
250
+ const path = resolve(TMP_BASE, "partial.jsonl");
251
+ fs.writeFileSync(
252
+ path,
253
+ JSON.stringify({
254
+ type: "assistant",
255
+ agentId: "x",
256
+ message: {
257
+ content: [{ type: "text", text: "checking" }],
258
+ stop_reason: "tool_use",
259
+ },
260
+ }) +
261
+ "\n" +
262
+ '{"type":"assistant","agentId":"x","mes',
263
+ "utf-8",
264
+ );
265
+ const status = await parseOutputFileStatus(path);
266
+ // Partial line is ignored; only the complete event counts
267
+ expect(status.state).toBe("running");
268
+ });
269
+
270
+ it("survives unparseable lines (skip them, keep checking)", async () => {
271
+ const path = resolve(TMP_BASE, "garbage.jsonl");
272
+ fs.writeFileSync(
273
+ path,
274
+ "garbage line\n" +
275
+ JSON.stringify({
276
+ type: "assistant",
277
+ agentId: "x",
278
+ message: {
279
+ content: [{ type: "text", text: "the answer" }],
280
+ stop_reason: "end_turn",
281
+ },
282
+ }) +
283
+ "\n",
284
+ "utf-8",
285
+ );
286
+ const status = await parseOutputFileStatus(path);
287
+ expect(status.state).toBe("completed");
288
+ if (status.state === "completed") {
289
+ expect(status.output).toBe("the answer");
290
+ }
291
+ });
292
+
293
+ it("only tail-reads large files (does not load entire content into memory)", async () => {
294
+ const path = resolve(TMP_BASE, "huge.jsonl");
295
+ // Write a 200KB padding stream of 'running' events, then an end_turn
296
+ const padding = JSON.stringify({
297
+ type: "assistant",
298
+ agentId: "x",
299
+ message: { content: [{ type: "text", text: "x".repeat(500) }], stop_reason: "tool_use" },
300
+ });
301
+ let buf = "";
302
+ for (let i = 0; i < 200; i++) buf += padding + "\n";
303
+ buf +=
304
+ JSON.stringify({
305
+ type: "assistant",
306
+ agentId: "x",
307
+ message: {
308
+ content: [{ type: "text", text: "FINAL" }],
309
+ stop_reason: "end_turn",
310
+ },
311
+ }) + "\n";
312
+ fs.writeFileSync(path, buf, "utf-8");
313
+ expect(fs.statSync(path).size).toBeGreaterThan(100_000);
314
+
315
+ const status = await parseOutputFileStatus(path, { maxTailBytes: 8192 });
316
+ // Tail should still find the last end_turn
317
+ expect(status.state).toBe("completed");
318
+ if (status.state === "completed") {
319
+ expect(status.output).toBe("FINAL");
320
+ }
321
+ });
322
+ });