alvin-bot 4.12.3 → 4.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/dist/handlers/message.js +9 -0
- package/dist/paths.js +8 -0
- package/dist/providers/claude-sdk-provider.js +25 -5
- package/dist/services/alvin-dispatch.js +125 -0
- package/dist/services/alvin-mcp-tools.js +103 -0
- package/dist/services/async-agent-parser.js +126 -1
- package/dist/services/personality.js +36 -10
- package/package.json +1 -1
- package/test/alvin-dispatch.test.ts +220 -0
- package/test/async-agent-parser-staleness.test.ts +412 -0
- package/test/async-agent-parser-streamjson.test.ts +273 -0
|
@@ -61,13 +61,27 @@ const SDK_ADDON = `When you run commands or edit files, briefly explain what you
|
|
|
61
61
|
* See test/system-prompt-background-hint.test.ts and
|
|
62
62
|
* docs/superpowers/plans/2026-04-13-async-subagents.md
|
|
63
63
|
*/
|
|
64
|
-
const BACKGROUND_SUBAGENT_HINT = `## ⚠️ CRITICAL: Background Sub-Agents
|
|
64
|
+
const BACKGROUND_SUBAGENT_HINT = `## ⚠️ CRITICAL: Background Sub-Agents on Telegram/WhatsApp/Slack
|
|
65
65
|
|
|
66
|
-
**
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
66
|
+
**THE RULE (v4.13 update)**: For ANY long-running work (research,
|
|
67
|
+
audits, multi-step analysis, >30 seconds), you MUST prefer the
|
|
68
|
+
\`mcp__alvin__dispatch_agent\` tool over the built-in \`Task\` tool.
|
|
69
|
+
|
|
70
|
+
**Why alvin_dispatch over Task(run_in_background: true)?**
|
|
71
|
+
|
|
72
|
+
The built-in Task tool (even with \`run_in_background: true\`) keeps
|
|
73
|
+
the sub-agent tied to THIS session's CLI subprocess. If the user sends
|
|
74
|
+
me a new message mid-work, aborting this session would kill the
|
|
75
|
+
sub-agent half-way through and leave garbage partial output. Alvin
|
|
76
|
+
learned this the hard way (v4.12.1–v4.12.3 all got this wrong).
|
|
77
|
+
|
|
78
|
+
\`mcp__alvin__dispatch_agent\` spawns a COMPLETELY INDEPENDENT
|
|
79
|
+
\`claude -p\` subprocess with its own PID, own process group. It
|
|
80
|
+
survives ANY abort of this session. The user can interrupt me freely
|
|
81
|
+
and keep chatting while the sub-agent finishes cleanly. The bot
|
|
82
|
+
delivers the result as a separate message when ready.
|
|
83
|
+
|
|
84
|
+
**THIS IS THE SINGLE MOST IMPORTANT RULE FOR USER RESPONSIVENESS.**
|
|
71
85
|
|
|
72
86
|
**Why it matters**: During a synchronous Agent tool call the parent
|
|
73
87
|
session has no way to know the sub-agent is still working. It appears
|
|
@@ -77,7 +91,7 @@ an \`agentId\` + \`outputFile\` path IMMEDIATELY, your turn ends in
|
|
|
77
91
|
seconds, the user can keep chatting with me, and the bot automatically
|
|
78
92
|
delivers the sub-agent's final result as a separate message when ready.
|
|
79
93
|
|
|
80
|
-
**Decision tree** (apply every time you consider
|
|
94
|
+
**Decision tree** (apply every time you consider any sub-agent tool):
|
|
81
95
|
|
|
82
96
|
Does the task involve ANY of the following?
|
|
83
97
|
• Visiting more than 2 URLs
|
|
@@ -89,8 +103,14 @@ delivers the sub-agent's final result as a separate message when ready.
|
|
|
89
103
|
• Crawling, scraping, or fetching multiple resources
|
|
90
104
|
• Research across multiple sources or domains
|
|
91
105
|
|
|
92
|
-
YES →
|
|
93
|
-
NO → foreground is fine (single quick sub-query under 30s
|
|
106
|
+
YES → use \`mcp__alvin__dispatch_agent\` (truly detached, preferred)
|
|
107
|
+
NO → foreground is fine (single quick sub-query under 30s, answer
|
|
108
|
+
yourself if possible)
|
|
109
|
+
|
|
110
|
+
NOTE: The built-in Task tool with run_in_background: true still works
|
|
111
|
+
but is now deprecated on Telegram/Slack/Discord/WhatsApp because it
|
|
112
|
+
ties sub-agent lifetime to this session. Only use Task directly when
|
|
113
|
+
you explicitly need the sub-agent's result IN THIS SAME TURN (rare).
|
|
94
114
|
|
|
95
115
|
**Examples where you MUST use \`run_in_background: true\`:**
|
|
96
116
|
- ANY audit (SEO, security, code quality, performance, accessibility, GEO)
|
|
@@ -107,7 +127,7 @@ delivers the sub-agent's final result as a separate message when ready.
|
|
|
107
127
|
- "What's 2+2?" (no sub-agent needed — answer yourself)
|
|
108
128
|
- "Check if package.json has foo" (one quick tool call)
|
|
109
129
|
|
|
110
|
-
**After launching a background agent, you MUST:**
|
|
130
|
+
**After launching a background agent (either tool), you MUST:**
|
|
111
131
|
1. Tell the user in ONE short sentence what you kicked off.
|
|
112
132
|
Example: "Starting SEO audit for gethomes.io in the background —
|
|
113
133
|
I'll send the report when it's done."
|
|
@@ -115,6 +135,12 @@ delivers the sub-agent's final result as a separate message when ready.
|
|
|
115
135
|
3. The bot will deliver the result as a separate message when ready.
|
|
116
136
|
You don't need to poll the outputFile proactively.
|
|
117
137
|
|
|
138
|
+
**For PARALLEL dispatch** (e.g. user says "research X and Y in parallel"):
|
|
139
|
+
Call \`mcp__alvin__dispatch_agent\` multiple times in the SAME assistant
|
|
140
|
+
turn, once per sub-task. Each returns its own agentId immediately. Your
|
|
141
|
+
turn ends as soon as all dispatches have returned — no sequential
|
|
142
|
+
waiting. The bot delivers each sub-agent's result separately when ready.
|
|
143
|
+
|
|
118
144
|
If the user asks "is it done yet?" before the bot delivers the result,
|
|
119
145
|
you MAY read the agent's \`outputFile\` (from the original tool result)
|
|
120
146
|
using the Read tool to peek at progress — but don't block on it.
|
package/package.json
CHANGED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v4.13 — alvin_dispatch custom-tool service.
|
|
3
|
+
*
|
|
4
|
+
* `dispatchDetachedAgent(input)` spawns a truly independent `claude -p`
|
|
5
|
+
* subprocess that survives the parent handler's abort. This is the
|
|
6
|
+
* architectural replacement for SDK's built-in Task(run_in_background)
|
|
7
|
+
* tool, which was tied to the parent SDK subprocess lifecycle.
|
|
8
|
+
*
|
|
9
|
+
* Contract:
|
|
10
|
+
* - Input: { prompt, description, chatId, userId, sessionKey }
|
|
11
|
+
* - Output (synchronous): { agentId, outputFile, spawned: true }
|
|
12
|
+
* - Side effect: spawns detached subprocess writing stream-json
|
|
13
|
+
* output to outputFile, registers with async-agent-watcher.
|
|
14
|
+
*
|
|
15
|
+
* These tests stub child_process.spawn so they run fast and deterministic.
|
|
16
|
+
* The "real subprocess survives parent" property was verified empirically
|
|
17
|
+
* in Phase A (see plan doc).
|
|
18
|
+
*/
|
|
19
|
+
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
|
20
|
+
import os from "os";
|
|
21
|
+
import fs from "fs";
|
|
22
|
+
import { resolve } from "path";
|
|
23
|
+
|
|
24
|
+
const TEST_DATA_DIR = resolve(
|
|
25
|
+
os.tmpdir(),
|
|
26
|
+
`alvin-dispatch-${process.pid}-${Date.now()}`,
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
interface SpawnRecord {
|
|
30
|
+
cmd: string;
|
|
31
|
+
args: string[];
|
|
32
|
+
opts: {
|
|
33
|
+
detached?: boolean;
|
|
34
|
+
stdio?: unknown;
|
|
35
|
+
cwd?: string;
|
|
36
|
+
env?: Record<string, string | undefined>;
|
|
37
|
+
};
|
|
38
|
+
unreffed: boolean;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
let spawned: SpawnRecord[] = [];
|
|
42
|
+
|
|
43
|
+
beforeEach(async () => {
|
|
44
|
+
if (fs.existsSync(TEST_DATA_DIR))
|
|
45
|
+
fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
|
46
|
+
fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
|
|
47
|
+
process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
|
|
48
|
+
spawned = [];
|
|
49
|
+
vi.resetModules();
|
|
50
|
+
|
|
51
|
+
vi.doMock("node:child_process", async () => {
|
|
52
|
+
const actual = await vi.importActual<typeof import("node:child_process")>(
|
|
53
|
+
"node:child_process",
|
|
54
|
+
);
|
|
55
|
+
return {
|
|
56
|
+
...actual,
|
|
57
|
+
spawn: (cmd: string, args: string[], opts: SpawnRecord["opts"]) => {
|
|
58
|
+
const record: SpawnRecord = {
|
|
59
|
+
cmd,
|
|
60
|
+
args,
|
|
61
|
+
opts,
|
|
62
|
+
unreffed: false,
|
|
63
|
+
};
|
|
64
|
+
spawned.push(record);
|
|
65
|
+
return {
|
|
66
|
+
pid: 12345,
|
|
67
|
+
unref() {
|
|
68
|
+
record.unreffed = true;
|
|
69
|
+
},
|
|
70
|
+
on() {},
|
|
71
|
+
kill() {},
|
|
72
|
+
};
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
vi.doMock("../src/services/subagent-delivery.js", () => ({
|
|
78
|
+
deliverSubAgentResult: async () => {},
|
|
79
|
+
attachBotApi: () => {},
|
|
80
|
+
__setBotApiForTest: () => {},
|
|
81
|
+
}));
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
afterEach(async () => {
|
|
85
|
+
try {
|
|
86
|
+
const mod = await import("../src/services/async-agent-watcher.js");
|
|
87
|
+
mod.stopWatcher();
|
|
88
|
+
mod.__resetForTest();
|
|
89
|
+
} catch {
|
|
90
|
+
/* ignore */
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
describe("dispatchDetachedAgent (v4.13)", () => {
|
|
95
|
+
it("spawns claude -p with detached: true and unrefs", async () => {
|
|
96
|
+
const mod = await import("../src/services/alvin-dispatch.js");
|
|
97
|
+
const result = mod.dispatchDetachedAgent({
|
|
98
|
+
prompt: "research X",
|
|
99
|
+
description: "X research",
|
|
100
|
+
chatId: 42,
|
|
101
|
+
userId: 42,
|
|
102
|
+
sessionKey: "s1",
|
|
103
|
+
});
|
|
104
|
+
expect(result.agentId).toMatch(/^alvin-[a-f0-9]{16,}$/);
|
|
105
|
+
expect(result.outputFile).toContain(TEST_DATA_DIR);
|
|
106
|
+
expect(result.spawned).toBe(true);
|
|
107
|
+
|
|
108
|
+
expect(spawned).toHaveLength(1);
|
|
109
|
+
const [s] = spawned;
|
|
110
|
+
expect(s.cmd).toMatch(/claude/);
|
|
111
|
+
expect(s.args).toContain("-p");
|
|
112
|
+
expect(s.args).toContain("research X");
|
|
113
|
+
expect(s.args).toContain("--output-format");
|
|
114
|
+
expect(s.args).toContain("stream-json");
|
|
115
|
+
expect(s.opts.detached).toBe(true);
|
|
116
|
+
expect(s.unreffed).toBe(true);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it("returns unique agentIds for concurrent dispatches", async () => {
|
|
120
|
+
const mod = await import("../src/services/alvin-dispatch.js");
|
|
121
|
+
const r1 = mod.dispatchDetachedAgent({
|
|
122
|
+
prompt: "a",
|
|
123
|
+
description: "a",
|
|
124
|
+
chatId: 1,
|
|
125
|
+
userId: 1,
|
|
126
|
+
sessionKey: "s1",
|
|
127
|
+
});
|
|
128
|
+
const r2 = mod.dispatchDetachedAgent({
|
|
129
|
+
prompt: "b",
|
|
130
|
+
description: "b",
|
|
131
|
+
chatId: 1,
|
|
132
|
+
userId: 1,
|
|
133
|
+
sessionKey: "s1",
|
|
134
|
+
});
|
|
135
|
+
expect(r1.agentId).not.toBe(r2.agentId);
|
|
136
|
+
expect(r1.outputFile).not.toBe(r2.outputFile);
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it("registers the pending agent with the watcher", async () => {
|
|
140
|
+
const mod = await import("../src/services/alvin-dispatch.js");
|
|
141
|
+
const watcher = await import("../src/services/async-agent-watcher.js");
|
|
142
|
+
|
|
143
|
+
mod.dispatchDetachedAgent({
|
|
144
|
+
prompt: "x",
|
|
145
|
+
description: "X audit",
|
|
146
|
+
chatId: 42,
|
|
147
|
+
userId: 42,
|
|
148
|
+
sessionKey: "s1",
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
const pending = watcher.listPendingAgents();
|
|
152
|
+
expect(pending).toHaveLength(1);
|
|
153
|
+
expect(pending[0].description).toBe("X audit");
|
|
154
|
+
expect(pending[0].sessionKey).toBe("s1");
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
it("increments session.pendingBackgroundCount on dispatch", async () => {
|
|
158
|
+
const mod = await import("../src/services/alvin-dispatch.js");
|
|
159
|
+
const { getSession } = await import("../src/services/session.js");
|
|
160
|
+
|
|
161
|
+
const session = getSession("s-count");
|
|
162
|
+
session.pendingBackgroundCount = 0;
|
|
163
|
+
|
|
164
|
+
mod.dispatchDetachedAgent({
|
|
165
|
+
prompt: "p",
|
|
166
|
+
description: "d",
|
|
167
|
+
chatId: 1,
|
|
168
|
+
userId: 1,
|
|
169
|
+
sessionKey: "s-count",
|
|
170
|
+
});
|
|
171
|
+
expect(session.pendingBackgroundCount).toBe(1);
|
|
172
|
+
|
|
173
|
+
mod.dispatchDetachedAgent({
|
|
174
|
+
prompt: "p2",
|
|
175
|
+
description: "d2",
|
|
176
|
+
chatId: 1,
|
|
177
|
+
userId: 1,
|
|
178
|
+
sessionKey: "s-count",
|
|
179
|
+
});
|
|
180
|
+
expect(session.pendingBackgroundCount).toBe(2);
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it("uses stdio redirect so child's stdout goes to outputFile", async () => {
|
|
184
|
+
const mod = await import("../src/services/alvin-dispatch.js");
|
|
185
|
+
mod.dispatchDetachedAgent({
|
|
186
|
+
prompt: "p",
|
|
187
|
+
description: "d",
|
|
188
|
+
chatId: 1,
|
|
189
|
+
userId: 1,
|
|
190
|
+
sessionKey: "s1",
|
|
191
|
+
});
|
|
192
|
+
const [s] = spawned;
|
|
193
|
+
// stdio should be an array with FD redirects (ignore, pipe-to-file, ignore)
|
|
194
|
+
// or similar. We verify it's NOT "inherit" (which would attach to parent).
|
|
195
|
+
expect(s.opts.stdio).not.toBe("inherit");
|
|
196
|
+
expect(s.opts.stdio).not.toBe(undefined);
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
it("cleans env of CLAUDECODE/CLAUDE_CODE_ENTRYPOINT to prevent nested session errors", async () => {
|
|
200
|
+
const mod = await import("../src/services/alvin-dispatch.js");
|
|
201
|
+
process.env.CLAUDECODE = "1";
|
|
202
|
+
process.env.CLAUDE_CODE_ENTRYPOINT = "cli";
|
|
203
|
+
try {
|
|
204
|
+
mod.dispatchDetachedAgent({
|
|
205
|
+
prompt: "p",
|
|
206
|
+
description: "d",
|
|
207
|
+
chatId: 1,
|
|
208
|
+
userId: 1,
|
|
209
|
+
sessionKey: "s1",
|
|
210
|
+
});
|
|
211
|
+
const [s] = spawned;
|
|
212
|
+
expect(s.opts.env).toBeDefined();
|
|
213
|
+
expect(s.opts.env?.CLAUDECODE).toBeUndefined();
|
|
214
|
+
expect(s.opts.env?.CLAUDE_CODE_ENTRYPOINT).toBeUndefined();
|
|
215
|
+
} finally {
|
|
216
|
+
delete process.env.CLAUDECODE;
|
|
217
|
+
delete process.env.CLAUDE_CODE_ENTRYPOINT;
|
|
218
|
+
}
|
|
219
|
+
});
|
|
220
|
+
});
|