alvin-bot 4.9.3 → 4.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,206 @@
1
+ /**
2
+ * Async Sub-Agent Watcher (Fix #17 Stage 2)
3
+ *
4
+ * Tracks pending background sub-agents that Claude launched with
5
+ * `run_in_background: true`. Polls each agent's outputFile every
6
+ * POLL_INTERVAL_MS, detects completion (success/failure/timeout),
7
+ * and delivers the final result as a separate Telegram message via
8
+ * the existing subagent-delivery.ts pipeline.
9
+ *
10
+ * Persistence: pending agents survive bot restarts via
11
+ * ~/.alvin-bot/state/async-agents.json. On boot, startWatcher() loads
12
+ * the file and resumes polling — same catchup pattern as the v4.9.0
13
+ * cron scheduler.
14
+ *
15
+ * Why this exists: Claude's Agent tool defaults to synchronous, which
16
+ * blocks the main Telegram session for 10+ minutes during long audits.
17
+ * Stage 1 of the fix tells Claude to use run_in_background; Stage 2
18
+ * (this file) catches the resulting outputFile and delivers the result
19
+ * when ready, so the user can keep chatting while the agent works.
20
+ *
21
+ * See docs/superpowers/plans/2026-04-13-async-subagents.md for the
22
+ * full plan and docs/superpowers/specs/sdk-async-agent-outputfile-format.md
23
+ * for the JSONL format details.
24
+ */
25
+ import fs from "fs";
26
+ import { dirname } from "path";
27
+ import { parseOutputFileStatus } from "./async-agent-parser.js";
28
+ import { ASYNC_AGENTS_STATE_FILE } from "../paths.js";
29
+ /** How often the polling loop runs against each pending agent. */
30
+ const POLL_INTERVAL_MS = 15_000;
31
+ /** Hard ceiling per agent — 12h. After this, give up and deliver
32
+ * a timeout banner. SEO audits historically take ~13 min, so 12h
33
+ * is absurdly generous and protects against state-file growth. */
34
+ const MAX_AGENT_AGE_MS = 12 * 60 * 60 * 1000;
35
+ // ── Module state ──────────────────────────────────────────────────
36
+ const pending = new Map();
37
+ let pollTimer = null;
38
+ let started = false;
39
+ // ── Persistence ───────────────────────────────────────────────────
40
+ function loadFromDisk() {
41
+ try {
42
+ const raw = fs.readFileSync(ASYNC_AGENTS_STATE_FILE, "utf-8");
43
+ const arr = JSON.parse(raw);
44
+ if (!Array.isArray(arr))
45
+ return;
46
+ for (const entry of arr) {
47
+ if (typeof entry?.agentId === "string" && typeof entry?.outputFile === "string") {
48
+ pending.set(entry.agentId, entry);
49
+ }
50
+ }
51
+ }
52
+ catch {
53
+ // No state file yet — fresh start. Not an error.
54
+ }
55
+ }
56
+ function saveToDisk() {
57
+ try {
58
+ fs.mkdirSync(dirname(ASYNC_AGENTS_STATE_FILE), { recursive: true });
59
+ fs.writeFileSync(ASYNC_AGENTS_STATE_FILE, JSON.stringify([...pending.values()], null, 2), "utf-8");
60
+ }
61
+ catch (err) {
62
+ console.error("[async-watcher] failed to persist state:", err);
63
+ }
64
+ }
65
+ // ── Public API ────────────────────────────────────────────────────
66
+ /**
67
+ * Register a new async agent that Claude just launched. Persists
68
+ * immediately so a crash right after registration still delivers
69
+ * the result on the next boot.
70
+ */
71
+ export function registerPendingAgent(input) {
72
+ const now = Date.now();
73
+ const entry = {
74
+ agentId: input.agentId,
75
+ outputFile: input.outputFile,
76
+ description: input.description,
77
+ prompt: input.prompt,
78
+ chatId: input.chatId,
79
+ userId: input.userId,
80
+ startedAt: now,
81
+ lastCheckedAt: 0,
82
+ giveUpAt: input.giveUpAt ?? now + MAX_AGENT_AGE_MS,
83
+ toolUseId: input.toolUseId,
84
+ };
85
+ pending.set(input.agentId, entry);
86
+ saveToDisk();
87
+ }
88
+ /** Returns a snapshot of in-memory pending agents (for /subagents + diagnostics). */
89
+ export function listPendingAgents() {
90
+ return [...pending.values()];
91
+ }
92
+ /** Start the polling loop. Idempotent. Loads any persisted state from disk. */
93
+ export function startWatcher() {
94
+ if (started)
95
+ return;
96
+ started = true;
97
+ loadFromDisk();
98
+ pollTimer = setInterval(() => {
99
+ pollOnce().catch((err) => console.error("[async-watcher] poll cycle failed:", err));
100
+ }, POLL_INTERVAL_MS);
101
+ console.log(`⏳ Async-agent watcher started (${pending.size} pending, ${POLL_INTERVAL_MS / 1000}s interval)`);
102
+ }
103
+ /** Stop the polling loop. Idempotent. */
104
+ export function stopWatcher() {
105
+ if (pollTimer)
106
+ clearInterval(pollTimer);
107
+ pollTimer = null;
108
+ started = false;
109
+ }
110
+ /**
111
+ * Run one poll cycle: check every pending agent, deliver the completed
112
+ * ones, drop them from the in-memory + on-disk state. Exported for
113
+ * tests; production uses the setInterval from startWatcher().
114
+ */
115
+ export async function pollOnce() {
116
+ const now = Date.now();
117
+ const toRemove = [];
118
+ for (const entry of pending.values()) {
119
+ entry.lastCheckedAt = now;
120
+ // Timeout check first — if the agent is past its giveUpAt, give up
121
+ // regardless of whether the file shows progress.
122
+ if (now >= entry.giveUpAt) {
123
+ await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
124
+ toRemove.push(entry.agentId);
125
+ continue;
126
+ }
127
+ const status = await parseOutputFileStatus(entry.outputFile);
128
+ if (status.state === "completed") {
129
+ await deliverAsCompleted(entry, status.output, status.tokensUsed);
130
+ toRemove.push(entry.agentId);
131
+ }
132
+ else if (status.state === "failed") {
133
+ await deliverAsFailure(entry, "error", status.error);
134
+ toRemove.push(entry.agentId);
135
+ }
136
+ // running / missing → keep polling next cycle
137
+ }
138
+ if (toRemove.length > 0) {
139
+ for (const id of toRemove)
140
+ pending.delete(id);
141
+ saveToDisk();
142
+ }
143
+ }
144
+ // ── Delivery helpers ──────────────────────────────────────────────
145
+ async function deliverAsCompleted(entry, output, tokensUsed) {
146
+ const { deliverSubAgentResult } = await import("./subagent-delivery.js");
147
+ const info = {
148
+ id: entry.agentId,
149
+ name: entry.description,
150
+ status: "completed",
151
+ startedAt: entry.startedAt,
152
+ source: "cron", // Reuse cron banner format — fits async background agents.
153
+ depth: 0,
154
+ parentChatId: entry.chatId,
155
+ };
156
+ const result = {
157
+ id: entry.agentId,
158
+ name: entry.description,
159
+ status: "completed",
160
+ output,
161
+ tokensUsed: tokensUsed ?? { input: 0, output: 0 },
162
+ duration: Date.now() - entry.startedAt,
163
+ };
164
+ try {
165
+ await deliverSubAgentResult(info, result);
166
+ }
167
+ catch (err) {
168
+ console.error(`[async-watcher] delivery failed for ${entry.agentId}:`, err);
169
+ }
170
+ }
171
+ async function deliverAsFailure(entry, status, error) {
172
+ const { deliverSubAgentResult } = await import("./subagent-delivery.js");
173
+ const info = {
174
+ id: entry.agentId,
175
+ name: entry.description,
176
+ status,
177
+ startedAt: entry.startedAt,
178
+ source: "cron",
179
+ depth: 0,
180
+ parentChatId: entry.chatId,
181
+ };
182
+ const result = {
183
+ id: entry.agentId,
184
+ name: entry.description,
185
+ status,
186
+ output: "",
187
+ tokensUsed: { input: 0, output: 0 },
188
+ duration: Date.now() - entry.startedAt,
189
+ error,
190
+ };
191
+ try {
192
+ await deliverSubAgentResult(info, result);
193
+ }
194
+ catch (err) {
195
+ console.error(`[async-watcher] failure delivery failed for ${entry.agentId}:`, err);
196
+ }
197
+ }
198
+ // ── Test helpers ──────────────────────────────────────────────────
199
+ /** Test-only: drop in-memory state. Doesn't touch disk. */
200
+ export function __resetForTest() {
201
+ pending.clear();
202
+ if (pollTimer)
203
+ clearInterval(pollTimer);
204
+ pollTimer = null;
205
+ started = false;
206
+ }
@@ -46,6 +46,58 @@ function getBasePrompt(lang) {
46
46
  }
47
47
  /** Additional instructions for SDK providers (tool use) */
48
48
  const SDK_ADDON = `When you run commands or edit files, briefly explain what you did.`;
49
+ /**
50
+ * Stage 1 of Fix #17 — async sub-agents.
51
+ *
52
+ * Tells Claude to use the SDK's `run_in_background` flag for long-running
53
+ * Agent tool calls so the main Telegram session doesn't stay locked for
54
+ * 10+ minutes while sub-agents crawl the web, run audits, or build reports.
55
+ *
56
+ * Only injected into the prompt when isSDK === true (non-SDK providers
57
+ * have no Agent tool). The bot's async-agent-watcher (Stage 2) picks up
58
+ * the resulting outputFile, polls for completion, and delivers the
59
+ * result as a separate Telegram message via subagent-delivery.ts.
60
+ *
61
+ * See test/system-prompt-background-hint.test.ts and
62
+ * docs/superpowers/plans/2026-04-13-async-subagents.md
63
+ */
64
+ const BACKGROUND_SUBAGENT_HINT = `## Background Sub-Agents (IMPORTANT)
65
+
66
+ For long-running work, ALWAYS set \`run_in_background: true\` on your Agent
67
+ tool call. This makes the tool return IMMEDIATELY with an \`agentId\` and
68
+ \`outputFile\` path instead of blocking for the full duration. Your main
69
+ turn ends quickly, the user's session unlocks, and they can keep chatting
70
+ with me while the sub-agent works in parallel.
71
+
72
+ After launching a background agent, you MUST:
73
+ 1. Tell the user in one short sentence what you kicked off ("Starting SEO
74
+ audit for gethomes.io in the background — I'll send the report when
75
+ it's done").
76
+ 2. End your turn promptly. Do NOT wait around. Do NOT keep working on
77
+ that task. Wrap up within 1-2 short messages.
78
+ 3. When the agent finishes, its final report will arrive as a SEPARATE
79
+ message in this chat — the bot handles that delivery automatically.
80
+ You don't need to poll the outputFile proactively.
81
+
82
+ If the user asks "is it done yet?" before the bot delivers the result,
83
+ you MAY read the agent's \`outputFile\` (from the tool result) using the
84
+ Read tool to check progress.
85
+
86
+ **DO use \`run_in_background: true\` for:**
87
+ - Audits (SEO, security, code quality, performance)
88
+ - Research tasks that visit more than 3 web pages
89
+ - Multi-file codebase analyses, full-repo scans
90
+ - Report generation with multiple sub-steps
91
+ - Anything you estimate will take longer than 2 minutes
92
+
93
+ **DON'T use run_in_background for:**
94
+ - Simple questions the user is actively waiting on a quick answer
95
+ - Single file reads
96
+ - Quick web fetches for a specific fact
97
+ - Short tool chains under ~30 seconds
98
+
99
+ When in doubt: prefer background for audits/research, foreground for
100
+ conversational answers.`;
49
101
  /**
50
102
  * Self-Awareness Core — Dynamic introspection block.
51
103
  *
@@ -164,6 +216,9 @@ export function buildSystemPrompt(isSDK, language = "en", chatId) {
164
216
  }
165
217
  if (isSDK) {
166
218
  parts.push(SDK_ADDON);
219
+ // Stage 1 — teach Claude to use run_in_background for long-running
220
+ // Agent tool calls so the main session unlocks fast.
221
+ parts.push(BACKGROUND_SUBAGENT_HINT);
167
222
  // SDK providers have bash access — inject discovered tools so they know what's available
168
223
  parts.push(getToolSummary());
169
224
  }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Pure decision helper for the web-server bind loop.
3
+ *
4
+ * Decouples the "what should happen next" logic from the side-effect
5
+ * spaghetti of real http.Server binding so it can be unit-tested in
6
+ * isolation. See test/web-server-resilience.test.ts for the contract.
7
+ *
8
+ * Why this exists: the v4.8.x and earlier implementations crashed the
9
+ * entire bot when port 3100 was held by a foreign process. A colleague
10
+ * running an OpenClaw fork hit the same bug years ago and ended up
11
+ * decoupling the web server completely — the main bot should never be
12
+ * gated on a web-UI bind. This helper encodes the decision logic so
13
+ * the new startWebServer() can just act on the returned action.
14
+ */
15
+ /**
16
+ * Decide what the bind loop should do next after a failed listen().
17
+ *
18
+ * Rule of thumb:
19
+ * - EADDRINUSE AND attempts remaining → climb the port ladder.
20
+ * - EADDRINUSE AND ladder exhausted → background retry at original port.
21
+ * - any other error (EACCES, listen-called-twice, etc.) → background retry.
22
+ *
23
+ * PURE: no timers, no I/O, no mutation of inputs. Safe to call from tests.
24
+ */
25
+ export function decideNextBindAction(err, attempt, opts) {
26
+ const code = err?.code;
27
+ if (code === "EADDRINUSE" && attempt < opts.maxPortTries - 1) {
28
+ return {
29
+ type: "retry-port",
30
+ port: opts.originalPort + attempt + 1,
31
+ attempt: attempt + 1,
32
+ };
33
+ }
34
+ // EADDRINUSE with no attempts left, OR any non-EADDRINUSE error:
35
+ // don't walk the port ladder further, just back off and retry the
36
+ // original port in the background.
37
+ return {
38
+ type: "retry-background",
39
+ delayMs: opts.backgroundRetryMs,
40
+ port: opts.originalPort,
41
+ };
42
+ }