alvin-bot 4.6.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Sub-Agent Stats (H3) — rolling 24h aggregation of per-agent run data.
3
+ *
4
+ * Append-only JSON ring buffer persisted to ~/.alvin-bot/subagent-stats.json.
5
+ * On load, entries older than 24h are pruned. On each append, entries older
6
+ * than 24h are pruned.
7
+ *
8
+ * Used by /subagents stats to show run totals per source (user, cron, implicit)
9
+ * over the last 24 hours. No SQLite dependency — when a real SQLite migration
10
+ * lands we can swap the backend without touching the consumer API.
11
+ */
12
+ import os from "os";
13
+ import fs from "fs";
14
+ import { resolve, dirname } from "path";
15
+ const DATA_DIR = process.env.ALVIN_DATA_DIR || resolve(os.homedir(), ".alvin-bot");
16
+ const STATS_FILE = resolve(DATA_DIR, "subagent-stats.json");
17
+ const WINDOW_MS = 24 * 60 * 60 * 1000; // 24 hours
18
+ const MAX_ENTRIES = 5000; // hard cap to prevent unbounded growth on high-frequency bots
19
+ let cache = null;
20
+ function load() {
21
+ if (cache)
22
+ return cache;
23
+ try {
24
+ const raw = fs.readFileSync(STATS_FILE, "utf-8");
25
+ const parsed = JSON.parse(raw);
26
+ if (!Array.isArray(parsed)) {
27
+ cache = [];
28
+ return cache;
29
+ }
30
+ // Prune stale entries (> 24h old) on load
31
+ const cutoff = Date.now() - WINDOW_MS;
32
+ cache = parsed.filter((e) => typeof e === "object" &&
33
+ e !== null &&
34
+ typeof e.completedAt === "number" &&
35
+ e.completedAt >= cutoff);
36
+ return cache;
37
+ }
38
+ catch {
39
+ cache = [];
40
+ return cache;
41
+ }
42
+ }
43
+ function save(entries) {
44
+ try {
45
+ fs.mkdirSync(dirname(STATS_FILE), { recursive: true });
46
+ fs.writeFileSync(STATS_FILE, JSON.stringify(entries, null, 0), "utf-8");
47
+ }
48
+ catch (err) {
49
+ console.error("[subagent-stats] failed to write:", err);
50
+ }
51
+ }
52
+ /**
53
+ * Record a completed sub-agent run. Called from runSubAgent.finally() via
54
+ * a side-effect hook. Automatically prunes entries older than 24h and
55
+ * keeps the file bounded at MAX_ENTRIES.
56
+ */
57
+ export function recordSubAgentRun(info, result) {
58
+ const entries = load();
59
+ const cutoff = Date.now() - WINDOW_MS;
60
+ // Prune in-place
61
+ const pruned = entries.filter((e) => e.completedAt >= cutoff);
62
+ const newEntry = {
63
+ completedAt: Date.now(),
64
+ name: info.name,
65
+ source: (info.source ?? "implicit"),
66
+ status: result.status,
67
+ durationMs: result.duration,
68
+ inputTokens: result.tokensUsed.input,
69
+ outputTokens: result.tokensUsed.output,
70
+ };
71
+ pruned.push(newEntry);
72
+ // Enforce hard cap — oldest entries drop first
73
+ const final = pruned.length > MAX_ENTRIES ? pruned.slice(-MAX_ENTRIES) : pruned;
74
+ cache = final;
75
+ save(final);
76
+ }
77
+ /**
78
+ * Compute a summary of the last 24h of sub-agent runs. Safe to call
79
+ * concurrently with recordSubAgentRun — both read from the same cache.
80
+ */
81
+ export function getSubAgentStats() {
82
+ const entries = load();
83
+ const cutoff = Date.now() - WINDOW_MS;
84
+ const recent = entries.filter((e) => e.completedAt >= cutoff);
85
+ const empty = () => ({
86
+ runs: 0,
87
+ inputTokens: 0,
88
+ outputTokens: 0,
89
+ totalDurationMs: 0,
90
+ });
91
+ const bySource = {
92
+ user: empty(),
93
+ cron: empty(),
94
+ implicit: empty(),
95
+ };
96
+ const byStatus = {
97
+ completed: 0,
98
+ timeout: 0,
99
+ error: 0,
100
+ cancelled: 0,
101
+ };
102
+ const total = empty();
103
+ for (const e of recent) {
104
+ const bucket = bySource[e.source] ?? bySource.implicit;
105
+ bucket.runs += 1;
106
+ bucket.inputTokens += e.inputTokens;
107
+ bucket.outputTokens += e.outputTokens;
108
+ bucket.totalDurationMs += e.durationMs;
109
+ total.runs += 1;
110
+ total.inputTokens += e.inputTokens;
111
+ total.outputTokens += e.outputTokens;
112
+ total.totalDurationMs += e.durationMs;
113
+ byStatus[e.status] = (byStatus[e.status] ?? 0) + 1;
114
+ }
115
+ return { windowHours: 24, total, bySource, byStatus };
116
+ }
117
+ /**
118
+ * Reset the in-memory cache — for test isolation. Does NOT delete the
119
+ * file; use ALVIN_DATA_DIR in tests to point at a fresh temp dir.
120
+ */
121
+ export function __resetStatsCacheForTest() {
122
+ cache = null;
123
+ }
@@ -15,9 +15,11 @@ const DATA_DIR = process.env.ALVIN_DATA_DIR || resolve(os.homedir(), ".alvin-bot
15
15
  const CONFIG_FILE = resolve(DATA_DIR, "sub-agents.json");
16
16
  const ABSOLUTE_MAX_AGENTS = 16; // Hard cap no matter what
17
17
  const MAX_SUBAGENT_DEPTH = 2; // F2: hard cap on nested spawning
18
+ const DEFAULT_QUEUE_CAP = 20; // D3: default bounded-queue size
19
+ const ABSOLUTE_MAX_QUEUE = 200; // D3: absolute ceiling on queue length
18
20
  let configCache = null;
19
21
  function isValidVisibility(v) {
20
- return v === "auto" || v === "banner" || v === "silent";
22
+ return v === "auto" || v === "banner" || v === "silent" || v === "live";
21
23
  }
22
24
  function loadSubAgentsConfig() {
23
25
  if (configCache)
@@ -28,6 +30,9 @@ function loadSubAgentsConfig() {
28
30
  configCache = {
29
31
  maxParallel: typeof parsed.maxParallel === "number" ? parsed.maxParallel : 0,
30
32
  visibility: isValidVisibility(parsed.visibility) ? parsed.visibility : "auto",
33
+ queueCap: typeof parsed.queueCap === "number"
34
+ ? Math.max(0, Math.min(Math.floor(parsed.queueCap), ABSOLUTE_MAX_QUEUE))
35
+ : DEFAULT_QUEUE_CAP,
31
36
  };
32
37
  }
33
38
  catch {
@@ -35,6 +40,7 @@ function loadSubAgentsConfig() {
35
40
  configCache = {
36
41
  maxParallel: Number(process.env.MAX_SUBAGENTS) || 0,
37
42
  visibility: "auto",
43
+ queueCap: DEFAULT_QUEUE_CAP,
38
44
  };
39
45
  }
40
46
  return configCache;
@@ -79,11 +85,23 @@ export function getVisibility() {
79
85
  */
80
86
  export function setVisibility(mode) {
81
87
  if (!isValidVisibility(mode)) {
82
- throw new Error(`Invalid visibility mode "${mode}". Expected: auto | banner | silent.`);
88
+ throw new Error(`Invalid visibility mode "${mode}". Expected: auto | banner | silent | live.`);
83
89
  }
84
90
  const cfg = loadSubAgentsConfig();
85
91
  saveSubAgentsConfig({ ...cfg, visibility: mode });
86
92
  }
93
+ /** D3: Current bounded-queue cap. 0 = queue disabled (reject on full pool). */
94
+ export function getQueueCap() {
95
+ return loadSubAgentsConfig().queueCap;
96
+ }
97
+ /** D3: Set the queue cap. Clamped to [0, ABSOLUTE_MAX_QUEUE].
98
+ * Returns the effective value after clamping. */
99
+ export function setQueueCap(n) {
100
+ const clamped = Math.max(0, Math.min(Math.floor(n), ABSOLUTE_MAX_QUEUE));
101
+ const cfg = loadSubAgentsConfig();
102
+ saveSubAgentsConfig({ ...cfg, queueCap: clamped });
103
+ return clamped;
104
+ }
87
105
  // ── State ───────────────────────────────────────────────
88
106
  const activeAgents = new Map();
89
107
  // ── Name resolver (B2) ──────────────────────────────────
@@ -167,6 +185,28 @@ export function findSubAgentByName(name, opts = {}) {
167
185
  async function runSubAgent(id, agentConfig, abort, resolvedName) {
168
186
  const startTime = Date.now();
169
187
  const entry = activeAgents.get(id);
188
+ // A4 live-stream state — set up if the effective visibility is "live"
189
+ // AND this is a user spawn with a parent chat. Cron and implicit spawns
190
+ // don't get live-streaming (cron because there's no interactive watcher,
191
+ // implicit because the parent Claude stream already shows everything).
192
+ let liveStream = null;
193
+ const effectiveVisibility = agentConfig.visibility ?? loadSubAgentsConfig().visibility;
194
+ if (effectiveVisibility === "live" &&
195
+ agentConfig.source === "user" &&
196
+ typeof agentConfig.parentChatId === "number") {
197
+ try {
198
+ const { createLiveStream } = await import("./subagent-delivery.js");
199
+ const stream = createLiveStream(agentConfig.parentChatId, resolvedName);
200
+ if (stream) {
201
+ await stream.start();
202
+ if (!stream.failed)
203
+ liveStream = stream;
204
+ }
205
+ }
206
+ catch (err) {
207
+ console.error(`[subagent ${id}] live-stream init failed:`, err);
208
+ }
209
+ }
170
210
  try {
171
211
  const { getRegistry } = await import("../engine.js");
172
212
  const registry = getRegistry();
@@ -189,8 +229,13 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
189
229
  effort: "high",
190
230
  abortSignal: abort.signal,
191
231
  })) {
192
- if (chunk.type === "text")
232
+ if (chunk.type === "text") {
193
233
  finalText = chunk.text || "";
234
+ // A4: push text updates into the throttled live-stream
235
+ if (liveStream && !liveStream.failed) {
236
+ liveStream.update(finalText);
237
+ }
238
+ }
194
239
  if (chunk.type === "done") {
195
240
  inputTokens = chunk.inputTokens || 0;
196
241
  outputTokens = chunk.outputTokens || 0;
@@ -227,6 +272,20 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
227
272
  };
228
273
  entry.info.status = "completed";
229
274
  }
275
+ // A4: finalize the live-stream if we had one. On success, mark the
276
+ // entry as delivered so spawnSubAgent.finally() skips the normal
277
+ // deliverSubAgentResult path — the live stream already posted the
278
+ // body, and finalize() already posted the banner.
279
+ if (liveStream && !liveStream.failed && entry.result) {
280
+ try {
281
+ await liveStream.finalize(entry.info, entry.result);
282
+ entry.delivered = true;
283
+ }
284
+ catch (err) {
285
+ console.error(`[subagent ${id}] live-stream finalize failed:`, err);
286
+ // Let the normal delivery path fire as a fallback.
287
+ }
288
+ }
230
289
  }
231
290
  catch (err) {
232
291
  const isAbort = err instanceof Error && err.message.includes("abort");
@@ -248,109 +307,179 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
248
307
  entry.info.status = status;
249
308
  }
250
309
  }
251
- // ── Public API ──────────────────────────────────────────
310
+ const pendingQueue = [];
311
+ /** Priority order used when draining the queue — higher index = lower priority. */
312
+ const SOURCE_PRIORITY = ["user", "cron", "implicit"];
313
+ function sourceOf(cfg) {
314
+ return cfg.source ?? "implicit";
315
+ }
316
+ /** Count how many agents are currently running. */
317
+ function runningCount() {
318
+ return [...activeAgents.values()].filter((a) => a.info.status === "running").length;
319
+ }
320
+ /**
321
+ * Pop the next queued spawn according to priority (user > cron > implicit)
322
+ * and within each priority in FIFO order. Returns null if the queue is empty.
323
+ */
324
+ function popHighestPriorityQueued() {
325
+ for (const priority of SOURCE_PRIORITY) {
326
+ const idx = pendingQueue.findIndex((q) => sourceOf(q.agentConfig) === priority);
327
+ if (idx >= 0) {
328
+ const [entry] = pendingQueue.splice(idx, 1);
329
+ return entry;
330
+ }
331
+ }
332
+ return null;
333
+ }
252
334
  /**
253
- * Spawn an isolated sub-agent that runs in the background.
254
- * Returns the agent ID immediately (does NOT await completion).
335
+ * Recalculate queuePosition for every entry still in the queue. Called
336
+ * after a pop or a cancel so /subagents list reflects the current state.
255
337
  */
338
+ function reindexQueue() {
339
+ for (let i = 0; i < pendingQueue.length; i++) {
340
+ const q = pendingQueue[i];
341
+ const entry = activeAgents.get(q.id);
342
+ if (entry)
343
+ entry.info.queuePosition = i + 1;
344
+ }
345
+ }
346
+ /** Drain as many queued spawns as fit into the current free slots. */
347
+ function drainQueue() {
348
+ const maxParallel = getMaxParallelAgents();
349
+ while (pendingQueue.length > 0 && runningCount() < maxParallel) {
350
+ const next = popHighestPriorityQueued();
351
+ if (!next)
352
+ break;
353
+ const entry = activeAgents.get(next.id);
354
+ if (!entry)
355
+ continue; // was cancelled while queued
356
+ reindexQueue();
357
+ // Transition to running
358
+ entry.info.status = "running";
359
+ entry.info.startedAt = Date.now();
360
+ entry.info.queuePosition = undefined;
361
+ startRun(next);
362
+ }
363
+ }
364
+ // ── Spawn pipeline ──────────────────────────────────────────
365
+ function startRun(q) {
366
+ const { id, resolvedName, agentConfig, timeoutId } = q;
367
+ const entry = activeAgents.get(id);
368
+ if (!entry)
369
+ return;
370
+ // Run in background — don't await
371
+ runSubAgent(id, agentConfig, entry.abort, resolvedName)
372
+ .finally(() => {
373
+ if (timeoutId)
374
+ clearTimeout(timeoutId);
375
+ const currentEntry = activeAgents.get(id);
376
+ if (agentConfig.onComplete && currentEntry?.result) {
377
+ try {
378
+ agentConfig.onComplete(currentEntry.result);
379
+ }
380
+ catch (err) {
381
+ console.error(`[subagent ${id}] onComplete callback threw:`, err);
382
+ }
383
+ }
384
+ // I3: fire delivery router (non-blocking, errors logged). Guarded
385
+ // by the `delivered` flag.
386
+ if (currentEntry?.result && !currentEntry.delivered) {
387
+ currentEntry.delivered = true;
388
+ const resultSnapshot = currentEntry.result;
389
+ const infoSnapshot = currentEntry.info;
390
+ import("./subagent-delivery.js")
391
+ .then(({ deliverSubAgentResult }) => deliverSubAgentResult(infoSnapshot, resultSnapshot, {
392
+ visibility: agentConfig.visibility,
393
+ }))
394
+ .catch((err) => console.error(`[subagent ${id}] delivery failed:`, err));
395
+ }
396
+ // H3: record this run in the rolling 24h stats (non-blocking).
397
+ if (currentEntry?.result) {
398
+ const resultSnapshot = currentEntry.result;
399
+ const infoSnapshot = currentEntry.info;
400
+ import("./subagent-stats.js")
401
+ .then(({ recordSubAgentRun }) => recordSubAgentRun(infoSnapshot, resultSnapshot))
402
+ .catch((err) => console.error(`[subagent ${id}] stats recording failed:`, err));
403
+ }
404
+ // D3: drain the queue now that a slot has freed up
405
+ drainQueue();
406
+ // Auto-cleanup: remove completed agents after 30 minutes
407
+ setTimeout(() => {
408
+ const e = activeAgents.get(id);
409
+ if (e && e.info.status !== "running" && e.info.status !== "queued") {
410
+ activeAgents.delete(id);
411
+ }
412
+ }, 30 * 60 * 1000);
413
+ });
414
+ }
256
415
  export function spawnSubAgent(agentConfig) {
257
416
  // F2: enforce depth cap before touching any state.
258
417
  const depth = agentConfig.depth ?? 0;
259
418
  if (depth > MAX_SUBAGENT_DEPTH) {
260
419
  return Promise.reject(new Error(`Sub-agent depth limit reached (${MAX_SUBAGENT_DEPTH}). Agents can only spawn ${MAX_SUBAGENT_DEPTH} level(s) of nested agents.`));
261
420
  }
262
- // G1: toolset preset. Only "full" is supported in Stufe 1. The literal
263
- // type blocks wrong values at compile time; the runtime check catches
264
- // callers that bypass TypeScript (e.g. plugin code loaded at runtime).
421
+ // G1: toolset preset. Only "full" is supported. The literal type blocks
422
+ // wrong values at compile time; the runtime check catches callers that
423
+ // bypass TypeScript (e.g. plugin code loaded at runtime).
265
424
  const toolset = agentConfig.toolset ?? "full";
266
425
  if (toolset !== "full") {
267
426
  return Promise.reject(new Error(`Invalid toolset "${toolset}". Only "full" is supported in this version.`));
268
427
  }
269
- // Check concurrency limit — now reads from the file-backed config so
270
- // /sub-agents max <n> edits take effect immediately without a restart.
271
- const running = [...activeAgents.values()].filter((a) => a.info.status === "running");
272
428
  const maxParallel = getMaxParallelAgents();
273
- if (running.length >= maxParallel) {
274
- // D4: priority-aware reject messages — give callers context about
275
- // WHO is holding the slots so they know whether to wait, cancel,
276
- // or give up.
277
- const source = agentConfig.source ?? "implicit";
278
- const userSlots = running.filter((a) => a.info.source === "user").length;
279
- const bgSlots = running.length - userSlots;
429
+ const queueCap = getQueueCap();
430
+ const running = runningCount();
431
+ const queuedLen = pendingQueue.length;
432
+ // B2: resolve the requested name to a unique variant.
433
+ const resolved = resolveAgentName(agentConfig.name);
434
+ const resolvedName = resolved.name;
435
+ const id = crypto.randomUUID();
436
+ const timeout = agentConfig.timeout ?? config.subAgentTimeout;
437
+ const abort = new AbortController();
438
+ const timeoutId = setTimeout(() => abort.abort(), timeout);
439
+ const willRunImmediately = running < maxParallel;
440
+ const canQueue = !willRunImmediately && queueCap > 0 && queuedLen < queueCap;
441
+ if (!willRunImmediately && !canQueue) {
442
+ // No slot, no queue room → priority-aware reject
443
+ clearTimeout(timeoutId);
444
+ const source = sourceOf(agentConfig);
445
+ const runningAgents = [...activeAgents.values()].filter((a) => a.info.status === "running");
446
+ const userSlots = runningAgents.filter((a) => a.info.source === "user").length;
447
+ const bgSlots = runningAgents.length - userSlots;
280
448
  let message;
281
449
  if (source === "user") {
282
450
  if (bgSlots > 0) {
283
- message = `Alle Slots belegt (${running.length}/${maxParallel}), davon ${bgSlots} cron/implicit im Hintergrund. /sub-agents list für Details oder /sub-agents cancel <name>.`;
451
+ message = `Alle Slots belegt (${running}/${maxParallel}), davon ${bgSlots} cron/implicit im Hintergrund. Queue voll (${queuedLen}/${queueCap}). /subagents list für Details oder /subagents cancel <name>.`;
284
452
  }
285
453
  else {
286
- message = `Alle Slots belegt (${running.length}/${maxParallel}) mit eigenen user-Spawns. /sub-agents cancel <name> oder warten: /sub-agents list`;
454
+ message = `Alle Slots belegt (${running}/${maxParallel}) mit eigenen user-Spawns. Queue voll (${queuedLen}/${queueCap}). /subagents cancel <name> oder warten.`;
287
455
  }
288
456
  }
289
457
  else {
290
- message = `Sub-agent limit reached (${maxParallel}). Wait for a running agent to finish or cancel one.`;
458
+ message = `Sub-agent limit reached (${maxParallel} running, ${queuedLen}/${queueCap} queued). Wait for a running agent to finish or cancel one.`;
291
459
  }
292
460
  return Promise.reject(new Error(message));
293
461
  }
294
- // B2: resolve the requested name to a unique variant. On collision,
295
- // append #N where N is the smallest free index ≥ 2.
296
- const resolved = resolveAgentName(agentConfig.name);
297
- const resolvedName = resolved.name;
298
- const id = crypto.randomUUID();
299
- const timeout = agentConfig.timeout ?? config.subAgentTimeout;
300
- const abort = new AbortController();
301
- // Set up timeout
302
- const timeoutId = setTimeout(() => abort.abort(), timeout);
303
462
  const info = {
304
463
  id,
305
464
  name: resolvedName,
306
- status: "running",
465
+ status: willRunImmediately ? "running" : "queued",
307
466
  startedAt: Date.now(),
308
467
  model: agentConfig.model,
309
468
  source: agentConfig.source,
310
469
  depth,
311
470
  parentChatId: agentConfig.parentChatId,
312
471
  nameIndex: resolved.index,
472
+ queuePosition: willRunImmediately ? undefined : queuedLen + 1,
313
473
  };
314
474
  activeAgents.set(id, { info, abort, delivered: false });
315
- // Run in background don't await
316
- runSubAgent(id, agentConfig, abort, resolvedName)
317
- .finally(() => {
318
- clearTimeout(timeoutId);
319
- // Call the onComplete callback if the caller provided one. This is
320
- // how cron.ts turns the fire-and-forget spawnSubAgent() into a
321
- // Promise that resolves when the work finishes. The callback runs
322
- // inside a try/catch so a throwing callback can't break cleanup.
323
- const entry = activeAgents.get(id);
324
- if (agentConfig.onComplete && entry?.result) {
325
- try {
326
- agentConfig.onComplete(entry.result);
327
- }
328
- catch (err) {
329
- console.error(`[subagent ${id}] onComplete callback threw:`, err);
330
- }
331
- }
332
- // I3: fire delivery router (non-blocking, errors logged). Dynamic
333
- // import keeps the module graph free of circular edges. Guarded by
334
- // the `delivered` flag so cancelAllSubAgents (shutdown path) and
335
- // this finally() can't both post the result.
336
- if (entry?.result && !entry.delivered) {
337
- entry.delivered = true;
338
- const resultSnapshot = entry.result;
339
- const infoSnapshot = entry.info;
340
- import("./subagent-delivery.js")
341
- .then(({ deliverSubAgentResult }) => deliverSubAgentResult(infoSnapshot, resultSnapshot, {
342
- visibility: agentConfig.visibility,
343
- }))
344
- .catch((err) => console.error(`[subagent ${id}] delivery failed:`, err));
345
- }
346
- // Auto-cleanup: remove completed agents after 30 minutes
347
- setTimeout(() => {
348
- const e = activeAgents.get(id);
349
- if (e && e.info.status !== "running") {
350
- activeAgents.delete(id);
351
- }
352
- }, 30 * 60 * 1000);
353
- });
475
+ const queuedSpawn = { id, resolvedName, agentConfig, depth, timeoutId };
476
+ if (willRunImmediately) {
477
+ startRun(queuedSpawn);
478
+ }
479
+ else {
480
+ pendingQueue.push(queuedSpawn);
481
+ reindexQueue();
482
+ }
354
483
  return Promise.resolve(id);
355
484
  }
356
485
  /**
@@ -365,7 +494,21 @@ export function listSubAgents() {
365
494
  */
366
495
  export function cancelSubAgent(id) {
367
496
  const entry = activeAgents.get(id);
368
- if (!entry || entry.info.status !== "running")
497
+ if (!entry)
498
+ return false;
499
+ if (entry.info.status === "queued") {
500
+ // D3: remove from the pending queue, reindex, mark cancelled.
501
+ const idx = pendingQueue.findIndex((q) => q.id === id);
502
+ if (idx >= 0) {
503
+ const [removed] = pendingQueue.splice(idx, 1);
504
+ if (removed.timeoutId)
505
+ clearTimeout(removed.timeoutId);
506
+ reindexQueue();
507
+ }
508
+ entry.info.status = "cancelled";
509
+ return true;
510
+ }
511
+ if (entry.info.status !== "running")
369
512
  return false;
370
513
  entry.abort.abort();
371
514
  entry.info.status = "cancelled";
@@ -418,6 +561,16 @@ export async function cancelAllSubAgents(notify = true) {
418
561
  // delivered=true so runSubAgent.finally() can't fire a second
419
562
  // delivery on the next microtask, (4) queue the I3 delivery.
420
563
  const runningEntries = [];
564
+ // D3: clear the pending queue first so no entry starts during shutdown.
565
+ for (const q of pendingQueue.splice(0)) {
566
+ if (q.timeoutId)
567
+ clearTimeout(q.timeoutId);
568
+ const entry = activeAgents.get(q.id);
569
+ if (entry) {
570
+ entry.info.status = "cancelled";
571
+ entry.delivered = true; // no delivery for queued-never-ran agents
572
+ }
573
+ }
421
574
  for (const [id, entry] of activeAgents) {
422
575
  if (entry.info.status !== "running")
423
576
  continue;
package/docs/HANDBOOK.md CHANGED
@@ -310,6 +310,7 @@ This allows the "scatter-gather" pattern (main → orchestrator → 10 workers)
310
310
  - `auto` (default) — source-based routing: implicit stays in the parent stream, user and cron get a banner+final delivery.
311
311
  - `banner` — always send a banner+final, even for implicit spawns.
312
312
  - `silent` — never send. The result is still stored in the activeAgents map for 30 minutes and pullable via `/subagents result <name>`.
313
+ - **`live`** — stream incremental updates into a single Telegram message as the agent thinks. Only applies to `source: "user"` spawns with a `parentChatId`. The live message is plain text (so half-formed markdown during streaming can't break the edit), updates are throttled to 800 ms between edits, and a separate banner message is posted at the end so you get a completion notification. If the bot API doesn't support `editMessageText` or the live setup fails, we fall through to `banner` mode automatically.
313
314
 
314
315
  ### 7.6 Inheritance
315
316
 
@@ -320,7 +321,29 @@ Sub-agents inherit from the spawning context:
320
321
  - **Model and tools** — inherited via the provider registry.
321
322
  - **Conversation history** — **not inherited.** Sub-agents receive only their own prompt. This forces clean, self-describing spawn requests.
322
323
 
323
- ### 7.7 Shutdown notifications
324
+ ### 7.7 Bounded priority queue
325
+
326
+ When the running pool hits `maxParallel`, new spawn requests land in a bounded queue instead of being rejected immediately.
327
+
328
+ - **Default cap:** 20 slots. Configure via `/subagents queue <n>` (clamped to 0–200).
329
+ - **Disable:** `/subagents queue 0` — restores the old reject-on-full behavior.
330
+ - **Priority order on drain:** `user > cron > implicit`. Within each priority class, FIFO.
331
+ - **`/subagents list`** shows queued entries with a `#N` suffix indicating their position.
332
+ - **Cancel a queued entry** with `/subagents cancel <name>` — it's removed from the queue without ever starting.
333
+
334
+ Reject is only triggered when the pool **and** the queue are both full. The reject message is priority-aware and names who's holding the slots.
335
+
336
+ ### 7.8 Stats
337
+
338
+ `/subagents stats` shows a summary of the last 24 hours of sub-agent runs:
339
+
340
+ - Total runs + total tokens + total wall time
341
+ - Runs per source (user / cron / implicit)
342
+ - Runs per status (completed / cancelled / timeout / error)
343
+
344
+ The backing data is an append-only JSON ring buffer at `~/.alvin-bot/subagent-stats.json`. Entries older than 24 hours are pruned automatically. A hard cap of 5000 entries protects against runaway growth on very busy bots.
345
+
346
+ ### 7.9 Shutdown notifications
324
347
 
325
348
  When you restart the bot (SIGTERM), any still-running sub-agents get a cancellation delivery before the process exits:
326
349
 
@@ -433,7 +456,14 @@ All commands are triggered from any platform that supports commands (Telegram, D
433
456
  | Command | Purpose |
434
457
  |---|---|
435
458
  | `/cron` | Manage scheduled jobs |
436
- | `/subagents` | Manage sub-agents |
459
+ | `/subagents` | Show sub-agent status |
460
+ | `/subagents max <n>` | Set max parallel (0 = auto) |
461
+ | `/subagents queue <n>` | Set bounded-queue cap (0 = disabled) |
462
+ | `/subagents visibility <auto\|banner\|silent\|live>` | Delivery mode |
463
+ | `/subagents list` | List all (queued + running + recent) |
464
+ | `/subagents cancel <name\|id>` | Cancel one |
465
+ | `/subagents result <name\|id>` | Show a completed result |
466
+ | `/subagents stats` | Last 24h run stats (by source + status) |
437
467
  | `/webui` | Open web UI URL |
438
468
  | `/setup` | Re-run the setup wizard flow from chat |
439
469
  | `/restart` | Restart the bot process |
@@ -776,6 +806,13 @@ Alvin Bot follows semver for the **data directory format**. Minor version bumps
776
806
  - Memory: no schema change.
777
807
  - `.env`: no new required variables. `MAX_SUBAGENTS` and `SUBAGENT_TIMEOUT` are optional.
778
808
 
809
+ ### 16.3 From 4.6.x to 4.7.0
810
+
811
+ - Sub-agents: new fields in `sub-agents.json` (`queueCap`, defaults to 20). Old files auto-upgrade.
812
+ - New file `~/.alvin-bot/subagent-stats.json` — auto-created when the first sub-agent finishes.
813
+ - `start`/`stop` now auto-detect the LaunchAgent on macOS. No migration needed; if you previously installed the LaunchAgent in 4.6.0, `alvin-bot start` now correctly reloads it instead of spawning a parallel pm2 process.
814
+ - No new required `.env` variables.
815
+
779
816
  ### 16.3 From git
780
817
 
781
818
  ```bash
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "alvin-bot",
3
- "version": "4.6.0",
3
+ "version": "4.7.0",
4
4
  "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",