alvin-bot 4.9.2 → 4.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,10 +30,24 @@ import { addCanvasClient } from "./canvas.js";
30
30
  import { BOT_ROOT, ENV_FILE, PUBLIC_DIR, MEMORY_DIR, MEMORY_FILE, SOUL_FILE, DATA_DIR, MCP_CONFIG, SKILLS_DIR } from "../paths.js";
31
31
  import { broadcast } from "../services/broadcast.js";
32
32
  import { BOT_VERSION } from "../version.js";
33
+ import { decideNextBindAction } from "./bind-strategy.js";
33
34
  const WEB_PORT = parseInt(process.env.WEB_PORT || "3100");
34
- /** Module-scope reference to the WebSocket server so stopWebServer() can
35
- * tear it down together with the HTTP server. Set inside startWebServer(). */
35
+ /** Tuning for the bind loop. Walk the port ladder `MAX_PORT_TRIES` times
36
+ * then fall back to a `BACKGROUND_RETRY_MS` idle loop the bot keeps
37
+ * running on Telegram either way; see bind-strategy.ts for the pure
38
+ * decision logic. */
39
+ const MAX_PORT_TRIES = 20;
40
+ const BACKGROUND_RETRY_MS = 30_000;
41
+ /** Current live http.Server, if one has successfully bound. */
42
+ let currentServer = null;
43
+ /** Current live WebSocketServer attached to currentServer. */
36
44
  let wsServerRef = null;
45
+ /** Background-retry timer handle — set when the bind loop is in its
46
+ * idle wait between cycles, cleared when stopWebServer() cancels. */
47
+ let bindRetryTimer = null;
48
+ /** Flag flipped by stopWebServer(). Every bind-loop callback checks
49
+ * this and exits silently if set, so stop is truly terminal. */
50
+ let stopRequested = false;
37
51
  const WEB_PASSWORD = process.env.WEB_PASSWORD || "";
38
52
  /** The actual port the Web UI is running on (may differ from WEB_PORT if busy). */
39
53
  let actualWebPort = WEB_PORT;
@@ -1371,126 +1385,207 @@ function handleWebSocket(wss) {
1371
1385
  });
1372
1386
  }
1373
1387
  // ── Start Server ────────────────────────────────────────
1374
- export function startWebServer() {
1375
- const server = http.createServer((req, res) => {
1376
- let body = "";
1377
- req.on("data", (chunk) => { body += chunk; });
1378
- req.on("end", () => {
1379
- const urlPath = (req.url || "/").split("?")[0];
1380
- // OpenAI-compatible API (/v1/chat/completions, /v1/models)
1381
- if (urlPath.startsWith("/v1/")) {
1382
- handleOpenAICompat(req, res, urlPath, body);
1383
- return;
1384
- }
1385
- // API routes
1386
- if (urlPath.startsWith("/api/")) {
1387
- handleAPI(req, res, urlPath, body);
1388
- return;
1389
- }
1390
- // Auth page (if password set and not authenticated)
1391
- if (WEB_PASSWORD && !checkAuth(req) && urlPath !== "/login.html") {
1392
- res.writeHead(302, { Location: "/login.html" });
1393
- res.end();
1394
- return;
1395
- }
1396
- // Canvas UI
1397
- if (urlPath === "/canvas") {
1398
- const canvasFile = resolve(PUBLIC_DIR, "canvas.html");
1399
- try {
1400
- const content = fs.readFileSync(canvasFile);
1401
- res.setHeader("Content-Type", "text/html");
1402
- res.end(content);
1403
- }
1404
- catch {
1405
- res.statusCode = 404;
1406
- res.end("Not found");
1407
- }
1408
- return;
1409
- }
1410
- // Static files
1411
- let filePath = urlPath === "/" ? "/index.html" : urlPath;
1412
- filePath = resolve(PUBLIC_DIR, filePath.slice(1));
1413
- // Security: prevent path traversal
1414
- if (!filePath.startsWith(PUBLIC_DIR)) {
1415
- res.statusCode = 403;
1416
- res.end("Forbidden");
1417
- return;
1418
- }
1388
+ /**
1389
+ * HTTP request handler for the web UI. Hoisted to a top-level function
1390
+ * so every bind attempt can create a fresh http.Server without
1391
+ * rebuilding the handler closure.
1392
+ */
1393
+ function handleWebRequest(req, res) {
1394
+ let body = "";
1395
+ req.on("data", (chunk) => { body += chunk; });
1396
+ req.on("end", () => {
1397
+ const urlPath = (req.url || "/").split("?")[0];
1398
+ // OpenAI-compatible API (/v1/chat/completions, /v1/models)
1399
+ if (urlPath.startsWith("/v1/")) {
1400
+ handleOpenAICompat(req, res, urlPath, body);
1401
+ return;
1402
+ }
1403
+ // API routes
1404
+ if (urlPath.startsWith("/api/")) {
1405
+ handleAPI(req, res, urlPath, body);
1406
+ return;
1407
+ }
1408
+ // Auth page (if password set and not authenticated)
1409
+ if (WEB_PASSWORD && !checkAuth(req) && urlPath !== "/login.html") {
1410
+ res.writeHead(302, { Location: "/login.html" });
1411
+ res.end();
1412
+ return;
1413
+ }
1414
+ // Canvas UI
1415
+ if (urlPath === "/canvas") {
1416
+ const canvasFile = resolve(PUBLIC_DIR, "canvas.html");
1419
1417
  try {
1420
- const content = fs.readFileSync(filePath);
1421
- const ext = path.extname(filePath);
1422
- res.setHeader("Content-Type", MIME[ext] || "application/octet-stream");
1418
+ const content = fs.readFileSync(canvasFile);
1419
+ res.setHeader("Content-Type", "text/html");
1423
1420
  res.end(content);
1424
1421
  }
1425
1422
  catch {
1426
1423
  res.statusCode = 404;
1427
1424
  res.end("Not found");
1428
1425
  }
1429
- });
1426
+ return;
1427
+ }
1428
+ // Static files
1429
+ let filePath = urlPath === "/" ? "/index.html" : urlPath;
1430
+ filePath = resolve(PUBLIC_DIR, filePath.slice(1));
1431
+ // Security: prevent path traversal
1432
+ if (!filePath.startsWith(PUBLIC_DIR)) {
1433
+ res.statusCode = 403;
1434
+ res.end("Forbidden");
1435
+ return;
1436
+ }
1437
+ try {
1438
+ const content = fs.readFileSync(filePath);
1439
+ const ext = path.extname(filePath);
1440
+ res.setHeader("Content-Type", MIME[ext] || "application/octet-stream");
1441
+ res.end(content);
1442
+ }
1443
+ catch {
1444
+ res.statusCode = 404;
1445
+ res.end("Not found");
1446
+ }
1430
1447
  });
1431
- const wss = new WebSocketServer({ server });
1432
- wsServerRef = wss;
1433
- handleWebSocket(wss);
1434
- // Smart port: try WEB_PORT, increment if busy (up to +20)
1435
- const MAX_TRIES = 20;
1436
- function tryListen(port, attempt = 0) {
1437
- server.once("error", (err) => {
1438
- if (err.code === "EADDRINUSE" && attempt < MAX_TRIES) {
1439
- tryListen(port + 1, attempt + 1);
1440
- }
1441
- else {
1442
- console.error(`❌ Web UI failed to start: ${err.message}`);
1443
- }
1448
+ }
1449
+ /**
1450
+ * Kick off the web-UI bind loop. NEVER throws, NEVER blocks.
1451
+ *
1452
+ * History: earlier versions returned an http.Server synchronously and
1453
+ * let listen() errors bubble up as uncaught exceptions — a colleague
1454
+ * flagged this on 2026-04-13 after spending months fighting the exact
1455
+ * same bug on a parallel OpenClaw fork. Their resolution: "the gateway
1456
+ * is a feature, not core. Decouple it."
1457
+ *
1458
+ * New contract:
1459
+ * - Returns `void` immediately. The actual bind happens asynchronously.
1460
+ * - If port 3100 is busy, tries 3101…3119 in sequence (same as before).
1461
+ * - If ALL 20 ports are busy, schedules a background retry at 3100
1462
+ * in `BACKGROUND_RETRY_MS` — keeps trying forever until success
1463
+ * or stopWebServer() is called.
1464
+ * - Any non-EADDRINUSE error also falls through to background retry.
1465
+ * - Each attempt uses a FRESH http.Server to avoid node's fragile
1466
+ * "listen-called-twice" state-recycling behaviour.
1467
+ * - The main Telegram bot is completely independent of this — if the
1468
+ * web UI never binds, the bot still answers messages.
1469
+ */
1470
+ export function startWebServer() {
1471
+ stopRequested = false;
1472
+ scheduleBindAttempt(WEB_PORT, 0);
1473
+ }
1474
+ function scheduleBindAttempt(port, attempt) {
1475
+ if (stopRequested)
1476
+ return;
1477
+ // Read WEB_PORT live every time rather than closing over the
1478
+ // module-load value, so tests that change process.env.WEB_PORT
1479
+ // between runs see the new port.
1480
+ const originalPort = parseInt(process.env.WEB_PORT || "3100");
1481
+ // Fresh server for each attempt. Recycling a server that has already
1482
+ // emitted an EADDRINUSE error has produced "Listen method has been
1483
+ // called more than once" crashes in the wild.
1484
+ //
1485
+ // IMPORTANT: do NOT attach the WebSocketServer yet. The `ws` library
1486
+ // installs its own event plumbing on the http.Server in its
1487
+ // constructor, which causes bind errors to escape as uncaught
1488
+ // exceptions. We only attach it AFTER listen() has succeeded.
1489
+ const server = http.createServer(handleWebRequest);
1490
+ // Double-invocation guard: on some Node versions `server.listen`
1491
+ // both throws synchronously AND emits an `error` event for the same
1492
+ // bind failure. Without the guard we'd climb the ladder twice in
1493
+ // parallel and end up with two retry cascades racing each other.
1494
+ let handled = false;
1495
+ const cleanupDeadAttempt = () => {
1496
+ try {
1497
+ server.removeAllListeners("error");
1498
+ }
1499
+ catch { /* ignore */ }
1500
+ try {
1501
+ server.close(() => { });
1502
+ }
1503
+ catch { /* ignore */ }
1504
+ };
1505
+ const handleBindFailure = (err) => {
1506
+ if (handled)
1507
+ return;
1508
+ handled = true;
1509
+ cleanupDeadAttempt();
1510
+ if (stopRequested)
1511
+ return;
1512
+ const action = decideNextBindAction(err, attempt, {
1513
+ originalPort,
1514
+ maxPortTries: MAX_PORT_TRIES,
1515
+ backgroundRetryMs: BACKGROUND_RETRY_MS,
1444
1516
  });
1517
+ if (action.type === "retry-port") {
1518
+ console.warn(`[web] port ${port} busy (${err.code || err.message}) — trying ${action.port}`);
1519
+ scheduleBindAttempt(action.port, action.attempt);
1520
+ return;
1521
+ }
1522
+ // action.type === "retry-background"
1523
+ console.warn(`[web] bind failed (${err.code || err.message}) — ` +
1524
+ `backing off ${action.delayMs / 1000}s then retrying port ${action.port}. ` +
1525
+ `Bot is unaffected; Telegram remains live.`);
1526
+ bindRetryTimer = setTimeout(() => {
1527
+ bindRetryTimer = null;
1528
+ scheduleBindAttempt(action.port, 0);
1529
+ }, action.delayMs);
1530
+ };
1531
+ // Use `on` (not `once`) so a pathological server that emits two
1532
+ // error events for a single failure doesn't leave the second one
1533
+ // uncaught. The `handled` guard makes the handler idempotent.
1534
+ server.on("error", handleBindFailure);
1535
+ // Defensive try/catch — `server.listen()` usually emits async errors,
1536
+ // but certain Node versions + edge cases (already-listening server,
1537
+ // invalid backlog, kernel hiccup) can throw synchronously. Catch here
1538
+ // so the main routine never crashes during web-UI bind.
1539
+ try {
1445
1540
  server.listen(port, () => {
1541
+ if (handled)
1542
+ return; // Should be impossible; paranoia.
1543
+ handled = true;
1544
+ // Now — and only now — attach the WebSocketServer. Before the
1545
+ // bind succeeded, the ws library's constructor would hijack the
1546
+ // http.Server's error event chain and let EADDRINUSE escape as
1547
+ // uncaught. Post-bind is safe.
1548
+ const wss = new WebSocketServer({ server });
1549
+ handleWebSocket(wss);
1550
+ currentServer = server;
1551
+ wsServerRef = wss;
1446
1552
  actualWebPort = port;
1553
+ // Remove the bind error handler — post-listen errors (socket
1554
+ // errors, close events) should not kick off a spurious retry
1555
+ // cycle. Install a quiet logger for any stray error events so
1556
+ // they can't escape as uncaught.
1557
+ server.removeListener("error", handleBindFailure);
1558
+ server.on("error", (err) => {
1559
+ console.warn(`[web] post-bind server error (ignored): ${err.message}`);
1560
+ });
1447
1561
  console.log(`🌐 Web UI: http://localhost:${actualWebPort}`);
1448
- if (actualWebPort !== WEB_PORT) {
1449
- console.log(` (Port ${WEB_PORT} was busy, using ${actualWebPort} instead)`);
1562
+ if (actualWebPort !== originalPort) {
1563
+ console.log(` (Port ${originalPort} was busy, using ${actualWebPort} instead)`);
1450
1564
  }
1451
1565
  });
1452
1566
  }
1453
- tryListen(WEB_PORT);
1454
- return server;
1567
+ catch (err) {
1568
+ handleBindFailure(err);
1569
+ }
1455
1570
  }
1456
1571
  /**
1457
- * Gracefully stop the web server so the port is released.
1458
- *
1459
- * Why this exists: `shutdown()` in src/index.ts used to stop grammy and the
1460
- * scheduler but leave the HTTP server listening. macOS then held the
1461
- * listening socket in the socket table, so launchd's next boot of the bot
1462
- * hit `EADDRINUSE :::3100`, threw an Uncaught exception and crash-looped.
1572
+ * Gracefully close a specific http.Server the low-level building
1573
+ * block. Exported for tests and for any future callers that manage
1574
+ * their own servers. Production bot code uses `stopWebServer()` below
1575
+ * which operates on the module-global current server instead.
1463
1576
  *
1464
1577
  * What this does:
1465
- * 1. Force-close idle keep-alive sockets (otherwise close() hangs on them).
1466
- * 2. Force-close active open requests (long-poll clients, WebSocket
1467
- * upgrades that never completed).
1468
- * 3. Tear down the WebSocket server so its own sockets don't linger.
1469
- * 4. Await `server.close()` so the listening socket is truly released
1470
- * before the caller's shutdown continues.
1578
+ * 1. Force-close idle keep-alive sockets (Node 18.2+).
1579
+ * 2. Force-close active open requests (long-poll clients).
1580
+ * 3. Await `server.close()` so the listening socket is truly freed.
1471
1581
  *
1472
- * Safe to call multiple times; no-op when the server is already closed or
1473
- * never listened. Never throws.
1582
+ * Safe to call on already-closed, never-listened, or mid-listen servers.
1583
+ * Never throws.
1474
1584
  */
1475
- export async function stopWebServer(server) {
1476
- try {
1477
- if (wsServerRef) {
1478
- for (const client of wsServerRef.clients) {
1479
- try {
1480
- client.terminate();
1481
- }
1482
- catch { /* ignore */ }
1483
- }
1484
- await new Promise((resolve) => wsServerRef.close(() => resolve()));
1485
- wsServerRef = null;
1486
- }
1487
- }
1488
- catch { /* ignore */ }
1585
+ export async function closeHttpServerGracefully(server) {
1489
1586
  if (!server.listening)
1490
1587
  return;
1491
1588
  try {
1492
- // Node 18.2+ APIs — break any keep-alive / long-poll stalls so
1493
- // server.close() can actually resolve.
1494
1589
  const s = server;
1495
1590
  if (typeof s.closeIdleConnections === "function")
1496
1591
  s.closeIdleConnections();
@@ -1499,12 +1594,47 @@ export async function stopWebServer(server) {
1499
1594
  }
1500
1595
  catch { /* ignore */ }
1501
1596
  await new Promise((resolve) => {
1502
- // close() callback fires with an Error arg when the server wasn't
1503
- // listening — we just resolve in either case. The caller only cares
1504
- // that the port is free when this awaits.
1505
1597
  server.close(() => resolve());
1506
1598
  });
1507
1599
  }
1600
+ /**
1601
+ * Stop the web server: cancel any pending background-retry, close
1602
+ * WebSocket clients, then gracefully close the HTTP server.
1603
+ *
1604
+ * Idempotent — safe to call multiple times, and safe to call before
1605
+ * startWebServer() ever successfully bound. Never throws.
1606
+ */
1607
+ export async function stopWebServer() {
1608
+ stopRequested = true;
1609
+ // Cancel any pending background-retry timer so a late retry doesn't
1610
+ // grab the port AFTER we thought we'd shut everything down.
1611
+ if (bindRetryTimer) {
1612
+ clearTimeout(bindRetryTimer);
1613
+ bindRetryTimer = null;
1614
+ }
1615
+ // Tear down the WebSocket server first so its sockets can't keep
1616
+ // the underlying http.Server alive.
1617
+ if (wsServerRef) {
1618
+ try {
1619
+ for (const client of wsServerRef.clients) {
1620
+ try {
1621
+ client.terminate();
1622
+ }
1623
+ catch { /* ignore */ }
1624
+ }
1625
+ await new Promise((resolve) => wsServerRef.close(() => resolve()));
1626
+ }
1627
+ catch { /* ignore */ }
1628
+ wsServerRef = null;
1629
+ }
1630
+ if (currentServer) {
1631
+ try {
1632
+ await closeHttpServerGracefully(currentServer);
1633
+ }
1634
+ catch { /* ignore */ }
1635
+ currentServer = null;
1636
+ }
1637
+ }
1508
1638
  /** Get the actual port the Web UI is running on. */
1509
1639
  export function getWebPort() {
1510
1640
  return actualWebPort;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "alvin-bot",
3
- "version": "4.9.2",
3
+ "version": "4.9.4",
4
4
  "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Fix #15 (B) — /cron run must give visible feedback during long runs.
3
+ *
4
+ * Regression from production: a 13-minute Daily Job Alert run showed
5
+ * the user ZERO feedback between trigger time and completion. The
6
+ * sub-agent was actually working (and eventually succeeded), but the
7
+ * Telegram chat was silent for the whole duration.
8
+ *
9
+ * This test doesn't exercise grammy directly — it tests the pure
10
+ * helper that drives the live progress message so we can verify the
11
+ * formatting, cadence math, and safety edges in isolation.
12
+ */
13
+ import { describe, it, expect } from "vitest";
14
+ import { formatElapsed, buildTickerText, buildDoneText } from "../src/handlers/cron-progress.js";
15
+
16
+ describe("formatElapsed (Fix #15B)", () => {
17
+ it("formats seconds under a minute", () => {
18
+ expect(formatElapsed(0)).toBe("0s");
19
+ expect(formatElapsed(45)).toBe("45s");
20
+ expect(formatElapsed(59)).toBe("59s");
21
+ });
22
+
23
+ it("formats minutes+seconds above a minute", () => {
24
+ expect(formatElapsed(60)).toBe("1m 0s");
25
+ expect(formatElapsed(61)).toBe("1m 1s");
26
+ expect(formatElapsed(125)).toBe("2m 5s");
27
+ expect(formatElapsed(797)).toBe("13m 17s"); // real prod duration
28
+ });
29
+
30
+ it("formats hours+minutes above 60m", () => {
31
+ expect(formatElapsed(3600)).toBe("1h 0m");
32
+ expect(formatElapsed(3660)).toBe("1h 1m");
33
+ });
34
+ });
35
+
36
+ describe("buildTickerText (Fix #15B)", () => {
37
+ it("shows job name and elapsed time in the running state", () => {
38
+ const text = buildTickerText("Daily Job Alert", 125);
39
+ expect(text).toContain("Daily Job Alert");
40
+ expect(text).toContain("2m 5s");
41
+ expect(text).toMatch(/🔄|running/i);
42
+ });
43
+
44
+ it("escapes markdown-breaking characters in the job name", () => {
45
+ // Underscores and asterisks in job names would otherwise break
46
+ // the Markdown edit and trigger "can't parse entities".
47
+ const text = buildTickerText("weird_job*name", 10);
48
+ expect(text).not.toContain("_job*"); // no raw unescaped asterisk
49
+ // We expect some form of escaping — back-slashes are fine
50
+ expect(text).toMatch(/weird/);
51
+ });
52
+ });
53
+
54
+ describe("buildDoneText (Fix #15B)", () => {
55
+ it("shows green check for a clean completion", () => {
56
+ const text = buildDoneText("Daily Job Alert", 797, { ok: true });
57
+ expect(text).toContain("✅");
58
+ expect(text).toContain("Daily Job Alert");
59
+ expect(text).toContain("13m 17s");
60
+ });
61
+
62
+ it("shows red cross and error excerpt for a failure", () => {
63
+ const text = buildDoneText("Daily Job Alert", 10, {
64
+ ok: false,
65
+ error: "Sub-agent cancelled: timeout",
66
+ });
67
+ expect(text).toContain("❌");
68
+ expect(text).toContain("timeout");
69
+ });
70
+
71
+ it("shows warning for an already-running skip", () => {
72
+ const text = buildDoneText("Daily Job Alert", 0, { ok: true, skipped: true });
73
+ expect(text).toContain("⏳");
74
+ expect(text).toMatch(/already running|in progress/i);
75
+ });
76
+ });
@@ -19,7 +19,7 @@
19
19
  */
20
20
  import { describe, it, expect, beforeEach, vi } from "vitest";
21
21
  import http from "http";
22
- import { stopWebServer } from "../src/web/server.js";
22
+ import { closeHttpServerGracefully as stopWebServer } from "../src/web/server.js";
23
23
  import {
24
24
  handleStartupCatchup,
25
25
  prepareForExecution,
@@ -0,0 +1,147 @@
1
+ /**
2
+ * Fix #15 (A) — subagent-delivery must retry without parse_mode when
3
+ * Telegram rejects the Markdown entities.
4
+ *
5
+ * Real regression: Daily Job Alert banners have been silently failing
6
+ * with "Bad Request: can't parse entities: Can't find end of the entity"
7
+ * every single day since the subagent-delivery module shipped. The
8
+ * result text contains mixed `|`, `**`, `\|`, emoji, and asterisks that
9
+ * Telegram's Markdown parser chokes on. The code currently logs the
10
+ * error and drops the delivery, so the user never sees the banner.
11
+ *
12
+ * Contract: when `sendMessage(..., parse_mode: Markdown)` throws with
13
+ * the "can't parse entities" pattern, retry the SAME text WITHOUT
14
+ * `parse_mode`. Any other error still logs + bails.
15
+ *
16
+ * This file uses a minimal bot-api stub so we can drive both the happy
17
+ * path and the parse-error path deterministically.
18
+ */
19
+ import { describe, it, expect, vi, beforeEach } from "vitest";
20
+ import { deliverSubAgentResult, __setBotApiForTest } from "../src/services/subagent-delivery.js";
21
+ import type { SubAgentInfo, SubAgentResult } from "../src/services/subagents.js";
22
+
23
+ interface Sent {
24
+ chatId: number;
25
+ text: string;
26
+ parseMode?: string;
27
+ }
28
+
29
+ function makeInfo(overrides: Partial<SubAgentInfo> = {}): SubAgentInfo {
30
+ return {
31
+ id: "id-1",
32
+ name: "Daily Job Alert",
33
+ status: "completed",
34
+ startedAt: 0,
35
+ depth: 0,
36
+ source: "cron",
37
+ parentChatId: 42,
38
+ ...overrides,
39
+ };
40
+ }
41
+
42
+ function makeResult(output: string): SubAgentResult {
43
+ return {
44
+ id: "id-1",
45
+ name: "Daily Job Alert",
46
+ status: "completed",
47
+ output,
48
+ tokensUsed: { input: 1000, output: 200 },
49
+ duration: 60_000,
50
+ };
51
+ }
52
+
53
+ beforeEach(() => {
54
+ __setBotApiForTest(null);
55
+ });
56
+
57
+ describe("deliverSubAgentResult Markdown fallback (Fix #15)", () => {
58
+ it("retries without parse_mode when Telegram rejects entity parsing", async () => {
59
+ const sent: Sent[] = [];
60
+ let callCount = 0;
61
+
62
+ __setBotApiForTest({
63
+ sendMessage: async (chatId: number, text: string, opts?: Record<string, unknown>) => {
64
+ callCount++;
65
+ const parseMode = opts?.parse_mode as string | undefined;
66
+ // First call (Markdown) throws the real production error
67
+ if (callCount === 1 && parseMode === "Markdown") {
68
+ const err = Object.assign(
69
+ new Error("Call to 'sendMessage' failed! (400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 2636)"),
70
+ {
71
+ description: "Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 2636",
72
+ error_code: 400,
73
+ },
74
+ );
75
+ throw err;
76
+ }
77
+ sent.push({ chatId, text, parseMode });
78
+ return { message_id: 1 };
79
+ },
80
+ sendDocument: async () => ({}),
81
+ });
82
+
83
+ const info = makeInfo();
84
+ const result = makeResult("This **has** | broken markdown \\| entities that fail Markdown parsing");
85
+
86
+ await deliverSubAgentResult(info, result);
87
+
88
+ // Must have retried at least once WITHOUT parse_mode
89
+ const plainAttempt = sent.find((s) => s.parseMode === undefined);
90
+ expect(plainAttempt).toBeDefined();
91
+ expect(plainAttempt?.text).toContain("Daily Job Alert");
92
+ expect(plainAttempt?.text).toContain("broken markdown");
93
+ });
94
+
95
+ it("does NOT retry for non-parse errors (e.g. chat not found)", async () => {
96
+ let callCount = 0;
97
+ __setBotApiForTest({
98
+ sendMessage: async () => {
99
+ callCount++;
100
+ const err = Object.assign(new Error("Forbidden: bot was blocked by the user"), {
101
+ description: "Forbidden: bot was blocked by the user",
102
+ error_code: 403,
103
+ });
104
+ throw err;
105
+ },
106
+ sendDocument: async () => ({}),
107
+ });
108
+
109
+ await deliverSubAgentResult(makeInfo(), makeResult("some text"));
110
+
111
+ // Should have tried once and given up — no retry
112
+ expect(callCount).toBe(1);
113
+ });
114
+
115
+ it("chunked delivery also retries without parse_mode on parse errors", async () => {
116
+ const sent: Sent[] = [];
117
+ let callCount = 0;
118
+
119
+ __setBotApiForTest({
120
+ sendMessage: async (chatId: number, text: string, opts?: Record<string, unknown>) => {
121
+ callCount++;
122
+ const parseMode = opts?.parse_mode as string | undefined;
123
+ // First banner attempt fails — should retry without parse_mode
124
+ if (callCount === 1 && parseMode === "Markdown") {
125
+ const err = Object.assign(
126
+ new Error("400: Bad Request: can't parse entities"),
127
+ { description: "can't parse entities", error_code: 400 },
128
+ );
129
+ throw err;
130
+ }
131
+ sent.push({ chatId, text, parseMode });
132
+ return { message_id: callCount };
133
+ },
134
+ sendDocument: async () => ({}),
135
+ });
136
+
137
+ const info = makeInfo();
138
+ // Large body forces the chunked path
139
+ const result = makeResult("x".repeat(5000));
140
+
141
+ await deliverSubAgentResult(info, result);
142
+
143
+ // At least one plain-text delivery must have landed
144
+ expect(sent.length).toBeGreaterThan(0);
145
+ expect(sent.some((s) => s.parseMode === undefined)).toBe(true);
146
+ });
147
+ });