npm - switchroom - Versions diffs - 0.14.22 → 0.14.23 - Mend

switchroom 0.14.22 → 0.14.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cli/switchroom.js +2 -2
package/package.json +1 -1
package/telegram-plugin/dist/gateway/gateway.js +152 -8
package/telegram-plugin/subagent-watcher.ts +79 -5
package/telegram-plugin/tests/subagent-registry-bugs.test.ts +7 -3
package/telegram-plugin/tests/subagent-watcher-handback-gaps.test.ts +293 -0
package/telegram-plugin/tests/subagent-watcher.test.ts +23 -15

package/dist/cli/switchroom.js CHANGED Viewed

@@ -49420,8 +49420,8 @@ var {
 } = import__.default;
 // src/build-info.ts
-var VERSION = "0.14.22";
-var COMMIT_SHA = "ab2692b9";
+var VERSION = "0.14.23";
+var COMMIT_SHA = "8ac2987a";
 // src/cli/agent.ts
 init_source();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "switchroom",
-  "version": "0.14.22",
+  "version": "0.14.23",
   "description": "Run Claude Code 24/7 on your Claude Pro/Max subscription over Telegram. Open-source alternative to OpenClaw and NanoClaw — no API keys.",
   "type": "module",
   "bin": {

package/telegram-plugin/dist/gateway/gateway.js CHANGED Viewed

@@ -48827,6 +48827,67 @@ import {
 import { join as join21 } from "path";
 // operator-events.ts
+function classifyClaudeError(raw) {
+  try {
+    return classifyInner(raw);
+  } catch {
+    return "unknown-4xx";
+  }
+}
+function classifyInner(raw) {
+  if (raw == null)
+    return "unknown-4xx";
+  const obj = typeof raw === "object" ? raw : {};
+  const errorType = extractString(obj, "error_type") ?? extractString(obj, "type") ?? extractString(getNestedObj(obj, "error"), "type") ?? "";
+  const errorCode = extractString(obj, "code") ?? extractString(getNestedObj(obj, "error"), "code") ?? "";
+  const message = extractString(obj, "message") ?? extractString(getNestedObj(obj, "error"), "message") ?? (typeof raw === "string" ? raw : "") ?? "";
+  const status = extractNumber(obj, "status") ?? extractNumber(obj, "statusCode") ?? extractNumber(obj, "status_code") ?? null;
+  const sdkCode = extractString(obj, "error_code") ?? "";
+  if (errorType === "authentication_error" || errorCode === "authentication_error" || sdkCode === "authentication_error" || message.toLowerCase().includes("authentication_error")) {
+    const msg = message.toLowerCase();
+    if (msg.includes("expired") || msg.includes("refresh")) {
+      return "credentials-expired";
+    }
+    return "credentials-invalid";
+  }
+  if (errorType === "invalid_api_key" || errorCode === "invalid_api_key" || sdkCode === "invalid_api_key" || message.toLowerCase().includes("invalid_api_key") || message.toLowerCase().includes("invalid api key")) {
+    return "credentials-invalid";
+  }
+  if (errorType === "credit_balance_too_low" || errorCode === "credit_balance_too_low" || sdkCode === "credit_balance_too_low" || message.toLowerCase().includes("credit_balance_too_low") || message.toLowerCase().includes("credit balance")) {
+    return "credit-exhausted";
+  }
+  if (errorType === "rate_limit_error" || errorCode === "rate_limit_error" || sdkCode === "rate_limit_error" || message.toLowerCase().includes("rate_limit_error") || message.toLowerCase().includes("rate limit")) {
+    return "rate-limited";
+  }
+  if (errorType === "overloaded_error" || errorCode === "overloaded_error" || sdkCode === "overloaded_error" || message.toLowerCase().includes("overloaded_error") || message.toLowerCase().includes("overloaded")) {
+    return "rate-limited";
+  }
+  if (errorType === "agent-crashed" || errorCode === "agent-crashed") {
+    return "agent-crashed";
+  }
+  if (errorType === "agent-restarted-unexpectedly" || errorCode === "agent-restarted-unexpectedly") {
+    return "agent-restarted-unexpectedly";
+  }
+  if (status != null) {
+    if (status >= 400 && status < 500)
+      return "unknown-4xx";
+    if (status >= 500 && status < 600)
+      return "unknown-5xx";
+  }
+  return "unknown-4xx";
+}
+function extractString(obj, key) {
+  const v = obj[key];
+  return typeof v === "string" && v.length > 0 ? v : null;
+}
+function extractNumber(obj, key) {
+  const v = obj[key];
+  return typeof v === "number" ? v : null;
+}
+function getNestedObj(obj, key) {
+  const v = obj[key];
+  return typeof v === "object" && v != null ? v : {};
+}
 var DEFAULT_OPERATOR_EVENT_COOLDOWN_MS2 = 5 * 60000;
 var cooldownMap2 = new Map;
@@ -48936,6 +48997,72 @@ function projectSubagentLine(line, agentId, state4) {
   }
   return [];
 }
+function extractRetryState(obj) {
+  return {
+    retryAttempt: typeof obj.retryAttempt === "number" ? obj.retryAttempt : null,
+    maxRetries: typeof obj.maxRetries === "number" ? obj.maxRetries : null
+  };
+}
+function detectErrorInTranscriptLine(line) {
+  if (!line || line.length > 2 * 1024 * 1024)
+    return null;
+  let obj;
+  try {
+    obj = JSON.parse(line);
+  } catch {
+    return null;
+  }
+  if (typeof obj !== "object" || obj == null)
+    return null;
+  const type = obj.type;
+  if (obj.isApiErrorMessage === true) {
+    const status = typeof obj.apiErrorStatus === "number" ? obj.apiErrorStatus : null;
+    const errStr = typeof obj.error === "string" ? obj.error : "";
+    const text = extractAssistantText(obj);
+    const kind2 = status === 429 ? "quota-exhausted" : classifyClaudeError({ type: errStr, status, message: text });
+    return {
+      kind: kind2,
+      raw: obj,
+      detail: text || errStr || "api error",
+      transient: kind2 === "rate-limited",
+      terminal: true
+    };
+  }
+  const isErrorLine = type === "api_error" || type === "error";
+  const embeddedError = typeof obj.error === "object" && obj.error != null ? obj.error : null;
+  if (!isErrorLine && !embeddedError)
+    return null;
+  const raw = embeddedError ?? obj;
+  const kind = classifyClaudeError(embeddedError ?? obj);
+  const detail = extractDetailMessage(embeddedError) ?? extractDetailMessage(obj) ?? String(type ?? "");
+  const transient = kind === "rate-limited";
+  const retry = extractRetryState(obj);
+  const terminal = !transient ? true : retry.retryAttempt != null && retry.maxRetries != null ? retry.retryAttempt >= retry.maxRetries : isErrorLine;
+  return { kind, raw, detail, transient, terminal };
+}
+function extractDetailMessage(obj) {
+  if (!obj)
+    return null;
+  const msg = obj.message;
+  return typeof msg === "string" && msg.length > 0 ? msg : null;
+}
+function extractAssistantText(obj) {
+  const message = obj.message;
+  if (typeof message !== "object" || message == null)
+    return "";
+  const content = message.content;
+  if (!Array.isArray(content))
+    return "";
+  const parts = [];
+  for (const block of content) {
+    if (typeof block === "object" && block != null && block.type === "text") {
+      const t = block.text;
+      if (typeof t === "string")
+        parts.push(t);
+    }
+  }
+  return parts.join(" ").trim();
+}
 // fleet-state.ts
 var SANITISE_MAX_LEN = 120;
@@ -49189,6 +49316,12 @@ function readSubTail(entry, tail, now, onDescriptionUpdate, fs2, log, db2, paren
     for (const line of lines) {
       if (!line)
         continue;
+      const errInfo = detectErrorInTranscriptLine(line);
+      if (errInfo?.terminal) {
+        entry.errored = true;
+        if (errInfo.detail)
+          entry.errorDetail = errInfo.detail.slice(0, SUBAGENT_RESULT_TEXT_MAX);
+      }
       const events = projectSubagentLine(line, entry.agentId, startState);
       for (const ev of events) {
         const idleSecBeforeBump = Math.round((now - entry.lastActivityAt) / 1000);
@@ -49253,7 +49386,7 @@ function readSubTail(entry, tail, now, onDescriptionUpdate, fs2, log, db2, paren
                   recordSubagentEnd(db2, {
                     id: rowRef.id,
                     endedAt: now,
-                    status: "completed"
+                    status: entry.errored ? "failed" : "completed"
                   });
                 }
               } catch (dbErr) {
@@ -49363,6 +49496,17 @@ function startSubagentWatcher(config) {
     readSubTail(entry, tail, n, (desc) => {
       log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`);
     }, fs2, log, db2, parentStateDir, config.onUnstall, undefined, config.onProgress);
+    if (isHistorical && entry.state === "running") {
+      entry.historical = false;
+      log?.(`subagent-watcher: ${agentId} was in-flight at boot \u2014 promoting to live (predates watcher; user still awaiting handback)`);
+      if (db2 != null) {
+        try {
+          backfillJsonlAgentId(db2, filePath, agentId, log);
+        } catch (err) {
+          log?.(`subagent-watcher: backfill error for ${agentId}: ${err.message}`);
+        }
+      }
+    }
     if (isHistorical && entry.state === "done") {
       entry.completionNotified = true;
       scheduleTerminalCleanup(agentId);
@@ -49397,11 +49541,11 @@ function startSubagentWatcher(config) {
           config.onFinish({
             agentId,
             state: entry.state,
-            outcome: entry.historical ? "orphan" : "completed",
+            outcome: entry.errored ? "failed" : entry.historical ? "orphan" : "completed",
             toolCount: entry.toolCount,
             durationMs: nowFn() - entry.dispatchedAt,
             description: entry.description,
-            resultText: entry.lastResultText
+            resultText: entry.errored ? entry.lastResultText || entry.errorDetail || "" : entry.lastResultText
           });
         } catch (cbErr) {
           log?.(`subagent-watcher: onFinish callback error ${agentId}: ${cbErr.message}`);
@@ -49518,7 +49662,7 @@ function startSubagentWatcher(config) {
             recordSubagentEnd(db2, {
               id: rowRef.id,
               endedAt: n,
-              status: "completed"
+              status: entry.errored ? "failed" : "completed"
             });
           }
         } catch (dbErr) {
@@ -51298,10 +51442,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
 }
 // ../src/build-info.ts
-var VERSION = "0.14.22";
-var COMMIT_SHA = "ab2692b9";
-var COMMIT_DATE = "2026-05-31T06:26:06Z";
-var LATEST_PR = 2028;
+var VERSION = "0.14.23";
+var COMMIT_SHA = "8ac2987a";
+var COMMIT_DATE = "2026-05-31T22:03:26Z";
+var LATEST_PR = 2031;
 var COMMITS_AHEAD_OF_TAG = 0;
 // gateway/boot-version.ts

package/telegram-plugin/subagent-watcher.ts CHANGED Viewed

@@ -40,7 +40,7 @@ import {
 } from 'fs'
 import { basename, join } from 'path'
 import { homedir } from 'os'
-import { projectSubagentLine, sanitizeCwdToProjectName } from './session-tail.js'
+import { projectSubagentLine, sanitizeCwdToProjectName, detectErrorInTranscriptLine } from './session-tail.js'
 import { sanitiseToolArg } from './fleet-state.js'
 import { escapeHtml, truncate } from './card-format.js'
 import { bumpSubagentActivity, recordSubagentStall, recordSubagentResume, recordSubagentEnd, reapStuckRunningRows } from './registry/subagents-schema.js'
@@ -142,6 +142,21 @@ export interface WorkerEntry {
    * dead, the file is just left over from a prior session.
    */
   historical: boolean
+  /**
+   * True once a TERMINAL error line — a model API failure / quota
+   * exhaustion / crash, NOT an in-flight retry or a routine tool-level
+   * `is_error` result — has been observed in this worker's own
+   * transcript. Drives the `failed` terminal outcome so the handback
+   * tells the user the delegated work did NOT complete, instead of
+   * dressing a dead worker up as `completed`. Classified by
+   * `detectErrorInTranscriptLine` (the same gate the operator-event
+   * path uses), so transient mid-retry errors are excluded.
+   */
+  errored?: boolean
+  /** Human-readable detail from the terminal error line, surfaced in the
+   *  failed handback's "what it reported before failing" slot when the
+   *  worker left no narrative result of its own. */
+  errorDetail?: string
 }
 export interface SubagentWatcherConfig {
@@ -611,6 +626,20 @@ export function readSubTail(
     const startState = { hasEmittedStart: tail.hasEmittedStart }
     for (const line of lines) {
       if (!line) continue
+      // Gap 2 (failure honesty): a terminal error line in the worker's
+      // OWN transcript — a model API failure, quota exhaustion, or crash —
+      // means the worker FAILED, not finished. Reuse the operator-event
+      // classifier: `terminal:true` excludes in-flight retries (a 529 mid-
+      // backoff is `terminal:false`), and tool-level `is_error` results
+      // never reach here (they parse as `sub_agent_tool_result`, which is
+      // routine mid-run noise, not a worker death). The flag persists on
+      // the entry; the terminal transition (real turn_end OR stall
+      // synthesis) reads it to emit `failed` instead of `completed`.
+      const errInfo = detectErrorInTranscriptLine(line)
+      if (errInfo?.terminal) {
+        entry.errored = true
+        if (errInfo.detail) entry.errorDetail = errInfo.detail.slice(0, SUBAGENT_RESULT_TEXT_MAX)
+      }
       const events = projectSubagentLine(line, entry.agentId, startState)
       for (const ev of events) {
         const idleSecBeforeBump = Math.round((now - entry.lastActivityAt) / 1000)
@@ -716,7 +745,10 @@ export function readSubTail(
                   recordSubagentEnd(db, {
                     id: rowRef.id,
                     endedAt: now,
-                    status: 'completed',
+                    // Gap 2: keep the audit row honest — a worker that hit a
+                    // terminal transcript error is `failed`, matching the
+                    // handback outcome computed in maybySendStateTransition.
+                    status: entry.errored ? 'failed' : 'completed',
                   })
                 }
               } catch (dbErr) {
@@ -917,6 +949,34 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
       log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`)
     }, fs, log, db, parentStateDir, config.onUnstall, undefined, config.onProgress)
+    // Gap 1 (restart survival): a file still RUNNING at boot is a LIVE
+    // worker that predates this watcher — typically one dispatched in a
+    // prior gateway life and still in-flight across a restart / fleet
+    // rollout, NOT a stale already-finished file. `historical` must
+    // suppress replay only for done-at-boot files; an in-flight-at-boot
+    // worker the user is still waiting on must get full live treatment:
+    // progress nudges, the stall-synthesis safety net (checkStalls skips
+    // historical entries), and a real `completed`/`failed` handback rather
+    // than a dropped `orphan`. Promote it to a live entry here. (A file
+    // already `done` at boot stays historical and is short-circuited just
+    // below — it finished before this session.)
+    if (isHistorical && entry.state === 'running') {
+      entry.historical = false
+      log?.(`subagent-watcher: ${agentId} was in-flight at boot — promoting to live (predates watcher; user still awaiting handback)`)
+      // The prior gateway life's registration normally linked
+      // jsonl_agent_id already, but re-run the backfill idempotently in
+      // case that life crashed before the link persisted — the handback's
+      // isBackground lookup is keyed on jsonl_agent_id, and an unlinked row
+      // would mis-resolve the worker as foreground and drop the handback.
+      if (db != null) {
+        try {
+          backfillJsonlAgentId(db, filePath, agentId, log)
+        } catch (err) {
+          log?.(`subagent-watcher: backfill error for ${agentId}: ${(err as Error).message}`)
+        }
+      }
+    }
     // If the JSONL already contained a turn_end at registration time
     // (file written-then-watched), fire the state-transition + completion
     // notification now. Otherwise the FSWatcher callback handles it on
@@ -980,11 +1040,22 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
           config.onFinish({
             agentId,
             state: entry.state,
-            outcome: entry.historical ? 'orphan' : 'completed',
+            // Gap 2: a terminal error observed in the transcript wins over
+            // the completed/orphan classification — a worker that crashed
+            // is `failed`, even if it later wrote a turn_end or aged into
+            // stall synthesis. `orphan` remains for genuinely stale
+            // done-at-boot rows (which never reach this path; see
+            // registerAgent's short-circuit + Gap 1 promotion).
+            outcome: entry.errored ? 'failed' : entry.historical ? 'orphan' : 'completed',
             toolCount: entry.toolCount,
             durationMs: nowFn() - entry.dispatchedAt,
             description: entry.description,
-            resultText: entry.lastResultText,
+            // For a failure, fall back to the error detail when the worker
+            // left no narrative of its own — so the handback's "what it
+            // reported before failing" slot is never empty on a crash.
+            resultText: entry.errored
+              ? entry.lastResultText || entry.errorDetail || ''
+              : entry.lastResultText,
           })
         } catch (cbErr) {
           log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)
@@ -1151,7 +1222,10 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
             recordSubagentEnd(db, {
               id: rowRef.id,
               endedAt: n,
-              status: 'completed',
+              // Gap 2: a worker that hit a terminal transcript error before
+              // going silent is `failed`, not `completed` — keep the audit
+              // row consistent with the handback outcome.
+              status: entry.errored ? 'failed' : 'completed',
             })
           }
         } catch (dbErr) {

package/telegram-plugin/tests/subagent-registry-bugs.test.ts CHANGED Viewed

@@ -624,13 +624,17 @@ describe('Bug 3 — stalled-row sweeper: watcher must call recordSubagentStall i
     h.watcher.stop()
   })
-  it('does not call stall for historical entries (pre-existing at boot)', () => {
+  it('does not call stall for historical (done-at-boot) entries', () => {
+    // A worker that already FINISHED before boot (turn_end present) stays
+    // historical and must not write stall rows. A still-RUNNING file at
+    // boot is a different case — Gap 1 promotes it to live so it DOES get
+    // the stall safety net (covered in subagent-watcher-handback-gaps).
     const agentDir = '/home/user/.switchroom/agents/myagent'
     const subagentsDir = `${agentDir}/.claude/projects/p1/session-abc/subagents`
     const jsonlStem = 'hist-agent'
     const toolUseId = 'toolu_hist001'
     const jsonlPath = `${subagentsDir}/agent-${jsonlStem}.jsonl`
-    const content = buildJSONL(subAgentUserMsg('Old task'))
+    const content = buildJSONL(subAgentUserMsg('Old task'), subAgentTurnDuration())
     const db = makeInMemoryDb({
       [toolUseId]: { id: toolUseId, jsonl_agent_id: jsonlStem, status: 'running' },
@@ -648,7 +652,7 @@ describe('Bug 3 — stalled-row sweeper: watcher must call recordSubagentStall i
       db,
     })
-    // Do NOT flip historical — entry is historical by default (file at boot)
+    // Done-at-boot → stays historical (not promoted); no stall write fires.
     h.advance(65_000)
     const stallDbCalls = db._calls.filter(

package/telegram-plugin/tests/subagent-watcher-handback-gaps.test.ts ADDED Viewed

@@ -0,0 +1,293 @@
+/**
+ * Tests for the two background-worker handback gaps closed in
+ * `fix/subagent-handback-restart-and-failure`:
+ *
+ *  Gap 1 — restart survival. A background worker that is in-flight when
+ *    the gateway restarts is discovered by the boot scan and tagged
+ *    `historical`. That flag is meant to suppress replay for workers that
+ *    ALREADY finished before boot — but it was also applied to workers
+ *    still running, which then completed with outcome `orphan`, and the
+ *    handback gate drops `orphan`. Net: dispatched worker + any gateway
+ *    bounce (incl. a fleet rollout) + worker finishes = user never told.
+ *    Fix: a file still `running` at boot is promoted to a LIVE entry, so
+ *    it gets the stall-synthesis safety net and a real `completed`/`failed`
+ *    handback. A file already `done` at boot stays suppressed.
+ *
+ *  Gap 2 — failure honesty. The `failed` outcome was dead code (no caller
+ *    set it), so every dead worker was reported `completed`. Fix: a
+ *    TERMINAL error line in the worker's own transcript (model API failure
+ *    / quota exhaustion / crash — not an in-flight retry, not a routine
+ *    tool-level is_error) flips the terminal outcome to `failed` and
+ *    carries the error detail into the handback result.
+ */
+import { describe, it, expect, vi } from 'vitest'
+import { startSubagentWatcher } from '../subagent-watcher.js'
+import * as fs from 'fs'
+function buildJSONL(...lines: object[]): string {
+  return lines.map((l) => JSON.stringify(l)).join('\n') + '\n'
+}
+function subAgentUserMsg(promptText: string) {
+  return { type: 'user', message: { content: [{ type: 'text', text: promptText }] } }
+}
+function subAgentText(text: string) {
+  return { type: 'assistant', message: { content: [{ type: 'text', text }] } }
+}
+function subAgentTurnEnd() {
+  return { type: 'system', subtype: 'turn_duration', duration_ms: 1234 }
+}
+// A terminal error line in the worker's OWN transcript — the model call
+// itself failed (here an invalid_request_error). `detectErrorInTranscriptLine`
+// classifies an explicit `type:"error"` line with a non-rate-limit kind as
+// terminal:true.
+function subAgentTerminalError(message: string) {
+  return { type: 'error', error: { type: 'invalid_request_error', message } }
+}
+// A routine mid-run tool failure (e.g. a grep that found nothing). This is a
+// `sub_agent_tool_result` with is_error — NOT a worker death. Must NOT trip
+// the failed classification.
+function subAgentToolResultError() {
+  return {
+    type: 'user',
+    message: {
+      content: [{ type: 'tool_result', tool_use_id: 'toolu_x', is_error: true, content: 'no matches found' }],
+    },
+  }
+}
+interface FinishCall {
+  agentId: string
+  outcome: string
+  resultText: string
+}
+interface Harness {
+  stallTerminalCalls: Array<{ agentId: string }>
+  finishCalls: FinishCall[]
+  logs: string[]
+  advance: (ms: number) => void
+  watcher: ReturnType<typeof startSubagentWatcher>
+  fileContents: Map<string, Buffer>
+  jsonlPath: string
+  append: (...lines: object[]) => void
+}
+function makeHarness(opts: {
+  agentId?: string
+  /** Lines present in the JSONL at boot (before the watcher starts). */
+  bootLines: object[]
+  stallThresholdMs?: number
+  silentStallTerminalMs?: number
+  rescanMs?: number
+}): Harness {
+  const {
+    agentId = 'gap-agent',
+    bootLines,
+    stallThresholdMs = 60_000,
+    silentStallTerminalMs = 300_000,
+    rescanMs = 500,
+  } = opts
+  let currentTime = 1000
+  const stallTerminalCalls: Array<{ agentId: string }> = []
+  const finishCalls: FinishCall[] = []
+  const logs: string[] = []
+  const agentDir = '/home/user/.switchroom/agents/myagent'
+  const sessionId = 'mock-session'
+  const projectsRoot = `${agentDir}/.claude/projects`
+  const projectDir = `${projectsRoot}/mock-cwd`
+  const sessionDir = `${projectDir}/${sessionId}`
+  const subagentsDir = `${sessionDir}/subagents`
+  const jsonlPath = `${subagentsDir}/agent-${agentId}.jsonl`
+  const fileContents = new Map<string, Buffer>()
+  fileContents.set(jsonlPath, Buffer.from(buildJSONL(...bootLines), 'utf-8'))
+  let lastOpenedPath: string | null = null
+  const mockFs = {
+    existsSync: ((p: fs.PathLike) => {
+      const ps = String(p)
+      if (ps === projectsRoot || ps === projectDir || ps === sessionDir || ps === subagentsDir) return true
+      if (fileContents.has(ps)) return true
+      return false
+    }) as typeof fs.existsSync,
+    readdirSync: ((p: fs.PathLike) => {
+      const ps = String(p)
+      if (ps === projectsRoot) return ['mock-cwd']
+      if (ps === projectDir) return [sessionId]
+      if (ps === sessionDir) return ['subagents']
+      if (ps === subagentsDir) return [`agent-${agentId}.jsonl`]
+      return []
+    }) as unknown as typeof fs.readdirSync,
+    statSync: ((p: fs.PathLike) => ({ size: fileContents.get(String(p))?.length ?? 0 }) as fs.Stats) as typeof fs.statSync,
+    openSync: ((p: fs.PathLike) => {
+      lastOpenedPath = String(p)
+      return 42
+    }) as unknown as typeof fs.openSync,
+    closeSync: (() => { lastOpenedPath = null }) as typeof fs.closeSync,
+    readSync: ((
+      _fd: number,
+      buf: NodeJS.ArrayBufferView,
+      offset: number,
+      length: number,
+      position: number | null,
+    ): number => {
+      const content = lastOpenedPath != null ? fileContents.get(lastOpenedPath) : undefined
+      if (!content) return 0
+      const pos = position ?? 0
+      const src = content.slice(pos, pos + length)
+      ;(src as Buffer).copy(buf as Buffer, offset)
+      return src.length
+    }) as unknown as typeof fs.readSync,
+    watch: (() => ({ close: vi.fn() }) as unknown as fs.FSWatcher) as unknown as typeof fs.watch,
+  }
+  const intervals: Array<{ fn: () => void; ms: number; ref: number; fireAt: number }> = []
+  let nextRef = 1
+  const watcher = startSubagentWatcher({
+    agentDir,
+    stallThresholdMs,
+    silentSynthesisStallThresholdMs: stallThresholdMs,
+    silentStallTerminalMs,
+    rescanMs,
+    onStallTerminal: (id) => stallTerminalCalls.push({ agentId: id }),
+    onFinish: ({ agentId: id, outcome, resultText }) =>
+      finishCalls.push({ agentId: id, outcome, resultText }),
+    now: () => currentTime,
+    setInterval: (fn, ms) => {
+      const ref = nextRef++
+      intervals.push({ fn, ms, ref, fireAt: currentTime + ms })
+      return { ref }
+    },
+    clearInterval: (handle) => {
+      const { ref } = handle as { ref: number }
+      const idx = intervals.findIndex((i) => i.ref === ref)
+      if (idx !== -1) intervals.splice(idx, 1)
+    },
+    fs: mockFs,
+    log: (msg) => logs.push(msg),
+  })
+  const advance = (ms: number): void => {
+    currentTime += ms
+    for (;;) {
+      intervals.sort((a, b) => a.fireAt - b.fireAt)
+      const next = intervals[0]
+      if (!next || next.fireAt > currentTime) break
+      next.fireAt += next.ms
+      next.fn()
+    }
+  }
+  const append = (...lines: object[]): void => {
+    const cur = fileContents.get(jsonlPath) ?? Buffer.alloc(0)
+    const more = buildJSONL(...lines)
+    fileContents.set(jsonlPath, Buffer.concat([cur, Buffer.from(more, 'utf-8')]))
+  }
+  return { stallTerminalCalls, finishCalls, logs, advance, watcher, fileContents, jsonlPath, append }
+}
+describe('Gap 1 — background worker in-flight across a gateway restart', () => {
+  it('an in-flight-at-boot worker that completes hands back as completed (not orphan)', () => {
+    // Boot scan finds a running worker (prompt, no turn_end yet) → tagged
+    // historical. The fix promotes it to live. When it finishes under our
+    // watch, the outcome must be `completed` so the handback delivers.
+    const h = makeHarness({ agentId: 'gap1-complete', bootLines: [subAgentUserMsg('bg task')] })
+    // The worker finishes after the restart.
+    h.append(subAgentText('Found the root cause in auth.ts'), subAgentTurnEnd())
+    h.advance(600) // one poll reads the new bytes
+    expect(h.finishCalls).toHaveLength(1)
+    expect(h.finishCalls[0].agentId).toBe('gap1-complete')
+    expect(h.finishCalls[0].outcome).toBe('completed') // pre-fix: 'orphan' → dropped
+    expect(h.finishCalls[0].resultText).toContain('root cause')
+    // The promotion is logged so the path is observable in prod.
+    expect(h.logs.some((l) => l.includes('in-flight at boot — promoting to live'))).toBe(true)
+  })
+  it('an in-flight-at-boot worker that dies silently is rescued by stall synthesis', () => {
+    // Pre-fix, historical entries were skipped by stall detection, so a
+    // worker that crossed a restart and then went silent sat running
+    // forever — no handback ever. After promotion it gets the safety net.
+    const h = makeHarness({
+      agentId: 'gap1-silent',
+      bootLines: [subAgentUserMsg('bg task')],
+      stallThresholdMs: 60_000,
+      silentStallTerminalMs: 120_000,
+    })
+    h.advance(62_000) // stall threshold crossed
+    expect(h.stallTerminalCalls).toHaveLength(0)
+    h.advance(121_000) // silent-stall terminal window elapses → synthesis
+    expect(h.stallTerminalCalls).toHaveLength(1)
+    expect(h.finishCalls).toHaveLength(1)
+    expect(h.finishCalls[0].outcome).toBe('completed')
+  })
+  it('a worker already DONE at boot stays suppressed (no spurious replay)', () => {
+    // The legitimate use of `historical`: a worker that finished in a prior
+    // session must NOT re-fire a handback on every restart. This is the
+    // regression guard for the fix.
+    const h = makeHarness({
+      agentId: 'gap1-stale',
+      bootLines: [subAgentUserMsg('bg task'), subAgentText('done long ago'), subAgentTurnEnd()],
+    })
+    h.advance(600)
+    h.advance(600_000) // well past any stall window
+    expect(h.finishCalls).toHaveLength(0)
+    expect(h.stallTerminalCalls).toHaveLength(0)
+  })
+})
+describe('Gap 2 — failure honesty', () => {
+  it('a terminal error line flips the outcome to failed and carries the detail', () => {
+    const h = makeHarness({ agentId: 'gap2-failed', bootLines: [subAgentUserMsg('bg task')] })
+    // The worker's model call errors out, then the transcript ends.
+    h.append(subAgentTerminalError('tool input rejected by the API'), subAgentTurnEnd())
+    h.advance(600)
+    expect(h.finishCalls).toHaveLength(1)
+    expect(h.finishCalls[0].outcome).toBe('failed')
+    // No narrative was emitted, so the detail backfills the result slot.
+    expect(h.finishCalls[0].resultText).toContain('tool input rejected')
+  })
+  it('a failed worker that went silent still synthesises terminal as failed', () => {
+    const h = makeHarness({
+      agentId: 'gap2-failed-silent',
+      bootLines: [subAgentUserMsg('bg task')],
+      stallThresholdMs: 60_000,
+      silentStallTerminalMs: 120_000,
+    })
+    // Error line, then the worker goes silent (no turn_end).
+    h.append(subAgentTerminalError('worker process crashed'))
+    h.advance(600) // read the error line
+    h.advance(62_000) // stall
+    h.advance(121_000) // synthesis
+    expect(h.stallTerminalCalls).toHaveLength(1)
+    expect(h.finishCalls).toHaveLength(1)
+    expect(h.finishCalls[0].outcome).toBe('failed')
+    expect(h.finishCalls[0].resultText).toContain('crashed')
+  })
+  it('a routine mid-run tool error does NOT cause a false failure', () => {
+    const h = makeHarness({ agentId: 'gap2-toolerr', bootLines: [subAgentUserMsg('bg task')] })
+    // A tool_result with is_error (e.g. grep found nothing) mid-run, then
+    // the worker recovers and completes normally.
+    h.append(subAgentToolResultError(), subAgentText('Completed after a retry'), subAgentTurnEnd())
+    h.advance(600)
+    expect(h.finishCalls).toHaveLength(1)
+    expect(h.finishCalls[0].outcome).toBe('completed') // NOT failed
+    expect(h.finishCalls[0].resultText).toContain('Completed after a retry')
+  })
+})

package/telegram-plugin/tests/subagent-watcher.test.ts CHANGED Viewed

@@ -693,18 +693,21 @@ describe('startSubagentWatcher', () => {
     h.watcher.stop()
   })
-  it('suppresses stall notifications for historical entries', () => {
-    // Historical entries (file existed at watcher boot) must NOT fire
-    // stall notifications. The sub-agent process is long dead; the file
-    // is just left over from a prior session. With many historicals
-    // present at restart, firing stalls for each would flood the chat.
+  it('suppresses stall notifications for historical (done-at-boot) entries', () => {
+    // A worker that already FINISHED before the watcher booted (turn_end
+    // present in the file) stays historical and must NOT fire stall
+    // notifications. With months of finished session history present at
+    // restart, firing stalls for each would flood the chat. NOTE: a worker
+    // still RUNNING at boot is a different case — Gap 1 promotes it to live
+    // so it DOES get the stall safety net (it's an in-flight worker the
+    // user is still awaiting), covered in subagent-watcher-handback-gaps.
     const agentDir = '/home/user/.switchroom/agents/myagent'
     const projectsRoot = `${agentDir}/.claude/projects`
     const projectDir = `${projectsRoot}/myproject`
     const sessionDir = `${projectDir}/session-abc123`
     const subagentsDir = `${sessionDir}/subagents`
     const jsonlPath = `${subagentsDir}/agent-deadbeef.jsonl`
-    const content = buildJSONL(subAgentUserMsg('Old task'))
+    const content = buildJSONL(subAgentUserMsg('Old task'), subAgentTurnDuration())
     const h = makeHarness({
       agentDir,
@@ -809,12 +812,15 @@ describe('startSubagentWatcher', () => {
   describe('historical-vs-active filter', () => {
     /**
-     * Pre-existing JSONL files at watcher boot are tagged historical=true.
-     * Stalls and completion notifications are gated on !historical so a
-     * restart with months of session history doesn't flood the chat.
+     * Pre-existing FINISHED (done-at-boot) JSONL files are tagged
+     * historical=true. Stalls and completion notifications are gated on
+     * !historical so a restart with months of session history doesn't
+     * flood the chat. (A still-RUNNING file at boot is promoted to live by
+     * Gap 1 — see subagent-watcher-handback-gaps — so it must carry a
+     * turn_end here to stay historical.)
      */
-    it('pre-existing JSONL files at startup are tagged historical', () => {
+    it('pre-existing done-at-boot JSONL files are tagged historical', () => {
       const agentDir = '/home/user/.switchroom/agents/myagent'
       const projectsRoot = `${agentDir}/.claude/projects`
       const projectDir = `${projectsRoot}/myproject`
@@ -823,7 +829,7 @@ describe('startSubagentWatcher', () => {
       const jsonlA = `${subagentsDir}/agent-hist-aaaa.jsonl`
       const jsonlB = `${subagentsDir}/agent-hist-bbbb.jsonl`
-      const content = buildJSONL(subAgentUserMsg('Old task'))
+      const content = buildJSONL(subAgentUserMsg('Old task'), subAgentTurnDuration())
       const h = makeHarness({
         agentDir,
@@ -895,10 +901,12 @@ describe('startSubagentWatcher', () => {
     })
     it('pre-existing in-flight agent that finishes after restart fires completion', () => {
-      // Historical at boot. Then writes turn_end. Completion notification
-      // still fires for the state transition (the file was in-flight at
-      // boot, so the transition is meaningful even if the entry is tagged
-      // historical for stall-suppression purposes).
+      // Running at boot → Gap 1 promotes it to live (historical=false),
+      // because it's an in-flight worker the user is still awaiting across
+      // the restart. When it then writes turn_end, the completion
+      // notification fires for the state transition. (The deeper handback
+      // outcome — completed, not the dropped `orphan` — is covered in
+      // subagent-watcher-handback-gaps.)
       const agentDir = '/home/user/.switchroom/agents/myagent'
       const projectsRoot = `${agentDir}/.claude/projects`
       const projectDir = `${projectsRoot}/myproject`