npm - switchroom - Versions diffs - 0.14.21 → 0.14.23 - Mend

switchroom 0.14.21 → 0.14.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/dist/agent-scheduler/index.js +0 -1
package/dist/auth-broker/index.js +0 -1
package/dist/cli/notion-write-pretool.mjs +0 -1
package/dist/cli/switchroom.js +14 -6
package/dist/host-control/main.js +0 -1
package/dist/vault/approvals/kernel-server.js +0 -1
package/dist/vault/broker/server.js +0 -1
package/package.json +3 -3
package/profiles/_base/start.sh.hbs +11 -24
package/profiles/_shared/telegram-style.md.hbs +2 -2
package/profiles/default/CLAUDE.md.hbs +4 -1
package/skills/switchroom-runtime/SKILL.md +6 -16
package/telegram-plugin/agent-dir.ts +15 -0
package/telegram-plugin/dist/gateway/gateway.js +788 -513
package/telegram-plugin/gateway/gateway.ts +216 -61
package/telegram-plugin/gateway/inbound-spool.ts +15 -0
package/telegram-plugin/gateway/resume-inbound-builder.ts +180 -0
package/telegram-plugin/registry/turns-schema.ts +138 -33
package/telegram-plugin/stream-reply-handler.ts +1 -11
package/telegram-plugin/subagent-watcher.ts +79 -5
package/telegram-plugin/tests/agent-dir.test.ts +25 -0
package/telegram-plugin/tests/e2e.test.ts +2 -77
package/telegram-plugin/tests/inbound-spool.test.ts +45 -0
package/telegram-plugin/tests/multi-turn-continuity.test.ts +0 -1
package/telegram-plugin/tests/outbound-ordering.test.ts +0 -1
package/telegram-plugin/tests/parse-mode-rotation.test.ts +0 -1
package/telegram-plugin/tests/races.test.ts +0 -26
package/telegram-plugin/tests/registry-turns.test.ts +106 -29
package/telegram-plugin/tests/resume-inbound-builder.test.ts +182 -0
package/telegram-plugin/tests/status-accent.test.ts +0 -1
package/telegram-plugin/tests/stream-reply-error-paths.test.ts +0 -1
package/telegram-plugin/tests/stream-reply-handler.test.ts +0 -24
package/telegram-plugin/tests/streaming-e2e.test.ts +0 -1
package/telegram-plugin/tests/streaming-orchestration.test.ts +0 -1
package/telegram-plugin/tests/subagent-registry-bugs.test.ts +7 -3
package/telegram-plugin/tests/subagent-watcher-handback-gaps.test.ts +293 -0
package/telegram-plugin/tests/subagent-watcher.test.ts +23 -15
package/telegram-plugin/tests/tool-activity-summary.test.ts +44 -0
package/telegram-plugin/tests/turns-writer.test.ts +16 -6
package/telegram-plugin/tool-activity-summary.ts +55 -0
package/telegram-plugin/uat/driver.ts +3 -1
package/telegram-plugin/handoff-continuity.ts +0 -206
package/telegram-plugin/tests/handoff-continuity.test.ts +0 -262

package/telegram-plugin/registry/turns-schema.ts CHANGED Viewed

@@ -28,11 +28,12 @@
  *     updated_at            INTEGER NOT NULL
  *
  * Boot-time usage:
- *   On every gateway boot, call `markOrphanedAsRestarted(db)` immediately
- *   after opening the DB. Any turn with `ended_at IS NULL` was killed
- *   mid-flight (SIGKILL, OOM, power loss) — it never got a chance to write
- *   a clean-shutdown marker. Stage 3 of simplify-restart will wire this up
- *   from the gateway entry point.
+ *   On every gateway boot, call `markOrphanedWithTimeoutClassification(db, …)`
+ *   immediately after opening the DB. Any turn with `ended_at IS NULL` was
+ *   killed mid-flight (SIGKILL, OOM, power loss, operator restart) — it never
+ *   got a chance to write a clean-shutdown marker. The classifier stamps the
+ *   in-flight turn `'timeout'` when its hang-marker is stale and `'restart'`
+ *   otherwise; the gateway then resumes or reports accordingly.
  */
 import { chmodSync, mkdirSync } from 'fs'
@@ -98,6 +99,15 @@ export interface Turn {
   user_prompt_preview: string | null
   assistant_reply_preview: string | null
   tool_call_count: number | null
+  /**
+   * Forensic snapshot persisted by the boot-time classifier when a turn is
+   * stamped `ended_via='timeout'` (the hang-watchdog window elapsed with no
+   * tool progress). Carries the idle duration so a *later* boot can rebuild
+   * the watchdog-report inbound after the on-disk turn-active marker — the
+   * only live source of the idle age — has already been swept. Null for
+   * cleanly-restarted (`'restart'`) orphans.
+   */
+  interrupt_reason: string | null
   created_at: number
   updated_at: number
 }
@@ -137,6 +147,7 @@ const SCHEMA_SQL = `
     user_prompt_preview     TEXT,
     assistant_reply_preview TEXT,
     tool_call_count         INTEGER,
+    interrupt_reason        TEXT,
     created_at              INTEGER NOT NULL,
     updated_at              INTEGER NOT NULL
   );
@@ -151,13 +162,21 @@ const PHASE1_MIGRATIONS = [
   `ALTER TABLE turns ADD COLUMN tool_call_count INTEGER`,
 ]
+// Column added for honest-restart-resume. Persists the idle snapshot the
+// boot classifier captures when stamping a turn 'timeout' (see
+// `markOrphanedWithTimeoutClassification`).
+const PHASE2_MIGRATIONS = [
+  `ALTER TABLE turns ADD COLUMN interrupt_reason TEXT`,
+]
 function applySchema(db: SqliteDatabase): void {
   db.exec('PRAGMA journal_mode = WAL')
   db.exec('PRAGMA synchronous = NORMAL')
   db.exec(SCHEMA_SQL)
-  // Run migrations for Phase 1 columns. SQLite doesn't support
-  // "ADD COLUMN IF NOT EXISTS", so we swallow the "duplicate column" error.
-  for (const sql of PHASE1_MIGRATIONS) {
+  // Run migrations. SQLite doesn't support "ADD COLUMN IF NOT EXISTS", so
+  // we swallow the "duplicate column" error to stay idempotent on
+  // pre-existing registry.db files.
+  for (const sql of [...PHASE1_MIGRATIONS, ...PHASE2_MIGRATIONS]) {
     try {
       db.exec(sql)
     } catch (err) {
@@ -225,6 +244,7 @@ interface RawTurnRow {
   user_prompt_preview: string | null
   assistant_reply_preview: string | null
   tool_call_count: number | null
+  interrupt_reason: string | null
   created_at: number
   updated_at: number
 }
@@ -244,6 +264,7 @@ function mapRow(row: RawTurnRow): Turn {
     user_prompt_preview: row.user_prompt_preview,
     assistant_reply_preview: row.assistant_reply_preview,
     tool_call_count: row.tool_call_count,
+    interrupt_reason: row.interrupt_reason,
     created_at: row.created_at,
     updated_at: row.updated_at,
   }
@@ -283,7 +304,7 @@ export function recordTurnStart(db: SqliteDatabase, args: RecordTurnStartArgs):
  * tool-call count.
  *
  * No-ops gracefully if `turnKey` is not found (turn may have already been
- * swept by `markOrphanedAsRestarted` on a prior boot).
+ * swept by `markOrphanedWithTimeoutClassification` on a prior boot).
  */
 export function recordTurnEnd(db: SqliteDatabase, args: RecordTurnEndArgs): void {
   const now = Date.now()
@@ -327,27 +348,96 @@ export function findOrphanedTurns(db: SqliteDatabase, chatId: string): Turn[] {
   return rows.map(mapRow)
 }
+export interface OrphanClassifyOpts {
+  /**
+   * `turnKey` from the on-disk `turn-active.json` marker — the single
+   * in-flight turn the hang-watchdog tracks. Null when no marker is
+   * present at boot (the previous process exited cleanly between turns).
+   */
+  markerTurnKey?: string | null
+  /**
+   * Age in ms of the `turn-active.json` marker's mtime at boot, or null
+   * when no marker is present. The marker's mtime is bumped on every
+   * tool_use, so this is "ms since the last observable progress" of the
+   * in-flight turn.
+   */
+  markerAgeMs?: number | null
+  /**
+   * Hang-watchdog threshold in ms (`TURN_HANG_SECS * 1000`, default
+   * 300_000). A marker older than this means the in-flight turn made no
+   * tool progress for at least the watchdog window — i.e. it was (or,
+   * under Docker where the watchdog is disabled, *would have been*)
+   * killed as a hang rather than cleanly restarted. That distinction is
+   * the whole point: a hung turn is reported, a live one is resumed.
+   */
+  hangThresholdMs: number
+  /**
+   * Opaque snapshot persisted to `interrupt_reason` for the
+   * timeout-classified turn so a later boot can rebuild the watchdog
+   * report after the marker has been swept.
+   */
+  reasonSnapshot?: string | null
+  /** Injectable clock for tests. */
+  now?: number
+}
+export interface OrphanClassifyResult {
+  /** Total rows stamped (timeout + restart). */
+  reaped: number
+  /** turn_key stamped 'timeout', or null if none qualified as a hang. */
+  timeoutTurnKey: string | null
+}
 /**
- * Boot-time reaper. Sweeps ALL turns (across all chats) that have
- * `ended_at IS NULL` and stamps them with `ended_via = 'restart'` and
- * `ended_at = now()`.
+ * Boot-time reaper + classifier. Sweeps ALL turns with `ended_at IS NULL`
+ * (killed mid-flight: SIGKILL / OOM / hard reboot / operator restart) and
+ * stamps an `ended_via`:
  *
- * Call this once, immediately after `openTurnsDb`, before any new turns
- * are recorded for the current boot. That way the current boot's turns
- * are cleanly separable from orphans inherited from the prior process.
+ *   - the in-flight turn (matched by `markerTurnKey`) is stamped
+ *     `'timeout'` IFF its marker is older than `hangThresholdMs` — it
+ *     stalled with no tool progress for the full watchdog window, so it's
+ *     reported-not-resumed; its `interrupt_reason` carries `reasonSnapshot`.
+ *   - every other open turn (and the in-flight one when it was making
+ *     progress) is stamped `'restart'` — a clean interrupt, eligible for
+ *     blanket resume.
  *
- * Returns the number of rows updated.
+ * Call this once immediately after `openTurnsDb`, BEFORE any new turns are
+ * recorded for the current boot, and BEFORE the turn-active marker is
+ * swept (the classifier needs the marker's mtime).
  */
-export function markOrphanedAsRestarted(db: SqliteDatabase): number {
-  const now = Date.now()
-  const result = db.prepare(`
+export function markOrphanedWithTimeoutClassification(
+  db: SqliteDatabase,
+  opts: OrphanClassifyOpts,
+): OrphanClassifyResult {
+  const now = opts.now ?? Date.now()
+  const isHang =
+    opts.markerAgeMs != null &&
+    opts.markerAgeMs >= opts.hangThresholdMs &&
+    opts.markerTurnKey != null &&
+    opts.markerTurnKey.length > 0
+  let timeoutTurnKey: string | null = null
+  if (isHang) {
+    const r = db.prepare(`
+      UPDATE turns
+      SET ended_at         = ?,
+          ended_via        = 'timeout',
+          interrupt_reason = ?,
+          updated_at       = ?
+      WHERE turn_key = ? AND ended_at IS NULL
+    `).run(now, opts.reasonSnapshot ?? null, now, opts.markerTurnKey) as { changes: number }
+    if (r.changes > 0) timeoutTurnKey = opts.markerTurnKey ?? null
+  }
+  const rest = db.prepare(`
     UPDATE turns
     SET ended_at   = ?,
         ended_via  = 'restart',
         updated_at = ?
     WHERE ended_at IS NULL
   `).run(now, now) as { changes: number }
-  return result.changes
+  return { reaped: (timeoutTurnKey ? 1 : 0) + rest.changes, timeoutTurnKey }
 }
 /**
@@ -392,26 +482,41 @@ export function listTurnsForAgent(
   return rows.map(mapRow)
 }
+/** ended_via values that mean "this turn did not finish on its own". */
+const INTERRUPTED_VIA: ReadonlySet<TurnEndedVia> = new Set<TurnEndedVia>([
+  'restart',
+  'sigterm',
+  'timeout',
+  'unknown',
+])
 /**
- * Find the single most-recently-started turn that ended via an interrupt
- * (`'restart'` | `'sigterm'` | `'timeout'`) OR is still open
- * (`ended_at IS NULL`). Used by Stage 4 to surface "you had pending work"
- * to the agent on cold start.
+ * Return the single most-recently-started turn IFF it was interrupted
+ * (`ended_at IS NULL`, or `ended_via` in {restart, sigterm, timeout,
+ * unknown}). Returns null when the latest turn ended cleanly (`'stop'`)
+ * or there are no turns at all.
  *
- * Returns null if no such turn exists (clean boot — last turn ended 'stop').
+ * This is the resume gate. Keying on the *latest* turn (not "latest
+ * interrupted turn anywhere in history") is deliberate: once the agent
+ * resumes and that follow-up turn ends `'stop'`, the latest turn is clean
+ * and this returns null — so a completed resume is never re-fired on the
+ * next restart. The older `findMostRecentInterruptedTurn` had the inverse
+ * bug: a clean latest turn didn't shadow a stale interrupted one, so it
+ * would resurface already-handled work indefinitely.
  *
- * Note on ordering: we use `started_at DESC` (not `updated_at`) so the
- * boot-time reaper (which mass-stamps orphans with the SAME `ended_at` /
- * `updated_at`) doesn't reorder them; the temporal "last turn" is what
- * the user remembers, and that's `started_at`.
+ * Ordering uses `started_at DESC` (not `updated_at`) so the boot reaper,
+ * which mass-stamps orphans with identical timestamps, can't reorder the
+ * temporal "last turn" the user actually remembers.
  */
-export function findMostRecentInterruptedTurn(db: SqliteDatabase): Turn | null {
+export function findLatestTurnIfInterrupted(db: SqliteDatabase): Turn | null {
   const row = db.prepare(`
     SELECT * FROM turns
-    WHERE ended_at IS NULL
-       OR ended_via IN ('restart', 'sigterm', 'timeout')
     ORDER BY started_at DESC
     LIMIT 1
   `).get() as RawTurnRow | undefined
-  return row ? mapRow(row) : null
+  if (!row) return null
+  const turn = mapRow(row)
+  if (turn.ended_at == null) return turn
+  if (turn.ended_via != null && INTERRUPTED_VIA.has(turn.ended_via)) return turn
+  return null
 }

package/telegram-plugin/stream-reply-handler.ts CHANGED Viewed

@@ -8,7 +8,7 @@
  *
  * Contract:
  *   - First call for a chat+thread: creates a stream via
- *     createStreamController, optionally prepending a handoff prefix.
+ *     createStreamController.
  *   - Subsequent calls: reuse the existing stream, push the new text.
  *   - `done=true`: finalize, delete the map entry, fire status-reaction
  *     completion, and (if history enabled) record the final message.
@@ -171,8 +171,6 @@ export interface StreamReplyDeps {
   escapeMarkdownV2: (text: string) => string
   /** Whitespace repair applied to the raw caller text. */
   repairEscapedWhitespace: (text: string) => string
-  /** Resolves the handoff prefix for a first-chunk stream. Empty string if none. */
-  takeHandoffPrefix: (format: 'html' | 'markdownv2' | 'text') => string
   /** Validates the chat id against the access list. Throws on deny. */
   assertAllowedChat: (chatId: string) => void
   /** Resolves the effective thread id (explicit, last-inbound, or undefined). */
@@ -445,14 +443,6 @@ export async function handleStreamReply(
     streamExisted,
   })
-  // First chunk of a session: consume any pending handoff prefix.
-  if (!stream) {
-    const prefix = deps.takeHandoffPrefix(
-      format === 'html' ? 'html' : format === 'markdownv2' ? 'markdownv2' : 'text',
-    )
-    if (prefix.length > 0) effectiveText = prefix + effectiveText
-  }
   if (!stream) {
     // Resolve the effective quote-reply target. Explicit `reply_to` wins;
     // otherwise (unless the caller opted out with `quote:false`) fall back

package/telegram-plugin/subagent-watcher.ts CHANGED Viewed

@@ -40,7 +40,7 @@ import {
 } from 'fs'
 import { basename, join } from 'path'
 import { homedir } from 'os'
-import { projectSubagentLine, sanitizeCwdToProjectName } from './session-tail.js'
+import { projectSubagentLine, sanitizeCwdToProjectName, detectErrorInTranscriptLine } from './session-tail.js'
 import { sanitiseToolArg } from './fleet-state.js'
 import { escapeHtml, truncate } from './card-format.js'
 import { bumpSubagentActivity, recordSubagentStall, recordSubagentResume, recordSubagentEnd, reapStuckRunningRows } from './registry/subagents-schema.js'
@@ -142,6 +142,21 @@ export interface WorkerEntry {
    * dead, the file is just left over from a prior session.
    */
   historical: boolean
+  /**
+   * True once a TERMINAL error line — a model API failure / quota
+   * exhaustion / crash, NOT an in-flight retry or a routine tool-level
+   * `is_error` result — has been observed in this worker's own
+   * transcript. Drives the `failed` terminal outcome so the handback
+   * tells the user the delegated work did NOT complete, instead of
+   * dressing a dead worker up as `completed`. Classified by
+   * `detectErrorInTranscriptLine` (the same gate the operator-event
+   * path uses), so transient mid-retry errors are excluded.
+   */
+  errored?: boolean
+  /** Human-readable detail from the terminal error line, surfaced in the
+   *  failed handback's "what it reported before failing" slot when the
+   *  worker left no narrative result of its own. */
+  errorDetail?: string
 }
 export interface SubagentWatcherConfig {
@@ -611,6 +626,20 @@ export function readSubTail(
     const startState = { hasEmittedStart: tail.hasEmittedStart }
     for (const line of lines) {
       if (!line) continue
+      // Gap 2 (failure honesty): a terminal error line in the worker's
+      // OWN transcript — a model API failure, quota exhaustion, or crash —
+      // means the worker FAILED, not finished. Reuse the operator-event
+      // classifier: `terminal:true` excludes in-flight retries (a 529 mid-
+      // backoff is `terminal:false`), and tool-level `is_error` results
+      // never reach here (they parse as `sub_agent_tool_result`, which is
+      // routine mid-run noise, not a worker death). The flag persists on
+      // the entry; the terminal transition (real turn_end OR stall
+      // synthesis) reads it to emit `failed` instead of `completed`.
+      const errInfo = detectErrorInTranscriptLine(line)
+      if (errInfo?.terminal) {
+        entry.errored = true
+        if (errInfo.detail) entry.errorDetail = errInfo.detail.slice(0, SUBAGENT_RESULT_TEXT_MAX)
+      }
       const events = projectSubagentLine(line, entry.agentId, startState)
       for (const ev of events) {
         const idleSecBeforeBump = Math.round((now - entry.lastActivityAt) / 1000)
@@ -716,7 +745,10 @@ export function readSubTail(
                   recordSubagentEnd(db, {
                     id: rowRef.id,
                     endedAt: now,
-                    status: 'completed',
+                    // Gap 2: keep the audit row honest — a worker that hit a
+                    // terminal transcript error is `failed`, matching the
+                    // handback outcome computed in maybySendStateTransition.
+                    status: entry.errored ? 'failed' : 'completed',
                   })
                 }
               } catch (dbErr) {
@@ -917,6 +949,34 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
       log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`)
     }, fs, log, db, parentStateDir, config.onUnstall, undefined, config.onProgress)
+    // Gap 1 (restart survival): a file still RUNNING at boot is a LIVE
+    // worker that predates this watcher — typically one dispatched in a
+    // prior gateway life and still in-flight across a restart / fleet
+    // rollout, NOT a stale already-finished file. `historical` must
+    // suppress replay only for done-at-boot files; an in-flight-at-boot
+    // worker the user is still waiting on must get full live treatment:
+    // progress nudges, the stall-synthesis safety net (checkStalls skips
+    // historical entries), and a real `completed`/`failed` handback rather
+    // than a dropped `orphan`. Promote it to a live entry here. (A file
+    // already `done` at boot stays historical and is short-circuited just
+    // below — it finished before this session.)
+    if (isHistorical && entry.state === 'running') {
+      entry.historical = false
+      log?.(`subagent-watcher: ${agentId} was in-flight at boot — promoting to live (predates watcher; user still awaiting handback)`)
+      // The prior gateway life's registration normally linked
+      // jsonl_agent_id already, but re-run the backfill idempotently in
+      // case that life crashed before the link persisted — the handback's
+      // isBackground lookup is keyed on jsonl_agent_id, and an unlinked row
+      // would mis-resolve the worker as foreground and drop the handback.
+      if (db != null) {
+        try {
+          backfillJsonlAgentId(db, filePath, agentId, log)
+        } catch (err) {
+          log?.(`subagent-watcher: backfill error for ${agentId}: ${(err as Error).message}`)
+        }
+      }
+    }
     // If the JSONL already contained a turn_end at registration time
     // (file written-then-watched), fire the state-transition + completion
     // notification now. Otherwise the FSWatcher callback handles it on
@@ -980,11 +1040,22 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
           config.onFinish({
             agentId,
             state: entry.state,
-            outcome: entry.historical ? 'orphan' : 'completed',
+            // Gap 2: a terminal error observed in the transcript wins over
+            // the completed/orphan classification — a worker that crashed
+            // is `failed`, even if it later wrote a turn_end or aged into
+            // stall synthesis. `orphan` remains for genuinely stale
+            // done-at-boot rows (which never reach this path; see
+            // registerAgent's short-circuit + Gap 1 promotion).
+            outcome: entry.errored ? 'failed' : entry.historical ? 'orphan' : 'completed',
             toolCount: entry.toolCount,
             durationMs: nowFn() - entry.dispatchedAt,
             description: entry.description,
-            resultText: entry.lastResultText,
+            // For a failure, fall back to the error detail when the worker
+            // left no narrative of its own — so the handback's "what it
+            // reported before failing" slot is never empty on a crash.
+            resultText: entry.errored
+              ? entry.lastResultText || entry.errorDetail || ''
+              : entry.lastResultText,
           })
         } catch (cbErr) {
           log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)
@@ -1151,7 +1222,10 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
             recordSubagentEnd(db, {
               id: rowRef.id,
               endedAt: n,
-              status: 'completed',
+              // Gap 2: a worker that hit a terminal transcript error before
+              // going silent is `failed`, not `completed` — keep the audit
+              // row consistent with the handback outcome.
+              status: entry.errored ? 'failed' : 'completed',
             })
           }
         } catch (dbErr) {

package/telegram-plugin/tests/agent-dir.test.ts ADDED Viewed

@@ -0,0 +1,25 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { resolveAgentDirFromEnv } from "../agent-dir.js";
+describe("resolveAgentDirFromEnv", () => {
+  const prior = process.env.TELEGRAM_STATE_DIR;
+  afterEach(() => {
+    if (prior === undefined) delete process.env.TELEGRAM_STATE_DIR;
+    else process.env.TELEGRAM_STATE_DIR = prior;
+  });
+  it("returns dirname of TELEGRAM_STATE_DIR", () => {
+    process.env.TELEGRAM_STATE_DIR = "/foo/bar/agent/telegram";
+    expect(resolveAgentDirFromEnv()).toBe("/foo/bar/agent");
+  });
+  it("returns null when env unset", () => {
+    delete process.env.TELEGRAM_STATE_DIR;
+    expect(resolveAgentDirFromEnv()).toBeNull();
+  });
+  it("returns null when env is empty string", () => {
+    process.env.TELEGRAM_STATE_DIR = "   ";
+    expect(resolveAgentDirFromEnv()).toBeNull();
+  });
+});

package/telegram-plugin/tests/e2e.test.ts CHANGED Viewed

@@ -10,7 +10,7 @@
  * of this test file and brittle w.r.t. upstream churn.
  *
  * Instead, following the existing project convention
- * (see steering.test.ts, handoff-continuity.test.ts), we exercise each
+ * (see steering.test.ts), we exercise each
  * specified scenario through the same pure helper modules that server.ts
  * calls. Where a scenario lives inside server.ts's in-memory state
  * (activeTurnStartedAt, activeStatusReactions, suppressPtyPreview), we
@@ -18,23 +18,13 @@
  * server.ts uses. The helpers and the state shape are the contract —
  * if they don't regress, the integrated behaviour doesn't regress.
  */
-import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'
-import { mkdtempSync, rmSync, writeFileSync, existsSync } from 'node:fs'
-import { tmpdir } from 'node:os'
-import { join } from 'node:path'
+import { describe, it, expect, beforeEach, afterEach } from 'vitest'
 import {
   parseQueuePrefix,
   formatPriorAssistantPreview,
   buildChannelMetaAttributes,
 } from '../steering.js'
-import {
-  consumeHandoffTopic,
-  readHandoffTopic,
-  formatHandoffLine,
-  shouldShowHandoffLine,
-  HANDOFF_TOPIC_FILENAME,
-} from '../handoff-continuity.js'
 import {
   isContextExhaustionText,
   shouldArmOrphanedReplyTimeout,
@@ -64,7 +54,6 @@ interface PluginState {
   currentSessionChatId: string | null
   currentSessionThreadId: number | undefined
   currentTurnStartedAt: number
-  handoffTopicUsed: boolean
 }
 function freshState(): PluginState {
@@ -75,7 +64,6 @@ function freshState(): PluginState {
     currentSessionChatId: null,
     currentSessionThreadId: undefined,
     currentTurnStartedAt: 0,
-    handoffTopicUsed: false,
   }
 }
@@ -271,69 +259,6 @@ describe('E2E: turn lifecycle cleanup', () => {
   })
 })
-// ---------------------------------------------------------------------------
-// Handoff continuity
-// ---------------------------------------------------------------------------
-describe('E2E: handoff continuity', () => {
-  let tmp: string
-  const priorEnv = { ...process.env }
-  beforeEach(() => {
-    tmp = mkdtempSync(join(tmpdir(), 'handoff-e2e-'))
-  })
-  afterEach(() => {
-    rmSync(tmp, { recursive: true, force: true })
-    process.env = { ...priorEnv }
-  })
-  it('bootstrap with sidecar + show-line=true → first reply prepends the line', () => {
-    writeFileSync(join(tmp, HANDOFF_TOPIC_FILENAME), 'shipped the feature\n', 'utf8')
-    process.env.SWITCHROOM_HANDOFF_SHOW_LINE = 'true'
-    expect(shouldShowHandoffLine()).toBe(true)
-    const topic = consumeHandoffTopic(tmp)
-    expect(topic).toBe('shipped the feature')
-    const line = formatHandoffLine(topic!, 'html')
-    expect(line).toContain('shipped the feature')
-    expect(line).toMatch(/^<i>/)
-  })
-  it('bootstrap with sidecar + show-line=false → no prefix', () => {
-    writeFileSync(join(tmp, HANDOFF_TOPIC_FILENAME), 'x\n', 'utf8')
-    process.env.SWITCHROOM_HANDOFF_SHOW_LINE = 'false'
-    expect(shouldShowHandoffLine()).toBe(false)
-  })
-  it('bootstrap with no sidecar → no prefix', () => {
-    expect(readHandoffTopic(tmp)).toBeNull()
-    expect(consumeHandoffTopic(tmp)).toBeNull()
-  })
-  it('consuming topic is one-shot — second call returns null + sidecar deleted', () => {
-    writeFileSync(join(tmp, HANDOFF_TOPIC_FILENAME), 'topic\n', 'utf8')
-    expect(consumeHandoffTopic(tmp)).toBe('topic')
-    expect(existsSync(join(tmp, HANDOFF_TOPIC_FILENAME))).toBe(false)
-    expect(consumeHandoffTopic(tmp)).toBeNull()
-  })
-  it('stream_reply: once topic consumed, subsequent stream chunks do not re-prefix', () => {
-    // Model: the plugin tracks handoffTopicUsed after first reply/stream_reply
-    // use. The second and later stream edits on the same stream read the flag
-    // and skip prepending.
-    writeFileSync(join(tmp, HANDOFF_TOPIC_FILENAME), 't\n', 'utf8')
-    const s = freshState()
-    expect(s.handoffTopicUsed).toBe(false)
-    // first chunk
-    const topic = consumeHandoffTopic(tmp)
-    expect(topic).toBe('t')
-    s.handoffTopicUsed = true
-    // simulate next chunk arriving — should not consume
-    expect(consumeHandoffTopic(tmp)).toBeNull()
-    expect(s.handoffTopicUsed).toBe(true)
-  })
-})
 // ---------------------------------------------------------------------------
 // Context exhaustion
 // ---------------------------------------------------------------------------

package/telegram-plugin/tests/inbound-spool.test.ts CHANGED Viewed

@@ -106,6 +106,51 @@ describe('spoolId — stable dedup key', () => {
     // messageId > 0 → legacy m:<chat>:<msgId> still wins.
     expect(a).toBe('m:c1:555')
   })
+  // honest-restart-resume: a boot-resume inbound is minted with a fresh
+  // ts/messageId every boot, so without a turn-keyed id an operator who
+  // restarts twice before the agent drains the first resume would stack
+  // N resumes of the same turn. Keying on resume_turn_key collapses them.
+  it('resume_interrupted → s:resume:<turn_key>, stable across boots (fresh ts/messageId)', () => {
+    const a = spoolId(
+      msg({
+        messageId: 1700_000_000_000,
+        ts: 1700_000_000_000,
+        meta: { source: 'resume_interrupted', resume_turn_key: '12345:11' },
+      }),
+    )
+    const b = spoolId(
+      msg({
+        messageId: 1700_000_999_999,
+        ts: 1700_000_999_999,
+        meta: { source: 'resume_interrupted', resume_turn_key: '12345:11' },
+      }),
+    )
+    expect(a).toBe('s:resume:12345:11')
+    expect(b).toBe(a)
+  })
+  it('resume_watchdog_timeout shares the s:resume namespace (one turn is one or the other)', () => {
+    const interrupted = spoolId(
+      msg({ messageId: 0, meta: { source: 'resume_interrupted', resume_turn_key: 'k:1' } }),
+    )
+    const timeout = spoolId(
+      msg({ messageId: 0, meta: { source: 'resume_watchdog_timeout', resume_turn_key: 'k:1' } }),
+    )
+    expect(timeout).toBe('s:resume:k:1')
+    expect(timeout).toBe(interrupted)
+  })
+  it('resume inbounds for distinct turns stay distinct', () => {
+    const a = spoolId(
+      msg({ messageId: 0, meta: { source: 'resume_interrupted', resume_turn_key: 'k:1' } }),
+    )
+    const b = spoolId(
+      msg({ messageId: 0, meta: { source: 'resume_interrupted', resume_turn_key: 'k:2' } }),
+    )
+    expect(a).not.toBe(b)
+  })
+  it('resume source without a turn_key falls back to legacy id (no crash)', () => {
+    const a = spoolId(msg({ messageId: 777, meta: { source: 'resume_interrupted' }, ts: 100 }))
+    expect(a).toBe('m:c1:777')
+  })
 })
 describe('inbound-spool — subagent_handback dedup across restart re-build (#1719)', () => {

package/telegram-plugin/tests/multi-turn-continuity.test.ts CHANGED Viewed

@@ -31,7 +31,6 @@ function makeDeps(bot: FakeBot, overrides?: Partial<StreamReplyDeps>): StreamRep
     markdownToHtml: (t) => realMarkdownToHtml(t),
     escapeMarkdownV2: (t) => t,
     repairEscapedWhitespace: (t) => t,
-    takeHandoffPrefix: () => '',
     assertAllowedChat: () => {},
     resolveThreadId: (_, explicit) => (explicit != null ? Number(explicit) : undefined),
     disableLinkPreview: true,

package/telegram-plugin/tests/outbound-ordering.test.ts CHANGED Viewed

@@ -437,7 +437,6 @@ describe('wrapBot + handleStreamReply + reply ordering', () => {
       markdownToHtml: (t) => t,
       escapeMarkdownV2: (t) => t,
       repairEscapedWhitespace: (t) => t,
-      takeHandoffPrefix: () => '',
       assertAllowedChat: () => {},
       resolveThreadId: () => undefined,
       disableLinkPreview: true,

package/telegram-plugin/tests/parse-mode-rotation.test.ts CHANGED Viewed

@@ -31,7 +31,6 @@ function makeDeps(bot: FakeBot, overrides?: Partial<StreamReplyDeps>): StreamRep
     markdownToHtml: (t) => realMarkdownToHtml(t),
     escapeMarkdownV2: (t) => `ESC(${t})`,
     repairEscapedWhitespace: (t) => t,
-    takeHandoffPrefix: () => '',
     assertAllowedChat: () => {},
     resolveThreadId: (_, explicit) => (explicit != null ? Number(explicit) : undefined),
     disableLinkPreview: true,