npm - switchroom - Versions diffs - 0.10.0 → 0.11.0 - Mend

switchroom 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/README.md +5 -4
package/dist/cli/drive-write-pretool.mjs +5418 -0
package/dist/cli/switchroom.js +201 -24
package/package.json +1 -1
package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
package/telegram-plugin/admin-commands/index.ts +2 -0
package/telegram-plugin/auth-snapshot-format.ts +612 -0
package/telegram-plugin/auto-fallback-fleet.ts +215 -0
package/telegram-plugin/auto-fallback.ts +28 -301
package/telegram-plugin/dist/gateway/gateway.js +4407 -2252
package/telegram-plugin/fleet-fallback-gate.ts +105 -0
package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
package/telegram-plugin/gateway/approval-callback.ts +31 -3
package/telegram-plugin/gateway/auth-command.ts +121 -10
package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
package/telegram-plugin/gateway/boot-card.ts +1 -1
package/telegram-plugin/gateway/boot-probes.ts +6 -9
package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
package/telegram-plugin/gateway/gateway.ts +876 -173
package/telegram-plugin/gateway/hostd-dispatch.ts +127 -0
package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
package/telegram-plugin/gateway/ipc-server.ts +69 -0
package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
package/telegram-plugin/model-unavailable.ts +28 -12
package/telegram-plugin/silence-poke.ts +153 -1
package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
package/telegram-plugin/tests/boot-probes.test.ts +16 -18
package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
package/telegram-plugin/tests/silence-poke.test.ts +237 -0
package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
package/telegram-plugin/turn-flush-safety.ts +55 -1
package/telegram-plugin/uat/SETUP.md +16 -12
package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
package/telegram-plugin/tests/hostd-dispatch.test.ts +0 -129

package/telegram-plugin/gateway/hostd-dispatch.ts CHANGED Viewed

@@ -115,3 +115,130 @@ export async function tryHostdDispatch(
 export function hostdRequestId(prefix: string): string {
   return `${prefix}-${Date.now()}-${randomBytes(4).toString("hex")}`;
 }
+/**
+ * Poll hostd's `get_status` verb until the target request reaches a
+ * terminal state (`completed` / `error` / `denied`) or the caller's
+ * timeout elapses.
+ *
+ * Motivation: the long-running mutating verbs (`update_apply`, `apply`)
+ * respond `result: "started"` immediately and run the work in a
+ * detached child on the daemon side. Without polling, callers that
+ * acked "started" to the operator have no way to surface a *fail
+ * before recreate* (image-pull error, scaffold regeneration crash,
+ * etc.) — the gateway dies if recreate succeeds, but stays alive and
+ * silent if it fails. Polling closes that observability hole.
+ *
+ * Behaviour:
+ *   - Polls every {@link opts.intervalMs} ms (default 2000 per RFC C §5.3).
+ *   - Bails out after {@link opts.timeoutMs} with a synthesized
+ *     `result: "error"` response describing the timeout. Caller should
+ *     treat that as inconclusive — for `update_apply` specifically,
+ *     a timeout often means the recreate succeeded and killed the
+ *     gateway; the *new* gateway's post-restart greeting card is the
+ *     true success signal.
+ *   - Any terminal state from the daemon (`completed`/`error`/`denied`)
+ *     bails immediately and returns that response. Wire errors are
+ *     synthesized by {@link tryHostdDispatch} as `result: "error"`,
+ *     which also bails — there's no separate retry on transient wire
+ *     failures because (a) the daemon doesn't actually go down except
+ *     during a recreate that kills us anyway, and (b) waiting until
+ *     timeout to surface a clear error is worse UX than surfacing it
+ *     immediately.
+ *   - Returns immediately if hostd is unconfigured (treats as
+ *     `not-configured`, same as {@link tryHostdDispatch}).
+ */
+export async function pollHostdStatus(
+  agentName: string,
+  targetRequestId: string,
+  opts: {
+    /** Hard cap. update_apply: 60_000; apply: 30_000. */
+    timeoutMs: number;
+    /** Default 2000. */
+    intervalMs?: number;
+    /** Test seam — defaults to `Date.now`. */
+    now?: () => number;
+    /** Test seam — defaults to `setTimeout`. */
+    sleep?: (ms: number) => Promise<void>;
+  },
+): Promise<HostdResponse | "not-configured"> {
+  if (!isHostdEnabled()) return "not-configured";
+  const sockPath = hostdSocketPath(agentName);
+  if (!existsSync(sockPath)) return "not-configured";
+  const now = opts.now ?? Date.now;
+  const sleep =
+    opts.sleep ?? ((ms) => new Promise<void>((r) => setTimeout(r, ms)));
+  const intervalMs = opts.intervalMs ?? 2000;
+  const deadline = now() + opts.timeoutMs;
+  // Initial wait — the caller just sent the kick-off request. Give the
+  // daemon a tick to begin work before the first poll.
+  await sleep(intervalMs);
+  while (now() < deadline) {
+    const pollId = hostdRequestId("gw-poll");
+    const resp = await tryHostdDispatch(agentName, {
+      v: 1,
+      op: "get_status",
+      request_id: pollId,
+      args: { target_request_id: targetRequestId },
+    });
+    if (resp === "not-configured") {
+      // Socket disappeared mid-poll — daemon was stopped. Surface that
+      // distinctly from a target-request error so callers can decide
+      // whether to retry or bail.
+      return resp;
+    }
+    // get_status returns the StatusEntry's result, which IS the target
+    // request's result. Any terminal state (completed/error/denied) is
+    // the target's final answer — bail with it. The previous draft of
+    // this helper retried on `error`/`denied` in case the daemon was
+    // transiently busy; that policy masked real errors as
+    // "still polling" until the 60s cap, then synthesized a misleading
+    // "timeout" response. Bailing immediately surfaces the daemon's
+    // audit-log truth directly to the operator.
+    if (
+      resp.result === "completed" ||
+      resp.result === "error" ||
+      resp.result === "denied"
+    ) {
+      return resp;
+    }
+    // result: "started" — get_status reflects the latest StatusEntry,
+    // which is still `started` until the daemon's mutation finishes.
+    // Keep polling.
+    await sleep(intervalMs);
+  }
+  return {
+    v: 1,
+    request_id: hostdRequestId("gw-poll-timeout"),
+    result: "error",
+    exit_code: null,
+    duration_ms: opts.timeoutMs,
+    error:
+      `hostd poll timeout after ${opts.timeoutMs}ms waiting for ` +
+      `target_request_id=${targetRequestId}`,
+  };
+}
+/**
+ * Emit a one-line operator-visible deprecation warning when a verb that
+ * hostd supports is being dispatched via the legacy spawn path. Quiet
+ * by design — operators see it once per verb per process in journald,
+ * never in chat. RFC C §7 Phase 2 → Phase 3.
+ */
+const _deprecationSeen = new Set<string>();
+export function warnLegacySpawnIfHostdDisabled(verb: string): void {
+  if (isHostdEnabled()) return;
+  if (_deprecationSeen.has(verb)) return;
+  _deprecationSeen.add(verb);
+  process.stderr.write(
+    `telegram gateway: spawnSwitchroomDetached(${verb}) — set ` +
+      `host_control.enabled: true and run \`switchroom hostd install\` ` +
+      `to route through audited hostd. Legacy path scheduled for ` +
+      `removal in v0.10 (RFC C Phase 3).\n`,
+  );
+}
+/** @internal Reset both caches so tests can re-assert behaviour. */
+export function _resetDeprecationSeen(): void {
+  _deprecationSeen.clear();
+}

package/telegram-plugin/gateway/ipc-protocol.ts CHANGED Viewed

@@ -59,12 +59,47 @@ export interface ScheduleRestartResult {
   error?: string;
 }
+/**
+ * RFC E §4.2 Cut 2 — sent by the gateway to acknowledge that a
+ * Drive-write approval card has been posted (or that posting
+ * failed). The Drive-write PreToolUse hook (a separate process)
+ * uses the `request_id` to poll the kernel's `approval_lookup` for
+ * the verdict; if posting fails, the hook fails closed.
+ *
+ * Why response-shaped: the hook is synchronous from Claude Code's
+ * perspective (PreToolUse blocks the tool call). The hook can't
+ * return its `decision: "approve" | "block"` until either the
+ * card has been posted (so the user can decide) OR posting failed
+ * (so the hook can return block immediately). A response message
+ * is the cleanest way to surface that.
+ */
+export interface DriveApprovalPostedEvent {
+  type: "drive_approval_posted";
+  /** Same correlation_id the client sent on the request. */
+  correlationId: string;
+  ok: boolean;
+  /**
+   * Kernel request_id the hook will pass to `approval_lookup` once
+   * it starts polling. Only present when `ok: true`.
+   */
+  requestId?: string;
+  /**
+   * Unix-ms expiry of the kernel request, mirrors the ttl_ms the
+   * gateway used. Hook uses this as its polling deadline. Only
+   * present when `ok: true`.
+   */
+  expiresAtMs?: number;
+  /** Diagnostic detail on failure. */
+  reason?: string;
+}
 export type GatewayToClient =
   | InboundMessage
   | PermissionEvent
   | StatusEvent
   | ToolCallResult
-  | ScheduleRestartResult;
+  | ScheduleRestartResult
+  | DriveApprovalPostedEvent;
 // === Bridge (Client) -> Gateway messages ===
@@ -189,6 +224,51 @@ export interface InjectInboundMessage {
   inbound: InboundMessage;
 }
+/**
+ * RFC E §4.2 Cut 2 — sent by the Drive-write PreToolUse hook to
+ * the gateway to register a diff-preview approval card with the
+ * kernel + post it to Telegram. The hook waits on the
+ * corresponding `drive_approval_posted` reply (matching
+ * `correlationId`), then polls `approval_lookup` for the verdict.
+ *
+ * The `preview` payload is shaped like
+ * `src/drive/diff-preview.ts:DiffPreviewInput`. We don't restate
+ * the full shape on the wire — the IPC validator does a structural
+ * check (required fields present, types right) and the gateway-side
+ * consumer feeds it straight to `buildDiffPreview()` which is
+ * already defensive against malformed inputs.
+ *
+ * Trust model: same as `inject_inbound` — the gateway socket lives
+ * inside the agent container, only that-UID processes can connect,
+ * so the hook is as trusted as anything else in the container.
+ */
+export interface RequestDriveApprovalMessage {
+  type: "request_drive_approval";
+  /**
+   * Hook-generated correlation id (any unique string ≤ 64 chars).
+   * Echoed back in `drive_approval_posted` so the hook can match
+   * the response if multiple Drive-write taps are in flight.
+   */
+  correlationId: string;
+  /**
+   * Target agent the gateway serves. Defense in depth — the gateway
+   * verifies this matches its own SWITCHROOM_AGENT_NAME and refuses
+   * cross-agent requests.
+   */
+  agentName: string;
+  /**
+   * DiffPreviewInput payload — see `src/drive/diff-preview.ts`.
+   * Carried as an opaque object on the wire; the gateway
+   * deserialises it via `buildDiffPreview()`.
+   */
+  preview: Record<string, unknown>;
+  /**
+   * TTL for the kernel approval request, in ms. Hook typically
+   * passes 5 min; gateway clamps to a sensible range.
+   */
+  ttlMs?: number;
+}
 export type ClientToGateway =
   | RegisterMessage
   | ToolCallMessage
@@ -199,4 +279,5 @@ export type ClientToGateway =
   | OperatorEventForward
   | PtyPartialForward
   | UpdatePlaceholderMessage
-  | InjectInboundMessage;
+  | InjectInboundMessage
+  | RequestDriveApprovalMessage;

package/telegram-plugin/gateway/ipc-server.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import type {
   PermissionRequestForward,
   PtyPartialForward,
   RegisterMessage,
+  RequestDriveApprovalMessage,
   ScheduleRestartMessage,
   SessionEventForward,
   ToolCallMessage,
@@ -40,6 +41,18 @@ export interface IpcServerOptions {
    * inline scheduler simply ignore inject_inbound messages.
    */
   onInjectInbound?: (client: IpcClient, msg: InjectInboundMessage) => void;
+  /**
+   * RFC E §4.2 Cut 2 — Drive-write PreToolUse hook asks the gateway
+   * to register a kernel approval request + post a diff-preview
+   * card to Telegram. Handler is expected to send a
+   * `drive_approval_posted` event back over the same connection
+   * (`client.send(...)`). Optional: gateways without the hook
+   * configured ignore these messages.
+   */
+  onRequestDriveApproval?: (
+    client: IpcClient,
+    msg: RequestDriveApprovalMessage,
+  ) => Promise<void>;
   log?: (msg: string) => void;
   /**
    * How long (in ms) to wait without a heartbeat before force-closing the
@@ -192,6 +205,23 @@ export function validateClientMessage(msg: unknown): msg is ClientToGateway {
         && typeof inb.meta === "object"
         && inb.meta !== null;
     }
+    case "request_drive_approval": {
+      // RFC E §4.2 Cut 2. Validate the wire-shaped fields the
+      // gateway will route on; the inner `preview` is treated as
+      // an opaque object and gets defensively re-validated by
+      // `buildDiffPreview()` downstream.
+      if (typeof m.correlationId !== "string"
+        || (m.correlationId as string).length === 0
+        || (m.correlationId as string).length > 64) return false;
+      if (typeof m.agentName !== "string"
+        || !AGENT_NAME_RE.test(m.agentName as string)) return false;
+      if (typeof m.preview !== "object" || m.preview === null) return false;
+      if (m.ttlMs !== undefined
+        && (typeof m.ttlMs !== "number"
+          || !Number.isFinite(m.ttlMs)
+          || (m.ttlMs as number) < 0)) return false;
+      return true;
+    }
     default:
       return false;
   }
@@ -210,6 +240,7 @@ export function createIpcServer(options: IpcServerOptions): IpcServer {
     onOperatorEvent,
     onPtyPartial,
     onInjectInbound,
+    onRequestDriveApproval,
     log = () => {},
     heartbeatTimeoutMs = 30_000,
   } = options;
@@ -298,6 +329,44 @@ export function createIpcServer(options: IpcServerOptions): IpcServer {
       case "inject_inbound":
         if (onInjectInbound) onInjectInbound(client, msg as InjectInboundMessage);
         break;
+      case "request_drive_approval":
+        if (onRequestDriveApproval) {
+          // Handler is async — fire-and-forget here; the handler
+          // is responsible for sending its `drive_approval_posted`
+          // response (success or failure) back to the client.
+          onRequestDriveApproval(client, msg as RequestDriveApprovalMessage).catch(
+            (err) => {
+              log(
+                `request_drive_approval handler threw (client=${client.id}): ${(err as Error).message}`,
+              );
+              try {
+                client.send({
+                  type: "drive_approval_posted",
+                  correlationId: (msg as RequestDriveApprovalMessage).correlationId,
+                  ok: false,
+                  reason: `gateway handler error: ${(err as Error).message}`,
+                });
+              } catch {
+                /* best effort */
+              }
+            },
+          );
+        } else {
+          // No handler wired — fail closed and tell the hook so it
+          // can fall back to blocking the tool. Better than leaving
+          // the hook timing out.
+          try {
+            client.send({
+              type: "drive_approval_posted",
+              correlationId: (msg as RequestDriveApprovalMessage).correlationId,
+              ok: false,
+              reason: "gateway not configured for Drive-write approval",
+            });
+          } catch {
+            /* best effort */
+          }
+        }
+        break;
       case "update_placeholder":
         // Legacy recall.py IPC — placeholder UX was removed in #553 PR 5.
         // Soft-accepted so recall.py keeps working without modifying

package/telegram-plugin/hooks/sandbox-hint-posttool.mjs CHANGED Viewed

@@ -90,6 +90,85 @@ function emitContext(text) {
   process.stdout.write(JSON.stringify(payload) + '\n')
 }
+/**
+ * #1303: classify a tool_response as a failure. Only failures can have
+ * hit a kernel sandbox boundary. Pre-fix the hook stringified the whole
+ * tool_response and pattern-matched against it — that meant a SUCCESSFUL
+ * Read/Edit/Bash whose payload merely MENTIONED "EROFS" or "Read-only
+ * file system" (e.g. file content, code comments, grep results, the hook
+ * source itself) tripped the advisory. Verified live during #1291/#1292
+ * PR work: every `Read` on a file talking about the sandbox model
+ * produced a false positive; every `Edit` adding a comment that
+ * mentioned read-only-fs did too.
+ *
+ * Recognise failure across the three observed tool_response shapes:
+ *   - Edit / Write / NotebookEdit / MCP: `{ is_error: true, ... }`
+ *   - Bash: `{ exit_code: <non-zero>, stdout, stderr, ... }`
+ *   - Free-form string body: assume failure if the string parses; the
+ *     pattern match downstream still gates the advisory text.
+ *
+ * Also exported as `legacy.error` style for forward-compat: any
+ * non-null `tool_response.error` field is treated as failure.
+ *
+ * If no failure signal is found we have no kernel error to advise on,
+ * and the hook stays silent.
+ */
+function classifyFailure(toolResponse) {
+  if (toolResponse == null) return null
+  if (typeof toolResponse === 'string') {
+    // Bare string body — no structured failure marker. Treat as a
+    // candidate; the pattern match decides.
+    return { kind: 'bare-string', body: toolResponse }
+  }
+  if (typeof toolResponse !== 'object') return null
+  const isError =
+    toolResponse.is_error === true
+    || toolResponse.success === false
+    || toolResponse.error != null
+    || (typeof toolResponse.exit_code === 'number'
+        && toolResponse.exit_code !== 0)
+  if (!isError) return null
+  // Extract error-bearing fields only — never the full response. For a
+  // failed Bash, stdout may carry the relevant kernel message alongside
+  // stderr (some commands write errors to stdout), so include stdout
+  // when there's a non-zero exit code.
+  const parts = []
+  if (typeof toolResponse.error === 'string') parts.push(toolResponse.error)
+  if (typeof toolResponse.stderr === 'string') parts.push(toolResponse.stderr)
+  if (toolResponse.exit_code != null && toolResponse.exit_code !== 0
+      && typeof toolResponse.stdout === 'string') {
+    parts.push(toolResponse.stdout)
+  }
+  // Fallback: failure was signalled but no error-bearing field
+  // surfaced — stringify the structured response so we don't miss an
+  // unusual tool that puts the kernel error in an unexpected key.
+  // Bounded by the 64 KiB cap downstream.
+  if (parts.length === 0) {
+    try { parts.push(JSON.stringify(toolResponse)) } catch { /* unprintable */ }
+  }
+  return { kind: 'structured-failure', body: parts.join('\n') }
+}
+/**
+ * #1303 secondary defence: only write-capable tools can hit a kernel
+ * sandbox boundary. Read/Grep/Glob/WebFetch/etc. cannot EROFS — even if
+ * settings.json wires this hook with matcher ".*", we gate at the
+ * script level so a future scaffold change can't re-introduce the
+ * false-positive class. Bash is included because it's the canonical
+ * write surface (mkdir, rm, install, apt, etc.). MCP tools that may
+ * proxy writes are included by an `mcp__` prefix check.
+ */
+const WRITE_CAPABLE_TOOLS = new Set([
+  'Edit', 'MultiEdit', 'Write', 'NotebookEdit', 'Bash',
+])
+function isWriteCapableTool(toolName) {
+  if (typeof toolName !== 'string') return false
+  if (WRITE_CAPABLE_TOOLS.has(toolName)) return true
+  if (toolName.startsWith('mcp__')) return true
+  return false
+}
 function main() {
   const raw = readStdin()
   if (!raw) return
@@ -101,18 +180,18 @@ function main() {
     return
   }
-  // tool_response shape varies by tool — string for Bash, object with
-  // file/oldString/newString for Edit/Write, etc. Stringify the whole
-  // thing so we match against every nested error field at once. Cap the
-  // scan window to keep memory bounded if the model just dumped a 10MB
-  // log into the tool_response.
-  let body
-  try {
-    body = JSON.stringify(evt.tool_response ?? '')
-  } catch {
-    return
-  }
-  if (!body) return
+  if (!isWriteCapableTool(evt.tool_name)) return
+  // #1303 primary fix: classify success vs failure FIRST. A successful
+  // tool can't have hit a kernel sandbox boundary by definition — its
+  // payload may mention EROFS / read-only-fs in benign content but
+  // that's not a kernel error.
+  const failure = classifyFailure(evt.tool_response)
+  if (failure == null) return
+  let body = failure.body
+  if (typeof body !== 'string') return
+  if (body.length === 0) return
   if (body.length > 64 * 1024) body = body.slice(0, 64 * 1024)
   for (const [pattern, key] of PATTERNS) {
@@ -123,6 +202,18 @@ function main() {
   }
 }
+// Test-only export hooks. Node ESM doesn't expose internal symbols
+// without a named export; tests import `__internals` and assert against
+// `classifyFailure` / `isWriteCapableTool` directly. Production paths
+// use `main()` and never touch this object.
+export const __internals = {
+  classifyFailure,
+  isWriteCapableTool,
+  WRITE_CAPABLE_TOOLS,
+  PATTERNS,
+  buildHint,
+}
 try {
   main()
 } catch {

package/telegram-plugin/model-unavailable.ts CHANGED Viewed

@@ -216,20 +216,21 @@ export interface FormatCardOptions {
   slot?: string | null
   /** Anchor for relative-time formatting. Tests pin this; prod omits it. */
   now?: Date
+  /**
+   * True when the gateway has concurrently fired
+   * `fireFleetAutoFallback` for this event. Switches the card body
+   * from "What to try" (manual commands) to "Auto-failover in
+   * progress" so the user doesn't manually `/auth use` while a
+   * fleet swap is mid-flight. Caller MUST pass this when invoking
+   * the dispatcher in parallel — otherwise the card lies.
+   */
+  autoFallbackInFlight?: boolean
 }
 /**
  * Render the actionable ⚠️ card for a detected model-unavailable event.
  * HTML-formatted for Telegram. Stable shape so snapshot tests remain
  * meaningful when the suggestion list shifts.
- *
- *   ⚠️ <b>Model unavailable</b> on agent <b>name</b>
- *   Reason: quota exhausted (resets in 5h)
- *
- *   <b>What to try</b>
- *   • <code>/authfallback</code> — switch to the next account slot
- *   • <code>/auth add</code> — attach another subscription
- *   • <code>/usage</code> — show quota breakdown
  */
 export function formatModelUnavailableCard(
   detection: ModelUnavailableDetection,
@@ -243,11 +244,26 @@ export function formatModelUnavailableCard(
     `⚠️ <b>Model unavailable</b> on agent <b>${escHtml(agent)}</b>${slotPart}`,
     `Reason: ${reason}`,
     '',
-    '<b>What to try</b>',
-    '• <code>/authfallback</code> — switch to the next account slot',
-    '• <code>/auth add</code> — attach another subscription',
-    '• <code>/usage</code> — show quota breakdown',
   ]
+  if (opts.autoFallbackInFlight) {
+    // Quiet variant — the gateway already kicked off a fleet-wide
+    // swap; a follow-up announcement (causal-shape) will land within
+    // ~1s. Mention it explicitly so the user knows not to react.
+    lines.push(
+      '<i>Auto-failover in progress — see the announcement below.</i>',
+    )
+  } else {
+    // Default — kinds where auto-fallback can't help (network)
+    // or pre-Format-2 callers. Also: `/authfallback` is no longer
+    // a verb (post-RFC-H); `/auth use <label>` is the canonical
+    // fleet-wide swap.
+    lines.push(
+      '<b>What to try</b>',
+      '• <code>/auth use &lt;label&gt;</code> — switch the fleet to a healthy account',
+      '• <code>/auth add</code> — attach another subscription',
+      '• <code>/usage</code> — show quota breakdown',
+    )
+  }
   return lines.join('\n')
 }