npm - switchroom - Versions diffs - 0.14.2 → 0.14.4 - Mend

switchroom 0.14.2 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/cli/switchroom.js +81 -5
package/package.json +1 -1
package/telegram-plugin/dist/bridge/bridge.js +15 -2
package/telegram-plugin/dist/gateway/gateway.js +97 -132
package/telegram-plugin/dist/server.js +15 -2
package/telegram-plugin/gateway/gateway.ts +174 -29
package/telegram-plugin/gateway/inbound-delivery-machine-shadow.ts +33 -0
package/telegram-plugin/hooks/tool-label-pretool.mjs +13 -4
package/telegram-plugin/permission-rule.ts +22 -0
package/telegram-plugin/session-tail.ts +18 -0
package/telegram-plugin/tests/always-allow-grant.test.ts +147 -0
package/telegram-plugin/tests/always-allow-persist.test.ts +124 -0
package/telegram-plugin/tests/inbound-delivery-cutover-gate.test.ts +93 -0
package/telegram-plugin/tests/tool-activity-summary.test.ts +19 -0
package/telegram-plugin/tests/tool-label-sidecar.test.ts +36 -0
package/telegram-plugin/tool-activity-summary.ts +18 -0
package/telegram-plugin/tool-label-sidecar.ts +31 -5
package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts +39 -13

package/telegram-plugin/tests/always-allow-persist.test.ts ADDED Viewed

@@ -0,0 +1,124 @@
+/**
+ * Behavioral tests for the always-allow grant verification helper
+ * (`isRulePersisted` in `permission-rule.ts`).
+ *
+ * The `perm:always:*` handler in gateway.ts calls `isRulePersisted` after
+ * `switchroom agent grant` returns to confirm the rule actually landed in
+ * `tools.allow`. These tests drive the invariants that the structural test
+ * in `always-allow-grant.test.ts` can only pin by text-slicing:
+ *
+ *   1. exec "succeeds" but the reloaded allow-list does NOT contain the
+ *      rule  →  isRulePersisted returns false  (loud-failure path).
+ *   2. reloaded allow-list DOES contain the rule  →  returns true
+ *      (success path).
+ *   3. Realistic rule values (`Skill(garmin)`, `Bash`, `mcp__x__y`) round-
+ *      trip correctly — guards against normalization divergence where the
+ *      value written by `agent grant` and the value read back from yaml
+ *      diverge in shape.
+ *
+ * Because `isRulePersisted` is a pure function (takes the already-resolved
+ * allow-list directly), no mocking of `loadSwitchroomConfig` /
+ * `resolveAgentConfig` is required here.  The handler's interaction with
+ * those config loaders is covered by the structural test.
+ */
+import { describe, it, expect } from 'vitest'
+import { isRulePersisted, resolveAlwaysAllowRule } from '../permission-rule.js'
+// ---------------------------------------------------------------------------
+// Core behavioral invariants
+// ---------------------------------------------------------------------------
+describe('isRulePersisted — failure path', () => {
+  it('returns false when the allow-list is empty (exec succeeded but nothing was written)', () => {
+    expect(isRulePersisted([], 'Bash')).toBe(false)
+  })
+  it('returns false when the rule is absent from a non-empty allow-list', () => {
+    expect(isRulePersisted(['Read', 'Write', 'Edit'], 'Bash')).toBe(false)
+  })
+  it('returns false for a Skill rule when the list only contains the bare tool name', () => {
+    // `agent grant` for Skill(garmin) should write `Skill(garmin)`, not
+    // `Skill`. If the yaml ended up with the wrong shape, the verification
+    // must catch it.
+    expect(isRulePersisted(['Skill'], 'Skill(garmin)')).toBe(false)
+  })
+  it('returns false for a bare tool name when only the parameterized form is present', () => {
+    expect(isRulePersisted(['Skill(garmin)'], 'Bash')).toBe(false)
+  })
+  it('returns false when a similar-looking rule is present but not an exact match', () => {
+    expect(isRulePersisted(['mcp__garmin__read_activity'], 'mcp__garmin__list_activities')).toBe(false)
+  })
+})
+describe('isRulePersisted — success path', () => {
+  it('returns true when the exact rule is present', () => {
+    expect(isRulePersisted(['Read', 'Bash', 'Write'], 'Bash')).toBe(true)
+  })
+  it('returns true when the rule is the only entry', () => {
+    expect(isRulePersisted(['Skill(garmin)'], 'Skill(garmin)')).toBe(true)
+  })
+  it('returns true for a namespaced MCP tool rule', () => {
+    expect(isRulePersisted(['mcp__garmin__list_activities', 'Bash'], 'mcp__garmin__list_activities')).toBe(true)
+  })
+})
+// ---------------------------------------------------------------------------
+// Round-trip: resolveAlwaysAllowRule → isRulePersisted
+// Simulates the full handler flow: resolve the rule from a permission_request,
+// "grant" it (allow-list contains the resolved rule.rule), then verify.
+// Guards against normalization divergence between the value the handler
+// resolves and the value `agent grant` writes + the config reader returns.
+// ---------------------------------------------------------------------------
+describe('rule round-trip through isRulePersisted', () => {
+  it('Skill tool: resolved rule persists correctly', () => {
+    const rule = resolveAlwaysAllowRule('Skill', JSON.stringify({ skill: 'garmin' }))
+    expect(rule).not.toBeNull()
+    // Simulate: allow-list now contains the rule that `agent grant` wrote.
+    expect(isRulePersisted([rule!.rule], rule!.rule)).toBe(true)
+    // Confirm the written form is `Skill(garmin)` — not a bare `Skill`.
+    expect(rule!.rule).toBe('Skill(garmin)')
+  })
+  it('Skill tool: absent rule is detected', () => {
+    const rule = resolveAlwaysAllowRule('Skill', JSON.stringify({ skill: 'garmin' }))
+    expect(rule).not.toBeNull()
+    // allow-list was not updated (silent grant failure).
+    expect(isRulePersisted([], rule!.rule)).toBe(false)
+    expect(isRulePersisted(['Skill'], rule!.rule)).toBe(false)
+  })
+  it('Bash tool: round-trips correctly', () => {
+    const rule = resolveAlwaysAllowRule('Bash', undefined)
+    expect(rule).not.toBeNull()
+    expect(rule!.rule).toBe('Bash')
+    expect(isRulePersisted(['Bash', 'Read'], rule!.rule)).toBe(true)
+    expect(isRulePersisted(['Read'], rule!.rule)).toBe(false)
+  })
+  it('MCP tool: round-trips with exact namespaced form', () => {
+    const toolName = 'mcp__garmin__list_activities'
+    const rule = resolveAlwaysAllowRule(toolName, undefined)
+    expect(rule).not.toBeNull()
+    expect(rule!.rule).toBe(toolName)
+    expect(isRulePersisted([toolName], rule!.rule)).toBe(true)
+    expect(isRulePersisted(['mcp__garmin__read_activity'], rule!.rule)).toBe(false)
+  })
+  it('multiple Skill tools do not cross-contaminate', () => {
+    const garmin = resolveAlwaysAllowRule('Skill', JSON.stringify({ skill: 'garmin' }))
+    const mail = resolveAlwaysAllowRule('Skill', JSON.stringify({ skill: 'mail' }))
+    expect(garmin).not.toBeNull()
+    expect(mail).not.toBeNull()
+    // Allow-list only has garmin's rule.
+    const allowList = [garmin!.rule]
+    expect(isRulePersisted(allowList, garmin!.rule)).toBe(true)
+    expect(isRulePersisted(allowList, mail!.rule)).toBe(false)
+  })
+})

package/telegram-plugin/tests/inbound-delivery-cutover-gate.test.ts ADDED Viewed

@@ -0,0 +1,93 @@
+/**
+ * PR3b cutover — the turn-in-flight GATE now reads the delivery state
+ * machine (`isMachineInTurn`) instead of the PR3b `claudeBusyKeys` set.
+ *
+ * The bug this closes (gymbro/clerk, 2026-05-28): `claudeBusyKeys` is a
+ * per-delivery Set — every delivery `.add`s a key, but turn-end `.delete`s
+ * exactly one. When a turn-end is missed (or fires under a non-matching
+ * key) the set keeps an orphan, `size > 0` reads true forever, and EVERY
+ * subsequent inbound buffers as "held mid-turn" until the 5-min
+ * framework-fallback force-drains it.
+ *
+ * The machine cannot accumulate orphans: global state holds ONE
+ * `activeTurn`, so any matching turnEnd returns it to idle, and the TTL
+ * `tick` self-heals a missed turnEnd. These tests pin both the normal
+ * reopen and the dangle-recovery path on the accessors the gate reads.
+ */
+import { describe, expect, it, beforeEach } from 'vitest'
+import {
+  shadowEmit,
+  isMachineInTurn,
+  isDeliveryCutoverEnabled,
+  __shadowResetForTests,
+} from '../gateway/inbound-delivery-machine-shadow.js'
+import { TURN_TTL_MS, type ChatKey } from '../gateway/inbound-delivery-machine.js'
+const KEY_A = '111:_' as ChatKey
+const KEY_B = '222:_' as ChatKey
+function inbound(key: ChatKey, at: number, msgId = 1) {
+  shadowEmit({ kind: 'inbound', key, msg: { msgId, isSteering: false, payload: null }, at })
+}
+describe('PR3b cutover gate accessors', () => {
+  beforeEach(() => __shadowResetForTests())
+  it('enabled by default (shadow on, no kill-switch in test env)', () => {
+    expect(isDeliveryCutoverEnabled()).toBe(true)
+  })
+  it('reads idle before any turn (bridge alive)', () => {
+    shadowEmit({ kind: 'bridgeUp', at: 1000 })
+    expect(isMachineInTurn()).toBe(false)
+  })
+  it('flips in-turn on a fresh inbound and reopens on turnEnd (the gate reopen)', () => {
+    shadowEmit({ kind: 'bridgeUp', at: 1000 })
+    inbound(KEY_A, 2000)
+    expect(isMachineInTurn()).toBe(true)
+    shadowEmit({ kind: 'turnEnd', key: KEY_A, at: 3000, outboundEmitted: true })
+    // Gate reopens immediately — this is the path claudeBusyKeys danged on.
+    expect(isMachineInTurn()).toBe(false)
+  })
+  it('self-heals a MISSED turnEnd via the TTL tick (the dangle the fix kills)', () => {
+    shadowEmit({ kind: 'bridgeUp', at: 1000 })
+    // Turn A starts via enqueue (turnStart), then turn B starts before A's
+    // turnEnd ever lands — the orphan scenario. The machine keeps
+    // activeTurn=A (turnStart is a no-op on global when already in_turn),
+    // so a later turnEnd(B) does NOT match and would leave A dangling.
+    shadowEmit({ kind: 'turnStart', key: KEY_A, at: 2000 })
+    shadowEmit({ kind: 'turnStart', key: KEY_B, at: 3000 })
+    shadowEmit({ kind: 'turnEnd', key: KEY_B, at: 4000, outboundEmitted: true })
+    // Without tick, the gate would still read in-turn (activeTurn=A stuck).
+    expect(isMachineInTurn()).toBe(true)
+    // TTL tick past A's start clears the orphan and reopens the gate —
+    // the structural guarantee claudeBusyKeys lacked.
+    shadowEmit({ kind: 'tick', now: 2000 + TURN_TTL_MS + 1 })
+    expect(isMachineInTurn()).toBe(false)
+  })
+  it('does NOT clear a long-but-ACTIVE turn (modelOutbound suppression)', () => {
+    shadowEmit({ kind: 'bridgeUp', at: 1000 })
+    shadowEmit({ kind: 'turnStart', key: KEY_A, at: 2000 })
+    // Model is still streaming just before the TTL boundary.
+    const justBeforeTtl = 2000 + TURN_TTL_MS - 5_000
+    shadowEmit({ kind: 'modelOutbound', key: KEY_A, at: justBeforeTtl })
+    // Tick past TTL — but recent outbound is within the suppression window,
+    // so the turn is NOT cleared (parity with the imperative silence-poke).
+    shadowEmit({ kind: 'tick', now: 2000 + TURN_TTL_MS + 1 })
+    expect(isMachineInTurn()).toBe(true)
+  })
+  it('a buffered sibling inbound does not change the active turn', () => {
+    shadowEmit({ kind: 'bridgeUp', at: 1000 })
+    inbound(KEY_A, 2000) // fresh turn A
+    inbound(KEY_B, 2500) // mid-turn — buffered, must NOT start a new turn
+    expect(isMachineInTurn()).toBe(true)
+    shadowEmit({ kind: 'turnEnd', key: KEY_A, at: 3000, outboundEmitted: true })
+    // A ended; nothing else active → gate reopens so B can drain.
+    expect(isMachineInTurn()).toBe(false)
+  })
+})

package/telegram-plugin/tests/tool-activity-summary.test.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import {
   verbForTool,
   describeToolUse,
   appendActivityLine,
+  appendActivityLabel,
   renderActivityFeed,
   MIRROR_MAX_LINES,
 } from "../tool-activity-summary.js";
@@ -328,3 +329,21 @@ describe("appendActivityLine + renderActivityFeed — accumulating draft feed",
     expect(renderActivityFeed([])).toBeNull();
   });
 });
+describe("appendActivityLabel — precomputed label feed (tool_label path)", () => {
+  it("accumulates precomputed labels, dedups consecutive, ignores empty", () => {
+    const lines: string[] = [];
+    expect(appendActivityLabel(lines, "Searching memory")).toBe("· Searching memory");
+    expect(appendActivityLabel(lines, "List workspace")).toBe(
+      "· Searching memory\n· List workspace",
+    );
+    // consecutive dup collapses
+    appendActivityLabel(lines, "List workspace");
+    expect(lines).toEqual(["Searching memory", "List workspace"]);
+    // empty / whitespace → null, no push
+    expect(appendActivityLabel(lines, "")).toBeNull();
+    expect(appendActivityLabel(lines, "   ")).toBeNull();
+    expect(appendActivityLabel(lines, undefined)).toBeNull();
+    expect(lines.length).toBe(2);
+  });
+});

package/telegram-plugin/tests/tool-label-sidecar.test.ts CHANGED Viewed

@@ -83,6 +83,42 @@ describe('tool-label-sidecar', () => {
     s.stop()
   })
+  it('replays pre-existing rows to a subscriber that attaches after construction', () => {
+    // Regression: the gateway's session-tail constructs the sidecar (which
+    // does an initial drain of the file) and only THEN wires `onLabel`. On a
+    // fast/clustered turn — or a resumed/flipped session — the hook has
+    // already written labels, so the initial drain consumed them with an
+    // empty subscriber set. Before the replay fix the late subscriber got
+    // nothing, so the real-time draft-mirror never fired (every label lost).
+    const sessionId = 'sess-replay'
+    const f = join(stateDir, `tool-labels-${sessionId}.jsonl`)
+    writeFileSync(
+      f,
+      JSON.stringify({ ts: 1, tool_use_id: 'A', agent_id: 'g', label: 'Reading foo.ts', tool_name: 'Read' }) + '\n' +
+      JSON.stringify({ ts: 2, tool_use_id: 'B', agent_id: 'g', label: 'List workspace', tool_name: 'Bash' }) + '\n',
+    )
+    const sched = makeManualScheduler()
+    const s = createToolLabelSidecar({ stateDir, sessionId, scheduler: sched })
+    // Subscribe AFTER construction (the real ensureSidecar ordering).
+    const seen: Array<[string, string, string]> = []
+    s.onLabel((id, label, toolName) => seen.push([id, label, toolName]))
+    expect(seen).toEqual([
+      ['A', 'Reading foo.ts', 'Read'],
+      ['B', 'List workspace', 'Bash'],
+    ])
+    // And a row appended afterwards still reaches the subscriber exactly once
+    // (no double-emit of the replayed rows).
+    appendFileSync(f, JSON.stringify({ ts: 3, tool_use_id: 'C', agent_id: 'g', label: 'Searching memory', tool_name: 'mcp__hindsight__recall' }) + '\n')
+    s.poll()
+    expect(seen).toEqual([
+      ['A', 'Reading foo.ts', 'Read'],
+      ['B', 'List workspace', 'Bash'],
+      ['C', 'Searching memory', 'mcp__hindsight__recall'],
+    ])
+    s.stop()
+  })
   it('ignores malformed JSON lines', () => {
     const sessionId = 'sess4'
     const sched = makeManualScheduler()

package/telegram-plugin/tool-activity-summary.ts CHANGED Viewed

@@ -382,3 +382,21 @@ export function renderActivityFeed(lines: string[]): string | null {
   const body = shown.map((l) => `· ${l}`).join("\n");
   return hidden > 0 ? `· +${hidden} earlier…\n${body}` : body;
 }
+/**
+ * Like appendActivityLine, but for a pre-computed label (from the
+ * real-time PreToolUse sidecar / `tool_label` event) — the hook already
+ * rendered the friendly text, so we skip describeToolUse. Returns the
+ * rendered feed, or null when the label is empty.
+ */
+export function appendActivityLabel(
+  lines: string[],
+  label: string | undefined,
+): string | null {
+  const l = (label ?? "").trim();
+  if (l.length === 0) return null;
+  if (lines.length === 0 || lines[lines.length - 1] !== l) {
+    lines.push(l);
+  }
+  return renderActivityFeed(lines);
+}

package/telegram-plugin/tool-label-sidecar.ts CHANGED Viewed

@@ -40,8 +40,11 @@ export interface ToolLabelRow {
 export interface ToolLabelSidecar {
   /** Synchronous label lookup. */
   getLabel(toolUseId: string): string | undefined
-  /** Subscribe to "label arrived" notifications. */
-  onLabel(cb: (toolUseId: string, label: string) => void): () => void
+  /** Subscribe to "label arrived" notifications. Fires once per new
+   *  sidecar line, in real time (~pollMs after the hook's appendFileSync),
+   *  independent of when the claude transcript flushes. `toolName` lets
+   *  subscribers filter surface tools (reply/react) from a live feed. */
+  onLabel(cb: (toolUseId: string, label: string, toolName: string) => void): () => void
   /** Force a re-poll (tests). */
   poll(): void
   /** Stop polling and release resources. */
@@ -63,7 +66,15 @@ export interface SidecarOptions {
 export function createToolLabelSidecar(opts: SidecarOptions): ToolLabelSidecar {
   const path = join(opts.stateDir, `tool-labels-${opts.sessionId}.jsonl`)
   const labels = new Map<string, string>()
-  const subscribers = new Set<(toolUseId: string, label: string) => void>()
+  // Ordered log of every row ingested so far (label + tool_name), used to
+  // replay history to a subscriber that attaches AFTER rows were already
+  // read. Without this, a sidecar whose file is already populated when
+  // `onLabel` is wired (fast/clustered turns, resumed/flipped sessions —
+  // the gateway's `ensureSidecar` subscribes *after* construction's initial
+  // drain) would silently lose every pre-existing label, breaking the
+  // real-time draft-mirror determinism the sidecar exists to provide.
+  const seen: Array<{ toolUseId: string; label: string; toolName: string }> = []
+  const subscribers = new Set<(toolUseId: string, label: string, toolName: string) => void>()
   let offset = 0
   let stopped = false
@@ -84,13 +95,19 @@ export function createToolLabelSidecar(opts: SidecarOptions): ToolLabelSidecar {
       } catch {
         continue
       }
-      if (!row || typeof row.tool_use_id !== 'string' || typeof row.label !== 'string') continue
+      if (
+        !row ||
+        typeof row.tool_use_id !== 'string' ||
+        typeof row.label !== 'string' ||
+        typeof row.tool_name !== 'string'
+      ) continue
       // First write wins — sidecar lines are append-only and we don't
       // expect duplicates, but if one lands we keep the earliest.
       if (labels.has(row.tool_use_id)) continue
       labels.set(row.tool_use_id, row.label)
+      seen.push({ toolUseId: row.tool_use_id, label: row.label, toolName: row.tool_name })
       for (const cb of subscribers) {
-        try { cb(row.tool_use_id, row.label) } catch { /* ignore */ }
+        try { cb(row.tool_use_id, row.label, row.tool_name) } catch { /* ignore */ }
       }
     }
   }
@@ -126,6 +143,15 @@ export function createToolLabelSidecar(opts: SidecarOptions): ToolLabelSidecar {
       return labels.get(toolUseId)
     },
     onLabel(cb) {
+      // Replay rows already ingested before this subscriber attached, then
+      // register for future rows. Single-threaded: no row can be ingested
+      // between the replay loop and the add, so each row reaches `cb`
+      // exactly once. This is what makes the draft-mirror deterministic
+      // regardless of when the gateway subscribes relative to the hook's
+      // writes (see the `seen` declaration above).
+      for (const r of seen) {
+        try { cb(r.toolUseId, r.label, r.toolName) } catch { /* ignore */ }
+      }
       subscribers.add(cb)
       return () => subscribers.delete(cb)
     },

package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts CHANGED Viewed

@@ -215,9 +215,13 @@ const CC2_CASES: readonly CC2Case[] = [
   },
   {
     name: "long-running with planned check-ins",
+    // Use python time.sleep, NOT the `sleep` command — Claude Code's bash
+    // sandbox blocks standalone `sleep` ("foreground sleep is sandboxed
+    // away"), which made this case un-runnable (agent replied instantly).
     prompt:
-      "Run `bash` with `sleep 5 && echo step1`, send a brief update, " +
-      "then `sleep 5 && echo step2`, send another brief update, then " +
+      "Run `bash` with `python3 -c 'import time; time.sleep(5)'` then echo " +
+      "step1, send a brief update, then `python3 -c 'import time; " +
+      "time.sleep(5)'` then echo step2, send another brief update, then " +
       "send a final 'done' as your answer.",
   },
 ];
@@ -262,12 +266,27 @@ async function assertMidTurnSilent(
     )
     .join("\n");
-  const last = collected[collected.length - 1];
-  expect(last.silent, `final answer was silent — won't ping. Trail:\n${trail}`).toBe(
-    false,
-  );
-  const midTurn = collected.slice(0, -1);
+  // The model habitually emits a trailing trivial confirmation ("Done.",
+  // "Sent.", "OK") as a separate SILENT message AFTER its real pinged
+  // answer. That's pacing noise (the turn-pacing directive discourages
+  // it), not the final answer — so don't treat it as the
+  // "final-answer-must-ping" target. Find the last SUBSTANTIVE message
+  // and assert that one pinged; trailing trivial confirmations are
+  // ignored for this invariant (they're correctly silent anyway).
+  const TRIVIAL_TAIL = /^(done|sent|ok|okay|ack|got it|hope (that|this) helps)\b[.! ]*$/i;
+  const isTrivial = (m: ObservedMessage) => TRIVIAL_TAIL.test(m.text.trim());
+  let finalIdx = collected.length - 1;
+  while (finalIdx > 0 && isTrivial(collected[finalIdx])) finalIdx--;
+  const finalAnswer = collected[finalIdx];
+  expect(
+    finalAnswer.silent,
+    `final substantive answer was silent — won't ping. Trail:\n${trail}`,
+  ).toBe(false);
+  // Everything BEFORE the final substantive answer must be silent
+  // (mid-turn updates ping-free). Trailing trivial confirmations after
+  // it are already silent and are not "mid-turn" — exclude them too.
+  const midTurn = collected.slice(0, finalIdx);
   const loudMidTurn = midTurn.filter((m) => !m.silent);
   expect(
     loudMidTurn.length,
@@ -334,12 +353,19 @@ async function assertSilencePokeFires(
   // Single bash call so the poke piggybacks the single tool result.
   // Without the explicit "no replies" instruction the model might
   // soft-commit; that resets the silence clock but a single >75s
-  // sleep still pushes post-commit silence past the threshold.
+  // wait still pushes post-commit silence past the threshold.
+  //
+  // Use python time.sleep, NOT the `sleep` command — Claude Code's bash
+  // sandbox blocks standalone `sleep` ("foreground sleep is sandboxed
+  // away to prevent burning cache windows"), so a `sleep 80` prompt made
+  // the agent reply instantly instead of going silent, breaking this
+  // case. python3 time.sleep is a genuine foreground wait the sandbox
+  // doesn't special-case.
   const prompt =
-    `Run exactly one Bash tool call: \`sleep ${sleepSeconds}\`. Do NOT ` +
-    `send any reply before the sleep completes — no soft commit, no ` +
-    `mid-turn updates. When the sleep returns, send one brief 'done' ` +
-    `reply.`;
+    `Run exactly one Bash tool call: \`python3 -c 'import time; ` +
+    `time.sleep(${sleepSeconds})'\`. Do NOT send any reply before it ` +
+    `completes — no soft commit, no mid-turn updates. When it returns, ` +
+    `send one brief 'done' reply.`;
   await scenario.sendDM(prompt);