npm - switchroom - Versions diffs - 0.13.9 → 0.13.11 - Mend

switchroom 0.13.9 → 0.13.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/cli/switchroom.js +38 -14
package/dist/host-control/main.js +222 -7
package/examples/switchroom.yaml +25 -7
package/package.json +1 -1
package/profiles/_shared/telegram-style.md.hbs +1 -1
package/telegram-plugin/dist/bridge/bridge.js +23 -4
package/telegram-plugin/dist/gateway/gateway.js +540 -147
package/telegram-plugin/dist/server.js +23 -4
package/telegram-plugin/gateway/config-approval-handler.test.ts +246 -0
package/telegram-plugin/gateway/config-approval-handler.ts +284 -0
package/telegram-plugin/gateway/gateway.ts +218 -25
package/telegram-plugin/gateway/ipc-protocol.ts +72 -2
package/telegram-plugin/gateway/ipc-server.ts +101 -0
package/telegram-plugin/gateway/subagent-handback-inbound-builder.ts +185 -0
package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +69 -0
package/telegram-plugin/model-unavailable.ts +11 -1
package/telegram-plugin/operator-events.fixtures.json +14 -24
package/telegram-plugin/operator-events.ts +11 -2
package/telegram-plugin/session-tail.ts +71 -4
package/telegram-plugin/subagent-watcher.ts +39 -0
package/telegram-plugin/tests/model-unavailable.test.ts +15 -2
package/telegram-plugin/tests/operator-events-session-tail.test.ts +53 -2
package/telegram-plugin/tests/operator-events.test.ts +14 -7
package/telegram-plugin/tests/subagent-handback-decision.test.ts +112 -0
package/telegram-plugin/tests/subagent-handback-inbound-builder.test.ts +105 -0
package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +61 -0
package/telegram-plugin/tests/subagent-watcher.test.ts +67 -1
package/telegram-plugin/uat/scenarios/jtbd-subagent-handback-dm.test.ts +95 -0
package/profiles/default/CLAUDE.md +0 -193

package/telegram-plugin/tests/operator-events-session-tail.test.ts CHANGED Viewed

@@ -56,13 +56,64 @@ describe('detectErrorInTranscriptLine — error detection', () => {
     expect(result!.kind).toBe('credit-exhausted')
   })
-  it('classifies overloaded_error as quota-exhausted', () => {
+  it('classifies overloaded_error as rate-limited (transient), NOT quota-exhausted', () => {
+    // A 529 "overloaded" is transient Anthropic server-capacity
+    // pressure — orthogonal to account quota. Classifying it
+    // quota-exhausted fired a false "Model unavailable" card + a
+    // self-cancelling fleet auto-fallback on every 529.
     const line = JSON.stringify({
       type: 'api_error',
       error: { type: 'overloaded_error', message: 'Overloaded' },
     })
     const result = detectErrorInTranscriptLine(line)
-    expect(result!.kind).toBe('quota-exhausted')
+    expect(result!.kind).toBe('rate-limited')
+    expect(result!.transient).toBe(true)
+    // An explicit `type:"api_error"` line (no retry state) = Claude
+    // surfaced the failure → terminal.
+    expect(result!.terminal).toBe(true)
+  })
+  it('marks an in-flight 529 retry transient + NOT terminal (suppressed)', () => {
+    // Real on-disk shape: a 529 Claude Code is internally retrying,
+    // annotated with retryAttempt < maxRetries.
+    const line = JSON.stringify({
+      type: 'system',
+      subtype: 'api_error',
+      error: { status: 529, type: 'overloaded_error', message: 'Overloaded' },
+      retryAttempt: 9,
+      maxRetries: 10,
+      retryInMs: 34479,
+    })
+    const result = detectErrorInTranscriptLine(line)
+    expect(result!.kind).toBe('rate-limited')
+    expect(result!.transient).toBe(true)
+    // 9 < 10 — still retrying → in-flight → the caller suppresses it.
+    expect(result!.terminal).toBe(false)
+  })
+  it('marks an exhausted 529 retry terminal (escalates)', () => {
+    const line = JSON.stringify({
+      type: 'system',
+      subtype: 'api_error',
+      error: { status: 529, type: 'overloaded_error', message: 'Overloaded' },
+      retryAttempt: 10,
+      maxRetries: 10,
+    })
+    const result = detectErrorInTranscriptLine(line)
+    expect(result!.kind).toBe('rate-limited')
+    expect(result!.transient).toBe(true)
+    // retries exhausted → terminal → escalates.
+    expect(result!.terminal).toBe(true)
+  })
+  it('marks non-transient errors terminal (always escalate)', () => {
+    const line = JSON.stringify({
+      type: 'api_error',
+      error: { type: 'authentication_error', message: 'expired' },
+    })
+    const result = detectErrorInTranscriptLine(line)
+    expect(result!.transient).toBe(false)
+    expect(result!.terminal).toBe(true)
   })
   it('returns null for lines without error field', () => {

package/telegram-plugin/tests/operator-events.test.ts CHANGED Viewed

@@ -57,13 +57,20 @@ describe('classifyClaudeError — credit-exhausted fixtures', () => {
   }
 })
-describe('classifyClaudeError — quota-exhausted fixtures', () => {
-  for (const fixture of fixtures['quota-exhausted']) {
-    it(`classifies: ${fixture._source}`, () => {
-      const input = '_value' in fixture ? fixture._value : fixture
-      expect(classifyClaudeError(input)).toBe('quota-exhausted')
-    })
-  }
+describe('classifyClaudeError — quota-exhausted', () => {
+  // classifyClaudeError is type/code/status-based and intentionally
+  // does NOT self-classify quota-exhausted: a genuine subscription
+  // usage-limit hit has no reliable Anthropic error TYPE — it is
+  // detected from the response TEXT. session-tail's `isApiErrorMessage`
+  // 429 branch + the `detectModelUnavailable` text path own quota
+  // detection. (`overloaded_error` used to be mapped here — wrongly;
+  // a 529 overload is transient server capacity, now `rate-limited`.)
+  it('no error TYPE maps to quota-exhausted (the text path owns it)', () => {
+    expect(fixtures['quota-exhausted']).toHaveLength(0)
+    expect(
+      classifyClaudeError({ type: 'overloaded_error', message: 'Overloaded' }),
+    ).not.toBe('quota-exhausted')
+  })
 })
 describe('classifyClaudeError — rate-limited fixtures', () => {

package/telegram-plugin/tests/subagent-handback-decision.test.ts ADDED Viewed

@@ -0,0 +1,112 @@
+/**
+ * Regression coverage for `decideSubagentHandback` — the gate the
+ * gateway's subagent-watcher `onFinish` callback runs to decide whether
+ * a finished sub-agent gets a handback turn injected.
+ *
+ * This is the highest-risk surface of the handback feature (#1650): it
+ * injects a fresh turn. Before this suite the decision lived inline in
+ * the gateway's `onFinish` closure with no automated test — a refactor
+ * that broke the `isBackground` gate would have fired handbacks for
+ * foreground sub-agents (double messages) with nothing to catch it.
+ * The decision is now a pure function; these cases pin every gate.
+ */
+import { describe, it, expect } from 'vitest'
+import { decideSubagentHandback } from '../gateway/subagent-handback-inbound-builder.js'
+const FIXED_NOW = 1_700_000_000_000
+const base = {
+  handbackEnvValue: undefined as string | undefined,
+  outcome: 'completed' as 'completed' | 'failed' | 'orphan',
+  isBackground: true,
+  fleetChatId: '777',
+  ownerChatId: '999',
+  taskDescription: 'Do the thing',
+  resultText: 'Done.',
+  nowMs: FIXED_NOW,
+}
+describe('decideSubagentHandback', () => {
+  it('delivers for a background completed sub-agent', () => {
+    const d = decideSubagentHandback({ ...base })
+    expect(d.deliver).toBe(true)
+    if (d.deliver) {
+      expect(d.chatId).toBe('777')
+      expect(d.inbound.meta.source).toBe('subagent_handback')
+      expect(d.inbound.chatId).toBe('777')
+    }
+  })
+  it('delivers for a background FAILED sub-agent', () => {
+    const d = decideSubagentHandback({ ...base, outcome: 'failed' })
+    expect(d.deliver).toBe(true)
+    if (d.deliver) expect(d.inbound.meta.outcome).toBe('failed')
+  })
+  it('skips a foreground sub-agent (handed back natively in-turn)', () => {
+    const d = decideSubagentHandback({ ...base, isBackground: false })
+    expect(d).toEqual({ deliver: false, reason: 'foreground' })
+  })
+  it("skips an 'orphan' outcome (stale historical-at-boot row)", () => {
+    const d = decideSubagentHandback({ ...base, outcome: 'orphan' })
+    expect(d).toEqual({ deliver: false, reason: 'outcome-not-terminal' })
+  })
+  it('skips when the kill-switch is set (SWITCHROOM_SUBAGENT_HANDBACK=0)', () => {
+    const d = decideSubagentHandback({ ...base, handbackEnvValue: '0' })
+    expect(d).toEqual({ deliver: false, reason: 'env-disabled' })
+  })
+  it('treats any non-"0" env value (incl. undefined) as enabled', () => {
+    expect(decideSubagentHandback({ ...base, handbackEnvValue: undefined }).deliver).toBe(true)
+    expect(decideSubagentHandback({ ...base, handbackEnvValue: '1' }).deliver).toBe(true)
+    expect(decideSubagentHandback({ ...base, handbackEnvValue: '' }).deliver).toBe(true)
+  })
+  it('falls back to the owner chat when the fleet entry is gone', () => {
+    const d = decideSubagentHandback({ ...base, fleetChatId: '' })
+    expect(d.deliver).toBe(true)
+    if (d.deliver) {
+      expect(d.chatId).toBe('999')
+      expect(d.inbound.chatId).toBe('999')
+    }
+  })
+  it('prefers the fleet chat id over the owner chat when both are present', () => {
+    const d = decideSubagentHandback({ ...base, fleetChatId: '777', ownerChatId: '999' })
+    expect(d.deliver).toBe(true)
+    if (d.deliver) expect(d.chatId).toBe('777')
+  })
+  it('skips when no chat resolves at all', () => {
+    const d = decideSubagentHandback({ ...base, fleetChatId: '', ownerChatId: '' })
+    expect(d).toEqual({ deliver: false, reason: 'no-chat' })
+  })
+  it('gate order: kill-switch wins over every other condition', () => {
+    // env-disabled even though it is a deliverable background completion.
+    const d = decideSubagentHandback({ ...base, handbackEnvValue: '0', isBackground: true })
+    expect(d).toEqual({ deliver: false, reason: 'env-disabled' })
+  })
+  it('gate order: outcome filter applies before the foreground check', () => {
+    // orphan + foreground — outcome filter is checked first.
+    const d = decideSubagentHandback({ ...base, outcome: 'orphan', isBackground: false })
+    expect(d).toEqual({ deliver: false, reason: 'outcome-not-terminal' })
+  })
+  it('carries the task description and result text into the inbound', () => {
+    const d = decideSubagentHandback({
+      ...base,
+      taskDescription: 'Migrate the DB',
+      resultText: 'Applied 3 migrations, 0 rows dropped.',
+    })
+    expect(d.deliver).toBe(true)
+    if (d.deliver) {
+      expect(d.inbound.text).toContain('Migrate the DB')
+      expect(d.inbound.text).toContain('Applied 3 migrations')
+    }
+  })
+})

package/telegram-plugin/tests/subagent-handback-inbound-builder.test.ts ADDED Viewed

@@ -0,0 +1,105 @@
+/**
+ * Pin the InboundMessage shape the gateway synthesizes when a
+ * *background* sub-agent finishes (conversational-pacing beat 4 — the
+ * handback). The `meta.source` string is load-bearing: the MCP channel
+ * notification wraps it as `<channel source="subagent_handback">`, and
+ * the agent prompt's beat 4 keys on exactly that tag. A regression that
+ * changes the source string silently breaks the wake-up — the model
+ * wouldn't recognise the turn as a handback cue.
+ */
+import { describe, it, expect } from 'vitest'
+import {
+  buildSubagentHandbackInbound,
+  HANDBACK_RESULT_MAX,
+  HANDBACK_DESC_MAX,
+} from '../gateway/subagent-handback-inbound-builder.js'
+const FIXED_NOW = 1_700_000_000_000
+describe('buildSubagentHandbackInbound', () => {
+  it('builds a completed-worker handback with the load-bearing meta.source', () => {
+    const inbound = buildSubagentHandbackInbound({
+      ctx: {
+        chatId: '12345',
+        taskDescription: 'Refactor the auth module',
+        resultText: 'Done — refactored, 4 tests added, all green.',
+        outcome: 'completed',
+      },
+      nowMs: FIXED_NOW,
+    })
+    expect(inbound.type).toBe('inbound')
+    expect(inbound.chatId).toBe('12345')
+    expect(inbound.userId).toBe(0)
+    expect(inbound.user).toBe('subagent-watcher')
+    expect(inbound.ts).toBe(FIXED_NOW)
+    expect(inbound.messageId).toBe(FIXED_NOW)
+    // The wake-up contract: bridge renders <channel source="subagent_handback">.
+    expect(inbound.meta.source).toBe('subagent_handback')
+    expect(inbound.meta.outcome).toBe('completed')
+    // Text carries the task, the result, and the beat-4 steer.
+    expect(inbound.text).toContain('Refactor the auth module')
+    expect(inbound.text).toContain('4 tests added, all green')
+    expect(inbound.text).toContain('beat 4')
+    expect(inbound.text).toMatch(/synthesise|synthesize/i)
+  })
+  it('builds a failed-worker handback that steers an honest report', () => {
+    const inbound = buildSubagentHandbackInbound({
+      ctx: {
+        chatId: '99',
+        taskDescription: 'Migrate the DB',
+        resultText: 'Hit a lock timeout on step 3.',
+        outcome: 'failed',
+      },
+      nowMs: FIXED_NOW,
+    })
+    expect(inbound.meta.source).toBe('subagent_handback')
+    expect(inbound.meta.outcome).toBe('failed')
+    expect(inbound.text).toContain('FAILED')
+    expect(inbound.text).toContain('lock timeout on step 3')
+    expect(inbound.text).toMatch(/did not complete|did not/i)
+  })
+  it('tolerates an empty result text (worker emitted no narrative)', () => {
+    const inbound = buildSubagentHandbackInbound({
+      ctx: {
+        chatId: '99',
+        taskDescription: 'Quiet task',
+        resultText: '',
+        outcome: 'completed',
+      },
+      nowMs: FIXED_NOW,
+    })
+    expect(inbound.meta.source).toBe('subagent_handback')
+    expect(inbound.text).toContain('left no summary')
+    // Still steers a handback even with no result text.
+    expect(inbound.text).toContain('beat 4')
+  })
+  it('caps an over-long result text and description', () => {
+    const inbound = buildSubagentHandbackInbound({
+      ctx: {
+        chatId: '99',
+        taskDescription: 'D'.repeat(HANDBACK_DESC_MAX + 500),
+        resultText: 'R'.repeat(HANDBACK_RESULT_MAX + 5000),
+        outcome: 'completed',
+      },
+      nowMs: FIXED_NOW,
+    })
+    // Body stays bounded — cap + the surrounding steer prose, well under
+    // Claude Code's hook/context limits.
+    expect(inbound.text.length).toBeLessThan(
+      HANDBACK_RESULT_MAX + HANDBACK_DESC_MAX + 800,
+    )
+    expect(inbound.text).toContain('…')
+  })
+  it('falls back to a placeholder when the description is blank', () => {
+    const inbound = buildSubagentHandbackInbound({
+      ctx: { chatId: '99', taskDescription: '   ', resultText: 'x', outcome: 'completed' },
+      nowMs: FIXED_NOW,
+    })
+    expect(inbound.text).toContain('(no description)')
+  })
+})

package/telegram-plugin/tests/subagent-tracker-hooks.test.ts CHANGED Viewed

@@ -210,6 +210,67 @@ describe('subagent-tracker-posttool', () => {
       | undefined
     expect(row?.status).toBe('failed')
   })
+  it('emits a foreground handback nudge for a foreground sub-agent', () => {
+    // conversational-pacing beat 4: a FOREGROUND sub-agent's PostToolUse
+    // fires at real completion, mid-parent-turn — emit an
+    // additionalContext nudge steering the parent to synthesise a
+    // handback.
+    runHook(PRETOOL_SCRIPT, {
+      session_id: 's-fg',
+      tool_name: 'Agent',
+      tool_use_id: 'toolu_fg001',
+      tool_input: { description: 'A foreground task', run_in_background: false },
+    })
+    const postResult = runHook(POSTTOOL_SCRIPT, {
+      tool_name: 'Agent',
+      tool_use_id: 'toolu_fg001',
+      tool_response: { result: 'Foreground work complete.', is_error: false },
+    })
+    expect(postResult.status).toBe(0)
+    expect(postResult.stdout).toContain('additionalContext')
+    expect(postResult.stdout).toContain('handback')
+    expect(postResult.stdout).toContain('PostToolUse')
+  })
+  it('does NOT emit a handback nudge for a background sub-agent', () => {
+    // A background sub-agent's PostToolUse fires on the launch ACK, not
+    // on completion — nudging "synthesise the handback" there is wrong.
+    // The gateway's subagent-watcher onFinish path owns background.
+    runHook(PRETOOL_SCRIPT, {
+      session_id: 's-bg',
+      tool_name: 'Agent',
+      tool_use_id: 'toolu_bg001',
+      tool_input: { description: 'A background task', run_in_background: true },
+    })
+    const postResult = runHook(POSTTOOL_SCRIPT, {
+      tool_name: 'Agent',
+      tool_use_id: 'toolu_bg001',
+      tool_response: { result: 'launched', is_error: false },
+    })
+    expect(postResult.status).toBe(0)
+    expect(postResult.stdout).not.toContain('additionalContext')
+  })
+  it('does NOT emit a handback nudge when SWITCHROOM_SUBAGENT_HANDBACK=0', () => {
+    runHook(PRETOOL_SCRIPT, {
+      session_id: 's-off',
+      tool_name: 'Agent',
+      tool_use_id: 'toolu_off001',
+      tool_input: { description: 'A foreground task', run_in_background: false },
+    })
+    const postResult = runHook(
+      POSTTOOL_SCRIPT,
+      {
+        tool_name: 'Agent',
+        tool_use_id: 'toolu_off001',
+        tool_response: { result: 'done', is_error: false },
+      },
+      { SWITCHROOM_SUBAGENT_HANDBACK: '0' },
+    )
+    expect(postResult.status).toBe(0)
+    expect(postResult.stdout).not.toContain('additionalContext')
+  })
 })
 describe('agent-dir resolution (RFC §Bug 2)', () => {

package/telegram-plugin/tests/subagent-watcher.test.ts CHANGED Viewed

@@ -367,7 +367,10 @@ describe('startSubagentWatcher', () => {
       return { agentDir, jsonlPath }
     }
-    function startWatcherSync(opts: { agentDir: string }): {
+    function startWatcherSync(opts: {
+      agentDir: string
+      onFinish?: Parameters<typeof startSubagentWatcher>[0]['onFinish']
+    }): {
       notifications: string[]
       poll: () => void
       watcher: ReturnType<typeof startSubagentWatcher>
@@ -380,6 +383,7 @@ describe('startSubagentWatcher', () => {
       const watcher = startSubagentWatcher({
         agentDir: opts.agentDir,
         sendNotification: (text) => notifications.push(text),
+        ...(opts.onFinish ? { onFinish: opts.onFinish } : {}),
         stallThresholdMs: 60_000,
         rescanMs: 500,
         now: () => Date.now(),
@@ -465,6 +469,68 @@ describe('startSubagentWatcher', () => {
       expect(entry?.toolCount).toBe(3)
     })
+    it('captures the full last narrative line into lastResultText (handback)', () => {
+      // lastSummaryLine keeps only the first line, 120 chars — a progress
+      // preview. lastResultText keeps the full last narrative emission:
+      // for a worker that IS its result summary, fed to the gateway's
+      // subagent_handback inbound (conversational-pacing beat 4).
+      const fullResult =
+        'Done. I refactored the auth module, added 4 tests, and all green.\n' +
+        'One caveat: the legacy token path still needs a follow-up.'
+      const content = buildJSONL(
+        subAgentUserMsg('Refactor auth'),
+        subAgentAssistantText(fullResult),
+      )
+      const { agentDir } = setupRealFs(content, 'deadbeef')
+      const h = startWatcherSync({ agentDir })
+      h.poll()
+      const entry = h.watcher.getRegistry().get('deadbeef')
+      expect(entry).toBeDefined()
+      // lastSummaryLine is the truncated first line only.
+      expect(entry?.lastSummaryLine).not.toMatch(/follow-up/)
+      // lastResultText keeps the whole thing — multi-line, both sentences.
+      expect(entry?.lastResultText).toContain('refactored the auth module')
+      expect(entry?.lastResultText).toContain('legacy token path still needs a follow-up')
+    })
+    it('onFinish carries description + resultText for the handback', () => {
+      // onFinish fires only on a POST-boot transition (a file already
+      // done at startup is historical and short-circuits). So: register
+      // the running sub-agent first, then append turn_duration.
+      const finishes: Array<{ description: string; resultText: string; outcome: string }> = []
+      const agentDir = join(tmpRoot, 'agent')
+      const subagentsDir = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents')
+      mkdirSync(subagentsDir, { recursive: true })
+      const jsonlPath = join(subagentsDir, 'agent-deadbeef.jsonl')
+      const h = startWatcherSync({
+        agentDir,
+        onFinish: ({ description, resultText, outcome }) => {
+          finishes.push({ description, resultText, outcome })
+        },
+      })
+      // Register the sub-agent as running (post-boot, not historical).
+      writeFileSync(
+        jsonlPath,
+        buildJSONL(
+          subAgentUserMsg('Run a long task'),
+          subAgentAssistantText('All set — migration applied cleanly, 0 rows dropped.'),
+        ),
+      )
+      h.poll()
+      expect(h.watcher.getRegistry().get('deadbeef')?.state).toBe('running')
+      // Now it finishes — onFinish must carry the result text.
+      appendFileSync(jsonlPath, buildJSONL(subAgentTurnDuration()))
+      h.poll()
+      expect(finishes.length).toBe(1)
+      expect(finishes[0].outcome).toBe('completed')
+      expect(finishes[0].resultText).toContain('migration applied cleanly')
+      // description stays the dispatch description, never the narrative.
+      expect(finishes[0].description).not.toMatch(/migration applied/)
+    })
     it('does NOT emit completion notification for a file already done at startup', () => {
       // File pre-exists with turn_end already written — agent was done before
       // the watcher started. No completion notification should fire.

package/telegram-plugin/uat/scenarios/jtbd-subagent-handback-dm.test.ts ADDED Viewed

@@ -0,0 +1,95 @@
+/**
+ * JTBD: "talking to my agent feels like talking to a capable person."
+ * Conversational-pacing beat 4 — the sub-agent handback.
+ *
+ * The gap this closes: a *background* sub-agent finishes decoupled from
+ * any turn boundary. The parent agent is idle when it completes, with no
+ * turn to receive the result — so without a deterministic nudge the user
+ * never hears back until they send the next message themselves. The
+ * agent looks like it dropped the delegated work on the floor.
+ *
+ * The fix (Option B): the gateway's subagent-watcher `onFinish` fires a
+ * `subagent_handback` inbound carrying the worker's result; the idle
+ * agent wakes and synthesises a user-facing handback in its own voice.
+ *
+ * What this scenario asserts: after the parent dispatches a background
+ * worker and ends its turn, a SECOND, unprompted bot message arrives —
+ * the handback — without the driver sending anything further. That
+ * second message is the whole point: proactive "the worker's done,
+ * here's what it found".
+ *
+ * Prompt strategy: explicit tool-naming (Option 1, mirroring
+ * `bg-sub-agent-dispatch-dm.test.ts`) — the scenario verifies the
+ * handback INFRA, not the model's delegation judgment, so the dispatch
+ * is pinned deterministic.
+ *
+ * Requires the standard DM-scenario env (see uat/SETUP.md §3-6). The
+ * test-harness override `SWITCHROOM_SUBAGENT_STALL_*` (switchroom.yaml)
+ * compresses the watcher's terminal-synthesis window so a background
+ * worker that never writes an explicit `turn_end` still terminates
+ * (and hands back) within the scenario budget instead of 5 min.
+ */
+import { describe, expect, it } from "vitest";
+import { spinUp } from "../harness.js";
+const BG_DISPATCH_PROMPT =
+  `Use the Agent tool with subagent_type "general-purpose" and ` +
+  `run_in_background: true to dispatch a worker with this exact task: ` +
+  `"Run \`echo HANDBACK-PROBE-OK\` via the Bash tool, then return a ` +
+  `one-line summary of what you did." After dispatching, send me a ` +
+  `brief one-line reply saying you have kicked off the background ` +
+  `worker, then END YOUR TURN — do NOT wait for the worker and do NOT ` +
+  `do the echo yourself.`;
+describe("uat: sub-agent handback — proactive beat-4 communication", () => {
+  it(
+    "delivers an unprompted handback message after a background worker finishes",
+    async () => {
+      const sc = await spinUp({ agent: "test-harness" });
+      try {
+        await sc.sendDM(BG_DISPATCH_PROMPT);
+        // Beat 1/5 of the dispatch turn: the parent acks that it kicked
+        // off the worker, then ends its turn. Generous timeout — a cold
+        // first turn plus the Agent dispatch can run long.
+        const ack = await sc.expectMessage(/.+/, {
+          from: "bot",
+          timeout: 60_000,
+        });
+        expect(ack.messageId).toBeGreaterThan(0);
+        // THE TEST: a second, distinct bot message arrives — the
+        // handback — WITHOUT the driver sending anything further. This
+        // is the deterministic beat-4 win: the watcher's onFinish fired
+        // a `subagent_handback` inbound, the idle agent woke, and it
+        // synthesised a user-facing report.
+        //
+        // Match: a bot message that is NOT the ack and reads like a
+        // completion report. The handback inbound steers the model to
+        // report what the worker found; we accept any of the natural
+        // wordings rather than pinning exact prose (the model owns the
+        // words — determinism contract).
+        const handback = await sc.expectMessage(
+          (m) =>
+            m.messageId !== ack.messageId &&
+            /\b(done|finished|complete|completed|wrapped up|worker|back|result)\b/i.test(
+              m.text,
+            ),
+          { from: "bot", timeout: 180_000 },
+        );
+        expect(handback.messageId).not.toBe(ack.messageId);
+        // The handback must be a real synthesised message, not an echo
+        // of the raw `<channel source="subagent_handback">` envelope or
+        // the steering text verbatim.
+        expect(handback.text).not.toMatch(/<channel/i);
+        expect(handback.text).not.toMatch(/source="subagent_handback"/i);
+        expect(handback.text.length).toBeGreaterThan(0);
+      } finally {
+        await sc.tearDown();
+      }
+    },
+    240_000,
+  );
+});