switchroom 0.13.9 → 0.13.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/dist/cli/switchroom.js +38 -14
  2. package/dist/host-control/main.js +222 -7
  3. package/examples/switchroom.yaml +25 -7
  4. package/package.json +1 -1
  5. package/profiles/_shared/telegram-style.md.hbs +1 -1
  6. package/telegram-plugin/dist/bridge/bridge.js +23 -4
  7. package/telegram-plugin/dist/gateway/gateway.js +540 -147
  8. package/telegram-plugin/dist/server.js +23 -4
  9. package/telegram-plugin/gateway/config-approval-handler.test.ts +246 -0
  10. package/telegram-plugin/gateway/config-approval-handler.ts +284 -0
  11. package/telegram-plugin/gateway/gateway.ts +218 -25
  12. package/telegram-plugin/gateway/ipc-protocol.ts +72 -2
  13. package/telegram-plugin/gateway/ipc-server.ts +101 -0
  14. package/telegram-plugin/gateway/subagent-handback-inbound-builder.ts +185 -0
  15. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +69 -0
  16. package/telegram-plugin/model-unavailable.ts +11 -1
  17. package/telegram-plugin/operator-events.fixtures.json +14 -24
  18. package/telegram-plugin/operator-events.ts +11 -2
  19. package/telegram-plugin/session-tail.ts +71 -4
  20. package/telegram-plugin/subagent-watcher.ts +39 -0
  21. package/telegram-plugin/tests/model-unavailable.test.ts +15 -2
  22. package/telegram-plugin/tests/operator-events-session-tail.test.ts +53 -2
  23. package/telegram-plugin/tests/operator-events.test.ts +14 -7
  24. package/telegram-plugin/tests/subagent-handback-decision.test.ts +112 -0
  25. package/telegram-plugin/tests/subagent-handback-inbound-builder.test.ts +105 -0
  26. package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +61 -0
  27. package/telegram-plugin/tests/subagent-watcher.test.ts +67 -1
  28. package/telegram-plugin/uat/scenarios/jtbd-subagent-handback-dm.test.ts +95 -0
  29. package/profiles/default/CLAUDE.md +0 -193
@@ -56,13 +56,64 @@ describe('detectErrorInTranscriptLine — error detection', () => {
56
56
  expect(result!.kind).toBe('credit-exhausted')
57
57
  })
58
58
 
59
- it('classifies overloaded_error as quota-exhausted', () => {
59
+ it('classifies overloaded_error as rate-limited (transient), NOT quota-exhausted', () => {
60
+ // A 529 "overloaded" is transient Anthropic server-capacity
61
+ // pressure — orthogonal to account quota. Classifying it
62
+ // quota-exhausted fired a false "Model unavailable" card + a
63
+ // self-cancelling fleet auto-fallback on every 529.
60
64
  const line = JSON.stringify({
61
65
  type: 'api_error',
62
66
  error: { type: 'overloaded_error', message: 'Overloaded' },
63
67
  })
64
68
  const result = detectErrorInTranscriptLine(line)
65
- expect(result!.kind).toBe('quota-exhausted')
69
+ expect(result!.kind).toBe('rate-limited')
70
+ expect(result!.transient).toBe(true)
71
+ // An explicit `type:"api_error"` line (no retry state) = Claude
72
+ // surfaced the failure → terminal.
73
+ expect(result!.terminal).toBe(true)
74
+ })
75
+
76
+ it('marks an in-flight 529 retry transient + NOT terminal (suppressed)', () => {
77
+ // Real on-disk shape: a 529 Claude Code is internally retrying,
78
+ // annotated with retryAttempt < maxRetries.
79
+ const line = JSON.stringify({
80
+ type: 'system',
81
+ subtype: 'api_error',
82
+ error: { status: 529, type: 'overloaded_error', message: 'Overloaded' },
83
+ retryAttempt: 9,
84
+ maxRetries: 10,
85
+ retryInMs: 34479,
86
+ })
87
+ const result = detectErrorInTranscriptLine(line)
88
+ expect(result!.kind).toBe('rate-limited')
89
+ expect(result!.transient).toBe(true)
90
+ // 9 < 10 — still retrying → in-flight → the caller suppresses it.
91
+ expect(result!.terminal).toBe(false)
92
+ })
93
+
94
+ it('marks an exhausted 529 retry terminal (escalates)', () => {
95
+ const line = JSON.stringify({
96
+ type: 'system',
97
+ subtype: 'api_error',
98
+ error: { status: 529, type: 'overloaded_error', message: 'Overloaded' },
99
+ retryAttempt: 10,
100
+ maxRetries: 10,
101
+ })
102
+ const result = detectErrorInTranscriptLine(line)
103
+ expect(result!.kind).toBe('rate-limited')
104
+ expect(result!.transient).toBe(true)
105
+ // retries exhausted → terminal → escalates.
106
+ expect(result!.terminal).toBe(true)
107
+ })
108
+
109
+ it('marks non-transient errors terminal (always escalate)', () => {
110
+ const line = JSON.stringify({
111
+ type: 'api_error',
112
+ error: { type: 'authentication_error', message: 'expired' },
113
+ })
114
+ const result = detectErrorInTranscriptLine(line)
115
+ expect(result!.transient).toBe(false)
116
+ expect(result!.terminal).toBe(true)
66
117
  })
67
118
 
68
119
  it('returns null for lines without error field', () => {
@@ -57,13 +57,20 @@ describe('classifyClaudeError — credit-exhausted fixtures', () => {
57
57
  }
58
58
  })
59
59
 
60
- describe('classifyClaudeError — quota-exhausted fixtures', () => {
61
- for (const fixture of fixtures['quota-exhausted']) {
62
- it(`classifies: ${fixture._source}`, () => {
63
- const input = '_value' in fixture ? fixture._value : fixture
64
- expect(classifyClaudeError(input)).toBe('quota-exhausted')
65
- })
66
- }
60
+ describe('classifyClaudeError — quota-exhausted', () => {
61
+ // classifyClaudeError is type/code/status-based and intentionally
62
+ // does NOT self-classify quota-exhausted: a genuine subscription
63
+ // usage-limit hit has no reliable Anthropic error TYPE — it is
64
+ // detected from the response TEXT. session-tail's `isApiErrorMessage`
65
+ // 429 branch + the `detectModelUnavailable` text path own quota
66
+ // detection. (`overloaded_error` used to be mapped here — wrongly;
67
+ // a 529 overload is transient server capacity, now `rate-limited`.)
68
+ it('no error TYPE maps to quota-exhausted (the text path owns it)', () => {
69
+ expect(fixtures['quota-exhausted']).toHaveLength(0)
70
+ expect(
71
+ classifyClaudeError({ type: 'overloaded_error', message: 'Overloaded' }),
72
+ ).not.toBe('quota-exhausted')
73
+ })
67
74
  })
68
75
 
69
76
  describe('classifyClaudeError — rate-limited fixtures', () => {
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Regression coverage for `decideSubagentHandback` — the gate the
3
+ * gateway's subagent-watcher `onFinish` callback runs to decide whether
4
+ * a finished sub-agent gets a handback turn injected.
5
+ *
6
+ * This is the highest-risk surface of the handback feature (#1650): it
7
+ * injects a fresh turn. Before this suite the decision lived inline in
8
+ * the gateway's `onFinish` closure with no automated test — a refactor
9
+ * that broke the `isBackground` gate would have fired handbacks for
10
+ * foreground sub-agents (double messages) with nothing to catch it.
11
+ * The decision is now a pure function; these cases pin every gate.
12
+ */
13
+
14
+ import { describe, it, expect } from 'vitest'
15
+ import { decideSubagentHandback } from '../gateway/subagent-handback-inbound-builder.js'
16
+
17
+ const FIXED_NOW = 1_700_000_000_000
18
+
19
+ const base = {
20
+ handbackEnvValue: undefined as string | undefined,
21
+ outcome: 'completed' as 'completed' | 'failed' | 'orphan',
22
+ isBackground: true,
23
+ fleetChatId: '777',
24
+ ownerChatId: '999',
25
+ taskDescription: 'Do the thing',
26
+ resultText: 'Done.',
27
+ nowMs: FIXED_NOW,
28
+ }
29
+
30
+ describe('decideSubagentHandback', () => {
31
+ it('delivers for a background completed sub-agent', () => {
32
+ const d = decideSubagentHandback({ ...base })
33
+ expect(d.deliver).toBe(true)
34
+ if (d.deliver) {
35
+ expect(d.chatId).toBe('777')
36
+ expect(d.inbound.meta.source).toBe('subagent_handback')
37
+ expect(d.inbound.chatId).toBe('777')
38
+ }
39
+ })
40
+
41
+ it('delivers for a background FAILED sub-agent', () => {
42
+ const d = decideSubagentHandback({ ...base, outcome: 'failed' })
43
+ expect(d.deliver).toBe(true)
44
+ if (d.deliver) expect(d.inbound.meta.outcome).toBe('failed')
45
+ })
46
+
47
+ it('skips a foreground sub-agent (handed back natively in-turn)', () => {
48
+ const d = decideSubagentHandback({ ...base, isBackground: false })
49
+ expect(d).toEqual({ deliver: false, reason: 'foreground' })
50
+ })
51
+
52
+ it("skips an 'orphan' outcome (stale historical-at-boot row)", () => {
53
+ const d = decideSubagentHandback({ ...base, outcome: 'orphan' })
54
+ expect(d).toEqual({ deliver: false, reason: 'outcome-not-terminal' })
55
+ })
56
+
57
+ it('skips when the kill-switch is set (SWITCHROOM_SUBAGENT_HANDBACK=0)', () => {
58
+ const d = decideSubagentHandback({ ...base, handbackEnvValue: '0' })
59
+ expect(d).toEqual({ deliver: false, reason: 'env-disabled' })
60
+ })
61
+
62
+ it('treats any non-"0" env value (incl. undefined) as enabled', () => {
63
+ expect(decideSubagentHandback({ ...base, handbackEnvValue: undefined }).deliver).toBe(true)
64
+ expect(decideSubagentHandback({ ...base, handbackEnvValue: '1' }).deliver).toBe(true)
65
+ expect(decideSubagentHandback({ ...base, handbackEnvValue: '' }).deliver).toBe(true)
66
+ })
67
+
68
+ it('falls back to the owner chat when the fleet entry is gone', () => {
69
+ const d = decideSubagentHandback({ ...base, fleetChatId: '' })
70
+ expect(d.deliver).toBe(true)
71
+ if (d.deliver) {
72
+ expect(d.chatId).toBe('999')
73
+ expect(d.inbound.chatId).toBe('999')
74
+ }
75
+ })
76
+
77
+ it('prefers the fleet chat id over the owner chat when both are present', () => {
78
+ const d = decideSubagentHandback({ ...base, fleetChatId: '777', ownerChatId: '999' })
79
+ expect(d.deliver).toBe(true)
80
+ if (d.deliver) expect(d.chatId).toBe('777')
81
+ })
82
+
83
+ it('skips when no chat resolves at all', () => {
84
+ const d = decideSubagentHandback({ ...base, fleetChatId: '', ownerChatId: '' })
85
+ expect(d).toEqual({ deliver: false, reason: 'no-chat' })
86
+ })
87
+
88
+ it('gate order: kill-switch wins over every other condition', () => {
89
+ // env-disabled even though it is a deliverable background completion.
90
+ const d = decideSubagentHandback({ ...base, handbackEnvValue: '0', isBackground: true })
91
+ expect(d).toEqual({ deliver: false, reason: 'env-disabled' })
92
+ })
93
+
94
+ it('gate order: outcome filter applies before the foreground check', () => {
95
+ // orphan + foreground — outcome filter is checked first.
96
+ const d = decideSubagentHandback({ ...base, outcome: 'orphan', isBackground: false })
97
+ expect(d).toEqual({ deliver: false, reason: 'outcome-not-terminal' })
98
+ })
99
+
100
+ it('carries the task description and result text into the inbound', () => {
101
+ const d = decideSubagentHandback({
102
+ ...base,
103
+ taskDescription: 'Migrate the DB',
104
+ resultText: 'Applied 3 migrations, 0 rows dropped.',
105
+ })
106
+ expect(d.deliver).toBe(true)
107
+ if (d.deliver) {
108
+ expect(d.inbound.text).toContain('Migrate the DB')
109
+ expect(d.inbound.text).toContain('Applied 3 migrations')
110
+ }
111
+ })
112
+ })
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Pin the InboundMessage shape the gateway synthesizes when a
3
+ * *background* sub-agent finishes (conversational-pacing beat 4 — the
4
+ * handback). The `meta.source` string is load-bearing: the MCP channel
5
+ * notification wraps it as `<channel source="subagent_handback">`, and
6
+ * the agent prompt's beat 4 keys on exactly that tag. A regression that
7
+ * changes the source string silently breaks the wake-up — the model
8
+ * wouldn't recognise the turn as a handback cue.
9
+ */
10
+
11
+ import { describe, it, expect } from 'vitest'
12
+ import {
13
+ buildSubagentHandbackInbound,
14
+ HANDBACK_RESULT_MAX,
15
+ HANDBACK_DESC_MAX,
16
+ } from '../gateway/subagent-handback-inbound-builder.js'
17
+
18
+ const FIXED_NOW = 1_700_000_000_000
19
+
20
+ describe('buildSubagentHandbackInbound', () => {
21
+ it('builds a completed-worker handback with the load-bearing meta.source', () => {
22
+ const inbound = buildSubagentHandbackInbound({
23
+ ctx: {
24
+ chatId: '12345',
25
+ taskDescription: 'Refactor the auth module',
26
+ resultText: 'Done — refactored, 4 tests added, all green.',
27
+ outcome: 'completed',
28
+ },
29
+ nowMs: FIXED_NOW,
30
+ })
31
+ expect(inbound.type).toBe('inbound')
32
+ expect(inbound.chatId).toBe('12345')
33
+ expect(inbound.userId).toBe(0)
34
+ expect(inbound.user).toBe('subagent-watcher')
35
+ expect(inbound.ts).toBe(FIXED_NOW)
36
+ expect(inbound.messageId).toBe(FIXED_NOW)
37
+ // The wake-up contract: bridge renders <channel source="subagent_handback">.
38
+ expect(inbound.meta.source).toBe('subagent_handback')
39
+ expect(inbound.meta.outcome).toBe('completed')
40
+ // Text carries the task, the result, and the beat-4 steer.
41
+ expect(inbound.text).toContain('Refactor the auth module')
42
+ expect(inbound.text).toContain('4 tests added, all green')
43
+ expect(inbound.text).toContain('beat 4')
44
+ expect(inbound.text).toMatch(/synthesise|synthesize/i)
45
+ })
46
+
47
+ it('builds a failed-worker handback that steers an honest report', () => {
48
+ const inbound = buildSubagentHandbackInbound({
49
+ ctx: {
50
+ chatId: '99',
51
+ taskDescription: 'Migrate the DB',
52
+ resultText: 'Hit a lock timeout on step 3.',
53
+ outcome: 'failed',
54
+ },
55
+ nowMs: FIXED_NOW,
56
+ })
57
+ expect(inbound.meta.source).toBe('subagent_handback')
58
+ expect(inbound.meta.outcome).toBe('failed')
59
+ expect(inbound.text).toContain('FAILED')
60
+ expect(inbound.text).toContain('lock timeout on step 3')
61
+ expect(inbound.text).toMatch(/did not complete|did not/i)
62
+ })
63
+
64
+ it('tolerates an empty result text (worker emitted no narrative)', () => {
65
+ const inbound = buildSubagentHandbackInbound({
66
+ ctx: {
67
+ chatId: '99',
68
+ taskDescription: 'Quiet task',
69
+ resultText: '',
70
+ outcome: 'completed',
71
+ },
72
+ nowMs: FIXED_NOW,
73
+ })
74
+ expect(inbound.meta.source).toBe('subagent_handback')
75
+ expect(inbound.text).toContain('left no summary')
76
+ // Still steers a handback even with no result text.
77
+ expect(inbound.text).toContain('beat 4')
78
+ })
79
+
80
+ it('caps an over-long result text and description', () => {
81
+ const inbound = buildSubagentHandbackInbound({
82
+ ctx: {
83
+ chatId: '99',
84
+ taskDescription: 'D'.repeat(HANDBACK_DESC_MAX + 500),
85
+ resultText: 'R'.repeat(HANDBACK_RESULT_MAX + 5000),
86
+ outcome: 'completed',
87
+ },
88
+ nowMs: FIXED_NOW,
89
+ })
90
+ // Body stays bounded — cap + the surrounding steer prose, well under
91
+ // Claude Code's hook/context limits.
92
+ expect(inbound.text.length).toBeLessThan(
93
+ HANDBACK_RESULT_MAX + HANDBACK_DESC_MAX + 800,
94
+ )
95
+ expect(inbound.text).toContain('…')
96
+ })
97
+
98
+ it('falls back to a placeholder when the description is blank', () => {
99
+ const inbound = buildSubagentHandbackInbound({
100
+ ctx: { chatId: '99', taskDescription: ' ', resultText: 'x', outcome: 'completed' },
101
+ nowMs: FIXED_NOW,
102
+ })
103
+ expect(inbound.text).toContain('(no description)')
104
+ })
105
+ })
@@ -210,6 +210,67 @@ describe('subagent-tracker-posttool', () => {
210
210
  | undefined
211
211
  expect(row?.status).toBe('failed')
212
212
  })
213
+
214
+ it('emits a foreground handback nudge for a foreground sub-agent', () => {
215
+ // conversational-pacing beat 4: a FOREGROUND sub-agent's PostToolUse
216
+ // fires at real completion, mid-parent-turn — emit an
217
+ // additionalContext nudge steering the parent to synthesise a
218
+ // handback.
219
+ runHook(PRETOOL_SCRIPT, {
220
+ session_id: 's-fg',
221
+ tool_name: 'Agent',
222
+ tool_use_id: 'toolu_fg001',
223
+ tool_input: { description: 'A foreground task', run_in_background: false },
224
+ })
225
+ const postResult = runHook(POSTTOOL_SCRIPT, {
226
+ tool_name: 'Agent',
227
+ tool_use_id: 'toolu_fg001',
228
+ tool_response: { result: 'Foreground work complete.', is_error: false },
229
+ })
230
+ expect(postResult.status).toBe(0)
231
+ expect(postResult.stdout).toContain('additionalContext')
232
+ expect(postResult.stdout).toContain('handback')
233
+ expect(postResult.stdout).toContain('PostToolUse')
234
+ })
235
+
236
+ it('does NOT emit a handback nudge for a background sub-agent', () => {
237
+ // A background sub-agent's PostToolUse fires on the launch ACK, not
238
+ // on completion — nudging "synthesise the handback" there is wrong.
239
+ // The gateway's subagent-watcher onFinish path owns background.
240
+ runHook(PRETOOL_SCRIPT, {
241
+ session_id: 's-bg',
242
+ tool_name: 'Agent',
243
+ tool_use_id: 'toolu_bg001',
244
+ tool_input: { description: 'A background task', run_in_background: true },
245
+ })
246
+ const postResult = runHook(POSTTOOL_SCRIPT, {
247
+ tool_name: 'Agent',
248
+ tool_use_id: 'toolu_bg001',
249
+ tool_response: { result: 'launched', is_error: false },
250
+ })
251
+ expect(postResult.status).toBe(0)
252
+ expect(postResult.stdout).not.toContain('additionalContext')
253
+ })
254
+
255
+ it('does NOT emit a handback nudge when SWITCHROOM_SUBAGENT_HANDBACK=0', () => {
256
+ runHook(PRETOOL_SCRIPT, {
257
+ session_id: 's-off',
258
+ tool_name: 'Agent',
259
+ tool_use_id: 'toolu_off001',
260
+ tool_input: { description: 'A foreground task', run_in_background: false },
261
+ })
262
+ const postResult = runHook(
263
+ POSTTOOL_SCRIPT,
264
+ {
265
+ tool_name: 'Agent',
266
+ tool_use_id: 'toolu_off001',
267
+ tool_response: { result: 'done', is_error: false },
268
+ },
269
+ { SWITCHROOM_SUBAGENT_HANDBACK: '0' },
270
+ )
271
+ expect(postResult.status).toBe(0)
272
+ expect(postResult.stdout).not.toContain('additionalContext')
273
+ })
213
274
  })
214
275
 
215
276
  describe('agent-dir resolution (RFC §Bug 2)', () => {
@@ -367,7 +367,10 @@ describe('startSubagentWatcher', () => {
367
367
  return { agentDir, jsonlPath }
368
368
  }
369
369
 
370
- function startWatcherSync(opts: { agentDir: string }): {
370
+ function startWatcherSync(opts: {
371
+ agentDir: string
372
+ onFinish?: Parameters<typeof startSubagentWatcher>[0]['onFinish']
373
+ }): {
371
374
  notifications: string[]
372
375
  poll: () => void
373
376
  watcher: ReturnType<typeof startSubagentWatcher>
@@ -380,6 +383,7 @@ describe('startSubagentWatcher', () => {
380
383
  const watcher = startSubagentWatcher({
381
384
  agentDir: opts.agentDir,
382
385
  sendNotification: (text) => notifications.push(text),
386
+ ...(opts.onFinish ? { onFinish: opts.onFinish } : {}),
383
387
  stallThresholdMs: 60_000,
384
388
  rescanMs: 500,
385
389
  now: () => Date.now(),
@@ -465,6 +469,68 @@ describe('startSubagentWatcher', () => {
465
469
  expect(entry?.toolCount).toBe(3)
466
470
  })
467
471
 
472
+ it('captures the full last narrative line into lastResultText (handback)', () => {
473
+ // lastSummaryLine keeps only the first line, 120 chars — a progress
474
+ // preview. lastResultText keeps the full last narrative emission:
475
+ // for a worker that IS its result summary, fed to the gateway's
476
+ // subagent_handback inbound (conversational-pacing beat 4).
477
+ const fullResult =
478
+ 'Done. I refactored the auth module, added 4 tests, and all green.\n' +
479
+ 'One caveat: the legacy token path still needs a follow-up.'
480
+ const content = buildJSONL(
481
+ subAgentUserMsg('Refactor auth'),
482
+ subAgentAssistantText(fullResult),
483
+ )
484
+ const { agentDir } = setupRealFs(content, 'deadbeef')
485
+ const h = startWatcherSync({ agentDir })
486
+ h.poll()
487
+ const entry = h.watcher.getRegistry().get('deadbeef')
488
+ expect(entry).toBeDefined()
489
+ // lastSummaryLine is the truncated first line only.
490
+ expect(entry?.lastSummaryLine).not.toMatch(/follow-up/)
491
+ // lastResultText keeps the whole thing — multi-line, both sentences.
492
+ expect(entry?.lastResultText).toContain('refactored the auth module')
493
+ expect(entry?.lastResultText).toContain('legacy token path still needs a follow-up')
494
+ })
495
+
496
+ it('onFinish carries description + resultText for the handback', () => {
497
+ // onFinish fires only on a POST-boot transition (a file already
498
+ // done at startup is historical and short-circuits). So: register
499
+ // the running sub-agent first, then append turn_duration.
500
+ const finishes: Array<{ description: string; resultText: string; outcome: string }> = []
501
+ const agentDir = join(tmpRoot, 'agent')
502
+ const subagentsDir = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents')
503
+ mkdirSync(subagentsDir, { recursive: true })
504
+ const jsonlPath = join(subagentsDir, 'agent-deadbeef.jsonl')
505
+
506
+ const h = startWatcherSync({
507
+ agentDir,
508
+ onFinish: ({ description, resultText, outcome }) => {
509
+ finishes.push({ description, resultText, outcome })
510
+ },
511
+ })
512
+ // Register the sub-agent as running (post-boot, not historical).
513
+ writeFileSync(
514
+ jsonlPath,
515
+ buildJSONL(
516
+ subAgentUserMsg('Run a long task'),
517
+ subAgentAssistantText('All set — migration applied cleanly, 0 rows dropped.'),
518
+ ),
519
+ )
520
+ h.poll()
521
+ expect(h.watcher.getRegistry().get('deadbeef')?.state).toBe('running')
522
+
523
+ // Now it finishes — onFinish must carry the result text.
524
+ appendFileSync(jsonlPath, buildJSONL(subAgentTurnDuration()))
525
+ h.poll()
526
+
527
+ expect(finishes.length).toBe(1)
528
+ expect(finishes[0].outcome).toBe('completed')
529
+ expect(finishes[0].resultText).toContain('migration applied cleanly')
530
+ // description stays the dispatch description, never the narrative.
531
+ expect(finishes[0].description).not.toMatch(/migration applied/)
532
+ })
533
+
468
534
  it('does NOT emit completion notification for a file already done at startup', () => {
469
535
  // File pre-exists with turn_end already written — agent was done before
470
536
  // the watcher started. No completion notification should fire.
@@ -0,0 +1,95 @@
1
+ /**
2
+ * JTBD: "talking to my agent feels like talking to a capable person."
3
+ * Conversational-pacing beat 4 — the sub-agent handback.
4
+ *
5
+ * The gap this closes: a *background* sub-agent finishes decoupled from
6
+ * any turn boundary. The parent agent is idle when it completes, with no
7
+ * turn to receive the result — so without a deterministic nudge the user
8
+ * never hears back until they send the next message themselves. The
9
+ * agent looks like it dropped the delegated work on the floor.
10
+ *
11
+ * The fix (Option B): the gateway's subagent-watcher `onFinish` fires a
12
+ * `subagent_handback` inbound carrying the worker's result; the idle
13
+ * agent wakes and synthesises a user-facing handback in its own voice.
14
+ *
15
+ * What this scenario asserts: after the parent dispatches a background
16
+ * worker and ends its turn, a SECOND, unprompted bot message arrives —
17
+ * the handback — without the driver sending anything further. That
18
+ * second message is the whole point: proactive "the worker's done,
19
+ * here's what it found".
20
+ *
21
+ * Prompt strategy: explicit tool-naming (Option 1, mirroring
22
+ * `bg-sub-agent-dispatch-dm.test.ts`) — the scenario verifies the
23
+ * handback INFRA, not the model's delegation judgment, so the dispatch
24
+ * is pinned deterministic.
25
+ *
26
+ * Requires the standard DM-scenario env (see uat/SETUP.md §3-6). The
27
+ * test-harness override `SWITCHROOM_SUBAGENT_STALL_*` (switchroom.yaml)
28
+ * compresses the watcher's terminal-synthesis window so a background
29
+ * worker that never writes an explicit `turn_end` still terminates
30
+ * (and hands back) within the scenario budget instead of 5 min.
31
+ */
32
+
33
+ import { describe, expect, it } from "vitest";
34
+ import { spinUp } from "../harness.js";
35
+
36
+ const BG_DISPATCH_PROMPT =
37
+ `Use the Agent tool with subagent_type "general-purpose" and ` +
38
+ `run_in_background: true to dispatch a worker with this exact task: ` +
39
+ `"Run \`echo HANDBACK-PROBE-OK\` via the Bash tool, then return a ` +
40
+ `one-line summary of what you did." After dispatching, send me a ` +
41
+ `brief one-line reply saying you have kicked off the background ` +
42
+ `worker, then END YOUR TURN — do NOT wait for the worker and do NOT ` +
43
+ `do the echo yourself.`;
44
+
45
+ describe("uat: sub-agent handback — proactive beat-4 communication", () => {
46
+ it(
47
+ "delivers an unprompted handback message after a background worker finishes",
48
+ async () => {
49
+ const sc = await spinUp({ agent: "test-harness" });
50
+ try {
51
+ await sc.sendDM(BG_DISPATCH_PROMPT);
52
+
53
+ // Beat 1/5 of the dispatch turn: the parent acks that it kicked
54
+ // off the worker, then ends its turn. Generous timeout — a cold
55
+ // first turn plus the Agent dispatch can run long.
56
+ const ack = await sc.expectMessage(/.+/, {
57
+ from: "bot",
58
+ timeout: 60_000,
59
+ });
60
+ expect(ack.messageId).toBeGreaterThan(0);
61
+
62
+ // THE TEST: a second, distinct bot message arrives — the
63
+ // handback — WITHOUT the driver sending anything further. This
64
+ // is the deterministic beat-4 win: the watcher's onFinish fired
65
+ // a `subagent_handback` inbound, the idle agent woke, and it
66
+ // synthesised a user-facing report.
67
+ //
68
+ // Match: a bot message that is NOT the ack and reads like a
69
+ // completion report. The handback inbound steers the model to
70
+ // report what the worker found; we accept any of the natural
71
+ // wordings rather than pinning exact prose (the model owns the
72
+ // words — determinism contract).
73
+ const handback = await sc.expectMessage(
74
+ (m) =>
75
+ m.messageId !== ack.messageId &&
76
+ /\b(done|finished|complete|completed|wrapped up|worker|back|result)\b/i.test(
77
+ m.text,
78
+ ),
79
+ { from: "bot", timeout: 180_000 },
80
+ );
81
+
82
+ expect(handback.messageId).not.toBe(ack.messageId);
83
+ // The handback must be a real synthesised message, not an echo
84
+ // of the raw `<channel source="subagent_handback">` envelope or
85
+ // the steering text verbatim.
86
+ expect(handback.text).not.toMatch(/<channel/i);
87
+ expect(handback.text).not.toMatch(/source="subagent_handback"/i);
88
+ expect(handback.text.length).toBeGreaterThan(0);
89
+ } finally {
90
+ await sc.tearDown();
91
+ }
92
+ },
93
+ 240_000,
94
+ );
95
+ });