switchroom 0.13.8 → 0.13.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +53 -25
- package/dist/host-control/main.js +222 -7
- package/examples/switchroom.yaml +25 -7
- package/package.json +1 -1
- package/profiles/_shared/telegram-style.md.hbs +2 -2
- package/telegram-plugin/dist/gateway/gateway.js +514 -143
- package/telegram-plugin/gateway/config-approval-handler.test.ts +246 -0
- package/telegram-plugin/gateway/config-approval-handler.ts +284 -0
- package/telegram-plugin/gateway/gateway.ts +206 -21
- package/telegram-plugin/gateway/ipc-protocol.ts +72 -2
- package/telegram-plugin/gateway/ipc-server.ts +101 -0
- package/telegram-plugin/gateway/subagent-handback-inbound-builder.ts +103 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +69 -0
- package/telegram-plugin/subagent-watcher.ts +39 -0
- package/telegram-plugin/tests/subagent-handback-inbound-builder.test.ts +105 -0
- package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +61 -0
- package/telegram-plugin/tests/subagent-watcher.test.ts +67 -1
- package/telegram-plugin/uat/scenarios/jtbd-subagent-handback-dm.test.ts +95 -0
- package/profiles/default/CLAUDE.md +0 -193
|
@@ -249,6 +249,58 @@ function updateRow(dbPath, { id, status, resultSummary, now }, done) {
|
|
|
249
249
|
})
|
|
250
250
|
}
|
|
251
251
|
|
|
252
|
+
// ---------------------------------------------------------------------------
|
|
253
|
+
// Foreground handback nudge (conversational-pacing beat 4)
|
|
254
|
+
// ---------------------------------------------------------------------------
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Synchronously read the `background` flag for a subagent row. Returns
|
|
258
|
+
* 0 (foreground), 1 (background), or null (unknown — sync SQLite
|
|
259
|
+
* unavailable, or row not found). Used to gate the foreground handback
|
|
260
|
+
* nudge: a background sub-agent's PostToolUse fires on the ~10s launch
|
|
261
|
+
* ACK, not on completion, so it must NOT be nudged here (the gateway's
|
|
262
|
+
* subagent-watcher handles the background handback via inject_inbound).
|
|
263
|
+
*/
|
|
264
|
+
function readBackgroundFlagSync(dbPath, id) {
|
|
265
|
+
const DatabaseSync = resolveSyncSqlite()
|
|
266
|
+
if (DatabaseSync == null) return null
|
|
267
|
+
try {
|
|
268
|
+
const db = new DatabaseSync(dbPath)
|
|
269
|
+
const row = db.prepare('SELECT background FROM subagents WHERE id = ?').get(id)
|
|
270
|
+
db.close()
|
|
271
|
+
if (row == null) return null
|
|
272
|
+
return row.background === 1 ? 1 : 0
|
|
273
|
+
} catch {
|
|
274
|
+
return null
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Emit a PostToolUse `additionalContext` nudge. For a foreground
|
|
280
|
+
* sub-agent this fires at real completion, mid-parent-turn, with the
|
|
281
|
+
* result already in the parent's context — the nudge steers the parent
|
|
282
|
+
* to synthesise a user-facing handback (beat 4) instead of dumping the
|
|
283
|
+
* raw report or moving on silently. Same channel `sandbox-hint-posttool`
|
|
284
|
+
* uses; capped well under Claude Code's 10k hook-output limit.
|
|
285
|
+
*/
|
|
286
|
+
function emitForegroundHandbackNudge() {
|
|
287
|
+
const out = {
|
|
288
|
+
hookSpecificOutput: {
|
|
289
|
+
hookEventName: 'PostToolUse',
|
|
290
|
+
additionalContext:
|
|
291
|
+
'A sub-agent you dispatched just returned. Beat 4 — the handback: '
|
|
292
|
+
+ 'before you move on, send the user a reply in your own voice that '
|
|
293
|
+
+ 'synthesises what the sub-agent found and your next step. Do not '
|
|
294
|
+
+ 'paste its raw report and do not go silent.',
|
|
295
|
+
},
|
|
296
|
+
}
|
|
297
|
+
try {
|
|
298
|
+
process.stdout.write(JSON.stringify(out) + '\n')
|
|
299
|
+
} catch {
|
|
300
|
+
/* stdout write failures never block the tool flow */
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
252
304
|
// ---------------------------------------------------------------------------
|
|
253
305
|
// main
|
|
254
306
|
// ---------------------------------------------------------------------------
|
|
@@ -292,6 +344,23 @@ function main() {
|
|
|
292
344
|
if (!existsSync(dbPath)) process.exit(0)
|
|
293
345
|
|
|
294
346
|
const toolResponse = event.tool_response ?? null
|
|
347
|
+
|
|
348
|
+
// conversational-pacing beat 4 (foreground half). A foreground
|
|
349
|
+
// sub-agent's PostToolUse fires at real completion, mid-parent-turn,
|
|
350
|
+
// with its result in tool_response — nudge the parent to synthesise a
|
|
351
|
+
// user-facing handback. Background sub-agents are gated OUT: their
|
|
352
|
+
// PostToolUse fires on the launch ACK (BACKGROUND_SQL leaves status
|
|
353
|
+
// untouched for that reason), and their handback is driven by the
|
|
354
|
+
// gateway's subagent-watcher onFinish path instead. Fail-silent: an
|
|
355
|
+
// unknown background flag (null) skips the nudge.
|
|
356
|
+
if (
|
|
357
|
+
process.env.SWITCHROOM_SUBAGENT_HANDBACK !== '0'
|
|
358
|
+
&& detectStatus(toolResponse) === 'completed'
|
|
359
|
+
&& readBackgroundFlagSync(dbPath, id) === 0
|
|
360
|
+
) {
|
|
361
|
+
emitForegroundHandbackNudge()
|
|
362
|
+
}
|
|
363
|
+
|
|
295
364
|
updateRow(
|
|
296
365
|
dbPath,
|
|
297
366
|
{
|
|
@@ -105,6 +105,15 @@ export interface WorkerEntry {
|
|
|
105
105
|
stallTerminalSynthesised: boolean
|
|
106
106
|
/** Short summary from last completed tool / narrative, for completion message. */
|
|
107
107
|
lastSummaryLine: string
|
|
108
|
+
/**
|
|
109
|
+
* Full text (capped at SUBAGENT_RESULT_TEXT_MAX) of the most recent
|
|
110
|
+
* `sub_agent_text` emission. For a worker the final such line before
|
|
111
|
+
* `turn_end` is its result summary. Carried to the gateway via
|
|
112
|
+
* `onFinish` so a background sub-agent's result can be handed back to
|
|
113
|
+
* the user (conversational-pacing beat 4). Empty until the first
|
|
114
|
+
* narrative line.
|
|
115
|
+
*/
|
|
116
|
+
lastResultText: string
|
|
108
117
|
/**
|
|
109
118
|
* Most recent tool call observed on this sub-agent's JSONL tail —
|
|
110
119
|
* tool name + sanitised arg for fleet-row display (P0 of #662). Null
|
|
@@ -270,6 +279,12 @@ export interface SubagentWatcherConfig {
|
|
|
270
279
|
outcome: 'completed' | 'failed' | 'orphan'
|
|
271
280
|
toolCount: number
|
|
272
281
|
durationMs: number
|
|
282
|
+
/** Dispatch-time task description, for the handback envelope. */
|
|
283
|
+
description: string
|
|
284
|
+
/** The worker's final narrative emission (capped). May be empty if
|
|
285
|
+
* no `sub_agent_text` line was ever observed. Feeds the
|
|
286
|
+
* `subagent_handback` inbound. */
|
|
287
|
+
resultText: string
|
|
273
288
|
}) => void
|
|
274
289
|
/** `Date.now` override for tests. */
|
|
275
290
|
now?: () => number
|
|
@@ -321,6 +336,15 @@ const DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS = 300_000
|
|
|
321
336
|
*/
|
|
322
337
|
const DEFAULT_SILENT_STALL_TERMINAL_MS = 300_000
|
|
323
338
|
|
|
339
|
+
/**
|
|
340
|
+
* Cap on the result text retained per sub-agent (`entry.lastResultText`)
|
|
341
|
+
* and carried to the gateway via `onFinish`. The gateway feeds this into
|
|
342
|
+
* the `subagent_handback` inbound; the model synthesises a fresh
|
|
343
|
+
* user-facing summary from it, so the full transcript is never needed
|
|
344
|
+
* and an unbounded retain would bloat the parent's context.
|
|
345
|
+
*/
|
|
346
|
+
const SUBAGENT_RESULT_TEXT_MAX = 3000
|
|
347
|
+
|
|
324
348
|
/**
|
|
325
349
|
* Resolve a threshold-knob env var (e.g.
|
|
326
350
|
* `SWITCHROOM_SUBAGENT_STALL_TERMINAL_MS`) to a positive integer ms
|
|
@@ -580,6 +604,16 @@ function readSubTail(
|
|
|
580
604
|
// and must remain stable. Overwriting it with the sub-agent's first
|
|
581
605
|
// narrative line caused a race-condition-dependent display (issue #352).
|
|
582
606
|
entry.lastSummaryLine = ev.text.split('\n')[0].trim().slice(0, 120)
|
|
607
|
+
// Retain the full text of the most recent narrative emission —
|
|
608
|
+
// for a worker the final such line before turn_end IS its
|
|
609
|
+
// result summary (the worker prompt asks it to "return a
|
|
610
|
+
// concise summary"). Carried to the gateway via onFinish so a
|
|
611
|
+
// *background* sub-agent's result can be handed back to the
|
|
612
|
+
// user (conversational-pacing beat 4). Replace-on-write +
|
|
613
|
+
// capped: this is the worker's intended output, never tool
|
|
614
|
+
// args or file content — consistent with the watcher's
|
|
615
|
+
// "descriptions only" privacy posture.
|
|
616
|
+
entry.lastResultText = ev.text.trim().slice(0, SUBAGENT_RESULT_TEXT_MAX)
|
|
583
617
|
} else if (ev.kind === 'sub_agent_turn_end') {
|
|
584
618
|
if (entry.state === 'running') {
|
|
585
619
|
entry.state = 'done'
|
|
@@ -750,6 +784,7 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
750
784
|
completionNotified: false,
|
|
751
785
|
stallTerminalSynthesised: false,
|
|
752
786
|
lastSummaryLine: '',
|
|
787
|
+
lastResultText: '',
|
|
753
788
|
lastTool: null,
|
|
754
789
|
historical: isHistorical,
|
|
755
790
|
}
|
|
@@ -850,6 +885,8 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
850
885
|
outcome: entry.historical ? 'orphan' : 'completed',
|
|
851
886
|
toolCount: entry.toolCount,
|
|
852
887
|
durationMs: nowFn() - entry.dispatchedAt,
|
|
888
|
+
description: entry.description,
|
|
889
|
+
resultText: entry.lastResultText,
|
|
853
890
|
})
|
|
854
891
|
} catch (cbErr) {
|
|
855
892
|
log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)
|
|
@@ -869,6 +906,8 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
869
906
|
outcome: 'failed',
|
|
870
907
|
toolCount: entry.toolCount,
|
|
871
908
|
durationMs: nowFn() - entry.dispatchedAt,
|
|
909
|
+
description: entry.description,
|
|
910
|
+
resultText: entry.lastResultText,
|
|
872
911
|
})
|
|
873
912
|
} catch (cbErr) {
|
|
874
913
|
log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pin the InboundMessage shape the gateway synthesizes when a
|
|
3
|
+
* *background* sub-agent finishes (conversational-pacing beat 4 — the
|
|
4
|
+
* handback). The `meta.source` string is load-bearing: the MCP channel
|
|
5
|
+
* notification wraps it as `<channel source="subagent_handback">`, and
|
|
6
|
+
* the agent prompt's beat 4 keys on exactly that tag. A regression that
|
|
7
|
+
* changes the source string silently breaks the wake-up — the model
|
|
8
|
+
* wouldn't recognise the turn as a handback cue.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { describe, it, expect } from 'vitest'
|
|
12
|
+
import {
|
|
13
|
+
buildSubagentHandbackInbound,
|
|
14
|
+
HANDBACK_RESULT_MAX,
|
|
15
|
+
HANDBACK_DESC_MAX,
|
|
16
|
+
} from '../gateway/subagent-handback-inbound-builder.js'
|
|
17
|
+
|
|
18
|
+
const FIXED_NOW = 1_700_000_000_000
|
|
19
|
+
|
|
20
|
+
describe('buildSubagentHandbackInbound', () => {
|
|
21
|
+
it('builds a completed-worker handback with the load-bearing meta.source', () => {
|
|
22
|
+
const inbound = buildSubagentHandbackInbound({
|
|
23
|
+
ctx: {
|
|
24
|
+
chatId: '12345',
|
|
25
|
+
taskDescription: 'Refactor the auth module',
|
|
26
|
+
resultText: 'Done — refactored, 4 tests added, all green.',
|
|
27
|
+
outcome: 'completed',
|
|
28
|
+
},
|
|
29
|
+
nowMs: FIXED_NOW,
|
|
30
|
+
})
|
|
31
|
+
expect(inbound.type).toBe('inbound')
|
|
32
|
+
expect(inbound.chatId).toBe('12345')
|
|
33
|
+
expect(inbound.userId).toBe(0)
|
|
34
|
+
expect(inbound.user).toBe('subagent-watcher')
|
|
35
|
+
expect(inbound.ts).toBe(FIXED_NOW)
|
|
36
|
+
expect(inbound.messageId).toBe(FIXED_NOW)
|
|
37
|
+
// The wake-up contract: bridge renders <channel source="subagent_handback">.
|
|
38
|
+
expect(inbound.meta.source).toBe('subagent_handback')
|
|
39
|
+
expect(inbound.meta.outcome).toBe('completed')
|
|
40
|
+
// Text carries the task, the result, and the beat-4 steer.
|
|
41
|
+
expect(inbound.text).toContain('Refactor the auth module')
|
|
42
|
+
expect(inbound.text).toContain('4 tests added, all green')
|
|
43
|
+
expect(inbound.text).toContain('beat 4')
|
|
44
|
+
expect(inbound.text).toMatch(/synthesise|synthesize/i)
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
it('builds a failed-worker handback that steers an honest report', () => {
|
|
48
|
+
const inbound = buildSubagentHandbackInbound({
|
|
49
|
+
ctx: {
|
|
50
|
+
chatId: '99',
|
|
51
|
+
taskDescription: 'Migrate the DB',
|
|
52
|
+
resultText: 'Hit a lock timeout on step 3.',
|
|
53
|
+
outcome: 'failed',
|
|
54
|
+
},
|
|
55
|
+
nowMs: FIXED_NOW,
|
|
56
|
+
})
|
|
57
|
+
expect(inbound.meta.source).toBe('subagent_handback')
|
|
58
|
+
expect(inbound.meta.outcome).toBe('failed')
|
|
59
|
+
expect(inbound.text).toContain('FAILED')
|
|
60
|
+
expect(inbound.text).toContain('lock timeout on step 3')
|
|
61
|
+
expect(inbound.text).toMatch(/did not complete|did not/i)
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
it('tolerates an empty result text (worker emitted no narrative)', () => {
|
|
65
|
+
const inbound = buildSubagentHandbackInbound({
|
|
66
|
+
ctx: {
|
|
67
|
+
chatId: '99',
|
|
68
|
+
taskDescription: 'Quiet task',
|
|
69
|
+
resultText: '',
|
|
70
|
+
outcome: 'completed',
|
|
71
|
+
},
|
|
72
|
+
nowMs: FIXED_NOW,
|
|
73
|
+
})
|
|
74
|
+
expect(inbound.meta.source).toBe('subagent_handback')
|
|
75
|
+
expect(inbound.text).toContain('left no summary')
|
|
76
|
+
// Still steers a handback even with no result text.
|
|
77
|
+
expect(inbound.text).toContain('beat 4')
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
it('caps an over-long result text and description', () => {
|
|
81
|
+
const inbound = buildSubagentHandbackInbound({
|
|
82
|
+
ctx: {
|
|
83
|
+
chatId: '99',
|
|
84
|
+
taskDescription: 'D'.repeat(HANDBACK_DESC_MAX + 500),
|
|
85
|
+
resultText: 'R'.repeat(HANDBACK_RESULT_MAX + 5000),
|
|
86
|
+
outcome: 'completed',
|
|
87
|
+
},
|
|
88
|
+
nowMs: FIXED_NOW,
|
|
89
|
+
})
|
|
90
|
+
// Body stays bounded — cap + the surrounding steer prose, well under
|
|
91
|
+
// Claude Code's hook/context limits.
|
|
92
|
+
expect(inbound.text.length).toBeLessThan(
|
|
93
|
+
HANDBACK_RESULT_MAX + HANDBACK_DESC_MAX + 800,
|
|
94
|
+
)
|
|
95
|
+
expect(inbound.text).toContain('…')
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
it('falls back to a placeholder when the description is blank', () => {
|
|
99
|
+
const inbound = buildSubagentHandbackInbound({
|
|
100
|
+
ctx: { chatId: '99', taskDescription: ' ', resultText: 'x', outcome: 'completed' },
|
|
101
|
+
nowMs: FIXED_NOW,
|
|
102
|
+
})
|
|
103
|
+
expect(inbound.text).toContain('(no description)')
|
|
104
|
+
})
|
|
105
|
+
})
|
|
@@ -210,6 +210,67 @@ describe('subagent-tracker-posttool', () => {
|
|
|
210
210
|
| undefined
|
|
211
211
|
expect(row?.status).toBe('failed')
|
|
212
212
|
})
|
|
213
|
+
|
|
214
|
+
it('emits a foreground handback nudge for a foreground sub-agent', () => {
|
|
215
|
+
// conversational-pacing beat 4: a FOREGROUND sub-agent's PostToolUse
|
|
216
|
+
// fires at real completion, mid-parent-turn — emit an
|
|
217
|
+
// additionalContext nudge steering the parent to synthesise a
|
|
218
|
+
// handback.
|
|
219
|
+
runHook(PRETOOL_SCRIPT, {
|
|
220
|
+
session_id: 's-fg',
|
|
221
|
+
tool_name: 'Agent',
|
|
222
|
+
tool_use_id: 'toolu_fg001',
|
|
223
|
+
tool_input: { description: 'A foreground task', run_in_background: false },
|
|
224
|
+
})
|
|
225
|
+
const postResult = runHook(POSTTOOL_SCRIPT, {
|
|
226
|
+
tool_name: 'Agent',
|
|
227
|
+
tool_use_id: 'toolu_fg001',
|
|
228
|
+
tool_response: { result: 'Foreground work complete.', is_error: false },
|
|
229
|
+
})
|
|
230
|
+
expect(postResult.status).toBe(0)
|
|
231
|
+
expect(postResult.stdout).toContain('additionalContext')
|
|
232
|
+
expect(postResult.stdout).toContain('handback')
|
|
233
|
+
expect(postResult.stdout).toContain('PostToolUse')
|
|
234
|
+
})
|
|
235
|
+
|
|
236
|
+
it('does NOT emit a handback nudge for a background sub-agent', () => {
|
|
237
|
+
// A background sub-agent's PostToolUse fires on the launch ACK, not
|
|
238
|
+
// on completion — nudging "synthesise the handback" there is wrong.
|
|
239
|
+
// The gateway's subagent-watcher onFinish path owns background.
|
|
240
|
+
runHook(PRETOOL_SCRIPT, {
|
|
241
|
+
session_id: 's-bg',
|
|
242
|
+
tool_name: 'Agent',
|
|
243
|
+
tool_use_id: 'toolu_bg001',
|
|
244
|
+
tool_input: { description: 'A background task', run_in_background: true },
|
|
245
|
+
})
|
|
246
|
+
const postResult = runHook(POSTTOOL_SCRIPT, {
|
|
247
|
+
tool_name: 'Agent',
|
|
248
|
+
tool_use_id: 'toolu_bg001',
|
|
249
|
+
tool_response: { result: 'launched', is_error: false },
|
|
250
|
+
})
|
|
251
|
+
expect(postResult.status).toBe(0)
|
|
252
|
+
expect(postResult.stdout).not.toContain('additionalContext')
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
it('does NOT emit a handback nudge when SWITCHROOM_SUBAGENT_HANDBACK=0', () => {
|
|
256
|
+
runHook(PRETOOL_SCRIPT, {
|
|
257
|
+
session_id: 's-off',
|
|
258
|
+
tool_name: 'Agent',
|
|
259
|
+
tool_use_id: 'toolu_off001',
|
|
260
|
+
tool_input: { description: 'A foreground task', run_in_background: false },
|
|
261
|
+
})
|
|
262
|
+
const postResult = runHook(
|
|
263
|
+
POSTTOOL_SCRIPT,
|
|
264
|
+
{
|
|
265
|
+
tool_name: 'Agent',
|
|
266
|
+
tool_use_id: 'toolu_off001',
|
|
267
|
+
tool_response: { result: 'done', is_error: false },
|
|
268
|
+
},
|
|
269
|
+
{ SWITCHROOM_SUBAGENT_HANDBACK: '0' },
|
|
270
|
+
)
|
|
271
|
+
expect(postResult.status).toBe(0)
|
|
272
|
+
expect(postResult.stdout).not.toContain('additionalContext')
|
|
273
|
+
})
|
|
213
274
|
})
|
|
214
275
|
|
|
215
276
|
describe('agent-dir resolution (RFC §Bug 2)', () => {
|
|
@@ -367,7 +367,10 @@ describe('startSubagentWatcher', () => {
|
|
|
367
367
|
return { agentDir, jsonlPath }
|
|
368
368
|
}
|
|
369
369
|
|
|
370
|
-
function startWatcherSync(opts: {
|
|
370
|
+
function startWatcherSync(opts: {
|
|
371
|
+
agentDir: string
|
|
372
|
+
onFinish?: Parameters<typeof startSubagentWatcher>[0]['onFinish']
|
|
373
|
+
}): {
|
|
371
374
|
notifications: string[]
|
|
372
375
|
poll: () => void
|
|
373
376
|
watcher: ReturnType<typeof startSubagentWatcher>
|
|
@@ -380,6 +383,7 @@ describe('startSubagentWatcher', () => {
|
|
|
380
383
|
const watcher = startSubagentWatcher({
|
|
381
384
|
agentDir: opts.agentDir,
|
|
382
385
|
sendNotification: (text) => notifications.push(text),
|
|
386
|
+
...(opts.onFinish ? { onFinish: opts.onFinish } : {}),
|
|
383
387
|
stallThresholdMs: 60_000,
|
|
384
388
|
rescanMs: 500,
|
|
385
389
|
now: () => Date.now(),
|
|
@@ -465,6 +469,68 @@ describe('startSubagentWatcher', () => {
|
|
|
465
469
|
expect(entry?.toolCount).toBe(3)
|
|
466
470
|
})
|
|
467
471
|
|
|
472
|
+
it('captures the full last narrative line into lastResultText (handback)', () => {
|
|
473
|
+
// lastSummaryLine keeps only the first line, 120 chars — a progress
|
|
474
|
+
// preview. lastResultText keeps the full last narrative emission:
|
|
475
|
+
// for a worker that IS its result summary, fed to the gateway's
|
|
476
|
+
// subagent_handback inbound (conversational-pacing beat 4).
|
|
477
|
+
const fullResult =
|
|
478
|
+
'Done. I refactored the auth module, added 4 tests, and all green.\n' +
|
|
479
|
+
'One caveat: the legacy token path still needs a follow-up.'
|
|
480
|
+
const content = buildJSONL(
|
|
481
|
+
subAgentUserMsg('Refactor auth'),
|
|
482
|
+
subAgentAssistantText(fullResult),
|
|
483
|
+
)
|
|
484
|
+
const { agentDir } = setupRealFs(content, 'deadbeef')
|
|
485
|
+
const h = startWatcherSync({ agentDir })
|
|
486
|
+
h.poll()
|
|
487
|
+
const entry = h.watcher.getRegistry().get('deadbeef')
|
|
488
|
+
expect(entry).toBeDefined()
|
|
489
|
+
// lastSummaryLine is the truncated first line only.
|
|
490
|
+
expect(entry?.lastSummaryLine).not.toMatch(/follow-up/)
|
|
491
|
+
// lastResultText keeps the whole thing — multi-line, both sentences.
|
|
492
|
+
expect(entry?.lastResultText).toContain('refactored the auth module')
|
|
493
|
+
expect(entry?.lastResultText).toContain('legacy token path still needs a follow-up')
|
|
494
|
+
})
|
|
495
|
+
|
|
496
|
+
it('onFinish carries description + resultText for the handback', () => {
|
|
497
|
+
// onFinish fires only on a POST-boot transition (a file already
|
|
498
|
+
// done at startup is historical and short-circuits). So: register
|
|
499
|
+
// the running sub-agent first, then append turn_duration.
|
|
500
|
+
const finishes: Array<{ description: string; resultText: string; outcome: string }> = []
|
|
501
|
+
const agentDir = join(tmpRoot, 'agent')
|
|
502
|
+
const subagentsDir = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents')
|
|
503
|
+
mkdirSync(subagentsDir, { recursive: true })
|
|
504
|
+
const jsonlPath = join(subagentsDir, 'agent-deadbeef.jsonl')
|
|
505
|
+
|
|
506
|
+
const h = startWatcherSync({
|
|
507
|
+
agentDir,
|
|
508
|
+
onFinish: ({ description, resultText, outcome }) => {
|
|
509
|
+
finishes.push({ description, resultText, outcome })
|
|
510
|
+
},
|
|
511
|
+
})
|
|
512
|
+
// Register the sub-agent as running (post-boot, not historical).
|
|
513
|
+
writeFileSync(
|
|
514
|
+
jsonlPath,
|
|
515
|
+
buildJSONL(
|
|
516
|
+
subAgentUserMsg('Run a long task'),
|
|
517
|
+
subAgentAssistantText('All set — migration applied cleanly, 0 rows dropped.'),
|
|
518
|
+
),
|
|
519
|
+
)
|
|
520
|
+
h.poll()
|
|
521
|
+
expect(h.watcher.getRegistry().get('deadbeef')?.state).toBe('running')
|
|
522
|
+
|
|
523
|
+
// Now it finishes — onFinish must carry the result text.
|
|
524
|
+
appendFileSync(jsonlPath, buildJSONL(subAgentTurnDuration()))
|
|
525
|
+
h.poll()
|
|
526
|
+
|
|
527
|
+
expect(finishes.length).toBe(1)
|
|
528
|
+
expect(finishes[0].outcome).toBe('completed')
|
|
529
|
+
expect(finishes[0].resultText).toContain('migration applied cleanly')
|
|
530
|
+
// description stays the dispatch description, never the narrative.
|
|
531
|
+
expect(finishes[0].description).not.toMatch(/migration applied/)
|
|
532
|
+
})
|
|
533
|
+
|
|
468
534
|
it('does NOT emit completion notification for a file already done at startup', () => {
|
|
469
535
|
// File pre-exists with turn_end already written — agent was done before
|
|
470
536
|
// the watcher started. No completion notification should fire.
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JTBD: "talking to my agent feels like talking to a capable person."
|
|
3
|
+
* Conversational-pacing beat 4 — the sub-agent handback.
|
|
4
|
+
*
|
|
5
|
+
* The gap this closes: a *background* sub-agent finishes decoupled from
|
|
6
|
+
* any turn boundary. The parent agent is idle when it completes, with no
|
|
7
|
+
* turn to receive the result — so without a deterministic nudge the user
|
|
8
|
+
* never hears back until they send the next message themselves. The
|
|
9
|
+
* agent looks like it dropped the delegated work on the floor.
|
|
10
|
+
*
|
|
11
|
+
* The fix (Option B): the gateway's subagent-watcher `onFinish` fires a
|
|
12
|
+
* `subagent_handback` inbound carrying the worker's result; the idle
|
|
13
|
+
* agent wakes and synthesises a user-facing handback in its own voice.
|
|
14
|
+
*
|
|
15
|
+
* What this scenario asserts: after the parent dispatches a background
|
|
16
|
+
* worker and ends its turn, a SECOND, unprompted bot message arrives —
|
|
17
|
+
* the handback — without the driver sending anything further. That
|
|
18
|
+
* second message is the whole point: proactive "the worker's done,
|
|
19
|
+
* here's what it found".
|
|
20
|
+
*
|
|
21
|
+
* Prompt strategy: explicit tool-naming (Option 1, mirroring
|
|
22
|
+
* `bg-sub-agent-dispatch-dm.test.ts`) — the scenario verifies the
|
|
23
|
+
* handback INFRA, not the model's delegation judgment, so the dispatch
|
|
24
|
+
* is pinned deterministic.
|
|
25
|
+
*
|
|
26
|
+
* Requires the standard DM-scenario env (see uat/SETUP.md §3-6). The
|
|
27
|
+
* test-harness override `SWITCHROOM_SUBAGENT_STALL_*` (switchroom.yaml)
|
|
28
|
+
* compresses the watcher's terminal-synthesis window so a background
|
|
29
|
+
* worker that never writes an explicit `turn_end` still terminates
|
|
30
|
+
* (and hands back) within the scenario budget instead of 5 min.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import { describe, expect, it } from "vitest";
|
|
34
|
+
import { spinUp } from "../harness.js";
|
|
35
|
+
|
|
36
|
+
const BG_DISPATCH_PROMPT =
|
|
37
|
+
`Use the Agent tool with subagent_type "general-purpose" and ` +
|
|
38
|
+
`run_in_background: true to dispatch a worker with this exact task: ` +
|
|
39
|
+
`"Run \`echo HANDBACK-PROBE-OK\` via the Bash tool, then return a ` +
|
|
40
|
+
`one-line summary of what you did." After dispatching, send me a ` +
|
|
41
|
+
`brief one-line reply saying you have kicked off the background ` +
|
|
42
|
+
`worker, then END YOUR TURN — do NOT wait for the worker and do NOT ` +
|
|
43
|
+
`do the echo yourself.`;
|
|
44
|
+
|
|
45
|
+
describe("uat: sub-agent handback — proactive beat-4 communication", () => {
|
|
46
|
+
it(
|
|
47
|
+
"delivers an unprompted handback message after a background worker finishes",
|
|
48
|
+
async () => {
|
|
49
|
+
const sc = await spinUp({ agent: "test-harness" });
|
|
50
|
+
try {
|
|
51
|
+
await sc.sendDM(BG_DISPATCH_PROMPT);
|
|
52
|
+
|
|
53
|
+
// Beat 1/5 of the dispatch turn: the parent acks that it kicked
|
|
54
|
+
// off the worker, then ends its turn. Generous timeout — a cold
|
|
55
|
+
// first turn plus the Agent dispatch can run long.
|
|
56
|
+
const ack = await sc.expectMessage(/.+/, {
|
|
57
|
+
from: "bot",
|
|
58
|
+
timeout: 60_000,
|
|
59
|
+
});
|
|
60
|
+
expect(ack.messageId).toBeGreaterThan(0);
|
|
61
|
+
|
|
62
|
+
// THE TEST: a second, distinct bot message arrives — the
|
|
63
|
+
// handback — WITHOUT the driver sending anything further. This
|
|
64
|
+
// is the deterministic beat-4 win: the watcher's onFinish fired
|
|
65
|
+
// a `subagent_handback` inbound, the idle agent woke, and it
|
|
66
|
+
// synthesised a user-facing report.
|
|
67
|
+
//
|
|
68
|
+
// Match: a bot message that is NOT the ack and reads like a
|
|
69
|
+
// completion report. The handback inbound steers the model to
|
|
70
|
+
// report what the worker found; we accept any of the natural
|
|
71
|
+
// wordings rather than pinning exact prose (the model owns the
|
|
72
|
+
// words — determinism contract).
|
|
73
|
+
const handback = await sc.expectMessage(
|
|
74
|
+
(m) =>
|
|
75
|
+
m.messageId !== ack.messageId &&
|
|
76
|
+
/\b(done|finished|complete|completed|wrapped up|worker|back|result)\b/i.test(
|
|
77
|
+
m.text,
|
|
78
|
+
),
|
|
79
|
+
{ from: "bot", timeout: 180_000 },
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
expect(handback.messageId).not.toBe(ack.messageId);
|
|
83
|
+
// The handback must be a real synthesised message, not an echo
|
|
84
|
+
// of the raw `<channel source="subagent_handback">` envelope or
|
|
85
|
+
// the steering text verbatim.
|
|
86
|
+
expect(handback.text).not.toMatch(/<channel/i);
|
|
87
|
+
expect(handback.text).not.toMatch(/source="subagent_handback"/i);
|
|
88
|
+
expect(handback.text.length).toBeGreaterThan(0);
|
|
89
|
+
} finally {
|
|
90
|
+
await sc.tearDown();
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
240_000,
|
|
94
|
+
);
|
|
95
|
+
});
|