switchroom 0.14.23 → 0.14.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +60 -20
- package/telegram-plugin/gateway/foreground-nesting.ts +65 -0
- package/telegram-plugin/gateway/gateway.ts +34 -12
- package/telegram-plugin/subagent-watcher.ts +82 -12
- package/telegram-plugin/tests/foreground-nesting.test.ts +98 -0
- package/telegram-plugin/tests/subagent-watcher-handback-gaps.test.ts +88 -1
- package/telegram-plugin/uat/scenarios/jtbd-foreground-subagent-activity-dm.test.ts +121 -0
package/dist/cli/switchroom.js
CHANGED
|
@@ -49420,8 +49420,8 @@ var {
|
|
|
49420
49420
|
} = import__.default;
|
|
49421
49421
|
|
|
49422
49422
|
// src/build-info.ts
|
|
49423
|
-
var VERSION = "0.14.
|
|
49424
|
-
var COMMIT_SHA = "
|
|
49423
|
+
var VERSION = "0.14.25";
|
|
49424
|
+
var COMMIT_SHA = "f75f4f25";
|
|
49425
49425
|
|
|
49426
49426
|
// src/cli/agent.ts
|
|
49427
49427
|
init_source();
|
package/package.json
CHANGED
|
@@ -47505,6 +47505,18 @@ function decideSubagentProgress(input) {
|
|
|
47505
47505
|
return { deliver: true, chatId, bucketIdx, inbound };
|
|
47506
47506
|
}
|
|
47507
47507
|
|
|
47508
|
+
// gateway/foreground-nesting.ts
|
|
47509
|
+
function shouldRenderForegroundProgress(g) {
|
|
47510
|
+
return g.nestingEnabled;
|
|
47511
|
+
}
|
|
47512
|
+
function foregroundFinishAction(i) {
|
|
47513
|
+
if (!i.removed)
|
|
47514
|
+
return "none";
|
|
47515
|
+
if (i.replyCalled && i.remainingForeground === 0)
|
|
47516
|
+
return "handoff-clear";
|
|
47517
|
+
return "recompose";
|
|
47518
|
+
}
|
|
47519
|
+
|
|
47508
47520
|
// gateway/poll-health.ts
|
|
47509
47521
|
var DEFAULT_LOG = (msg) => {
|
|
47510
47522
|
process.stderr.write(msg.endsWith(`
|
|
@@ -49225,6 +49237,7 @@ var DEFAULT_RESCAN_MS = 1000;
|
|
|
49225
49237
|
var DEFAULT_STALL_THRESHOLD_MS = 60000;
|
|
49226
49238
|
var DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS = 300000;
|
|
49227
49239
|
var DEFAULT_SILENT_STALL_TERMINAL_MS = 300000;
|
|
49240
|
+
var DEFAULT_INFLIGHT_PROMOTE_MAX_AGE_MS = 15 * 60000;
|
|
49228
49241
|
var SUBAGENT_RESULT_TEXT_MAX = 3000;
|
|
49229
49242
|
function parseEnvMs(varName) {
|
|
49230
49243
|
const raw = process.env[varName];
|
|
@@ -49415,6 +49428,8 @@ function startSubagentWatcher(config) {
|
|
|
49415
49428
|
const stallThresholdMs = config.stallThresholdMs ?? parseEnvMs("SWITCHROOM_SUBAGENT_STALL_MS") ?? DEFAULT_STALL_THRESHOLD_MS;
|
|
49416
49429
|
const silentSynthesisStallThresholdMs = config.silentSynthesisStallThresholdMs ?? parseEnvMs("SWITCHROOM_SUBAGENT_SILENT_SYNTH_STALL_MS") ?? DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS;
|
|
49417
49430
|
const silentStallTerminalMs = config.silentStallTerminalMs ?? parseEnvMs("SWITCHROOM_SUBAGENT_STALL_TERMINAL_MS") ?? DEFAULT_SILENT_STALL_TERMINAL_MS;
|
|
49431
|
+
const inflightPromoteMaxAgeMs = config.inflightPromoteMaxAgeMs ?? parseEnvMs("SWITCHROOM_SUBAGENT_INFLIGHT_MAX_AGE_MS") ?? DEFAULT_INFLIGHT_PROMOTE_MAX_AGE_MS;
|
|
49432
|
+
const bootPromoteEnabled = config.bootPromoteEnabled ?? process.env.SWITCHROOM_SUBAGENT_BOOT_PROMOTE !== "0";
|
|
49418
49433
|
const reaperTtlMs = config.reaperTtlMs ?? DEFAULT_REAPER_TTL_MS;
|
|
49419
49434
|
const reaperIntervalMs = config.reaperIntervalMs ?? DEFAULT_REAPER_INTERVAL_MS;
|
|
49420
49435
|
const rescanMs = config.rescanMs ?? DEFAULT_RESCAN_MS;
|
|
@@ -49497,13 +49512,25 @@ function startSubagentWatcher(config) {
|
|
|
49497
49512
|
log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`);
|
|
49498
49513
|
}, fs2, log, db2, parentStateDir, config.onUnstall, undefined, config.onProgress);
|
|
49499
49514
|
if (isHistorical && entry.state === "running") {
|
|
49500
|
-
|
|
49501
|
-
|
|
49502
|
-
|
|
49503
|
-
|
|
49504
|
-
|
|
49505
|
-
|
|
49506
|
-
|
|
49515
|
+
let fileAgeMs = Infinity;
|
|
49516
|
+
try {
|
|
49517
|
+
const st = fs2.statSync(filePath);
|
|
49518
|
+
if (typeof st.mtimeMs === "number")
|
|
49519
|
+
fileAgeMs = n - st.mtimeMs;
|
|
49520
|
+
} catch {}
|
|
49521
|
+
if (!bootPromoteEnabled) {
|
|
49522
|
+
log?.(`subagent-watcher: ${agentId} running at boot but promotion disabled (SWITCHROOM_SUBAGENT_BOOT_PROMOTE=0) \u2014 leaving historical`);
|
|
49523
|
+
} else if (fileAgeMs > inflightPromoteMaxAgeMs) {
|
|
49524
|
+
log?.(`subagent-watcher: ${agentId} running at boot but stale (last write ${Math.round(fileAgeMs / 1000)}s ago > ${Math.round(inflightPromoteMaxAgeMs / 1000)}s) \u2014 leaving historical (dead prior-session worker, not in-flight)`);
|
|
49525
|
+
} else {
|
|
49526
|
+
entry.historical = false;
|
|
49527
|
+
log?.(`subagent-watcher: ${agentId} was in-flight at boot \u2014 promoting to live (last write ${Math.round(fileAgeMs / 1000)}s ago; user still awaiting handback)`);
|
|
49528
|
+
if (db2 != null) {
|
|
49529
|
+
try {
|
|
49530
|
+
backfillJsonlAgentId(db2, filePath, agentId, log);
|
|
49531
|
+
} catch (err) {
|
|
49532
|
+
log?.(`subagent-watcher: backfill error for ${agentId}: ${err.message}`);
|
|
49533
|
+
}
|
|
49507
49534
|
}
|
|
49508
49535
|
}
|
|
49509
49536
|
}
|
|
@@ -51442,10 +51469,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
51442
51469
|
}
|
|
51443
51470
|
|
|
51444
51471
|
// ../src/build-info.ts
|
|
51445
|
-
var VERSION = "0.14.
|
|
51446
|
-
var COMMIT_SHA = "
|
|
51447
|
-
var COMMIT_DATE = "2026-
|
|
51448
|
-
var LATEST_PR =
|
|
51472
|
+
var VERSION = "0.14.25";
|
|
51473
|
+
var COMMIT_SHA = "f75f4f25";
|
|
51474
|
+
var COMMIT_DATE = "2026-06-01T00:05:32Z";
|
|
51475
|
+
var LATEST_PR = 2038;
|
|
51449
51476
|
var COMMITS_AHEAD_OF_TAG = 0;
|
|
51450
51477
|
|
|
51451
51478
|
// gateway/boot-version.ts
|
|
@@ -61961,12 +61988,22 @@ var didOneTimeSetup = false;
|
|
|
61961
61988
|
const isBackground = dispatch.isBackground;
|
|
61962
61989
|
if (!isBackground) {
|
|
61963
61990
|
const turn = currentTurn;
|
|
61964
|
-
|
|
61965
|
-
|
|
61966
|
-
|
|
61967
|
-
|
|
61968
|
-
|
|
61969
|
-
|
|
61991
|
+
const removed = turn != null && turn.foregroundSubAgents.delete(agentId);
|
|
61992
|
+
if (turn != null && removed) {
|
|
61993
|
+
const action = foregroundFinishAction({
|
|
61994
|
+
removed,
|
|
61995
|
+
replyCalled: turn.replyCalled,
|
|
61996
|
+
remainingForeground: turn.foregroundSubAgents.size
|
|
61997
|
+
});
|
|
61998
|
+
if (action === "handoff-clear") {
|
|
61999
|
+
clearActivitySummary(turn);
|
|
62000
|
+
} else if (action === "recompose") {
|
|
62001
|
+
const rendered = composeTurnActivity(turn);
|
|
62002
|
+
if (rendered != null) {
|
|
62003
|
+
turn.activityPendingRender = rendered;
|
|
62004
|
+
if (turn.activityInFlight == null) {
|
|
62005
|
+
turn.activityInFlight = drainActivitySummary(turn);
|
|
62006
|
+
}
|
|
61970
62007
|
}
|
|
61971
62008
|
}
|
|
61972
62009
|
}
|
|
@@ -62033,10 +62070,13 @@ var didOneTimeSetup = false;
|
|
|
62033
62070
|
}
|
|
62034
62071
|
const isBackground = dispatch.isBackground;
|
|
62035
62072
|
if (!isBackground) {
|
|
62036
|
-
if (!foregroundNestingEnabled)
|
|
62037
|
-
return;
|
|
62038
62073
|
const turn = currentTurn;
|
|
62039
|
-
if (turn == null
|
|
62074
|
+
if (turn == null)
|
|
62075
|
+
return;
|
|
62076
|
+
if (!shouldRenderForegroundProgress({
|
|
62077
|
+
nestingEnabled: foregroundNestingEnabled,
|
|
62078
|
+
replyCalled: turn.replyCalled
|
|
62079
|
+
}))
|
|
62040
62080
|
return;
|
|
62041
62081
|
const child = latestSummary.trim().slice(0, 120);
|
|
62042
62082
|
if (child.length === 0)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure state-transition helpers for Model A foreground sub-agent activity
|
|
3
|
+
* nesting (#2027). Extracted from gateway.ts so the `replyCalled` gate is
|
|
4
|
+
* unit-testable on its own — the exact seam #2027 shipped without, which let
|
|
5
|
+
* the feature silently no-op for every ack-first turn (reply "On it…" then
|
|
6
|
+
* delegate) and go unnoticed in production. See
|
|
7
|
+
* `tests/foreground-nesting.test.ts`.
|
|
8
|
+
*
|
|
9
|
+
* Subscription-honest: every caller is pure jsonl-tail → render, no model
|
|
10
|
+
* call. These functions decide *whether/what* to render; the gateway owns the
|
|
11
|
+
* `currentTurn` mutation and the Telegram edit.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
export interface ForegroundProgressGate {
|
|
15
|
+
/** `SWITCHROOM_FOREGROUND_SUBAGENT_NESTING !== '0'` — the kill-switch. */
|
|
16
|
+
nestingEnabled: boolean
|
|
17
|
+
/**
|
|
18
|
+
* Whether the parent turn has already emitted a reply. Accepted as input
|
|
19
|
+
* but intentionally NOT a gate: a foreground `Task` is a *blocking* call,
|
|
20
|
+
* so the parent cannot have issued its FINAL answer while the sub-agent is
|
|
21
|
+
* still running — any `replyCalled === true` observed here is therefore an
|
|
22
|
+
* *interim* ack. The pre-fix code bailed on this flag, which is precisely
|
|
23
|
+
* why ack-first turns showed zero live foreground activity.
|
|
24
|
+
*/
|
|
25
|
+
replyCalled: boolean
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Whether a foreground sub-agent progress tick should accumulate its narrative
|
|
30
|
+
* and re-render the activity feed. Gated ONLY by the kill-switch — never by
|
|
31
|
+
* `replyCalled` (see the field doc above).
|
|
32
|
+
*/
|
|
33
|
+
export function shouldRenderForegroundProgress(g: ForegroundProgressGate): boolean {
|
|
34
|
+
return g.nestingEnabled
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export type ForegroundFinishAction = 'recompose' | 'handoff-clear' | 'none'
|
|
38
|
+
|
|
39
|
+
export interface ForegroundFinishInput {
|
|
40
|
+
/** Was this agentId actually an active foreground sub-agent we tracked? */
|
|
41
|
+
removed: boolean
|
|
42
|
+
/** Has the parent turn already emitted a reply (an interim ack)? */
|
|
43
|
+
replyCalled: boolean
|
|
44
|
+
/** Foreground sub-agents still active AFTER removing the finished one. */
|
|
45
|
+
remainingForeground: number
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* What the gateway should do when a foreground sub-agent finishes:
|
|
50
|
+
*
|
|
51
|
+
* - `'none'` — it wasn't one we were tracking (e.g. background, or a
|
|
52
|
+
* stale boot row); leave the feed alone.
|
|
53
|
+
* - `'handoff-clear'`— it was the LAST foreground sub-agent and the parent
|
|
54
|
+
* has already acked. The re-opened post-ack feed now
|
|
55
|
+
* hands off to the imminent final answer, mirroring the
|
|
56
|
+
* first-reply hand-off. (turn_end is the safety net if
|
|
57
|
+
* this is somehow missed.)
|
|
58
|
+
* - `'recompose'` — collapse the finished sub-agent's block and re-render
|
|
59
|
+
* (pre-ack flow, or other sub-agents still running).
|
|
60
|
+
*/
|
|
61
|
+
export function foregroundFinishAction(i: ForegroundFinishInput): ForegroundFinishAction {
|
|
62
|
+
if (!i.removed) return 'none'
|
|
63
|
+
if (i.replyCalled && i.remainingForeground === 0) return 'handoff-clear'
|
|
64
|
+
return 'recompose'
|
|
65
|
+
}
|
|
@@ -303,6 +303,10 @@ import {
|
|
|
303
303
|
decideSubagentProgress,
|
|
304
304
|
DEFAULT_PROGRESS_INTERVAL_MS,
|
|
305
305
|
} from './subagent-progress-inbound-builder.js'
|
|
306
|
+
import {
|
|
307
|
+
shouldRenderForegroundProgress,
|
|
308
|
+
foregroundFinishAction,
|
|
309
|
+
} from './foreground-nesting.js'
|
|
306
310
|
import { createPollHealthCheck, type PollHealthCheckHandle } from './poll-health.js'
|
|
307
311
|
import type {
|
|
308
312
|
ToolCallMessage,
|
|
@@ -17843,16 +17847,26 @@ void (async () => {
|
|
|
17843
17847
|
// tool result, so there's no handback to deliver. Reaction
|
|
17844
17848
|
// promotion already ran above.
|
|
17845
17849
|
const turn = currentTurn
|
|
17846
|
-
|
|
17847
|
-
|
|
17848
|
-
|
|
17849
|
-
|
|
17850
|
-
|
|
17851
|
-
|
|
17852
|
-
|
|
17853
|
-
|
|
17854
|
-
|
|
17855
|
-
|
|
17850
|
+
const removed = turn != null && turn.foregroundSubAgents.delete(agentId)
|
|
17851
|
+
if (turn != null && removed) {
|
|
17852
|
+
const action = foregroundFinishAction({
|
|
17853
|
+
removed,
|
|
17854
|
+
replyCalled: turn.replyCalled,
|
|
17855
|
+
remainingForeground: turn.foregroundSubAgents.size,
|
|
17856
|
+
})
|
|
17857
|
+
if (action === 'handoff-clear') {
|
|
17858
|
+
// Post-ack: the last foreground sub-agent finished and
|
|
17859
|
+
// the parent will now produce its answer inline. Hand
|
|
17860
|
+
// the re-opened feed off to the answer, mirroring the
|
|
17861
|
+
// first-reply clear (turn_end is the safety net).
|
|
17862
|
+
clearActivitySummary(turn)
|
|
17863
|
+
} else if (action === 'recompose') {
|
|
17864
|
+
const rendered = composeTurnActivity(turn)
|
|
17865
|
+
if (rendered != null) {
|
|
17866
|
+
turn.activityPendingRender = rendered
|
|
17867
|
+
if (turn.activityInFlight == null) {
|
|
17868
|
+
turn.activityInFlight = drainActivitySummary(turn)
|
|
17869
|
+
}
|
|
17856
17870
|
}
|
|
17857
17871
|
}
|
|
17858
17872
|
}
|
|
@@ -17972,9 +17986,17 @@ void (async () => {
|
|
|
17972
17986
|
// activity draft rather than a separate worker message. Pure
|
|
17973
17987
|
// jsonl-tail → render (no model call), inside the
|
|
17974
17988
|
// subscription-honest boundary.
|
|
17975
|
-
if (!foregroundNestingEnabled) return // kill-switch: skip overhead
|
|
17976
17989
|
const turn = currentTurn
|
|
17977
|
-
if (turn == null
|
|
17990
|
+
if (turn == null) return
|
|
17991
|
+
// Render regardless of `replyCalled` — a foreground Task
|
|
17992
|
+
// blocks the parent, so any reply seen while it runs is an
|
|
17993
|
+
// interim ack, never the final answer. Gating on replyCalled
|
|
17994
|
+
// (pre-#2032) made ack-first turns show zero live foreground
|
|
17995
|
+
// activity. Kill-switch lives in the predicate.
|
|
17996
|
+
if (!shouldRenderForegroundProgress({
|
|
17997
|
+
nestingEnabled: foregroundNestingEnabled,
|
|
17998
|
+
replyCalled: turn.replyCalled,
|
|
17999
|
+
})) return
|
|
17978
18000
|
const child = latestSummary.trim().slice(0, 120)
|
|
17979
18001
|
if (child.length === 0) return
|
|
17980
18002
|
let narrative = turn.foregroundSubAgents.get(agentId)
|
|
@@ -208,6 +208,23 @@ export interface SubagentWatcherConfig {
|
|
|
208
208
|
* synthesis; tests use a tiny value to exercise the path.
|
|
209
209
|
*/
|
|
210
210
|
silentStallTerminalMs?: number
|
|
211
|
+
/**
|
|
212
|
+
* Freshness window (ms) for promoting a running-at-boot worker file to
|
|
213
|
+
* live. A file whose last write (mtime) is older than this is treated as
|
|
214
|
+
* a dead prior-session worker and stays historical/suppressed, NOT
|
|
215
|
+
* promoted. Default 15 min (DEFAULT_INFLIGHT_PROMOTE_MAX_AGE_MS); env
|
|
216
|
+
* override `SWITCHROOM_SUBAGENT_INFLIGHT_MAX_AGE_MS`. Guards the v0.14.23
|
|
217
|
+
* stale-handback replay regression.
|
|
218
|
+
*/
|
|
219
|
+
inflightPromoteMaxAgeMs?: number
|
|
220
|
+
/**
|
|
221
|
+
* Kill-switch for the boot-scan promotion path. When false, a
|
|
222
|
+
* running-at-boot worker is never promoted — the watcher reverts to the
|
|
223
|
+
* pre-v0.14.23 behaviour of leaving every boot-scan file historical
|
|
224
|
+
* (suppressed). Default true; env `SWITCHROOM_SUBAGENT_BOOT_PROMOTE=0`
|
|
225
|
+
* disables it fleet-wide without a code change (emergency lever).
|
|
226
|
+
*/
|
|
227
|
+
bootPromoteEnabled?: boolean
|
|
211
228
|
/**
|
|
212
229
|
* Reaper TTL (ms): background rows in `status='running'` whose
|
|
213
230
|
* `last_activity_at` (or `started_at` if liveness never wrote) is older
|
|
@@ -382,6 +399,29 @@ const DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS = 300_000
|
|
|
382
399
|
*/
|
|
383
400
|
const DEFAULT_SILENT_STALL_TERMINAL_MS = 300_000
|
|
384
401
|
|
|
402
|
+
/**
|
|
403
|
+
* Freshness window for the boot-scan "in-flight at boot → promote to
|
|
404
|
+
* live" path. A worker file still in `running` state at boot is only
|
|
405
|
+
* promoted (un-suppressed) if its last write (file mtime) is within this
|
|
406
|
+
* window of now. The signal cleanly separates the two populations:
|
|
407
|
+
*
|
|
408
|
+
* - A worker genuinely in-flight across a restart / fleet rollout was
|
|
409
|
+
* writing right up until the container was recreated, so its mtime is
|
|
410
|
+
* seconds-to-minutes before the new gateway boots — well inside the
|
|
411
|
+
* window. The user is still awaiting it; promote it.
|
|
412
|
+
* - A worker that died in a PRIOR session without writing a terminal
|
|
413
|
+
* `turn_end` is also `running` in the file, but its mtime is hours-to-
|
|
414
|
+
* weeks old. These accumulate by the dozen-to-hundred in a long-lived
|
|
415
|
+
* agent's subagents dir. Promoting them replays stale handbacks
|
|
416
|
+
* (often `failed`, from old error lines) on every boot — the v0.14.23
|
|
417
|
+
* regression. Leave them historical/suppressed, exactly as before.
|
|
418
|
+
*
|
|
419
|
+
* 15 min is generous for any plausible restart gap (container recreate +
|
|
420
|
+
* image pull) yet far below the staleness of a dead prior-session file.
|
|
421
|
+
* Override with `SWITCHROOM_SUBAGENT_INFLIGHT_MAX_AGE_MS`.
|
|
422
|
+
*/
|
|
423
|
+
const DEFAULT_INFLIGHT_PROMOTE_MAX_AGE_MS = 15 * 60_000
|
|
424
|
+
|
|
385
425
|
/**
|
|
386
426
|
* Cap on the result text retained per sub-agent (`entry.lastResultText`)
|
|
387
427
|
* and carried to the gateway via `onFinish`. The gateway feeds this into
|
|
@@ -810,6 +850,14 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
810
850
|
config.silentStallTerminalMs
|
|
811
851
|
?? parseEnvMs('SWITCHROOM_SUBAGENT_STALL_TERMINAL_MS')
|
|
812
852
|
?? DEFAULT_SILENT_STALL_TERMINAL_MS
|
|
853
|
+
const inflightPromoteMaxAgeMs =
|
|
854
|
+
config.inflightPromoteMaxAgeMs
|
|
855
|
+
?? parseEnvMs('SWITCHROOM_SUBAGENT_INFLIGHT_MAX_AGE_MS')
|
|
856
|
+
?? DEFAULT_INFLIGHT_PROMOTE_MAX_AGE_MS
|
|
857
|
+
// Kill-switch: not parseEnvMs (which rejects `0`) — an explicit `=0`
|
|
858
|
+
// here MUST disable promotion (revert to pre-v0.14.23 suppression).
|
|
859
|
+
const bootPromoteEnabled =
|
|
860
|
+
config.bootPromoteEnabled ?? (process.env.SWITCHROOM_SUBAGENT_BOOT_PROMOTE !== '0')
|
|
813
861
|
const reaperTtlMs = config.reaperTtlMs ?? DEFAULT_REAPER_TTL_MS
|
|
814
862
|
const reaperIntervalMs = config.reaperIntervalMs ?? DEFAULT_REAPER_INTERVAL_MS
|
|
815
863
|
const rescanMs = config.rescanMs ?? DEFAULT_RESCAN_MS
|
|
@@ -961,18 +1009,40 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
961
1009
|
// already `done` at boot stays historical and is short-circuited just
|
|
962
1010
|
// below — it finished before this session.)
|
|
963
1011
|
if (isHistorical && entry.state === 'running') {
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
//
|
|
967
|
-
//
|
|
968
|
-
//
|
|
969
|
-
//
|
|
970
|
-
//
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
1012
|
+
// Freshness gate (v0.14.24): only promote a file whose LAST WRITE is
|
|
1013
|
+
// recent. A genuinely in-flight-across-a-restart worker was writing
|
|
1014
|
+
// until the container was recreated (mtime seconds-to-minutes old); a
|
|
1015
|
+
// dead prior-session worker that never wrote a terminal turn_end is
|
|
1016
|
+
// also `running` but hours-to-weeks stale. Promoting the latter
|
|
1017
|
+
// replayed stale `failed` handbacks on every boot (the v0.14.23
|
|
1018
|
+
// fleet-wide regression). Unreadable mtime → treat as stale (suppress
|
|
1019
|
+
// rather than risk re-spamming). The kill-switch reverts to pre-fix
|
|
1020
|
+
// suppression entirely.
|
|
1021
|
+
let fileAgeMs = Infinity
|
|
1022
|
+
try {
|
|
1023
|
+
const st = fs.statSync(filePath)
|
|
1024
|
+
if (typeof st.mtimeMs === 'number') fileAgeMs = n - st.mtimeMs
|
|
1025
|
+
} catch {
|
|
1026
|
+
/* unreadable → Infinity → treated as stale below */
|
|
1027
|
+
}
|
|
1028
|
+
if (!bootPromoteEnabled) {
|
|
1029
|
+
log?.(`subagent-watcher: ${agentId} running at boot but promotion disabled (SWITCHROOM_SUBAGENT_BOOT_PROMOTE=0) — leaving historical`)
|
|
1030
|
+
} else if (fileAgeMs > inflightPromoteMaxAgeMs) {
|
|
1031
|
+
log?.(`subagent-watcher: ${agentId} running at boot but stale (last write ${Math.round(fileAgeMs / 1000)}s ago > ${Math.round(inflightPromoteMaxAgeMs / 1000)}s) — leaving historical (dead prior-session worker, not in-flight)`)
|
|
1032
|
+
} else {
|
|
1033
|
+
entry.historical = false
|
|
1034
|
+
log?.(`subagent-watcher: ${agentId} was in-flight at boot — promoting to live (last write ${Math.round(fileAgeMs / 1000)}s ago; user still awaiting handback)`)
|
|
1035
|
+
// The prior gateway life's registration normally linked
|
|
1036
|
+
// jsonl_agent_id already, but re-run the backfill idempotently in
|
|
1037
|
+
// case that life crashed before the link persisted — the handback's
|
|
1038
|
+
// isBackground lookup is keyed on jsonl_agent_id, and an unlinked row
|
|
1039
|
+
// would mis-resolve the worker as foreground and drop the handback.
|
|
1040
|
+
if (db != null) {
|
|
1041
|
+
try {
|
|
1042
|
+
backfillJsonlAgentId(db, filePath, agentId, log)
|
|
1043
|
+
} catch (err) {
|
|
1044
|
+
log?.(`subagent-watcher: backfill error for ${agentId}: ${(err as Error).message}`)
|
|
1045
|
+
}
|
|
976
1046
|
}
|
|
977
1047
|
}
|
|
978
1048
|
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regression guard for the Model A foreground sub-agent nesting gate (#2032).
|
|
3
|
+
*
|
|
4
|
+
* #2027 shipped foreground nesting but gated every render path on
|
|
5
|
+
* `turn.replyCalled`. Because the framework's ack-first pattern replies
|
|
6
|
+
* "On it…" FIRST and then delegates, `replyCalled` was already true before any
|
|
7
|
+
* foreground sub-agent ran — so the feature silently produced ZERO live
|
|
8
|
+
* foreground activity for the exact case it was built for, and that went
|
|
9
|
+
* unnoticed because #2027 only tested the pure renderer, never the gate.
|
|
10
|
+
*
|
|
11
|
+
* These tests pin the gate decisions directly so the replyCalled-independence
|
|
12
|
+
* can never regress silently again.
|
|
13
|
+
*/
|
|
14
|
+
import { describe, it, expect } from 'vitest'
|
|
15
|
+
import {
|
|
16
|
+
shouldRenderForegroundProgress,
|
|
17
|
+
foregroundFinishAction,
|
|
18
|
+
} from '../gateway/foreground-nesting.js'
|
|
19
|
+
import { renderActivityFeedWithNested } from '../tool-activity-summary.js'
|
|
20
|
+
|
|
21
|
+
describe('shouldRenderForegroundProgress', () => {
|
|
22
|
+
it('renders even after the parent has acked (the #2027 blindspot)', () => {
|
|
23
|
+
// THE regression guard: ack-first sets replyCalled=true before the
|
|
24
|
+
// foreground sub-agent runs. It MUST still render.
|
|
25
|
+
expect(
|
|
26
|
+
shouldRenderForegroundProgress({ nestingEnabled: true, replyCalled: true }),
|
|
27
|
+
).toBe(true)
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it('renders before any reply (pre-ack flow, unchanged)', () => {
|
|
31
|
+
expect(
|
|
32
|
+
shouldRenderForegroundProgress({ nestingEnabled: true, replyCalled: false }),
|
|
33
|
+
).toBe(true)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('is independent of replyCalled — the flag never flips the outcome', () => {
|
|
37
|
+
const on = shouldRenderForegroundProgress({ nestingEnabled: true, replyCalled: true })
|
|
38
|
+
const off = shouldRenderForegroundProgress({ nestingEnabled: true, replyCalled: false })
|
|
39
|
+
expect(on).toBe(off)
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
it('honours the kill-switch regardless of replyCalled', () => {
|
|
43
|
+
expect(
|
|
44
|
+
shouldRenderForegroundProgress({ nestingEnabled: false, replyCalled: false }),
|
|
45
|
+
).toBe(false)
|
|
46
|
+
expect(
|
|
47
|
+
shouldRenderForegroundProgress({ nestingEnabled: false, replyCalled: true }),
|
|
48
|
+
).toBe(false)
|
|
49
|
+
})
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
describe('foregroundFinishAction', () => {
|
|
53
|
+
it('hands off to the answer when the last sub-agent finishes post-ack', () => {
|
|
54
|
+
expect(
|
|
55
|
+
foregroundFinishAction({ removed: true, replyCalled: true, remainingForeground: 0 }),
|
|
56
|
+
).toBe('handoff-clear')
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
it('recomposes when other foreground sub-agents are still running post-ack', () => {
|
|
60
|
+
expect(
|
|
61
|
+
foregroundFinishAction({ removed: true, replyCalled: true, remainingForeground: 1 }),
|
|
62
|
+
).toBe('recompose')
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
it('recomposes pre-ack (original behaviour preserved)', () => {
|
|
66
|
+
expect(
|
|
67
|
+
foregroundFinishAction({ removed: true, replyCalled: false, remainingForeground: 0 }),
|
|
68
|
+
).toBe('recompose')
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it('does nothing for an agent it was not tracking', () => {
|
|
72
|
+
expect(
|
|
73
|
+
foregroundFinishAction({ removed: false, replyCalled: true, remainingForeground: 0 }),
|
|
74
|
+
).toBe('none')
|
|
75
|
+
})
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
describe('end-to-end render shape under ack-first', () => {
|
|
79
|
+
it('produces a live nested block from a post-ack foreground narrative', () => {
|
|
80
|
+
// marko's real scenario: parent acked with no prior steps (empty mirror),
|
|
81
|
+
// then a foreground researcher emits progress. The gate now ALLOWS this
|
|
82
|
+
// render; prove the render is a real, non-empty nested feed with a live
|
|
83
|
+
// bold "→ current" line — i.e. the user would actually see activity.
|
|
84
|
+
const html = renderActivityFeedWithNested(
|
|
85
|
+
[],
|
|
86
|
+
[
|
|
87
|
+
'searching Unsplash CC0 desk',
|
|
88
|
+
'checking license on 4 candidates',
|
|
89
|
+
'ranking by resolution',
|
|
90
|
+
],
|
|
91
|
+
)
|
|
92
|
+
expect(html).not.toBeNull()
|
|
93
|
+
// newest child is the live, bold current step
|
|
94
|
+
expect(html).toContain('<b>→ ranking by resolution</b>')
|
|
95
|
+
// an earlier child is present as a done/italic step
|
|
96
|
+
expect(html).toContain('checking license on 4 candidates')
|
|
97
|
+
})
|
|
98
|
+
})
|
|
@@ -80,6 +80,14 @@ function makeHarness(opts: {
|
|
|
80
80
|
stallThresholdMs?: number
|
|
81
81
|
silentStallTerminalMs?: number
|
|
82
82
|
rescanMs?: number
|
|
83
|
+
/** How long ago (ms) the boot file was last written, i.e. its mtime is
|
|
84
|
+
* `currentTime - bootFileAgeMs` at registration. Default 0 (fresh, so the
|
|
85
|
+
* freshness gate promotes it). Set large to simulate a dead prior-session
|
|
86
|
+
* worker that must NOT be promoted. */
|
|
87
|
+
bootFileAgeMs?: number
|
|
88
|
+
/** Kill-switch passthrough; default true (promotion enabled). */
|
|
89
|
+
bootPromoteEnabled?: boolean
|
|
90
|
+
inflightPromoteMaxAgeMs?: number
|
|
83
91
|
}): Harness {
|
|
84
92
|
const {
|
|
85
93
|
agentId = 'gap-agent',
|
|
@@ -87,6 +95,9 @@ function makeHarness(opts: {
|
|
|
87
95
|
stallThresholdMs = 60_000,
|
|
88
96
|
silentStallTerminalMs = 300_000,
|
|
89
97
|
rescanMs = 500,
|
|
98
|
+
bootFileAgeMs = 0,
|
|
99
|
+
bootPromoteEnabled = true,
|
|
100
|
+
inflightPromoteMaxAgeMs,
|
|
90
101
|
} = opts
|
|
91
102
|
|
|
92
103
|
let currentTime = 1000
|
|
@@ -104,6 +115,10 @@ function makeHarness(opts: {
|
|
|
104
115
|
|
|
105
116
|
const fileContents = new Map<string, Buffer>()
|
|
106
117
|
fileContents.set(jsonlPath, Buffer.from(buildJSONL(...bootLines), 'utf-8'))
|
|
118
|
+
// Per-file mtime (ms). The boot file's last write is `bootFileAgeMs` in the
|
|
119
|
+
// past; appends bump it to currentTime. The freshness gate reads this.
|
|
120
|
+
const fileMtimes = new Map<string, number>()
|
|
121
|
+
fileMtimes.set(jsonlPath, 1000 - bootFileAgeMs)
|
|
107
122
|
|
|
108
123
|
let lastOpenedPath: string | null = null
|
|
109
124
|
const mockFs = {
|
|
@@ -121,7 +136,7 @@ function makeHarness(opts: {
|
|
|
121
136
|
if (ps === subagentsDir) return [`agent-${agentId}.jsonl`]
|
|
122
137
|
return []
|
|
123
138
|
}) as unknown as typeof fs.readdirSync,
|
|
124
|
-
statSync: ((p: fs.PathLike) => ({ size: fileContents.get(String(p))?.length ?? 0 }) as fs.Stats) as typeof fs.statSync,
|
|
139
|
+
statSync: ((p: fs.PathLike) => ({ size: fileContents.get(String(p))?.length ?? 0, mtimeMs: fileMtimes.get(String(p)) ?? currentTime }) as fs.Stats) as typeof fs.statSync,
|
|
125
140
|
openSync: ((p: fs.PathLike) => {
|
|
126
141
|
lastOpenedPath = String(p)
|
|
127
142
|
return 42
|
|
@@ -153,6 +168,8 @@ function makeHarness(opts: {
|
|
|
153
168
|
silentSynthesisStallThresholdMs: stallThresholdMs,
|
|
154
169
|
silentStallTerminalMs,
|
|
155
170
|
rescanMs,
|
|
171
|
+
bootPromoteEnabled,
|
|
172
|
+
...(inflightPromoteMaxAgeMs != null ? { inflightPromoteMaxAgeMs } : {}),
|
|
156
173
|
onStallTerminal: (id) => stallTerminalCalls.push({ agentId: id }),
|
|
157
174
|
onFinish: ({ agentId: id, outcome, resultText }) =>
|
|
158
175
|
finishCalls.push({ agentId: id, outcome, resultText }),
|
|
@@ -186,6 +203,7 @@ function makeHarness(opts: {
|
|
|
186
203
|
const cur = fileContents.get(jsonlPath) ?? Buffer.alloc(0)
|
|
187
204
|
const more = buildJSONL(...lines)
|
|
188
205
|
fileContents.set(jsonlPath, Buffer.concat([cur, Buffer.from(more, 'utf-8')]))
|
|
206
|
+
fileMtimes.set(jsonlPath, currentTime)
|
|
189
207
|
}
|
|
190
208
|
|
|
191
209
|
return { stallTerminalCalls, finishCalls, logs, advance, watcher, fileContents, jsonlPath, append }
|
|
@@ -245,6 +263,75 @@ describe('Gap 1 — background worker in-flight across a gateway restart', () =>
|
|
|
245
263
|
})
|
|
246
264
|
})
|
|
247
265
|
|
|
266
|
+
describe('Gap 1 freshness gate — v0.14.24 stale-replay regression', () => {
|
|
267
|
+
// The v0.14.23 regression: promoting EVERY running-at-boot file replayed
|
|
268
|
+
// weeks-old dead prior-session workers as handbacks (often `failed`, from
|
|
269
|
+
// old error lines) on every boot, spamming the whole fleet. The gate
|
|
270
|
+
// promotes only files whose last write is recent.
|
|
271
|
+
|
|
272
|
+
it('a STALE running-at-boot worker (weeks-old mtime) is NOT promoted — no handback, no stall', () => {
|
|
273
|
+
const h = makeHarness({
|
|
274
|
+
agentId: 'gap1-stale-running',
|
|
275
|
+
bootLines: [subAgentUserMsg('bg task from weeks ago')], // running: no turn_end
|
|
276
|
+
bootFileAgeMs: 21 * 24 * 60 * 60_000, // 21 days old — clearly dead
|
|
277
|
+
silentStallTerminalMs: 120_000,
|
|
278
|
+
})
|
|
279
|
+
|
|
280
|
+
h.advance(600)
|
|
281
|
+
h.advance(600_000) // far past every stall/synthesis window
|
|
282
|
+
expect(h.finishCalls).toHaveLength(0) // pre-fix: a spurious (often failed) handback
|
|
283
|
+
expect(h.stallTerminalCalls).toHaveLength(0)
|
|
284
|
+
expect(h.logs.some((l) => l.includes('stale') && l.includes('leaving historical'))).toBe(true)
|
|
285
|
+
})
|
|
286
|
+
|
|
287
|
+
it('a FRESH running-at-boot worker (recent mtime) IS still promoted and hands back', () => {
|
|
288
|
+
// Preserve the genuine Gap 1 fix: a worker in-flight across a restart
|
|
289
|
+
// (wrote moments before the bounce) must still get promoted + handed back.
|
|
290
|
+
const h = makeHarness({
|
|
291
|
+
agentId: 'gap1-fresh-running',
|
|
292
|
+
bootLines: [subAgentUserMsg('bg task')],
|
|
293
|
+
bootFileAgeMs: 30_000, // 30s old — in-flight across a quick restart
|
|
294
|
+
})
|
|
295
|
+
|
|
296
|
+
h.append(subAgentText('Finished the migration'), subAgentTurnEnd())
|
|
297
|
+
h.advance(600)
|
|
298
|
+
|
|
299
|
+
expect(h.finishCalls).toHaveLength(1)
|
|
300
|
+
expect(h.finishCalls[0].outcome).toBe('completed')
|
|
301
|
+
expect(h.logs.some((l) => l.includes('promoting to live'))).toBe(true)
|
|
302
|
+
})
|
|
303
|
+
|
|
304
|
+
it('kill-switch (bootPromoteEnabled=false) suppresses even a fresh running-at-boot worker', () => {
|
|
305
|
+
const h = makeHarness({
|
|
306
|
+
agentId: 'gap1-killswitch',
|
|
307
|
+
bootLines: [subAgentUserMsg('bg task')],
|
|
308
|
+
bootFileAgeMs: 5_000, // fresh — would normally promote
|
|
309
|
+
bootPromoteEnabled: false,
|
|
310
|
+
silentStallTerminalMs: 120_000,
|
|
311
|
+
})
|
|
312
|
+
|
|
313
|
+
h.advance(600)
|
|
314
|
+
h.advance(600_000)
|
|
315
|
+
expect(h.finishCalls).toHaveLength(0)
|
|
316
|
+
expect(h.logs.some((l) => l.includes('promotion disabled'))).toBe(true)
|
|
317
|
+
})
|
|
318
|
+
|
|
319
|
+
it('a worker just past the freshness window is NOT promoted (boundary)', () => {
|
|
320
|
+
const h = makeHarness({
|
|
321
|
+
agentId: 'gap1-boundary',
|
|
322
|
+
bootLines: [subAgentUserMsg('bg task')],
|
|
323
|
+
inflightPromoteMaxAgeMs: 60_000, // 60s window
|
|
324
|
+
bootFileAgeMs: 90_000, // 90s old → just stale
|
|
325
|
+
silentStallTerminalMs: 120_000,
|
|
326
|
+
})
|
|
327
|
+
|
|
328
|
+
h.advance(600)
|
|
329
|
+
h.advance(600_000)
|
|
330
|
+
expect(h.finishCalls).toHaveLength(0)
|
|
331
|
+
expect(h.logs.some((l) => l.includes('stale'))).toBe(true)
|
|
332
|
+
})
|
|
333
|
+
})
|
|
334
|
+
|
|
248
335
|
describe('Gap 2 — failure honesty', () => {
|
|
249
336
|
it('a terminal error line flips the outcome to failed and carries the detail', () => {
|
|
250
337
|
const h = makeHarness({ agentId: 'gap2-failed', bootLines: [subAgentUserMsg('bg task')] })
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Foreground sub-agent live activity nesting (#2032) — UAT.
|
|
3
|
+
*
|
|
4
|
+
* Background (#2027): a FOREGROUND sub-agent (Agent/Task WITHOUT
|
|
5
|
+
* run_in_background) runs INSIDE the parent turn, blocking it. Its live steps
|
|
6
|
+
* were meant to nest into the parent's activity-summary feed (Model A). But
|
|
7
|
+
* every render path bailed on `turn.replyCalled`, and the framework's
|
|
8
|
+
* ack-first pattern replies "On it…" FIRST — so the sub-agent ran with
|
|
9
|
+
* replyCalled already true and NOTHING ever painted. #2027 only tested the
|
|
10
|
+
* pure renderer, so the regression shipped silently (the "marko's researcher
|
|
11
|
+
* showed no Telegram activity" report). #2032 renders foreground progress
|
|
12
|
+
* regardless of replyCalled.
|
|
13
|
+
*
|
|
14
|
+
* This scenario forces the exact broken shape end-to-end:
|
|
15
|
+
* 1. Prompt the agent to send a quick ack FIRST (sets replyCalled=true),
|
|
16
|
+
* 2. THEN dispatch a FOREGROUND sub-agent (run_in_background:false) that
|
|
17
|
+
* narrates ~10 paced steps so its jsonl keeps ticking under the
|
|
18
|
+
* test-harness 5s stall floor and the feed can paint + edit,
|
|
19
|
+
* 3. then report done.
|
|
20
|
+
*
|
|
21
|
+
* Asserts the load-bearing proof: an activity-summary feed message appears
|
|
22
|
+
* carrying the NESTED foreground marker ("↳") with the sub-agent's narration —
|
|
23
|
+
* i.e. live foreground activity surfaced AFTER the ack. Pre-#2032 this message
|
|
24
|
+
* never existed. Logs every observed body so a human can read the real UX.
|
|
25
|
+
*
|
|
26
|
+
* NOT a draft: the activity-summary feed is a real sendMessage/editMessageText
|
|
27
|
+
* (gateway.ts drainActivitySummary), so mtcute can observe it — unlike the
|
|
28
|
+
* answer-stream draft, which mtcute cannot see.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { describe, expect, it } from "vitest";
|
|
32
|
+
import { spinUp } from "../harness.js";
|
|
33
|
+
|
|
34
|
+
// Same paced-narration discipline as jtbd-worker-activity-feed-dm: each step
|
|
35
|
+
// is its own Bash call with a one-line narration so the sub-agent emits a
|
|
36
|
+
// `sub_agent_text` line every ~2s — under SWITCHROOM_SUBAGENT_STALL_MS=5000,
|
|
37
|
+
// so the watcher never synth-terminates it mid-flight (which would suppress
|
|
38
|
+
// onProgress and the feed would never paint). run_in_background:false makes it
|
|
39
|
+
// FOREGROUND — the whole point.
|
|
40
|
+
const FG_DISPATCH_PROMPT =
|
|
41
|
+
`First, immediately send me a one-line acknowledgement that you're starting ` +
|
|
42
|
+
`(just "On it — running a check now."). Then use the Agent tool with ` +
|
|
43
|
+
`subagent_type "general-purpose" and run_in_background: false (a FOREGROUND ` +
|
|
44
|
+
`sub-agent) with this exact task: "Do eight steps, ONE AT A TIME, k = 1 ` +
|
|
45
|
+
`through 8. Before each step write a brief one-sentence narration of what ` +
|
|
46
|
+
`you are about to do, then run \`sleep 2\` via the Bash tool, then run ` +
|
|
47
|
+
`\`echo step-k\` via the Bash tool (substitute the real number for k). Run ` +
|
|
48
|
+
`every sleep and every echo as its OWN separate Bash call — never batch or ` +
|
|
49
|
+
`chain them with && — and narrate before each so progress surfaces ` +
|
|
50
|
+
`incrementally. Do not stop early; complete all eight steps, then return a ` +
|
|
51
|
+
`one-line summary." Wait for the foreground sub-agent to finish, then send ` +
|
|
52
|
+
`me a brief reply telling me it's done.`;
|
|
53
|
+
|
|
54
|
+
// The nested foreground block renders each child line prefixed with "↳"
|
|
55
|
+
// (NESTED_PREFIX = " ↳ "), newest as a bold "→ …" current step. Telegram
|
|
56
|
+
// strips the bold but keeps the literal ↳ / → glyphs in message text.
|
|
57
|
+
const NESTED_RE = /↳/;
|
|
58
|
+
|
|
59
|
+
describe("uat: foreground sub-agent live activity nesting (#2032)", () => {
|
|
60
|
+
it(
|
|
61
|
+
"surfaces nested foreground activity in the feed AFTER the ack-first reply",
|
|
62
|
+
async () => {
|
|
63
|
+
const sc = await spinUp({ agent: "test-harness" });
|
|
64
|
+
try {
|
|
65
|
+
await sc.sendDM(FG_DISPATCH_PROMPT);
|
|
66
|
+
|
|
67
|
+
// Ack-first reply — establishes replyCalled=true BEFORE the
|
|
68
|
+
// foreground sub-agent runs. This is the condition that broke #2027.
|
|
69
|
+
const ack = await sc.expectMessage(/.+/, {
|
|
70
|
+
from: "bot",
|
|
71
|
+
timeout: 60_000,
|
|
72
|
+
});
|
|
73
|
+
console.log(`[fg-activity UAT] ack-first reply: ${JSON.stringify(ack.text)}`);
|
|
74
|
+
|
|
75
|
+
// The activity-summary feed carrying the NESTED foreground narrative.
|
|
76
|
+
// First paint waits for the sub-agent to dispatch + narrate (~8s
|
|
77
|
+
// first-paint throttle), so give it a generous window. Its presence
|
|
78
|
+
// is the load-bearing proof of the fix: post-ack foreground activity.
|
|
79
|
+
const feed = await sc.expectMessage(NESTED_RE, {
|
|
80
|
+
from: "bot",
|
|
81
|
+
timeout: 90_000,
|
|
82
|
+
});
|
|
83
|
+
console.log(
|
|
84
|
+
`[fg-activity UAT] nested feed paint (id=${feed.messageId}): ${JSON.stringify(feed.text)}`,
|
|
85
|
+
);
|
|
86
|
+
expect(feed.messageId).toBeGreaterThan(0);
|
|
87
|
+
expect(feed.text).toMatch(NESTED_RE);
|
|
88
|
+
|
|
89
|
+
// Live edit: re-fetch the SAME message after the throttle + a few
|
|
90
|
+
// sub-agent steps. Body should change as the nested narrative
|
|
91
|
+
// advances — proving it's a live feed, not a one-shot post. Soft:
|
|
92
|
+
// log either way; the nested-paint above is the load-bearing proof.
|
|
93
|
+
const before = feed.text;
|
|
94
|
+
await new Promise((r) => setTimeout(r, 10_000));
|
|
95
|
+
const mid = await sc.driver.getMessage(sc.botUserId, feed.messageId);
|
|
96
|
+
console.log(
|
|
97
|
+
`[fg-activity UAT] same feed after 10s (id=${feed.messageId}): ${JSON.stringify(mid?.text ?? null)}`,
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
// Final answer — the parent resumes after the foreground sub-agent
|
|
101
|
+
// returns and reports done. The feed hands off (clears) to this
|
|
102
|
+
// answer. Confirms the turn completes cleanly, not wedged.
|
|
103
|
+
const done = await sc.expectMessage(/done|complete|finished|step-8|wrapped/i, {
|
|
104
|
+
from: "bot",
|
|
105
|
+
timeout: 120_000,
|
|
106
|
+
});
|
|
107
|
+
console.log(`[fg-activity UAT] final answer: ${JSON.stringify(done.text)}`);
|
|
108
|
+
expect(done.text.length).toBeGreaterThan(0);
|
|
109
|
+
// Did the nested narrative actually move while in flight?
|
|
110
|
+
if (mid?.text != null) {
|
|
111
|
+
console.log(
|
|
112
|
+
`[fg-activity UAT] body moved in-flight: ${mid.text !== before}`,
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
} finally {
|
|
116
|
+
await sc.tearDown();
|
|
117
|
+
}
|
|
118
|
+
},
|
|
119
|
+
300_000,
|
|
120
|
+
);
|
|
121
|
+
});
|