switchroom 0.14.23 → 0.14.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49420,8 +49420,8 @@ var {
49420
49420
  } = import__.default;
49421
49421
 
49422
49422
  // src/build-info.ts
49423
- var VERSION = "0.14.23";
49424
- var COMMIT_SHA = "8ac2987a";
49423
+ var VERSION = "0.14.25";
49424
+ var COMMIT_SHA = "f75f4f25";
49425
49425
 
49426
49426
  // src/cli/agent.ts
49427
49427
  init_source();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "switchroom",
3
- "version": "0.14.23",
3
+ "version": "0.14.25",
4
4
  "description": "Run Claude Code 24/7 on your Claude Pro/Max subscription over Telegram. Open-source alternative to OpenClaw and NanoClaw — no API keys.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -47505,6 +47505,18 @@ function decideSubagentProgress(input) {
47505
47505
  return { deliver: true, chatId, bucketIdx, inbound };
47506
47506
  }
47507
47507
 
47508
+ // gateway/foreground-nesting.ts
47509
+ function shouldRenderForegroundProgress(g) {
47510
+ return g.nestingEnabled;
47511
+ }
47512
+ function foregroundFinishAction(i) {
47513
+ if (!i.removed)
47514
+ return "none";
47515
+ if (i.replyCalled && i.remainingForeground === 0)
47516
+ return "handoff-clear";
47517
+ return "recompose";
47518
+ }
47519
+
47508
47520
  // gateway/poll-health.ts
47509
47521
  var DEFAULT_LOG = (msg) => {
47510
47522
  process.stderr.write(msg.endsWith(`
@@ -49225,6 +49237,7 @@ var DEFAULT_RESCAN_MS = 1000;
49225
49237
  var DEFAULT_STALL_THRESHOLD_MS = 60000;
49226
49238
  var DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS = 300000;
49227
49239
  var DEFAULT_SILENT_STALL_TERMINAL_MS = 300000;
49240
+ var DEFAULT_INFLIGHT_PROMOTE_MAX_AGE_MS = 15 * 60000;
49228
49241
  var SUBAGENT_RESULT_TEXT_MAX = 3000;
49229
49242
  function parseEnvMs(varName) {
49230
49243
  const raw = process.env[varName];
@@ -49415,6 +49428,8 @@ function startSubagentWatcher(config) {
49415
49428
  const stallThresholdMs = config.stallThresholdMs ?? parseEnvMs("SWITCHROOM_SUBAGENT_STALL_MS") ?? DEFAULT_STALL_THRESHOLD_MS;
49416
49429
  const silentSynthesisStallThresholdMs = config.silentSynthesisStallThresholdMs ?? parseEnvMs("SWITCHROOM_SUBAGENT_SILENT_SYNTH_STALL_MS") ?? DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS;
49417
49430
  const silentStallTerminalMs = config.silentStallTerminalMs ?? parseEnvMs("SWITCHROOM_SUBAGENT_STALL_TERMINAL_MS") ?? DEFAULT_SILENT_STALL_TERMINAL_MS;
49431
+ const inflightPromoteMaxAgeMs = config.inflightPromoteMaxAgeMs ?? parseEnvMs("SWITCHROOM_SUBAGENT_INFLIGHT_MAX_AGE_MS") ?? DEFAULT_INFLIGHT_PROMOTE_MAX_AGE_MS;
49432
+ const bootPromoteEnabled = config.bootPromoteEnabled ?? process.env.SWITCHROOM_SUBAGENT_BOOT_PROMOTE !== "0";
49418
49433
  const reaperTtlMs = config.reaperTtlMs ?? DEFAULT_REAPER_TTL_MS;
49419
49434
  const reaperIntervalMs = config.reaperIntervalMs ?? DEFAULT_REAPER_INTERVAL_MS;
49420
49435
  const rescanMs = config.rescanMs ?? DEFAULT_RESCAN_MS;
@@ -49497,13 +49512,25 @@ function startSubagentWatcher(config) {
49497
49512
  log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`);
49498
49513
  }, fs2, log, db2, parentStateDir, config.onUnstall, undefined, config.onProgress);
49499
49514
  if (isHistorical && entry.state === "running") {
49500
- entry.historical = false;
49501
- log?.(`subagent-watcher: ${agentId} was in-flight at boot \u2014 promoting to live (predates watcher; user still awaiting handback)`);
49502
- if (db2 != null) {
49503
- try {
49504
- backfillJsonlAgentId(db2, filePath, agentId, log);
49505
- } catch (err) {
49506
- log?.(`subagent-watcher: backfill error for ${agentId}: ${err.message}`);
49515
+ let fileAgeMs = Infinity;
49516
+ try {
49517
+ const st = fs2.statSync(filePath);
49518
+ if (typeof st.mtimeMs === "number")
49519
+ fileAgeMs = n - st.mtimeMs;
49520
+ } catch {}
49521
+ if (!bootPromoteEnabled) {
49522
+ log?.(`subagent-watcher: ${agentId} running at boot but promotion disabled (SWITCHROOM_SUBAGENT_BOOT_PROMOTE=0) \u2014 leaving historical`);
49523
+ } else if (fileAgeMs > inflightPromoteMaxAgeMs) {
49524
+ log?.(`subagent-watcher: ${agentId} running at boot but stale (last write ${Math.round(fileAgeMs / 1000)}s ago > ${Math.round(inflightPromoteMaxAgeMs / 1000)}s) \u2014 leaving historical (dead prior-session worker, not in-flight)`);
49525
+ } else {
49526
+ entry.historical = false;
49527
+ log?.(`subagent-watcher: ${agentId} was in-flight at boot \u2014 promoting to live (last write ${Math.round(fileAgeMs / 1000)}s ago; user still awaiting handback)`);
49528
+ if (db2 != null) {
49529
+ try {
49530
+ backfillJsonlAgentId(db2, filePath, agentId, log);
49531
+ } catch (err) {
49532
+ log?.(`subagent-watcher: backfill error for ${agentId}: ${err.message}`);
49533
+ }
49507
49534
  }
49508
49535
  }
49509
49536
  }
@@ -51442,10 +51469,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
51442
51469
  }
51443
51470
 
51444
51471
  // ../src/build-info.ts
51445
- var VERSION = "0.14.23";
51446
- var COMMIT_SHA = "8ac2987a";
51447
- var COMMIT_DATE = "2026-05-31T22:03:26Z";
51448
- var LATEST_PR = 2031;
51472
+ var VERSION = "0.14.25";
51473
+ var COMMIT_SHA = "f75f4f25";
51474
+ var COMMIT_DATE = "2026-06-01T00:05:32Z";
51475
+ var LATEST_PR = 2038;
51449
51476
  var COMMITS_AHEAD_OF_TAG = 0;
51450
51477
 
51451
51478
  // gateway/boot-version.ts
@@ -61961,12 +61988,22 @@ var didOneTimeSetup = false;
61961
61988
  const isBackground = dispatch.isBackground;
61962
61989
  if (!isBackground) {
61963
61990
  const turn = currentTurn;
61964
- if (turn != null && turn.foregroundSubAgents.delete(agentId) && !turn.replyCalled) {
61965
- const rendered = composeTurnActivity(turn);
61966
- if (rendered != null) {
61967
- turn.activityPendingRender = rendered;
61968
- if (turn.activityInFlight == null) {
61969
- turn.activityInFlight = drainActivitySummary(turn);
61991
+ const removed = turn != null && turn.foregroundSubAgents.delete(agentId);
61992
+ if (turn != null && removed) {
61993
+ const action = foregroundFinishAction({
61994
+ removed,
61995
+ replyCalled: turn.replyCalled,
61996
+ remainingForeground: turn.foregroundSubAgents.size
61997
+ });
61998
+ if (action === "handoff-clear") {
61999
+ clearActivitySummary(turn);
62000
+ } else if (action === "recompose") {
62001
+ const rendered = composeTurnActivity(turn);
62002
+ if (rendered != null) {
62003
+ turn.activityPendingRender = rendered;
62004
+ if (turn.activityInFlight == null) {
62005
+ turn.activityInFlight = drainActivitySummary(turn);
62006
+ }
61970
62007
  }
61971
62008
  }
61972
62009
  }
@@ -62033,10 +62070,13 @@ var didOneTimeSetup = false;
62033
62070
  }
62034
62071
  const isBackground = dispatch.isBackground;
62035
62072
  if (!isBackground) {
62036
- if (!foregroundNestingEnabled)
62037
- return;
62038
62073
  const turn = currentTurn;
62039
- if (turn == null || turn.replyCalled)
62074
+ if (turn == null)
62075
+ return;
62076
+ if (!shouldRenderForegroundProgress({
62077
+ nestingEnabled: foregroundNestingEnabled,
62078
+ replyCalled: turn.replyCalled
62079
+ }))
62040
62080
  return;
62041
62081
  const child = latestSummary.trim().slice(0, 120);
62042
62082
  if (child.length === 0)
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Pure state-transition helpers for Model A foreground sub-agent activity
3
+ * nesting (#2027). Extracted from gateway.ts so the `replyCalled` gate is
4
+ * unit-testable on its own — the exact seam #2027 shipped without, which let
5
+ * the feature silently no-op for every ack-first turn (reply "On it…" then
6
+ * delegate) and go unnoticed in production. See
7
+ * `tests/foreground-nesting.test.ts`.
8
+ *
9
+ * Subscription-honest: every caller is pure jsonl-tail → render, no model
10
+ * call. These functions decide *whether/what* to render; the gateway owns the
11
+ * `currentTurn` mutation and the Telegram edit.
12
+ */
13
+
14
+ export interface ForegroundProgressGate {
15
+ /** `SWITCHROOM_FOREGROUND_SUBAGENT_NESTING !== '0'` — the kill-switch. */
16
+ nestingEnabled: boolean
17
+ /**
18
+ * Whether the parent turn has already emitted a reply. Accepted as input
19
+ * but intentionally NOT a gate: a foreground `Task` is a *blocking* call,
20
+ * so the parent cannot have issued its FINAL answer while the sub-agent is
21
+ * still running — any `replyCalled === true` observed here is therefore an
22
+ * *interim* ack. The pre-fix code bailed on this flag, which is precisely
23
+ * why ack-first turns showed zero live foreground activity.
24
+ */
25
+ replyCalled: boolean
26
+ }
27
+
28
+ /**
29
+ * Whether a foreground sub-agent progress tick should accumulate its narrative
30
+ * and re-render the activity feed. Gated ONLY by the kill-switch — never by
31
+ * `replyCalled` (see the field doc above).
32
+ */
33
+ export function shouldRenderForegroundProgress(g: ForegroundProgressGate): boolean {
34
+ return g.nestingEnabled
35
+ }
36
+
37
+ export type ForegroundFinishAction = 'recompose' | 'handoff-clear' | 'none'
38
+
39
+ export interface ForegroundFinishInput {
40
+ /** Was this agentId actually an active foreground sub-agent we tracked? */
41
+ removed: boolean
42
+ /** Has the parent turn already emitted a reply (an interim ack)? */
43
+ replyCalled: boolean
44
+ /** Foreground sub-agents still active AFTER removing the finished one. */
45
+ remainingForeground: number
46
+ }
47
+
48
+ /**
49
+ * What the gateway should do when a foreground sub-agent finishes:
50
+ *
51
+ * - `'none'` — it wasn't one we were tracking (e.g. background, or a
52
+ * stale boot row); leave the feed alone.
53
+ * - `'handoff-clear'`— it was the LAST foreground sub-agent and the parent
54
+ * has already acked. The re-opened post-ack feed now
55
+ * hands off to the imminent final answer, mirroring the
56
+ * first-reply hand-off. (turn_end is the safety net if
57
+ * this is somehow missed.)
58
+ * - `'recompose'` — collapse the finished sub-agent's block and re-render
59
+ * (pre-ack flow, or other sub-agents still running).
60
+ */
61
+ export function foregroundFinishAction(i: ForegroundFinishInput): ForegroundFinishAction {
62
+ if (!i.removed) return 'none'
63
+ if (i.replyCalled && i.remainingForeground === 0) return 'handoff-clear'
64
+ return 'recompose'
65
+ }
@@ -303,6 +303,10 @@ import {
303
303
  decideSubagentProgress,
304
304
  DEFAULT_PROGRESS_INTERVAL_MS,
305
305
  } from './subagent-progress-inbound-builder.js'
306
+ import {
307
+ shouldRenderForegroundProgress,
308
+ foregroundFinishAction,
309
+ } from './foreground-nesting.js'
306
310
  import { createPollHealthCheck, type PollHealthCheckHandle } from './poll-health.js'
307
311
  import type {
308
312
  ToolCallMessage,
@@ -17843,16 +17847,26 @@ void (async () => {
17843
17847
  // tool result, so there's no handback to deliver. Reaction
17844
17848
  // promotion already ran above.
17845
17849
  const turn = currentTurn
17846
- if (
17847
- turn != null &&
17848
- turn.foregroundSubAgents.delete(agentId) &&
17849
- !turn.replyCalled
17850
- ) {
17851
- const rendered = composeTurnActivity(turn)
17852
- if (rendered != null) {
17853
- turn.activityPendingRender = rendered
17854
- if (turn.activityInFlight == null) {
17855
- turn.activityInFlight = drainActivitySummary(turn)
17850
+ const removed = turn != null && turn.foregroundSubAgents.delete(agentId)
17851
+ if (turn != null && removed) {
17852
+ const action = foregroundFinishAction({
17853
+ removed,
17854
+ replyCalled: turn.replyCalled,
17855
+ remainingForeground: turn.foregroundSubAgents.size,
17856
+ })
17857
+ if (action === 'handoff-clear') {
17858
+ // Post-ack: the last foreground sub-agent finished and
17859
+ // the parent will now produce its answer inline. Hand
17860
+ // the re-opened feed off to the answer, mirroring the
17861
+ // first-reply clear (turn_end is the safety net).
17862
+ clearActivitySummary(turn)
17863
+ } else if (action === 'recompose') {
17864
+ const rendered = composeTurnActivity(turn)
17865
+ if (rendered != null) {
17866
+ turn.activityPendingRender = rendered
17867
+ if (turn.activityInFlight == null) {
17868
+ turn.activityInFlight = drainActivitySummary(turn)
17869
+ }
17856
17870
  }
17857
17871
  }
17858
17872
  }
@@ -17972,9 +17986,17 @@ void (async () => {
17972
17986
  // activity draft rather than a separate worker message. Pure
17973
17987
  // jsonl-tail → render (no model call), inside the
17974
17988
  // subscription-honest boundary.
17975
- if (!foregroundNestingEnabled) return // kill-switch: skip overhead
17976
17989
  const turn = currentTurn
17977
- if (turn == null || turn.replyCalled) return
17990
+ if (turn == null) return
17991
+ // Render regardless of `replyCalled` — a foreground Task
17992
+ // blocks the parent, so any reply seen while it runs is an
17993
+ // interim ack, never the final answer. Gating on replyCalled
17994
+ // (pre-#2032) made ack-first turns show zero live foreground
17995
+ // activity. Kill-switch lives in the predicate.
17996
+ if (!shouldRenderForegroundProgress({
17997
+ nestingEnabled: foregroundNestingEnabled,
17998
+ replyCalled: turn.replyCalled,
17999
+ })) return
17978
18000
  const child = latestSummary.trim().slice(0, 120)
17979
18001
  if (child.length === 0) return
17980
18002
  let narrative = turn.foregroundSubAgents.get(agentId)
@@ -208,6 +208,23 @@ export interface SubagentWatcherConfig {
208
208
  * synthesis; tests use a tiny value to exercise the path.
209
209
  */
210
210
  silentStallTerminalMs?: number
211
+ /**
212
+ * Freshness window (ms) for promoting a running-at-boot worker file to
213
+ * live. A file whose last write (mtime) is older than this is treated as
214
+ * a dead prior-session worker and stays historical/suppressed, NOT
215
+ * promoted. Default 15 min (DEFAULT_INFLIGHT_PROMOTE_MAX_AGE_MS); env
216
+ * override `SWITCHROOM_SUBAGENT_INFLIGHT_MAX_AGE_MS`. Guards the v0.14.23
217
+ * stale-handback replay regression.
218
+ */
219
+ inflightPromoteMaxAgeMs?: number
220
+ /**
221
+ * Kill-switch for the boot-scan promotion path. When false, a
222
+ * running-at-boot worker is never promoted — the watcher reverts to the
223
+ * pre-v0.14.23 behaviour of leaving every boot-scan file historical
224
+ * (suppressed). Default true; env `SWITCHROOM_SUBAGENT_BOOT_PROMOTE=0`
225
+ * disables it fleet-wide without a code change (emergency lever).
226
+ */
227
+ bootPromoteEnabled?: boolean
211
228
  /**
212
229
  * Reaper TTL (ms): background rows in `status='running'` whose
213
230
  * `last_activity_at` (or `started_at` if liveness never wrote) is older
@@ -382,6 +399,29 @@ const DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS = 300_000
382
399
  */
383
400
  const DEFAULT_SILENT_STALL_TERMINAL_MS = 300_000
384
401
 
402
+ /**
403
+ * Freshness window for the boot-scan "in-flight at boot → promote to
404
+ * live" path. A worker file still in `running` state at boot is only
405
+ * promoted (un-suppressed) if its last write (file mtime) is within this
406
+ * window of now. The signal cleanly separates the two populations:
407
+ *
408
+ * - A worker genuinely in-flight across a restart / fleet rollout was
409
+ * writing right up until the container was recreated, so its mtime is
410
+ * seconds-to-minutes before the new gateway boots — well inside the
411
+ * window. The user is still awaiting it; promote it.
412
+ * - A worker that died in a PRIOR session without writing a terminal
413
+ * `turn_end` is also `running` in the file, but its mtime is hours-to-
414
+ * weeks old. These accumulate by the dozen-to-hundred in a long-lived
415
+ * agent's subagents dir. Promoting them replays stale handbacks
416
+ * (often `failed`, from old error lines) on every boot — the v0.14.23
417
+ * regression. Leave them historical/suppressed, exactly as before.
418
+ *
419
+ * 15 min is generous for any plausible restart gap (container recreate +
420
+ * image pull) yet far below the staleness of a dead prior-session file.
421
+ * Override with `SWITCHROOM_SUBAGENT_INFLIGHT_MAX_AGE_MS`.
422
+ */
423
+ const DEFAULT_INFLIGHT_PROMOTE_MAX_AGE_MS = 15 * 60_000
424
+
385
425
  /**
386
426
  * Cap on the result text retained per sub-agent (`entry.lastResultText`)
387
427
  * and carried to the gateway via `onFinish`. The gateway feeds this into
@@ -810,6 +850,14 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
810
850
  config.silentStallTerminalMs
811
851
  ?? parseEnvMs('SWITCHROOM_SUBAGENT_STALL_TERMINAL_MS')
812
852
  ?? DEFAULT_SILENT_STALL_TERMINAL_MS
853
+ const inflightPromoteMaxAgeMs =
854
+ config.inflightPromoteMaxAgeMs
855
+ ?? parseEnvMs('SWITCHROOM_SUBAGENT_INFLIGHT_MAX_AGE_MS')
856
+ ?? DEFAULT_INFLIGHT_PROMOTE_MAX_AGE_MS
857
+ // Kill-switch: not parseEnvMs (which rejects `0`) — an explicit `=0`
858
+ // here MUST disable promotion (revert to pre-v0.14.23 suppression).
859
+ const bootPromoteEnabled =
860
+ config.bootPromoteEnabled ?? (process.env.SWITCHROOM_SUBAGENT_BOOT_PROMOTE !== '0')
813
861
  const reaperTtlMs = config.reaperTtlMs ?? DEFAULT_REAPER_TTL_MS
814
862
  const reaperIntervalMs = config.reaperIntervalMs ?? DEFAULT_REAPER_INTERVAL_MS
815
863
  const rescanMs = config.rescanMs ?? DEFAULT_RESCAN_MS
@@ -961,18 +1009,40 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
961
1009
  // already `done` at boot stays historical and is short-circuited just
962
1010
  // below — it finished before this session.)
963
1011
  if (isHistorical && entry.state === 'running') {
964
- entry.historical = false
965
- log?.(`subagent-watcher: ${agentId} was in-flight at boot — promoting to live (predates watcher; user still awaiting handback)`)
966
- // The prior gateway life's registration normally linked
967
- // jsonl_agent_id already, but re-run the backfill idempotently in
968
- // case that life crashed before the link persisted — the handback's
969
- // isBackground lookup is keyed on jsonl_agent_id, and an unlinked row
970
- // would mis-resolve the worker as foreground and drop the handback.
971
- if (db != null) {
972
- try {
973
- backfillJsonlAgentId(db, filePath, agentId, log)
974
- } catch (err) {
975
- log?.(`subagent-watcher: backfill error for ${agentId}: ${(err as Error).message}`)
1012
+ // Freshness gate (v0.14.24): only promote a file whose LAST WRITE is
1013
+ // recent. A genuinely in-flight-across-a-restart worker was writing
1014
+ // until the container was recreated (mtime seconds-to-minutes old); a
1015
+ // dead prior-session worker that never wrote a terminal turn_end is
1016
+ // also `running` but hours-to-weeks stale. Promoting the latter
1017
+ // replayed stale `failed` handbacks on every boot (the v0.14.23
1018
+ // fleet-wide regression). Unreadable mtime treat as stale (suppress
1019
+ // rather than risk re-spamming). The kill-switch reverts to pre-fix
1020
+ // suppression entirely.
1021
+ let fileAgeMs = Infinity
1022
+ try {
1023
+ const st = fs.statSync(filePath)
1024
+ if (typeof st.mtimeMs === 'number') fileAgeMs = n - st.mtimeMs
1025
+ } catch {
1026
+ /* unreadable → Infinity → treated as stale below */
1027
+ }
1028
+ if (!bootPromoteEnabled) {
1029
+ log?.(`subagent-watcher: ${agentId} running at boot but promotion disabled (SWITCHROOM_SUBAGENT_BOOT_PROMOTE=0) — leaving historical`)
1030
+ } else if (fileAgeMs > inflightPromoteMaxAgeMs) {
1031
+ log?.(`subagent-watcher: ${agentId} running at boot but stale (last write ${Math.round(fileAgeMs / 1000)}s ago > ${Math.round(inflightPromoteMaxAgeMs / 1000)}s) — leaving historical (dead prior-session worker, not in-flight)`)
1032
+ } else {
1033
+ entry.historical = false
1034
+ log?.(`subagent-watcher: ${agentId} was in-flight at boot — promoting to live (last write ${Math.round(fileAgeMs / 1000)}s ago; user still awaiting handback)`)
1035
+ // The prior gateway life's registration normally linked
1036
+ // jsonl_agent_id already, but re-run the backfill idempotently in
1037
+ // case that life crashed before the link persisted — the handback's
1038
+ // isBackground lookup is keyed on jsonl_agent_id, and an unlinked row
1039
+ // would mis-resolve the worker as foreground and drop the handback.
1040
+ if (db != null) {
1041
+ try {
1042
+ backfillJsonlAgentId(db, filePath, agentId, log)
1043
+ } catch (err) {
1044
+ log?.(`subagent-watcher: backfill error for ${agentId}: ${(err as Error).message}`)
1045
+ }
976
1046
  }
977
1047
  }
978
1048
  }
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Regression guard for the Model A foreground sub-agent nesting gate (#2032).
3
+ *
4
+ * #2027 shipped foreground nesting but gated every render path on
5
+ * `turn.replyCalled`. Because the framework's ack-first pattern replies
6
+ * "On it…" FIRST and then delegates, `replyCalled` was already true before any
7
+ * foreground sub-agent ran — so the feature silently produced ZERO live
8
+ * foreground activity for the exact case it was built for, and that went
9
+ * unnoticed because #2027 only tested the pure renderer, never the gate.
10
+ *
11
+ * These tests pin the gate decisions directly so the replyCalled-independence
12
+ * can never regress silently again.
13
+ */
14
+ import { describe, it, expect } from 'vitest'
15
+ import {
16
+ shouldRenderForegroundProgress,
17
+ foregroundFinishAction,
18
+ } from '../gateway/foreground-nesting.js'
19
+ import { renderActivityFeedWithNested } from '../tool-activity-summary.js'
20
+
21
+ describe('shouldRenderForegroundProgress', () => {
22
+ it('renders even after the parent has acked (the #2027 blindspot)', () => {
23
+ // THE regression guard: ack-first sets replyCalled=true before the
24
+ // foreground sub-agent runs. It MUST still render.
25
+ expect(
26
+ shouldRenderForegroundProgress({ nestingEnabled: true, replyCalled: true }),
27
+ ).toBe(true)
28
+ })
29
+
30
+ it('renders before any reply (pre-ack flow, unchanged)', () => {
31
+ expect(
32
+ shouldRenderForegroundProgress({ nestingEnabled: true, replyCalled: false }),
33
+ ).toBe(true)
34
+ })
35
+
36
+ it('is independent of replyCalled — the flag never flips the outcome', () => {
37
+ const on = shouldRenderForegroundProgress({ nestingEnabled: true, replyCalled: true })
38
+ const off = shouldRenderForegroundProgress({ nestingEnabled: true, replyCalled: false })
39
+ expect(on).toBe(off)
40
+ })
41
+
42
+ it('honours the kill-switch regardless of replyCalled', () => {
43
+ expect(
44
+ shouldRenderForegroundProgress({ nestingEnabled: false, replyCalled: false }),
45
+ ).toBe(false)
46
+ expect(
47
+ shouldRenderForegroundProgress({ nestingEnabled: false, replyCalled: true }),
48
+ ).toBe(false)
49
+ })
50
+ })
51
+
52
+ describe('foregroundFinishAction', () => {
53
+ it('hands off to the answer when the last sub-agent finishes post-ack', () => {
54
+ expect(
55
+ foregroundFinishAction({ removed: true, replyCalled: true, remainingForeground: 0 }),
56
+ ).toBe('handoff-clear')
57
+ })
58
+
59
+ it('recomposes when other foreground sub-agents are still running post-ack', () => {
60
+ expect(
61
+ foregroundFinishAction({ removed: true, replyCalled: true, remainingForeground: 1 }),
62
+ ).toBe('recompose')
63
+ })
64
+
65
+ it('recomposes pre-ack (original behaviour preserved)', () => {
66
+ expect(
67
+ foregroundFinishAction({ removed: true, replyCalled: false, remainingForeground: 0 }),
68
+ ).toBe('recompose')
69
+ })
70
+
71
+ it('does nothing for an agent it was not tracking', () => {
72
+ expect(
73
+ foregroundFinishAction({ removed: false, replyCalled: true, remainingForeground: 0 }),
74
+ ).toBe('none')
75
+ })
76
+ })
77
+
78
+ describe('end-to-end render shape under ack-first', () => {
79
+ it('produces a live nested block from a post-ack foreground narrative', () => {
80
+ // marko's real scenario: parent acked with no prior steps (empty mirror),
81
+ // then a foreground researcher emits progress. The gate now ALLOWS this
82
+ // render; prove the render is a real, non-empty nested feed with a live
83
+ // bold "→ current" line — i.e. the user would actually see activity.
84
+ const html = renderActivityFeedWithNested(
85
+ [],
86
+ [
87
+ 'searching Unsplash CC0 desk',
88
+ 'checking license on 4 candidates',
89
+ 'ranking by resolution',
90
+ ],
91
+ )
92
+ expect(html).not.toBeNull()
93
+ // newest child is the live, bold current step
94
+ expect(html).toContain('<b>→ ranking by resolution</b>')
95
+ // an earlier child is present as a done/italic step
96
+ expect(html).toContain('checking license on 4 candidates')
97
+ })
98
+ })
@@ -80,6 +80,14 @@ function makeHarness(opts: {
80
80
  stallThresholdMs?: number
81
81
  silentStallTerminalMs?: number
82
82
  rescanMs?: number
83
+ /** How long ago (ms) the boot file was last written, i.e. its mtime is
84
+ * `currentTime - bootFileAgeMs` at registration. Default 0 (fresh, so the
85
+ * freshness gate promotes it). Set large to simulate a dead prior-session
86
+ * worker that must NOT be promoted. */
87
+ bootFileAgeMs?: number
88
+ /** Kill-switch passthrough; default true (promotion enabled). */
89
+ bootPromoteEnabled?: boolean
90
+ inflightPromoteMaxAgeMs?: number
83
91
  }): Harness {
84
92
  const {
85
93
  agentId = 'gap-agent',
@@ -87,6 +95,9 @@ function makeHarness(opts: {
87
95
  stallThresholdMs = 60_000,
88
96
  silentStallTerminalMs = 300_000,
89
97
  rescanMs = 500,
98
+ bootFileAgeMs = 0,
99
+ bootPromoteEnabled = true,
100
+ inflightPromoteMaxAgeMs,
90
101
  } = opts
91
102
 
92
103
  let currentTime = 1000
@@ -104,6 +115,10 @@ function makeHarness(opts: {
104
115
 
105
116
  const fileContents = new Map<string, Buffer>()
106
117
  fileContents.set(jsonlPath, Buffer.from(buildJSONL(...bootLines), 'utf-8'))
118
+ // Per-file mtime (ms). The boot file's last write is `bootFileAgeMs` in the
119
+ // past; appends bump it to currentTime. The freshness gate reads this.
120
+ const fileMtimes = new Map<string, number>()
121
+ fileMtimes.set(jsonlPath, 1000 - bootFileAgeMs)
107
122
 
108
123
  let lastOpenedPath: string | null = null
109
124
  const mockFs = {
@@ -121,7 +136,7 @@ function makeHarness(opts: {
121
136
  if (ps === subagentsDir) return [`agent-${agentId}.jsonl`]
122
137
  return []
123
138
  }) as unknown as typeof fs.readdirSync,
124
- statSync: ((p: fs.PathLike) => ({ size: fileContents.get(String(p))?.length ?? 0 }) as fs.Stats) as typeof fs.statSync,
139
+ statSync: ((p: fs.PathLike) => ({ size: fileContents.get(String(p))?.length ?? 0, mtimeMs: fileMtimes.get(String(p)) ?? currentTime }) as fs.Stats) as typeof fs.statSync,
125
140
  openSync: ((p: fs.PathLike) => {
126
141
  lastOpenedPath = String(p)
127
142
  return 42
@@ -153,6 +168,8 @@ function makeHarness(opts: {
153
168
  silentSynthesisStallThresholdMs: stallThresholdMs,
154
169
  silentStallTerminalMs,
155
170
  rescanMs,
171
+ bootPromoteEnabled,
172
+ ...(inflightPromoteMaxAgeMs != null ? { inflightPromoteMaxAgeMs } : {}),
156
173
  onStallTerminal: (id) => stallTerminalCalls.push({ agentId: id }),
157
174
  onFinish: ({ agentId: id, outcome, resultText }) =>
158
175
  finishCalls.push({ agentId: id, outcome, resultText }),
@@ -186,6 +203,7 @@ function makeHarness(opts: {
186
203
  const cur = fileContents.get(jsonlPath) ?? Buffer.alloc(0)
187
204
  const more = buildJSONL(...lines)
188
205
  fileContents.set(jsonlPath, Buffer.concat([cur, Buffer.from(more, 'utf-8')]))
206
+ fileMtimes.set(jsonlPath, currentTime)
189
207
  }
190
208
 
191
209
  return { stallTerminalCalls, finishCalls, logs, advance, watcher, fileContents, jsonlPath, append }
@@ -245,6 +263,75 @@ describe('Gap 1 — background worker in-flight across a gateway restart', () =>
245
263
  })
246
264
  })
247
265
 
266
+ describe('Gap 1 freshness gate — v0.14.24 stale-replay regression', () => {
267
+ // The v0.14.23 regression: promoting EVERY running-at-boot file replayed
268
+ // weeks-old dead prior-session workers as handbacks (often `failed`, from
269
+ // old error lines) on every boot, spamming the whole fleet. The gate
270
+ // promotes only files whose last write is recent.
271
+
272
+ it('a STALE running-at-boot worker (weeks-old mtime) is NOT promoted — no handback, no stall', () => {
273
+ const h = makeHarness({
274
+ agentId: 'gap1-stale-running',
275
+ bootLines: [subAgentUserMsg('bg task from weeks ago')], // running: no turn_end
276
+ bootFileAgeMs: 21 * 24 * 60 * 60_000, // 21 days old — clearly dead
277
+ silentStallTerminalMs: 120_000,
278
+ })
279
+
280
+ h.advance(600)
281
+ h.advance(600_000) // far past every stall/synthesis window
282
+ expect(h.finishCalls).toHaveLength(0) // pre-fix: a spurious (often failed) handback
283
+ expect(h.stallTerminalCalls).toHaveLength(0)
284
+ expect(h.logs.some((l) => l.includes('stale') && l.includes('leaving historical'))).toBe(true)
285
+ })
286
+
287
+ it('a FRESH running-at-boot worker (recent mtime) IS still promoted and hands back', () => {
288
+ // Preserve the genuine Gap 1 fix: a worker in-flight across a restart
289
+ // (wrote moments before the bounce) must still get promoted + handed back.
290
+ const h = makeHarness({
291
+ agentId: 'gap1-fresh-running',
292
+ bootLines: [subAgentUserMsg('bg task')],
293
+ bootFileAgeMs: 30_000, // 30s old — in-flight across a quick restart
294
+ })
295
+
296
+ h.append(subAgentText('Finished the migration'), subAgentTurnEnd())
297
+ h.advance(600)
298
+
299
+ expect(h.finishCalls).toHaveLength(1)
300
+ expect(h.finishCalls[0].outcome).toBe('completed')
301
+ expect(h.logs.some((l) => l.includes('promoting to live'))).toBe(true)
302
+ })
303
+
304
+ it('kill-switch (bootPromoteEnabled=false) suppresses even a fresh running-at-boot worker', () => {
305
+ const h = makeHarness({
306
+ agentId: 'gap1-killswitch',
307
+ bootLines: [subAgentUserMsg('bg task')],
308
+ bootFileAgeMs: 5_000, // fresh — would normally promote
309
+ bootPromoteEnabled: false,
310
+ silentStallTerminalMs: 120_000,
311
+ })
312
+
313
+ h.advance(600)
314
+ h.advance(600_000)
315
+ expect(h.finishCalls).toHaveLength(0)
316
+ expect(h.logs.some((l) => l.includes('promotion disabled'))).toBe(true)
317
+ })
318
+
319
+ it('a worker just past the freshness window is NOT promoted (boundary)', () => {
320
+ const h = makeHarness({
321
+ agentId: 'gap1-boundary',
322
+ bootLines: [subAgentUserMsg('bg task')],
323
+ inflightPromoteMaxAgeMs: 60_000, // 60s window
324
+ bootFileAgeMs: 90_000, // 90s old → just stale
325
+ silentStallTerminalMs: 120_000,
326
+ })
327
+
328
+ h.advance(600)
329
+ h.advance(600_000)
330
+ expect(h.finishCalls).toHaveLength(0)
331
+ expect(h.logs.some((l) => l.includes('stale'))).toBe(true)
332
+ })
333
+ })
334
+
248
335
  describe('Gap 2 — failure honesty', () => {
249
336
  it('a terminal error line flips the outcome to failed and carries the detail', () => {
250
337
  const h = makeHarness({ agentId: 'gap2-failed', bootLines: [subAgentUserMsg('bg task')] })
@@ -0,0 +1,121 @@
1
+ /**
2
+ * Foreground sub-agent live activity nesting (#2032) — UAT.
3
+ *
4
+ * Background (#2027): a FOREGROUND sub-agent (Agent/Task WITHOUT
5
+ * run_in_background) runs INSIDE the parent turn, blocking it. Its live steps
6
+ * were meant to nest into the parent's activity-summary feed (Model A). But
7
+ * every render path bailed on `turn.replyCalled`, and the framework's
8
+ * ack-first pattern replies "On it…" FIRST — so the sub-agent ran with
9
+ * replyCalled already true and NOTHING ever painted. #2027 only tested the
10
+ * pure renderer, so the regression shipped silently (the "marko's researcher
11
+ * showed no Telegram activity" report). #2032 renders foreground progress
12
+ * regardless of replyCalled.
13
+ *
14
+ * This scenario forces the exact broken shape end-to-end:
15
+ * 1. Prompt the agent to send a quick ack FIRST (sets replyCalled=true),
16
+ * 2. THEN dispatch a FOREGROUND sub-agent (run_in_background:false) that
17
+ * narrates ~10 paced steps so its jsonl keeps ticking under the
18
+ * test-harness 5s stall floor and the feed can paint + edit,
19
+ * 3. then report done.
20
+ *
21
+ * Asserts the load-bearing proof: an activity-summary feed message appears
22
+ * carrying the NESTED foreground marker ("↳") with the sub-agent's narration —
23
+ * i.e. live foreground activity surfaced AFTER the ack. Pre-#2032 this message
24
+ * never existed. Logs every observed body so a human can read the real UX.
25
+ *
26
+ * NOT a draft: the activity-summary feed is a real sendMessage/editMessageText
27
+ * (gateway.ts drainActivitySummary), so mtcute can observe it — unlike the
28
+ * answer-stream draft, which mtcute cannot see.
29
+ */
30
+
31
+ import { describe, expect, it } from "vitest";
32
+ import { spinUp } from "../harness.js";
33
+
34
+ // Same paced-narration discipline as jtbd-worker-activity-feed-dm: each step
35
+ // is its own Bash call with a one-line narration so the sub-agent emits a
36
+ // `sub_agent_text` line every ~2s — under SWITCHROOM_SUBAGENT_STALL_MS=5000,
37
+ // so the watcher never synth-terminates it mid-flight (which would suppress
38
+ // onProgress and the feed would never paint). run_in_background:false makes it
39
+ // FOREGROUND — the whole point.
40
+ const FG_DISPATCH_PROMPT =
41
+ `First, immediately send me a one-line acknowledgement that you're starting ` +
42
+ `(just "On it — running a check now."). Then use the Agent tool with ` +
43
+ `subagent_type "general-purpose" and run_in_background: false (a FOREGROUND ` +
44
+ `sub-agent) with this exact task: "Do eight steps, ONE AT A TIME, k = 1 ` +
45
+ `through 8. Before each step write a brief one-sentence narration of what ` +
46
+ `you are about to do, then run \`sleep 2\` via the Bash tool, then run ` +
47
+ `\`echo step-k\` via the Bash tool (substitute the real number for k). Run ` +
48
+ `every sleep and every echo as its OWN separate Bash call — never batch or ` +
49
+ `chain them with && — and narrate before each so progress surfaces ` +
50
+ `incrementally. Do not stop early; complete all eight steps, then return a ` +
51
+ `one-line summary." Wait for the foreground sub-agent to finish, then send ` +
52
+ `me a brief reply telling me it's done.`;
53
+
54
+ // The nested foreground block renders each child line prefixed with "↳"
55
+ // (NESTED_PREFIX = " ↳ "), newest as a bold "→ …" current step. Telegram
56
+ // strips the bold but keeps the literal ↳ / → glyphs in message text.
57
+ const NESTED_RE = /↳/;
58
+
59
+ describe("uat: foreground sub-agent live activity nesting (#2032)", () => {
60
+ it(
61
+ "surfaces nested foreground activity in the feed AFTER the ack-first reply",
62
+ async () => {
63
+ const sc = await spinUp({ agent: "test-harness" });
64
+ try {
65
+ await sc.sendDM(FG_DISPATCH_PROMPT);
66
+
67
+ // Ack-first reply — establishes replyCalled=true BEFORE the
68
+ // foreground sub-agent runs. This is the condition that broke #2027.
69
+ const ack = await sc.expectMessage(/.+/, {
70
+ from: "bot",
71
+ timeout: 60_000,
72
+ });
73
+ console.log(`[fg-activity UAT] ack-first reply: ${JSON.stringify(ack.text)}`);
74
+
75
+ // The activity-summary feed carrying the NESTED foreground narrative.
76
+ // First paint waits for the sub-agent to dispatch + narrate (~8s
77
+ // first-paint throttle), so give it a generous window. Its presence
78
+ // is the load-bearing proof of the fix: post-ack foreground activity.
79
+ const feed = await sc.expectMessage(NESTED_RE, {
80
+ from: "bot",
81
+ timeout: 90_000,
82
+ });
83
+ console.log(
84
+ `[fg-activity UAT] nested feed paint (id=${feed.messageId}): ${JSON.stringify(feed.text)}`,
85
+ );
86
+ expect(feed.messageId).toBeGreaterThan(0);
87
+ expect(feed.text).toMatch(NESTED_RE);
88
+
89
+ // Live edit: re-fetch the SAME message after the throttle + a few
90
+ // sub-agent steps. Body should change as the nested narrative
91
+ // advances — proving it's a live feed, not a one-shot post. Soft:
92
+ // log either way; the nested-paint above is the load-bearing proof.
93
+ const before = feed.text;
94
+ await new Promise((r) => setTimeout(r, 10_000));
95
+ const mid = await sc.driver.getMessage(sc.botUserId, feed.messageId);
96
+ console.log(
97
+ `[fg-activity UAT] same feed after 10s (id=${feed.messageId}): ${JSON.stringify(mid?.text ?? null)}`,
98
+ );
99
+
100
+ // Final answer — the parent resumes after the foreground sub-agent
101
+ // returns and reports done. The feed hands off (clears) to this
102
+ // answer. Confirms the turn completes cleanly, not wedged.
103
+ const done = await sc.expectMessage(/done|complete|finished|step-8|wrapped/i, {
104
+ from: "bot",
105
+ timeout: 120_000,
106
+ });
107
+ console.log(`[fg-activity UAT] final answer: ${JSON.stringify(done.text)}`);
108
+ expect(done.text.length).toBeGreaterThan(0);
109
+ // Did the nested narrative actually move while in flight?
110
+ if (mid?.text != null) {
111
+ console.log(
112
+ `[fg-activity UAT] body moved in-flight: ${mid.text !== before}`,
113
+ );
114
+ }
115
+ } finally {
116
+ await sc.tearDown();
117
+ }
118
+ },
119
+ 300_000,
120
+ );
121
+ });