switchroom 0.13.26 → 0.13.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/cli/switchroom.js +2 -2
  2. package/package.json +1 -1
  3. package/telegram-plugin/active-reactions-sweep.ts +4 -4
  4. package/telegram-plugin/dist/gateway/gateway.js +240 -64
  5. package/telegram-plugin/docs/waiting-ux-spec.md +17 -1
  6. package/telegram-plugin/gateway/disconnect-flush.ts +10 -6
  7. package/telegram-plugin/gateway/gateway.ts +199 -61
  8. package/telegram-plugin/gateway/inbound-spool.ts +69 -2
  9. package/telegram-plugin/gateway/subagent-handback-inbound-builder.ts +14 -0
  10. package/telegram-plugin/gateway/subagent-progress-inbound-builder.ts +256 -0
  11. package/telegram-plugin/pending-work-progress.ts +5 -1
  12. package/telegram-plugin/status-reactions.ts +70 -58
  13. package/telegram-plugin/stream-reply-handler.ts +7 -36
  14. package/telegram-plugin/subagent-watcher.ts +64 -3
  15. package/telegram-plugin/tests/gateway-disconnect-flush.test.ts +5 -3
  16. package/telegram-plugin/tests/inbound-spool-progress.test.ts +213 -0
  17. package/telegram-plugin/tests/inbound-spool.test.ts +62 -0
  18. package/telegram-plugin/tests/multi-turn-continuity.test.ts +0 -1
  19. package/telegram-plugin/tests/outbound-ordering.test.ts +0 -1
  20. package/telegram-plugin/tests/parse-mode-rotation.test.ts +0 -1
  21. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +119 -133
  22. package/telegram-plugin/tests/status-accent.test.ts +0 -1
  23. package/telegram-plugin/tests/status-reactions.test.ts +56 -27
  24. package/telegram-plugin/tests/stream-reply-error-paths.test.ts +0 -1
  25. package/telegram-plugin/tests/stream-reply-handler.test.ts +9 -25
  26. package/telegram-plugin/tests/streaming-e2e.test.ts +0 -1
  27. package/telegram-plugin/tests/streaming-orchestration.test.ts +0 -1
  28. package/telegram-plugin/tests/subagent-handback-inbound-builder.test.ts +22 -0
  29. package/telegram-plugin/tests/subagent-progress-inbound-builder.test.ts +269 -0
  30. package/telegram-plugin/uat/scenarios/jtbd-reflective-status-reaction-dm.test.ts +204 -0
@@ -27,7 +27,7 @@
27
27
  * needing to spin up the whole gateway.
28
28
  */
29
29
 
30
- export interface DisconnectFlushDeps<Ctrl extends { setDone: () => void }, Stream extends { isFinal: () => boolean; finalize: () => Promise<void> }> {
30
+ export interface DisconnectFlushDeps<Ctrl extends { finalize: (reason?: 'done' | 'error') => void }, Stream extends { isFinal: () => boolean; finalize: () => Promise<void> }> {
31
31
  /** The disconnecting client's agentName. `null` ⇒ anonymous (never registered). */
32
32
  agentName: string | null
33
33
 
@@ -50,7 +50,7 @@ export interface DisconnectFlushDeps<Ctrl extends { setDone: () => void }, Strea
50
50
 
51
51
  /** Optional: called when the registered-agent disconnect found dangling
52
52
  * `activeTurnStartedAt` entries the controller loop did not clear (i.e.
53
- * `setDone()` already ran on the canonical reply path, leaving
53
+ * `finalize()` already ran on the canonical reply path, leaving
54
54
  * `activeStatusReactions` empty but `activeTurnStartedAt` populated).
55
55
  * The gateway uses this to null its module-scope `currentTurn` — the
56
56
  * bridge that owned that turn just died. Without this, the next
@@ -70,7 +70,7 @@ export interface DisconnectFlushDeps<Ctrl extends { setDone: () => void }, Strea
70
70
  * client). The boolean is for tests + observability — callers can ignore it.
71
71
  */
72
72
  export function flushOnAgentDisconnect<
73
- Ctrl extends { setDone: () => void },
73
+ Ctrl extends { finalize: (reason?: 'done' | 'error') => void },
74
74
  Stream extends { isFinal: () => boolean; finalize: () => Promise<void> },
75
75
  >(deps: DisconnectFlushDeps<Ctrl, Stream>): boolean {
76
76
  const {
@@ -96,8 +96,12 @@ export function flushOnAgentDisconnect<
96
96
  // Real agent disconnect (e.g. the claude bridge crashed/restarted). Flush
97
97
  // all in-flight status reactions to 👍 so user messages don't stay stuck on
98
98
  // intermediate emoji (🤔, 🔥, etc.) after an agent crash/restart.
99
+ // #1713: route through finalize() — single terminal path for the
100
+ // status-reaction controller. Disconnect implies the agent bridge
101
+ // died mid-turn; treat as a clean terminal so the user's emoji
102
+ // doesn't stay stuck on an intermediate working state.
99
103
  for (const [key, ctrl] of activeStatusReactions.entries()) {
100
- ctrl.setDone()
104
+ ctrl.finalize('done')
101
105
  activeStatusReactions.delete(key)
102
106
  activeReactionMsgIds.delete(key)
103
107
  activeTurnStartedAt.delete(key)
@@ -107,7 +111,7 @@ export function flushOnAgentDisconnect<
107
111
  // Defense-in-depth — sweep any `activeTurnStartedAt` keys the controller
108
112
  // loop above did not touch. The bridge has crashed; any turn it owned is
109
113
  // dead by definition, regardless of whether `activeStatusReactions`
110
- // still tracks it. The race that motivates this: `setDone()` already
114
+ // still tracks it. The race that motivates this: `finalize()` already
111
115
  // fired on the canonical reply path (clearing the reaction controller)
112
116
  // BUT the disconnect arrived BEFORE `purgeReactionTracking` ran the
113
117
  // `activeTurnStartedAt.delete` line for that key. Without this sweep,
@@ -123,7 +127,7 @@ export function flushOnAgentDisconnect<
123
127
  }
124
128
  log(
125
129
  `telegram gateway: disconnect-flush swept ${danglingKeys.length} dangling turn key(s) ` +
126
- `post-bridge-death (controller loop missed — setDone raced disconnect)`,
130
+ `post-bridge-death (controller loop missed — finalize raced disconnect)`,
127
131
  )
128
132
  onDanglingTurnsSwept?.(danglingKeys)
129
133
  }
@@ -288,6 +288,10 @@ import {
288
288
  buildVaultSaveDiscardedInbound,
289
289
  } from './vault-grant-inbound-builders.js'
290
290
  import { decideSubagentHandback } from './subagent-handback-inbound-builder.js'
291
+ import {
292
+ decideSubagentProgress,
293
+ DEFAULT_PROGRESS_INTERVAL_MS,
294
+ } from './subagent-progress-inbound-builder.js'
291
295
  import { createPollHealthCheck, type PollHealthCheckHandle } from './poll-health.js'
292
296
  import type {
293
297
  ToolCallMessage,
@@ -1515,6 +1519,13 @@ function maybeProactiveCompact(): void {
1515
1519
  void resolveCompactCard('superseded', occupancy);
1516
1520
  }
1517
1521
  void postCompactCard(occupancy, cap);
1522
+ // #1713: compaction is a reflective working state — paint ✍ on
1523
+ // every in-flight inbound's status reaction so the user can see
1524
+ // the agent is doing compaction work, not stuck. Non-terminal:
1525
+ // post-compact transitions back to thinking/tool resume normally.
1526
+ for (const ctrl of activeStatusReactions.values()) {
1527
+ ctrl.setCompacting()
1528
+ }
1518
1529
  }
1519
1530
 
1520
1531
  if (!decision.fire) return;
@@ -1642,15 +1653,40 @@ async function resolveCompactCard(
1642
1653
  }
1643
1654
  }
1644
1655
 
1645
- function endStatusReaction(chatId: string, threadId: number | undefined, outcome: 'done' | 'error'): void {
1656
+ /**
1657
+ * Terminal-only reaction helper — routes through `finalize()` per #1713.
1658
+ *
1659
+ * Only the `turn_end` IPC handler, disconnect-flush, and boot-sweep
1660
+ * should call this. Mid-turn replies and stream-done events are
1661
+ * NON-EVENTS for the reaction (the reaction reflects current turn
1662
+ * activity, not delivery state). See `reference/know-what-my-agent-is-
1663
+ * doing.md` for the user-perceived contract.
1664
+ */
1665
+ function finalizeStatusReaction(
1666
+ chatId: string,
1667
+ threadId: number | undefined,
1668
+ reason: 'done' | 'error' = 'done',
1669
+ ): void {
1646
1670
  const key = statusKey(chatId, threadId)
1647
1671
  const ctrl = activeStatusReactions.get(key)
1648
1672
  if (!ctrl) return
1649
- if (outcome === 'done') ctrl.setDone()
1650
- else ctrl.setError()
1673
+ ctrl.finalize(reason)
1651
1674
  purgeReactionTracking(key)
1652
1675
  }
1653
1676
 
1677
+ /**
1678
+ * Non-terminal error paint (😱). Distinct from `finalize('error')` —
1679
+ * recovery to a working state is allowed after this (#1713). Mid-turn
1680
+ * 5xx surfaces use this; the terminal turn_end handler decides whether
1681
+ * the turn actually ends in error.
1682
+ */
1683
+ function paintStatusReactionError(chatId: string, threadId: number | undefined): void {
1684
+ const key = statusKey(chatId, threadId)
1685
+ const ctrl = activeStatusReactions.get(key)
1686
+ if (!ctrl) return
1687
+ ctrl.setError()
1688
+ }
1689
+
1654
1690
  function resolveThreadId(chat_id: string, explicit?: string | number | null): number | undefined {
1655
1691
  if (explicit != null) return Number(explicit)
1656
1692
  return chatThreadMap.get(chat_id)
@@ -4895,35 +4931,49 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4895
4931
  } catch { /* best-effort signal */ }
4896
4932
  // #203: fresh sendMessage from reply tool is a user-visible signal.
4897
4933
  signalTracker.noteSignal(statusKey(chat_id, threadId), Date.now())
4898
- // PR #602 follow-up: fire the terminal 👍 here so plain `reply`-only
4899
- // turns get the same delivery-confirmed reaction as stream_reply
4900
- // (Bug Z). Pre-follow-up, the dedup-suppress branch in the gateway
4901
- // turn_end handler was the sole 👍 emitter for reply-tool-only
4902
- // turns; removing its setDone call (Bug D) left those turns with no
4903
- // 👍 at all. Mirror the stream_reply contract: only fire after at
4904
- // least one sendMessage has resolved successfully (sentIds.length>0
4905
- // guarantees this), so the emoji means "the reply landed in
4906
- // Telegram", not "the reply tool was invoked". The reply tool has
4907
- // no lane concept every reply is the user-visible answer — so no
4908
- // lane gate is needed (unlike stream_reply where named lanes are
4909
- // internal driver emits).
4910
- try {
4911
- endStatusReaction(chat_id, threadId, 'done')
4912
- } catch (err) {
4913
- process.stderr.write(`telegram gateway: reply: endStatusReaction hook threw: ${err}\n`)
4914
- }
4915
- // #1664 — mark the turn's final answer as delivered when this reply
4916
- // looks like the real answer rather than an interim ack. The
4917
- // classification (notification-bearing OR substantive length) lives
4918
- // in `isFinalAnswerReply`. Without this, a turn that ack'd then ended
4919
- // with the real answer as plain transcript text (#1664) would look
4920
- // "delivered" because replyCalled is true — and the silent-end
4921
- // re-prompt would never engage. `rawText` is the model's own answer
4922
- // text, measured before HTML conversion / Telegraph-link
4923
- // substitution. Writes `turn` (pinned at executeReply entry) so the
4924
- // flag always lands on the turn this reply belongs to.
4934
+ // #1713: the reply tool is a NON-EVENT for the status reaction
4935
+ // WHEN IT'S AN INTERIM ACK. The reaction reflects current turn
4936
+ // activity, not delivery state interim acks must not collapse
4937
+ // the working-state ladder to 👍.
4938
+ //
4939
+ // #1728 carve-out (2026-05-24): when this reply IS the final
4940
+ // answer (`isFinalAnswerReply` returns true same classifier
4941
+ // #1664 uses for silent-end re-prompt gating), it IS effectively
4942
+ // turn-end and we MUST finalize here. Rationale: Claude Code's
4943
+ // `turn_duration` system event is unreliable for the trivial-
4944
+ // prompt happy path (driver sends "what's 2+2", model replies
4945
+ // "4", no `turn_duration` ever lands in the JSONL session tail).
4946
+ // Pre-#1718 this wedge was masked by the legacy
4947
+ // `endStatusReaction` shim running unconditionally on every
4948
+ // reply (outcome='done'); #1718 removed that call site
4949
+ // intending `turn_end` to be the sole terminal trigger. The
4950
+ // contract was right in spirit but `turn_end` doesn't fire 100%
4951
+ // of the time, so the buffer gate (activeTurnStartedAt) stays
4952
+ // set forever and every subsequent inbound gets `held mid-turn`
4953
+ // and never delivered. v0.13.27 shipped + reverted on this
4954
+ // failure mode (#1728).
4955
+ //
4956
+ // Net contract:
4957
+ // - interim ack reply (isFinalAnswerReply === false)
4958
+ // non-event, no reaction finalize, buffer gate stays
4959
+ // - final-answer reply (isFinalAnswerReply === true)
4960
+ // finalize reaction (debounced 👍) + release buffer
4961
+ // gate via purgeReactionTracking (called inside
4962
+ // finalizeStatusReaction). currentTurn stays alive so
4963
+ // a subsequent `turn_end` still cleans up its share
4964
+ // idempotently.
4965
+ //
4966
+ // #1664 — `turn.finalAnswerDelivered = true` keeps the silent-
4967
+ // end re-prompt from spuriously firing on a delivered final.
4925
4968
  if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
4926
4969
  turn.finalAnswerDelivered = true
4970
+ // #1728: release the buffer gate + emit terminal 👍. Mid-turn
4971
+ // acks bypass this branch and remain non-events for the
4972
+ // reaction (preserves #1713). The full turn-state teardown
4973
+ // (nulling `currentTurn`, the per-turn cleanup) still runs in
4974
+ // the `turn_end` handler when it lands; this only fires the
4975
+ // observable side effects that #1718 deferred unconditionally.
4976
+ finalizeStatusReaction(chat_id, threadId, 'done')
4927
4977
  }
4928
4978
  }
4929
4979
 
@@ -5065,7 +5115,6 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
5065
5115
  disableLinkPreview: access.disableLinkPreview !== false,
5066
5116
  defaultFormat: access.parseMode ?? 'html',
5067
5117
  logStreamingEvent,
5068
- endStatusReaction,
5069
5118
  isPrivateChat: streamIsPrivate,
5070
5119
  isForumTopic: streamIsForumTopic,
5071
5120
  ...(sendMessageDraftFn != null ? { sendMessageDraft: sendMessageDraftFn } : {}),
@@ -6523,10 +6572,12 @@ function handleSessionEvent(ev: SessionEvent): void {
6523
6572
  ...(threadId != null ? { threadId } : {}),
6524
6573
  },
6525
6574
  )
6526
- const ceKey = statusKey(chatId, threadId)
6527
- const ctrl = activeStatusReactions.get(ceKey)
6528
- if (ctrl) ctrl.setError()
6529
- purgeReactionTracking(ceKey)
6575
+ // #1713: context-exhaustion is a terminal failure path — paint 😱
6576
+ // and finalize the controller. `setError` alone is non-terminal
6577
+ // (recovery permitted); since this turn is genuinely ending, route
6578
+ // through `finalize('error')` so the emoji lands and the controller
6579
+ // stops accepting further transitions.
6580
+ finalizeStatusReaction(chatId, threadId, 'error')
6530
6581
  // Surfaced during CC-5 investigation (`docs/status-ask-cause-classes.md`):
6531
6582
  // the context-exhaust bail path teardown was missing
6532
6583
  // `silencePoke.endTurn(key)`. Without it, the silence-poke state for
@@ -6537,6 +6588,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6537
6588
  // dead and has already told the user is over (the ⚠️ Context window
6538
6589
  // full message above). Match the pattern used at the regular
6539
6590
  // turn-end path (line ~5039) and the wedged-turn path (~5290).
6591
+ const ceKey = statusKey(chatId, threadId)
6540
6592
  silencePoke.endTurn(ceKey)
6541
6593
  pendingProgress.noteTurnEnd(ceKey)
6542
6594
  // Issue #195: tear down the answer-lane stream on context-exhaustion
@@ -6729,10 +6781,11 @@ function handleSessionEvent(ev: SessionEvent): void {
6729
6781
  }
6730
6782
  // Unpin without editing the message so no orphaned card lingers.
6731
6783
  unpinProgressCardForChat?.(chatId, threadId)
6732
- // Fall through to normal state cleanup (ctrl.setDone, purge, etc.)
6784
+ // Fall through to normal state cleanup (finalize, purge, etc.)
6733
6785
  // but skip the regular closeProgressLane so we don't re-finalize.
6734
- if (ctrl) ctrl.setDone()
6735
- purgeReactionTracking(statusKey(chatId, threadId))
6786
+ // #1713: silent-marker turns still finalize to 👍 — turn_end is
6787
+ // the terminal trigger regardless of whether a reply landed.
6788
+ finalizeStatusReaction(chatId, threadId, 'done')
6736
6789
  // Match the normal turn_end path's telemetry so silent-marker turns
6737
6790
  // still appear in turn-duration graphs.
6738
6791
  {
@@ -6889,24 +6942,12 @@ function handleSessionEvent(ev: SessionEvent): void {
6889
6942
  const recentCount = getRecentOutboundCount(backstopChatId, 2)
6890
6943
  if (recentCount > 0) {
6891
6944
  process.stderr.write(`telegram gateway: turn-flush suppressed — reply tool sent ${recentCount} message(s) within 2s\n`)
6892
- // Bug D fix: do NOT fire setDone here. The previous code
6893
- // assumed `recentCount > 0` was sufficient proof of delivery
6894
- // — and it is, since recordOutbound is called synchronously
6895
- // after sendMessage success. But firing setDone here races
6896
- // with the stream_reply done=true callback (Bug Z) which now
6897
- // fires endStatusReaction after finalize() resolves (i.e.
6898
- // after the final edit lands in Telegram). Both racing on
6899
- // setDone is harmless (setDone is idempotent post-terminal),
6900
- // but the dedup branch firing FIRST means we'd be claiming
6901
- // delivery from a 500ms-lagged read of local history rather
6902
- // than from the actual API confirmation. Letting Bug Z's
6903
- // post-finalize callback own the 👍 transition keeps the
6904
- // emoji tied to true delivery. The plain `reply` tool path
6905
- // (PR #602 follow-up) now also fires endStatusReaction
6906
- // directly from executeReply after sendMessage resolves,
6907
- // mirroring this contract — so reply-only turns transition
6908
- // to terminal 👍 in their own success path rather than
6909
- // relying on this dedup heuristic.
6945
+ // Do NOT finalize the status reaction here. As of #1713
6946
+ // the reaction is only finalized by the `turn_end` IPC
6947
+ // handler mid-turn delivery proofs (local history,
6948
+ // stream finalize callbacks, executeReply post-send) no
6949
+ // longer transition the emoji. This branch just purges
6950
+ // the per-turn reaction tracking entry and returns.
6910
6951
  purgeReactionTracking(statusKey(backstopChatId, backstopThreadId))
6911
6952
  return
6912
6953
  }
@@ -7018,7 +7059,9 @@ function handleSessionEvent(ev: SessionEvent): void {
7018
7059
  Date.now(),
7019
7060
  currentTurn?.registryKey ?? null,
7020
7061
  )
7021
- if (backstopCtrl) backstopCtrl.setDone()
7062
+ // #1713: route the backstop terminal through finalize()
7063
+ // single terminal path keeps the controller contract clean.
7064
+ if (backstopCtrl) backstopCtrl.finalize('done')
7022
7065
  // Unpin the card. completeTurn cleans up pinMgr's per-turn
7023
7066
  // state and unpins via the API. If we didn't take over a
7024
7067
  // turn (cardTakeover.turnKey == null), fall back to the
@@ -7034,7 +7077,9 @@ function handleSessionEvent(ev: SessionEvent): void {
7034
7077
  }
7035
7078
  } catch (err) {
7036
7079
  process.stderr.write(`telegram gateway: turn-flush send failed: ${(err as Error).message}\n`)
7037
- if (backstopCtrl) backstopCtrl.setError()
7080
+ // #1713: backstop send failed — finalize as error so the
7081
+ // turn ends cleanly with 😱 rather than leaving it open.
7082
+ if (backstopCtrl) backstopCtrl.finalize('error')
7038
7083
  } finally {
7039
7084
  purgeReactionTracking(statusKey(backstopChatId, backstopThreadId))
7040
7085
  }
@@ -7042,8 +7087,11 @@ function handleSessionEvent(ev: SessionEvent): void {
7042
7087
  return
7043
7088
  }
7044
7089
 
7045
- if (ctrl) ctrl.setDone()
7046
- purgeReactionTracking(statusKey(chatId, threadId))
7090
+ // #1713: turn_end is THE terminal trigger. Finalize via the
7091
+ // single terminal path (👍). Any prior intermediate states
7092
+ // pending in the debounce window are flushed by `finalize()`
7093
+ // before the terminal emoji emits.
7094
+ finalizeStatusReaction(chatId, threadId, 'done')
7047
7095
  {
7048
7096
  const sKey = streamKey(chatId, threadId)
7049
7097
  const turnDurationMs = turn.startedAt > 0 ? Date.now() - turn.startedAt : 0
@@ -7255,7 +7303,6 @@ function handlePtyActivity(text: string): void {
7255
7303
  disableLinkPreview: access.disableLinkPreview !== false,
7256
7304
  defaultFormat: 'text',
7257
7305
  logStreamingEvent,
7258
- endStatusReaction,
7259
7306
  historyEnabled: false,
7260
7307
  recordOutbound,
7261
7308
  writeError: (line) => process.stderr.write(line),
@@ -15796,6 +15843,10 @@ void (async () => {
15796
15843
  ownerChatId: loadAccess().allowFrom[0] ?? '',
15797
15844
  taskDescription: description,
15798
15845
  resultText,
15846
+ // Plumb the JSONL agent id so the spool can mint a
15847
+ // deterministic dedup key — closes the #1719
15848
+ // re-fire-on-restart class.
15849
+ jsonlAgentId: agentId,
15799
15850
  })
15800
15851
  if (!decision.deliver) {
15801
15852
  if (decision.reason === 'no-chat') {
@@ -15806,6 +15857,28 @@ void (async () => {
15806
15857
  return
15807
15858
  }
15808
15859
 
15860
+ // #1720: when the handback is queued, sweep any still-
15861
+ // live progress envelopes for the SAME sub-agent out of
15862
+ // the spool. Without this a progress envelope queued
15863
+ // moments before the worker finished could land on top
15864
+ // of the handback turn, producing a duplicated /
15865
+ // contradictory "still running" line. Prefix match on
15866
+ // `s:progress:<jsonl_agent_id>:` — see `inbound-spool.ts`
15867
+ // spoolId branch.
15868
+ try {
15869
+ const progressPrefix = `s:progress:${agentId}:`
15870
+ const dropped = inboundSpool?.dropMatching((id) => id.startsWith(progressPrefix)) ?? 0
15871
+ if (dropped > 0) {
15872
+ process.stderr.write(
15873
+ `telegram gateway: subagent-handback ${agentId} swept ${dropped} live progress envelope(s) from spool\n`,
15874
+ )
15875
+ }
15876
+ } catch (err) {
15877
+ process.stderr.write(
15878
+ `telegram gateway: subagent-handback ${agentId} progress-sweep error: ${(err as Error).message}\n`,
15879
+ )
15880
+ }
15881
+
15809
15882
  // Deliver via pendingInboundBuffer + the idle-drain tick.
15810
15883
  // The drain only releases at an idle prompt (no active
15811
15884
  // turn), so the handback always lands as a clean fresh
@@ -15815,6 +15888,71 @@ void (async () => {
15815
15888
  `telegram gateway: subagent-handback queued agent=${agentId} outcome=${outcome} chat=${decision.chatId} resultChars=${resultText.length}\n`,
15816
15889
  )
15817
15890
  },
15891
+ // conversational-pacing beat 3 — mid-flight progress for
15892
+ // background workers (#1720). Fires on every
15893
+ // `sub_agent_text` event; the pure `decideSubagentProgress`
15894
+ // gates on (a) background flag, (b) bucket-not-yet-fired
15895
+ // (deterministic `floor(elapsed / interval)`), (c) chat
15896
+ // resolves. Envelope spoolId is
15897
+ // `s:progress:<jsonl_agent_id>:<bucketIdx>` so a re-fire
15898
+ // within the same bucket — or across a gateway restart —
15899
+ // collapses to one live entry. TTL on `meta.expiresAt`
15900
+ // suppresses stale-after-restart delivery (a 4-h-old
15901
+ // "still working (5m)" would be a lie). Sweep on handback
15902
+ // lives in the `onFinish` block just above.
15903
+ onProgress: ({ agentId, description, latestSummary, elapsedMs, prevBucketIdx, setBucketIdx }) => {
15904
+ let fleetChatId = ''
15905
+ let isBackground = false
15906
+ try {
15907
+ const fleets = progressDriver?.peekAllFleets() ?? []
15908
+ for (const f of fleets) {
15909
+ if (f.fleet.has(agentId)) {
15910
+ fleetChatId = f.chatId ?? ''
15911
+ break
15912
+ }
15913
+ }
15914
+ } catch { /* peek failures non-fatal */ }
15915
+ if (turnsDb != null) {
15916
+ try {
15917
+ const row = turnsDb
15918
+ .prepare('SELECT background FROM subagents WHERE jsonl_agent_id = ?')
15919
+ .get(agentId) as { background: number } | undefined
15920
+ if (row != null) isBackground = row.background === 1
15921
+ } catch { /* best-effort */ }
15922
+ }
15923
+ if (!isBackground) return // skip overhead for foreground
15924
+
15925
+ const decision = decideSubagentProgress({
15926
+ disableEnvValue: process.env.SWITCHROOM_DISABLE_SUBAGENT_PROGRESS,
15927
+ isBackground,
15928
+ fleetChatId,
15929
+ ownerChatId: loadAccess().allowFrom[0] ?? '',
15930
+ subagentJsonlId: agentId,
15931
+ taskDescription: description,
15932
+ latestSummary,
15933
+ elapsedMs,
15934
+ progressIntervalMs: DEFAULT_PROGRESS_INTERVAL_MS,
15935
+ lastBucketIdx: prevBucketIdx,
15936
+ })
15937
+ if (!decision.deliver) return
15938
+
15939
+ setBucketIdx(decision.bucketIdx)
15940
+ pendingInboundBuffer.push(process.env.SWITCHROOM_AGENT_NAME ?? '', decision.inbound)
15941
+ // #1725 follow-up: yield the cross-turn ambient ticker
15942
+ // for this chat. With the progress envelope queued, the
15943
+ // model is about to compose an explicit in-voice
15944
+ // progress line — letting the "— still working (Nm)"
15945
+ // edit fire in parallel would double-surface the
15946
+ // signal. Progress envelopes target the chat level
15947
+ // (no thread id), matching how the inbound lands.
15948
+ pendingProgress.clearPending(
15949
+ statusKey(decision.chatId, undefined),
15950
+ 'progress',
15951
+ )
15952
+ process.stderr.write(
15953
+ `telegram gateway: subagent-progress queued agent=${agentId} bucket=${decision.bucketIdx} elapsed_ms=${elapsedMs} chat=${decision.chatId}\n`,
15954
+ )
15955
+ },
15818
15956
  })
15819
15957
  process.stderr.write('telegram gateway: subagent-watcher active\n')
15820
15958
  }
@@ -49,6 +49,36 @@ import type { InboundMessage } from './ipc-protocol.js'
49
49
  * synthetics of the SAME logical event dedup, but distinct events
50
50
  * (different ts) do not collapse. */
51
51
  export function spoolId(msg: InboundMessage): string {
52
+ // Subagent handbacks (#1719): the JSONL agent id is unique per
53
+ // Claude Code spawn, so use it as the dedup key. This makes the id
54
+ // stable across the watcher's onFinish race AND across a
55
+ // gateway/container restart — so a re-built handback envelope for
56
+ // the same finished sub-agent collapses against the live spool
57
+ // entry (or its tombstone) instead of minting a fresh ts-derived
58
+ // id and re-firing the turn. See issue #1719.
59
+ if (
60
+ msg.meta?.source === 'subagent_handback' &&
61
+ typeof msg.meta?.subagent_jsonl_id === 'string' &&
62
+ msg.meta.subagent_jsonl_id.length > 0
63
+ ) {
64
+ return `s:handback:${msg.meta.subagent_jsonl_id}`
65
+ }
66
+ // Subagent progress envelopes (#1720): deterministic per (jsonl id,
67
+ // bucket idx) — every elapsed bucket collapses to one live entry, so
68
+ // a re-fire within the same bucket window (or after a gateway
69
+ // restart) is a structural no-op. The bucket idx is computed by the
70
+ // gateway from `floor(elapsedMs / progressIntervalMs)` so a worker
71
+ // that emits narrative lines every 30s only produces one envelope
72
+ // per bucket. Mirrors the #1719 handback-spoolId pattern.
73
+ if (
74
+ msg.meta?.source === 'subagent_progress' &&
75
+ typeof msg.meta?.subagent_jsonl_id === 'string' &&
76
+ msg.meta.subagent_jsonl_id.length > 0 &&
77
+ typeof msg.meta?.bucket_idx === 'string' &&
78
+ msg.meta.bucket_idx.length > 0
79
+ ) {
80
+ return `s:progress:${msg.meta.subagent_jsonl_id}:${msg.meta.bucket_idx}`
81
+ }
52
82
  if (typeof msg.messageId === 'number' && msg.messageId > 0) {
53
83
  return `m:${msg.chatId}:${msg.messageId}`
54
84
  }
@@ -104,8 +134,22 @@ export interface InboundSpool {
104
134
  * registered bridge. Idempotent. */
105
135
  ack: (msg: InboundMessage) => void
106
136
  /** Live (un-acked) entries, oldest first. Used at boot to re-push
107
- * into the in-memory buffer. Pure read — does not mutate. */
137
+ * into the in-memory buffer. Pure read — does not mutate.
138
+ *
139
+ * TTL (#1720): an entry whose `msg.meta.expiresAt` is a numeric ms
140
+ * epoch in the past is OMITTED from the result. Progress envelopes
141
+ * carry a TTL because stale progress lies ("still working (5m)"
142
+ * delivered 4h after the worker finished is worse than no progress);
143
+ * handback envelopes never set `expiresAt` so this is a no-op for
144
+ * them. */
108
145
  liveEntries: () => ReplayEntry[]
146
+ /** Drop every live entry whose spool id matches the predicate. Used
147
+ * by the handback path (#1720) to sweep stale progress envelopes
148
+ * for the same sub-agent at the moment the handback is queued —
149
+ * otherwise a progress envelope queued moments before the worker
150
+ * finished could land on top of the handback turn. Tombstones the
151
+ * dropped entries durably. */
152
+ dropMatching: (predicate: (id: string) => boolean) => number
109
153
  /** Escalate+drop entries older than `escalateAfterMs`. Calls
110
154
  * `onEscalate` once per dropped entry (post the "couldn't deliver"
111
155
  * card there). Returns the count escalated. Safe to call on a timer. */
@@ -243,7 +287,30 @@ export function createInboundSpool(opts: InboundSpoolOptions): InboundSpool {
243
287
  },
244
288
  liveEntries() {
245
289
  // Insertion order = Map iteration order = oldest first.
246
- return [...live.values()].map((e) => ({ agent: e.agent, msg: e.msg }))
290
+ // TTL filter (#1720): skip entries whose meta.expiresAt is in the
291
+ // past. The on-disk log keeps them (cheap); compaction sweeps.
292
+ const cutoff = now()
293
+ const out: ReplayEntry[] = []
294
+ for (const e of live.values()) {
295
+ const expRaw = e.msg.meta?.expiresAt
296
+ if (typeof expRaw === 'string' && expRaw.length > 0) {
297
+ const exp = Number(expRaw)
298
+ if (Number.isFinite(exp) && exp <= cutoff) continue
299
+ }
300
+ out.push({ agent: e.agent, msg: e.msg })
301
+ }
302
+ return out
303
+ },
304
+ dropMatching(predicate) {
305
+ let n = 0
306
+ for (const [id, _e] of [...live.entries()]) {
307
+ if (!predicate(id)) continue
308
+ live.delete(id)
309
+ appendRecord({ t: 'ack', id })
310
+ n++
311
+ }
312
+ if (n > 0) maybeCompact()
313
+ return n
247
314
  },
248
315
  sweepEscalations(onEscalate) {
249
316
  const cutoff = now() - escalateAfterMs
@@ -48,6 +48,14 @@ export interface SubagentHandbackContext {
48
48
  resultText: string
49
49
  /** Terminal outcome as classified by the watcher. */
50
50
  outcome: 'completed' | 'failed'
51
+ /** JSONL filename stem for this Claude Code spawn — unique per
52
+ * sub-agent run. Plumbed into `meta.subagent_jsonl_id` so the
53
+ * spool can mint a deterministic dedup id (`s:handback:<id>`),
54
+ * closing the #1719 re-fire-on-restart class. Optional only for
55
+ * back-compat with older builder callers — when present the
56
+ * spoolId branch fires, when absent the spool falls back to the
57
+ * legacy ts-based id (status-quo behaviour). */
58
+ jsonlAgentId?: string
51
59
  }
52
60
 
53
61
  function truncate(s: string, max: number): string {
@@ -98,6 +106,7 @@ export function buildSubagentHandbackInbound(opts: {
98
106
  meta: {
99
107
  source: 'subagent_handback',
100
108
  outcome: opts.ctx.outcome,
109
+ ...(opts.ctx.jsonlAgentId ? { subagent_jsonl_id: opts.ctx.jsonlAgentId } : {}),
101
110
  },
102
111
  }
103
112
  }
@@ -128,6 +137,10 @@ export interface SubagentHandbackDecisionInput {
128
137
  ownerChatId: string
129
138
  taskDescription: string
130
139
  resultText: string
140
+ /** JSONL filename stem for this Claude Code spawn — forwarded into
141
+ * the built inbound's `meta.subagent_jsonl_id`. See
142
+ * `SubagentHandbackContext.jsonlAgentId` for the dedup rationale. */
143
+ jsonlAgentId?: string
131
144
  /** Deterministic clock for tests. */
132
145
  nowMs?: number
133
146
  }
@@ -178,6 +191,7 @@ export function decideSubagentHandback(
178
191
  taskDescription: input.taskDescription,
179
192
  resultText: input.resultText,
180
193
  outcome: input.outcome,
194
+ ...(input.jsonlAgentId ? { jsonlAgentId: input.jsonlAgentId } : {}),
181
195
  },
182
196
  ...(input.nowMs !== undefined ? { nowMs: input.nowMs } : {}),
183
197
  })