switchroom 0.13.26 → 0.13.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/cli/switchroom.js +2 -2
  2. package/package.json +1 -1
  3. package/telegram-plugin/active-reactions-sweep.ts +4 -4
  4. package/telegram-plugin/dist/gateway/gateway.js +239 -64
  5. package/telegram-plugin/docs/waiting-ux-spec.md +17 -1
  6. package/telegram-plugin/gateway/disconnect-flush.ts +10 -6
  7. package/telegram-plugin/gateway/gateway.ts +166 -51
  8. package/telegram-plugin/gateway/inbound-spool.ts +69 -2
  9. package/telegram-plugin/gateway/subagent-handback-inbound-builder.ts +14 -0
  10. package/telegram-plugin/gateway/subagent-progress-inbound-builder.ts +256 -0
  11. package/telegram-plugin/pending-work-progress.ts +5 -1
  12. package/telegram-plugin/status-reactions.ts +70 -58
  13. package/telegram-plugin/stream-reply-handler.ts +7 -36
  14. package/telegram-plugin/subagent-watcher.ts +64 -3
  15. package/telegram-plugin/tests/gateway-disconnect-flush.test.ts +5 -3
  16. package/telegram-plugin/tests/inbound-spool-progress.test.ts +213 -0
  17. package/telegram-plugin/tests/inbound-spool.test.ts +62 -0
  18. package/telegram-plugin/tests/multi-turn-continuity.test.ts +0 -1
  19. package/telegram-plugin/tests/outbound-ordering.test.ts +0 -1
  20. package/telegram-plugin/tests/parse-mode-rotation.test.ts +0 -1
  21. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +78 -135
  22. package/telegram-plugin/tests/status-accent.test.ts +0 -1
  23. package/telegram-plugin/tests/status-reactions.test.ts +56 -27
  24. package/telegram-plugin/tests/stream-reply-error-paths.test.ts +0 -1
  25. package/telegram-plugin/tests/stream-reply-handler.test.ts +9 -25
  26. package/telegram-plugin/tests/streaming-e2e.test.ts +0 -1
  27. package/telegram-plugin/tests/streaming-orchestration.test.ts +0 -1
  28. package/telegram-plugin/tests/subagent-handback-inbound-builder.test.ts +22 -0
  29. package/telegram-plugin/tests/subagent-progress-inbound-builder.test.ts +269 -0
  30. package/telegram-plugin/uat/scenarios/jtbd-reflective-status-reaction-dm.test.ts +204 -0
@@ -27,7 +27,7 @@
27
27
  * needing to spin up the whole gateway.
28
28
  */
29
29
 
30
- export interface DisconnectFlushDeps<Ctrl extends { setDone: () => void }, Stream extends { isFinal: () => boolean; finalize: () => Promise<void> }> {
30
+ export interface DisconnectFlushDeps<Ctrl extends { finalize: (reason?: 'done' | 'error') => void }, Stream extends { isFinal: () => boolean; finalize: () => Promise<void> }> {
31
31
  /** The disconnecting client's agentName. `null` ⇒ anonymous (never registered). */
32
32
  agentName: string | null
33
33
 
@@ -50,7 +50,7 @@ export interface DisconnectFlushDeps<Ctrl extends { setDone: () => void }, Strea
50
50
 
51
51
  /** Optional: called when the registered-agent disconnect found dangling
52
52
  * `activeTurnStartedAt` entries the controller loop did not clear (i.e.
53
- * `setDone()` already ran on the canonical reply path, leaving
53
+ * `finalize()` already ran on the canonical reply path, leaving
54
54
  * `activeStatusReactions` empty but `activeTurnStartedAt` populated).
55
55
  * The gateway uses this to null its module-scope `currentTurn` — the
56
56
  * bridge that owned that turn just died. Without this, the next
@@ -70,7 +70,7 @@ export interface DisconnectFlushDeps<Ctrl extends { setDone: () => void }, Strea
70
70
  * client). The boolean is for tests + observability — callers can ignore it.
71
71
  */
72
72
  export function flushOnAgentDisconnect<
73
- Ctrl extends { setDone: () => void },
73
+ Ctrl extends { finalize: (reason?: 'done' | 'error') => void },
74
74
  Stream extends { isFinal: () => boolean; finalize: () => Promise<void> },
75
75
  >(deps: DisconnectFlushDeps<Ctrl, Stream>): boolean {
76
76
  const {
@@ -96,8 +96,12 @@ export function flushOnAgentDisconnect<
96
96
  // Real agent disconnect (e.g. the claude bridge crashed/restarted). Flush
97
97
  // all in-flight status reactions to 👍 so user messages don't stay stuck on
98
98
  // intermediate emoji (🤔, 🔥, etc.) after an agent crash/restart.
99
+ // #1713: route through finalize() — single terminal path for the
100
+ // status-reaction controller. Disconnect implies the agent bridge
101
+ // died mid-turn; treat as a clean terminal so the user's emoji
102
+ // doesn't stay stuck on an intermediate working state.
99
103
  for (const [key, ctrl] of activeStatusReactions.entries()) {
100
- ctrl.setDone()
104
+ ctrl.finalize('done')
101
105
  activeStatusReactions.delete(key)
102
106
  activeReactionMsgIds.delete(key)
103
107
  activeTurnStartedAt.delete(key)
@@ -107,7 +111,7 @@ export function flushOnAgentDisconnect<
107
111
  // Defense-in-depth — sweep any `activeTurnStartedAt` keys the controller
108
112
  // loop above did not touch. The bridge has crashed; any turn it owned is
109
113
  // dead by definition, regardless of whether `activeStatusReactions`
110
- // still tracks it. The race that motivates this: `setDone()` already
114
+ // still tracks it. The race that motivates this: `finalize()` already
111
115
  // fired on the canonical reply path (clearing the reaction controller)
112
116
  // BUT the disconnect arrived BEFORE `purgeReactionTracking` ran the
113
117
  // `activeTurnStartedAt.delete` line for that key. Without this sweep,
@@ -123,7 +127,7 @@ export function flushOnAgentDisconnect<
123
127
  }
124
128
  log(
125
129
  `telegram gateway: disconnect-flush swept ${danglingKeys.length} dangling turn key(s) ` +
126
- `post-bridge-death (controller loop missed — setDone raced disconnect)`,
130
+ `post-bridge-death (controller loop missed — finalize raced disconnect)`,
127
131
  )
128
132
  onDanglingTurnsSwept?.(danglingKeys)
129
133
  }
@@ -288,6 +288,10 @@ import {
288
288
  buildVaultSaveDiscardedInbound,
289
289
  } from './vault-grant-inbound-builders.js'
290
290
  import { decideSubagentHandback } from './subagent-handback-inbound-builder.js'
291
+ import {
292
+ decideSubagentProgress,
293
+ DEFAULT_PROGRESS_INTERVAL_MS,
294
+ } from './subagent-progress-inbound-builder.js'
291
295
  import { createPollHealthCheck, type PollHealthCheckHandle } from './poll-health.js'
292
296
  import type {
293
297
  ToolCallMessage,
@@ -1515,6 +1519,13 @@ function maybeProactiveCompact(): void {
1515
1519
  void resolveCompactCard('superseded', occupancy);
1516
1520
  }
1517
1521
  void postCompactCard(occupancy, cap);
1522
+ // #1713: compaction is a reflective working state — paint ✍ on
1523
+ // every in-flight inbound's status reaction so the user can see
1524
+ // the agent is doing compaction work, not stuck. Non-terminal:
1525
+ // post-compact transitions back to thinking/tool resume normally.
1526
+ for (const ctrl of activeStatusReactions.values()) {
1527
+ ctrl.setCompacting()
1528
+ }
1518
1529
  }
1519
1530
 
1520
1531
  if (!decision.fire) return;
@@ -1642,15 +1653,40 @@ async function resolveCompactCard(
1642
1653
  }
1643
1654
  }
1644
1655
 
1645
- function endStatusReaction(chatId: string, threadId: number | undefined, outcome: 'done' | 'error'): void {
1656
+ /**
1657
+ * Terminal-only reaction helper — routes through `finalize()` per #1713.
1658
+ *
1659
+ * Only the `turn_end` IPC handler, disconnect-flush, and boot-sweep
1660
+ * should call this. Mid-turn replies and stream-done events are
1661
+ * NON-EVENTS for the reaction (the reaction reflects current turn
1662
+ * activity, not delivery state). See `reference/know-what-my-agent-is-
1663
+ * doing.md` for the user-perceived contract.
1664
+ */
1665
+ function finalizeStatusReaction(
1666
+ chatId: string,
1667
+ threadId: number | undefined,
1668
+ reason: 'done' | 'error' = 'done',
1669
+ ): void {
1646
1670
  const key = statusKey(chatId, threadId)
1647
1671
  const ctrl = activeStatusReactions.get(key)
1648
1672
  if (!ctrl) return
1649
- if (outcome === 'done') ctrl.setDone()
1650
- else ctrl.setError()
1673
+ ctrl.finalize(reason)
1651
1674
  purgeReactionTracking(key)
1652
1675
  }
1653
1676
 
1677
+ /**
1678
+ * Non-terminal error paint (😱). Distinct from `finalize('error')` —
1679
+ * recovery to a working state is allowed after this (#1713). Mid-turn
1680
+ * 5xx surfaces use this; the terminal turn_end handler decides whether
1681
+ * the turn actually ends in error.
1682
+ */
1683
+ function paintStatusReactionError(chatId: string, threadId: number | undefined): void {
1684
+ const key = statusKey(chatId, threadId)
1685
+ const ctrl = activeStatusReactions.get(key)
1686
+ if (!ctrl) return
1687
+ ctrl.setError()
1688
+ }
1689
+
1654
1690
  function resolveThreadId(chat_id: string, explicit?: string | number | null): number | undefined {
1655
1691
  if (explicit != null) return Number(explicit)
1656
1692
  return chatThreadMap.get(chat_id)
@@ -4895,23 +4931,14 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4895
4931
  } catch { /* best-effort signal */ }
4896
4932
  // #203: fresh sendMessage from reply tool is a user-visible signal.
4897
4933
  signalTracker.noteSignal(statusKey(chat_id, threadId), Date.now())
4898
- // PR #602 follow-up: fire the terminal 👍 here so plain `reply`-only
4899
- // turns get the same delivery-confirmed reaction as stream_reply
4900
- // (Bug Z). Pre-follow-up, the dedup-suppress branch in the gateway
4901
- // turn_end handler was the sole 👍 emitter for reply-tool-only
4902
- // turns; removing its setDone call (Bug D) left those turns with no
4903
- // 👍 at all. Mirror the stream_reply contract: only fire after at
4904
- // least one sendMessage has resolved successfully (sentIds.length>0
4905
- // guarantees this), so the emoji means "the reply landed in
4906
- // Telegram", not "the reply tool was invoked". The reply tool has
4907
- // no lane concept — every reply is the user-visible answer — so no
4908
- // lane gate is needed (unlike stream_reply where named lanes are
4909
- // internal driver emits).
4910
- try {
4911
- endStatusReaction(chat_id, threadId, 'done')
4912
- } catch (err) {
4913
- process.stderr.write(`telegram gateway: reply: endStatusReaction hook threw: ${err}\n`)
4914
- }
4934
+ // #1713: the reply tool is a NON-EVENT for the status reaction.
4935
+ // The reaction reflects current turn activity, not delivery state
4936
+ // only the `turn_end` IPC handler finalizes (👍). A plain `reply`
4937
+ // mid-turn or as the final answer does not change the emoji on the
4938
+ // user's inbound message; the next turn_end does. This is a
4939
+ // deliberate revert of the PR #602 follow-up that wired delivery-
4940
+ // confirmation into the terminal path (see #1713 issue body for
4941
+ // the rationale: "delivery confirmation turn end").
4915
4942
  // #1664 — mark the turn's final answer as delivered when this reply
4916
4943
  // looks like the real answer rather than an interim ack. The
4917
4944
  // classification (notification-bearing OR substantive length) lives
@@ -5065,7 +5092,6 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
5065
5092
  disableLinkPreview: access.disableLinkPreview !== false,
5066
5093
  defaultFormat: access.parseMode ?? 'html',
5067
5094
  logStreamingEvent,
5068
- endStatusReaction,
5069
5095
  isPrivateChat: streamIsPrivate,
5070
5096
  isForumTopic: streamIsForumTopic,
5071
5097
  ...(sendMessageDraftFn != null ? { sendMessageDraft: sendMessageDraftFn } : {}),
@@ -6523,10 +6549,12 @@ function handleSessionEvent(ev: SessionEvent): void {
6523
6549
  ...(threadId != null ? { threadId } : {}),
6524
6550
  },
6525
6551
  )
6526
- const ceKey = statusKey(chatId, threadId)
6527
- const ctrl = activeStatusReactions.get(ceKey)
6528
- if (ctrl) ctrl.setError()
6529
- purgeReactionTracking(ceKey)
6552
+ // #1713: context-exhaustion is a terminal failure path — paint 😱
6553
+ // and finalize the controller. `setError` alone is non-terminal
6554
+ // (recovery permitted); since this turn is genuinely ending, route
6555
+ // through `finalize('error')` so the emoji lands and the controller
6556
+ // stops accepting further transitions.
6557
+ finalizeStatusReaction(chatId, threadId, 'error')
6530
6558
  // Surfaced during CC-5 investigation (`docs/status-ask-cause-classes.md`):
6531
6559
  // the context-exhaust bail path teardown was missing
6532
6560
  // `silencePoke.endTurn(key)`. Without it, the silence-poke state for
@@ -6537,6 +6565,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6537
6565
  // dead and has already told the user is over (the ⚠️ Context window
6538
6566
  // full message above). Match the pattern used at the regular
6539
6567
  // turn-end path (line ~5039) and the wedged-turn path (~5290).
6568
+ const ceKey = statusKey(chatId, threadId)
6540
6569
  silencePoke.endTurn(ceKey)
6541
6570
  pendingProgress.noteTurnEnd(ceKey)
6542
6571
  // Issue #195: tear down the answer-lane stream on context-exhaustion
@@ -6729,10 +6758,11 @@ function handleSessionEvent(ev: SessionEvent): void {
6729
6758
  }
6730
6759
  // Unpin without editing the message so no orphaned card lingers.
6731
6760
  unpinProgressCardForChat?.(chatId, threadId)
6732
- // Fall through to normal state cleanup (ctrl.setDone, purge, etc.)
6761
+ // Fall through to normal state cleanup (finalize, purge, etc.)
6733
6762
  // but skip the regular closeProgressLane so we don't re-finalize.
6734
- if (ctrl) ctrl.setDone()
6735
- purgeReactionTracking(statusKey(chatId, threadId))
6763
+ // #1713: silent-marker turns still finalize to 👍 — turn_end is
6764
+ // the terminal trigger regardless of whether a reply landed.
6765
+ finalizeStatusReaction(chatId, threadId, 'done')
6736
6766
  // Match the normal turn_end path's telemetry so silent-marker turns
6737
6767
  // still appear in turn-duration graphs.
6738
6768
  {
@@ -6889,24 +6919,12 @@ function handleSessionEvent(ev: SessionEvent): void {
6889
6919
  const recentCount = getRecentOutboundCount(backstopChatId, 2)
6890
6920
  if (recentCount > 0) {
6891
6921
  process.stderr.write(`telegram gateway: turn-flush suppressed — reply tool sent ${recentCount} message(s) within 2s\n`)
6892
- // Bug D fix: do NOT fire setDone here. The previous code
6893
- // assumed `recentCount > 0` was sufficient proof of delivery
6894
- // — and it is, since recordOutbound is called synchronously
6895
- // after sendMessage success. But firing setDone here races
6896
- // with the stream_reply done=true callback (Bug Z) which now
6897
- // fires endStatusReaction after finalize() resolves (i.e.
6898
- // after the final edit lands in Telegram). Both racing on
6899
- // setDone is harmless (setDone is idempotent post-terminal),
6900
- // but the dedup branch firing FIRST means we'd be claiming
6901
- // delivery from a 500ms-lagged read of local history rather
6902
- // than from the actual API confirmation. Letting Bug Z's
6903
- // post-finalize callback own the 👍 transition keeps the
6904
- // emoji tied to true delivery. The plain `reply` tool path
6905
- // (PR #602 follow-up) now also fires endStatusReaction
6906
- // directly from executeReply after sendMessage resolves,
6907
- // mirroring this contract — so reply-only turns transition
6908
- // to terminal 👍 in their own success path rather than
6909
- // relying on this dedup heuristic.
6922
+ // Do NOT finalize the status reaction here. As of #1713
6923
+ // the reaction is only finalized by the `turn_end` IPC
6924
+ // handler mid-turn delivery proofs (local history,
6925
+ // stream finalize callbacks, executeReply post-send) no
6926
+ // longer transition the emoji. This branch just purges
6927
+ // the per-turn reaction tracking entry and returns.
6910
6928
  purgeReactionTracking(statusKey(backstopChatId, backstopThreadId))
6911
6929
  return
6912
6930
  }
@@ -7018,7 +7036,9 @@ function handleSessionEvent(ev: SessionEvent): void {
7018
7036
  Date.now(),
7019
7037
  currentTurn?.registryKey ?? null,
7020
7038
  )
7021
- if (backstopCtrl) backstopCtrl.setDone()
7039
+ // #1713: route the backstop terminal through finalize()
7040
+ // single terminal path keeps the controller contract clean.
7041
+ if (backstopCtrl) backstopCtrl.finalize('done')
7022
7042
  // Unpin the card. completeTurn cleans up pinMgr's per-turn
7023
7043
  // state and unpins via the API. If we didn't take over a
7024
7044
  // turn (cardTakeover.turnKey == null), fall back to the
@@ -7034,7 +7054,9 @@ function handleSessionEvent(ev: SessionEvent): void {
7034
7054
  }
7035
7055
  } catch (err) {
7036
7056
  process.stderr.write(`telegram gateway: turn-flush send failed: ${(err as Error).message}\n`)
7037
- if (backstopCtrl) backstopCtrl.setError()
7057
+ // #1713: backstop send failed — finalize as error so the
7058
+ // turn ends cleanly with 😱 rather than leaving it open.
7059
+ if (backstopCtrl) backstopCtrl.finalize('error')
7038
7060
  } finally {
7039
7061
  purgeReactionTracking(statusKey(backstopChatId, backstopThreadId))
7040
7062
  }
@@ -7042,8 +7064,11 @@ function handleSessionEvent(ev: SessionEvent): void {
7042
7064
  return
7043
7065
  }
7044
7066
 
7045
- if (ctrl) ctrl.setDone()
7046
- purgeReactionTracking(statusKey(chatId, threadId))
7067
+ // #1713: turn_end is THE terminal trigger. Finalize via the
7068
+ // single terminal path (👍). Any prior intermediate states
7069
+ // pending in the debounce window are flushed by `finalize()`
7070
+ // before the terminal emoji emits.
7071
+ finalizeStatusReaction(chatId, threadId, 'done')
7047
7072
  {
7048
7073
  const sKey = streamKey(chatId, threadId)
7049
7074
  const turnDurationMs = turn.startedAt > 0 ? Date.now() - turn.startedAt : 0
@@ -7255,7 +7280,6 @@ function handlePtyActivity(text: string): void {
7255
7280
  disableLinkPreview: access.disableLinkPreview !== false,
7256
7281
  defaultFormat: 'text',
7257
7282
  logStreamingEvent,
7258
- endStatusReaction,
7259
7283
  historyEnabled: false,
7260
7284
  recordOutbound,
7261
7285
  writeError: (line) => process.stderr.write(line),
@@ -15796,6 +15820,10 @@ void (async () => {
15796
15820
  ownerChatId: loadAccess().allowFrom[0] ?? '',
15797
15821
  taskDescription: description,
15798
15822
  resultText,
15823
+ // Plumb the JSONL agent id so the spool can mint a
15824
+ // deterministic dedup key — closes the #1719
15825
+ // re-fire-on-restart class.
15826
+ jsonlAgentId: agentId,
15799
15827
  })
15800
15828
  if (!decision.deliver) {
15801
15829
  if (decision.reason === 'no-chat') {
@@ -15806,6 +15834,28 @@ void (async () => {
15806
15834
  return
15807
15835
  }
15808
15836
 
15837
+ // #1720: when the handback is queued, sweep any still-
15838
+ // live progress envelopes for the SAME sub-agent out of
15839
+ // the spool. Without this a progress envelope queued
15840
+ // moments before the worker finished could land on top
15841
+ // of the handback turn, producing a duplicated /
15842
+ // contradictory "still running" line. Prefix match on
15843
+ // `s:progress:<jsonl_agent_id>:` — see `inbound-spool.ts`
15844
+ // spoolId branch.
15845
+ try {
15846
+ const progressPrefix = `s:progress:${agentId}:`
15847
+ const dropped = inboundSpool?.dropMatching((id) => id.startsWith(progressPrefix)) ?? 0
15848
+ if (dropped > 0) {
15849
+ process.stderr.write(
15850
+ `telegram gateway: subagent-handback ${agentId} swept ${dropped} live progress envelope(s) from spool\n`,
15851
+ )
15852
+ }
15853
+ } catch (err) {
15854
+ process.stderr.write(
15855
+ `telegram gateway: subagent-handback ${agentId} progress-sweep error: ${(err as Error).message}\n`,
15856
+ )
15857
+ }
15858
+
15809
15859
  // Deliver via pendingInboundBuffer + the idle-drain tick.
15810
15860
  // The drain only releases at an idle prompt (no active
15811
15861
  // turn), so the handback always lands as a clean fresh
@@ -15815,6 +15865,71 @@ void (async () => {
15815
15865
  `telegram gateway: subagent-handback queued agent=${agentId} outcome=${outcome} chat=${decision.chatId} resultChars=${resultText.length}\n`,
15816
15866
  )
15817
15867
  },
15868
+ // conversational-pacing beat 3 — mid-flight progress for
15869
+ // background workers (#1720). Fires on every
15870
+ // `sub_agent_text` event; the pure `decideSubagentProgress`
15871
+ // gates on (a) background flag, (b) bucket-not-yet-fired
15872
+ // (deterministic `floor(elapsed / interval)`), (c) chat
15873
+ // resolves. Envelope spoolId is
15874
+ // `s:progress:<jsonl_agent_id>:<bucketIdx>` so a re-fire
15875
+ // within the same bucket — or across a gateway restart —
15876
+ // collapses to one live entry. TTL on `meta.expiresAt`
15877
+ // suppresses stale-after-restart delivery (a 4-h-old
15878
+ // "still working (5m)" would be a lie). Sweep on handback
15879
+ // lives in the `onFinish` block just above.
15880
+ onProgress: ({ agentId, description, latestSummary, elapsedMs, prevBucketIdx, setBucketIdx }) => {
15881
+ let fleetChatId = ''
15882
+ let isBackground = false
15883
+ try {
15884
+ const fleets = progressDriver?.peekAllFleets() ?? []
15885
+ for (const f of fleets) {
15886
+ if (f.fleet.has(agentId)) {
15887
+ fleetChatId = f.chatId ?? ''
15888
+ break
15889
+ }
15890
+ }
15891
+ } catch { /* peek failures non-fatal */ }
15892
+ if (turnsDb != null) {
15893
+ try {
15894
+ const row = turnsDb
15895
+ .prepare('SELECT background FROM subagents WHERE jsonl_agent_id = ?')
15896
+ .get(agentId) as { background: number } | undefined
15897
+ if (row != null) isBackground = row.background === 1
15898
+ } catch { /* best-effort */ }
15899
+ }
15900
+ if (!isBackground) return // skip overhead for foreground
15901
+
15902
+ const decision = decideSubagentProgress({
15903
+ disableEnvValue: process.env.SWITCHROOM_DISABLE_SUBAGENT_PROGRESS,
15904
+ isBackground,
15905
+ fleetChatId,
15906
+ ownerChatId: loadAccess().allowFrom[0] ?? '',
15907
+ subagentJsonlId: agentId,
15908
+ taskDescription: description,
15909
+ latestSummary,
15910
+ elapsedMs,
15911
+ progressIntervalMs: DEFAULT_PROGRESS_INTERVAL_MS,
15912
+ lastBucketIdx: prevBucketIdx,
15913
+ })
15914
+ if (!decision.deliver) return
15915
+
15916
+ setBucketIdx(decision.bucketIdx)
15917
+ pendingInboundBuffer.push(process.env.SWITCHROOM_AGENT_NAME ?? '', decision.inbound)
15918
+ // #1725 follow-up: yield the cross-turn ambient ticker
15919
+ // for this chat. With the progress envelope queued, the
15920
+ // model is about to compose an explicit in-voice
15921
+ // progress line — letting the "— still working (Nm)"
15922
+ // edit fire in parallel would double-surface the
15923
+ // signal. Progress envelopes target the chat level
15924
+ // (no thread id), matching how the inbound lands.
15925
+ pendingProgress.clearPending(
15926
+ statusKey(decision.chatId, undefined),
15927
+ 'progress',
15928
+ )
15929
+ process.stderr.write(
15930
+ `telegram gateway: subagent-progress queued agent=${agentId} bucket=${decision.bucketIdx} elapsed_ms=${elapsedMs} chat=${decision.chatId}\n`,
15931
+ )
15932
+ },
15818
15933
  })
15819
15934
  process.stderr.write('telegram gateway: subagent-watcher active\n')
15820
15935
  }
@@ -49,6 +49,36 @@ import type { InboundMessage } from './ipc-protocol.js'
49
49
  * synthetics of the SAME logical event dedup, but distinct events
50
50
  * (different ts) do not collapse. */
51
51
  export function spoolId(msg: InboundMessage): string {
52
+ // Subagent handbacks (#1719): the JSONL agent id is unique per
53
+ // Claude Code spawn, so use it as the dedup key. This makes the id
54
+ // stable across the watcher's onFinish race AND across a
55
+ // gateway/container restart — so a re-built handback envelope for
56
+ // the same finished sub-agent collapses against the live spool
57
+ // entry (or its tombstone) instead of minting a fresh ts-derived
58
+ // id and re-firing the turn. See issue #1719.
59
+ if (
60
+ msg.meta?.source === 'subagent_handback' &&
61
+ typeof msg.meta?.subagent_jsonl_id === 'string' &&
62
+ msg.meta.subagent_jsonl_id.length > 0
63
+ ) {
64
+ return `s:handback:${msg.meta.subagent_jsonl_id}`
65
+ }
66
+ // Subagent progress envelopes (#1720): deterministic per (jsonl id,
67
+ // bucket idx) — every elapsed bucket collapses to one live entry, so
68
+ // a re-fire within the same bucket window (or after a gateway
69
+ // restart) is a structural no-op. The bucket idx is computed by the
70
+ // gateway from `floor(elapsedMs / progressIntervalMs)` so a worker
71
+ // that emits narrative lines every 30s only produces one envelope
72
+ // per bucket. Mirrors the #1719 handback-spoolId pattern.
73
+ if (
74
+ msg.meta?.source === 'subagent_progress' &&
75
+ typeof msg.meta?.subagent_jsonl_id === 'string' &&
76
+ msg.meta.subagent_jsonl_id.length > 0 &&
77
+ typeof msg.meta?.bucket_idx === 'string' &&
78
+ msg.meta.bucket_idx.length > 0
79
+ ) {
80
+ return `s:progress:${msg.meta.subagent_jsonl_id}:${msg.meta.bucket_idx}`
81
+ }
52
82
  if (typeof msg.messageId === 'number' && msg.messageId > 0) {
53
83
  return `m:${msg.chatId}:${msg.messageId}`
54
84
  }
@@ -104,8 +134,22 @@ export interface InboundSpool {
104
134
  * registered bridge. Idempotent. */
105
135
  ack: (msg: InboundMessage) => void
106
136
  /** Live (un-acked) entries, oldest first. Used at boot to re-push
107
- * into the in-memory buffer. Pure read — does not mutate. */
137
+ * into the in-memory buffer. Pure read — does not mutate.
138
+ *
139
+ * TTL (#1720): an entry whose `msg.meta.expiresAt` is a numeric ms
140
+ * epoch in the past is OMITTED from the result. Progress envelopes
141
+ * carry a TTL because stale progress lies ("still working (5m)"
142
+ * delivered 4h after the worker finished is worse than no progress);
143
+ * handback envelopes never set `expiresAt` so this is a no-op for
144
+ * them. */
108
145
  liveEntries: () => ReplayEntry[]
146
+ /** Drop every live entry whose spool id matches the predicate. Used
147
+ * by the handback path (#1720) to sweep stale progress envelopes
148
+ * for the same sub-agent at the moment the handback is queued —
149
+ * otherwise a progress envelope queued moments before the worker
150
+ * finished could land on top of the handback turn. Tombstones the
151
+ * dropped entries durably. */
152
+ dropMatching: (predicate: (id: string) => boolean) => number
109
153
  /** Escalate+drop entries older than `escalateAfterMs`. Calls
110
154
  * `onEscalate` once per dropped entry (post the "couldn't deliver"
111
155
  * card there). Returns the count escalated. Safe to call on a timer. */
@@ -243,7 +287,30 @@ export function createInboundSpool(opts: InboundSpoolOptions): InboundSpool {
243
287
  },
244
288
  liveEntries() {
245
289
  // Insertion order = Map iteration order = oldest first.
246
- return [...live.values()].map((e) => ({ agent: e.agent, msg: e.msg }))
290
+ // TTL filter (#1720): skip entries whose meta.expiresAt is in the
291
+ // past. The on-disk log keeps them (cheap); compaction sweeps.
292
+ const cutoff = now()
293
+ const out: ReplayEntry[] = []
294
+ for (const e of live.values()) {
295
+ const expRaw = e.msg.meta?.expiresAt
296
+ if (typeof expRaw === 'string' && expRaw.length > 0) {
297
+ const exp = Number(expRaw)
298
+ if (Number.isFinite(exp) && exp <= cutoff) continue
299
+ }
300
+ out.push({ agent: e.agent, msg: e.msg })
301
+ }
302
+ return out
303
+ },
304
+ dropMatching(predicate) {
305
+ let n = 0
306
+ for (const [id, _e] of [...live.entries()]) {
307
+ if (!predicate(id)) continue
308
+ live.delete(id)
309
+ appendRecord({ t: 'ack', id })
310
+ n++
311
+ }
312
+ if (n > 0) maybeCompact()
313
+ return n
247
314
  },
248
315
  sweepEscalations(onEscalate) {
249
316
  const cutoff = now() - escalateAfterMs
@@ -48,6 +48,14 @@ export interface SubagentHandbackContext {
48
48
  resultText: string
49
49
  /** Terminal outcome as classified by the watcher. */
50
50
  outcome: 'completed' | 'failed'
51
+ /** JSONL filename stem for this Claude Code spawn — unique per
52
+ * sub-agent run. Plumbed into `meta.subagent_jsonl_id` so the
53
+ * spool can mint a deterministic dedup id (`s:handback:<id>`),
54
+ * closing the #1719 re-fire-on-restart class. Optional only for
55
+ * back-compat with older builder callers — when present the
56
+ * spoolId branch fires, when absent the spool falls back to the
57
+ * legacy ts-based id (status-quo behaviour). */
58
+ jsonlAgentId?: string
51
59
  }
52
60
 
53
61
  function truncate(s: string, max: number): string {
@@ -98,6 +106,7 @@ export function buildSubagentHandbackInbound(opts: {
98
106
  meta: {
99
107
  source: 'subagent_handback',
100
108
  outcome: opts.ctx.outcome,
109
+ ...(opts.ctx.jsonlAgentId ? { subagent_jsonl_id: opts.ctx.jsonlAgentId } : {}),
101
110
  },
102
111
  }
103
112
  }
@@ -128,6 +137,10 @@ export interface SubagentHandbackDecisionInput {
128
137
  ownerChatId: string
129
138
  taskDescription: string
130
139
  resultText: string
140
+ /** JSONL filename stem for this Claude Code spawn — forwarded into
141
+ * the built inbound's `meta.subagent_jsonl_id`. See
142
+ * `SubagentHandbackContext.jsonlAgentId` for the dedup rationale. */
143
+ jsonlAgentId?: string
131
144
  /** Deterministic clock for tests. */
132
145
  nowMs?: number
133
146
  }
@@ -178,6 +191,7 @@ export function decideSubagentHandback(
178
191
  taskDescription: input.taskDescription,
179
192
  resultText: input.resultText,
180
193
  outcome: input.outcome,
194
+ ...(input.jsonlAgentId ? { jsonlAgentId: input.jsonlAgentId } : {}),
181
195
  },
182
196
  ...(input.nowMs !== undefined ? { nowMs: input.nowMs } : {}),
183
197
  })