npm - switchroom - Versions diffs - 0.13.26 → 0.13.28 - Mend

switchroom 0.13.26 → 0.13.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/telegram-plugin/gateway/disconnect-flush.ts CHANGED Viewed

@@ -27,7 +27,7 @@
  * needing to spin up the whole gateway.
  */
-export interface DisconnectFlushDeps<Ctrl extends { setDone: () => void }, Stream extends { isFinal: () => boolean; finalize: () => Promise<void> }> {
+export interface DisconnectFlushDeps<Ctrl extends { finalize: (reason?: 'done' | 'error') => void }, Stream extends { isFinal: () => boolean; finalize: () => Promise<void> }> {
   /** The disconnecting client's agentName. `null` ⇒ anonymous (never registered). */
   agentName: string | null
@@ -50,7 +50,7 @@ export interface DisconnectFlushDeps<Ctrl extends { setDone: () => void }, Strea
   /** Optional: called when the registered-agent disconnect found dangling
    *  `activeTurnStartedAt` entries the controller loop did not clear (i.e.
-   *  `setDone()` already ran on the canonical reply path, leaving
+   *  `finalize()` already ran on the canonical reply path, leaving
    *  `activeStatusReactions` empty but `activeTurnStartedAt` populated).
    *  The gateway uses this to null its module-scope `currentTurn` — the
    *  bridge that owned that turn just died. Without this, the next
@@ -70,7 +70,7 @@ export interface DisconnectFlushDeps<Ctrl extends { setDone: () => void }, Strea
  * client). The boolean is for tests + observability — callers can ignore it.
  */
 export function flushOnAgentDisconnect<
-  Ctrl extends { setDone: () => void },
+  Ctrl extends { finalize: (reason?: 'done' | 'error') => void },
   Stream extends { isFinal: () => boolean; finalize: () => Promise<void> },
 >(deps: DisconnectFlushDeps<Ctrl, Stream>): boolean {
   const {
@@ -96,8 +96,12 @@ export function flushOnAgentDisconnect<
   // Real agent disconnect (e.g. the claude bridge crashed/restarted). Flush
   // all in-flight status reactions to 👍 so user messages don't stay stuck on
   // intermediate emoji (🤔, 🔥, etc.) after an agent crash/restart.
+  // #1713: route through finalize() — single terminal path for the
+  // status-reaction controller. Disconnect implies the agent bridge
+  // died mid-turn; treat as a clean terminal so the user's emoji
+  // doesn't stay stuck on an intermediate working state.
   for (const [key, ctrl] of activeStatusReactions.entries()) {
-    ctrl.setDone()
+    ctrl.finalize('done')
     activeStatusReactions.delete(key)
     activeReactionMsgIds.delete(key)
     activeTurnStartedAt.delete(key)
@@ -107,7 +111,7 @@ export function flushOnAgentDisconnect<
   // Defense-in-depth — sweep any `activeTurnStartedAt` keys the controller
   // loop above did not touch. The bridge has crashed; any turn it owned is
   // dead by definition, regardless of whether `activeStatusReactions`
-  // still tracks it. The race that motivates this: `setDone()` already
+  // still tracks it. The race that motivates this: `finalize()` already
   // fired on the canonical reply path (clearing the reaction controller)
   // BUT the disconnect arrived BEFORE `purgeReactionTracking` ran the
   // `activeTurnStartedAt.delete` line for that key. Without this sweep,
@@ -123,7 +127,7 @@ export function flushOnAgentDisconnect<
     }
     log(
       `telegram gateway: disconnect-flush swept ${danglingKeys.length} dangling turn key(s) ` +
-      `post-bridge-death (controller loop missed — setDone raced disconnect)`,
+      `post-bridge-death (controller loop missed — finalize raced disconnect)`,
     )
     onDanglingTurnsSwept?.(danglingKeys)
   }

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -288,6 +288,10 @@ import {
   buildVaultSaveDiscardedInbound,
 } from './vault-grant-inbound-builders.js'
 import { decideSubagentHandback } from './subagent-handback-inbound-builder.js'
+import {
+  decideSubagentProgress,
+  DEFAULT_PROGRESS_INTERVAL_MS,
+} from './subagent-progress-inbound-builder.js'
 import { createPollHealthCheck, type PollHealthCheckHandle } from './poll-health.js'
 import type {
   ToolCallMessage,
@@ -1515,6 +1519,13 @@ function maybeProactiveCompact(): void {
       void resolveCompactCard('superseded', occupancy);
     }
     void postCompactCard(occupancy, cap);
+    // #1713: compaction is a reflective working state — paint ✍ on
+    // every in-flight inbound's status reaction so the user can see
+    // the agent is doing compaction work, not stuck. Non-terminal:
+    // post-compact transitions back to thinking/tool resume normally.
+    for (const ctrl of activeStatusReactions.values()) {
+      ctrl.setCompacting()
+    }
   }
   if (!decision.fire) return;
@@ -1642,15 +1653,40 @@ async function resolveCompactCard(
   }
 }
-function endStatusReaction(chatId: string, threadId: number | undefined, outcome: 'done' | 'error'): void {
+/**
+ * Terminal-only reaction helper — routes through `finalize()` per #1713.
+ *
+ * Only the `turn_end` IPC handler, disconnect-flush, and boot-sweep
+ * should call this. Mid-turn replies and stream-done events are
+ * NON-EVENTS for the reaction (the reaction reflects current turn
+ * activity, not delivery state). See `reference/know-what-my-agent-is-
+ * doing.md` for the user-perceived contract.
+ */
+function finalizeStatusReaction(
+  chatId: string,
+  threadId: number | undefined,
+  reason: 'done' | 'error' = 'done',
+): void {
   const key = statusKey(chatId, threadId)
   const ctrl = activeStatusReactions.get(key)
   if (!ctrl) return
-  if (outcome === 'done') ctrl.setDone()
-  else ctrl.setError()
+  ctrl.finalize(reason)
   purgeReactionTracking(key)
 }
+/**
+ * Non-terminal error paint (😱). Distinct from `finalize('error')` —
+ * recovery to a working state is allowed after this (#1713). Mid-turn
+ * 5xx surfaces use this; the terminal turn_end handler decides whether
+ * the turn actually ends in error.
+ */
+function paintStatusReactionError(chatId: string, threadId: number | undefined): void {
+  const key = statusKey(chatId, threadId)
+  const ctrl = activeStatusReactions.get(key)
+  if (!ctrl) return
+  ctrl.setError()
+}
 function resolveThreadId(chat_id: string, explicit?: string | number | null): number | undefined {
   if (explicit != null) return Number(explicit)
   return chatThreadMap.get(chat_id)
@@ -4895,35 +4931,49 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
     } catch { /* best-effort signal */ }
     // #203: fresh sendMessage from reply tool is a user-visible signal.
     signalTracker.noteSignal(statusKey(chat_id, threadId), Date.now())
-    // PR #602 follow-up: fire the terminal 👍 here so plain `reply`-only
-    // turns get the same delivery-confirmed reaction as stream_reply
-    // (Bug Z). Pre-follow-up, the dedup-suppress branch in the gateway
-    // turn_end handler was the sole 👍 emitter for reply-tool-only
-    // turns; removing its setDone call (Bug D) left those turns with no
-    // 👍 at all. Mirror the stream_reply contract: only fire after at
-    // least one sendMessage has resolved successfully (sentIds.length>0
-    // guarantees this), so the emoji means "the reply landed in
-    // Telegram", not "the reply tool was invoked". The reply tool has
-    // no lane concept — every reply is the user-visible answer — so no
-    // lane gate is needed (unlike stream_reply where named lanes are
-    // internal driver emits).
-    try {
-      endStatusReaction(chat_id, threadId, 'done')
-    } catch (err) {
-      process.stderr.write(`telegram gateway: reply: endStatusReaction hook threw: ${err}\n`)
-    }
-    // #1664 — mark the turn's final answer as delivered when this reply
-    // looks like the real answer rather than an interim ack. The
-    // classification (notification-bearing OR substantive length) lives
-    // in `isFinalAnswerReply`. Without this, a turn that ack'd then ended
-    // with the real answer as plain transcript text (#1664) would look
-    // "delivered" because replyCalled is true — and the silent-end
-    // re-prompt would never engage. `rawText` is the model's own answer
-    // text, measured before HTML conversion / Telegraph-link
-    // substitution. Writes `turn` (pinned at executeReply entry) so the
-    // flag always lands on the turn this reply belongs to.
+    // #1713: the reply tool is a NON-EVENT for the status reaction
+    // WHEN IT'S AN INTERIM ACK. The reaction reflects current turn
+    // activity, not delivery state — interim acks must not collapse
+    // the working-state ladder to 👍.
+    //
+    // #1728 carve-out (2026-05-24): when this reply IS the final
+    // answer (`isFinalAnswerReply` returns true — same classifier
+    // #1664 uses for silent-end re-prompt gating), it IS effectively
+    // turn-end and we MUST finalize here. Rationale: Claude Code's
+    // `turn_duration` system event is unreliable for the trivial-
+    // prompt happy path (driver sends "what's 2+2", model replies
+    // "4", no `turn_duration` ever lands in the JSONL session tail).
+    // Pre-#1718 this wedge was masked by the legacy
+    // `endStatusReaction` shim running unconditionally on every
+    // reply (outcome='done'); #1718 removed that call site
+    // intending `turn_end` to be the sole terminal trigger. The
+    // contract was right in spirit but `turn_end` doesn't fire 100%
+    // of the time, so the buffer gate (activeTurnStartedAt) stays
+    // set forever and every subsequent inbound gets `held mid-turn`
+    // and never delivered. v0.13.27 shipped + reverted on this
+    // failure mode (#1728).
+    //
+    // Net contract:
+    //   - interim ack reply (isFinalAnswerReply === false)
+    //         → non-event, no reaction finalize, buffer gate stays
+    //   - final-answer reply (isFinalAnswerReply === true)
+    //         → finalize reaction (debounced 👍) + release buffer
+    //           gate via purgeReactionTracking (called inside
+    //           finalizeStatusReaction). currentTurn stays alive so
+    //           a subsequent `turn_end` still cleans up its share
+    //           idempotently.
+    //
+    // #1664 — `turn.finalAnswerDelivered = true` keeps the silent-
+    // end re-prompt from spuriously firing on a delivered final.
     if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
       turn.finalAnswerDelivered = true
+      // #1728: release the buffer gate + emit terminal 👍. Mid-turn
+      // acks bypass this branch and remain non-events for the
+      // reaction (preserves #1713). The full turn-state teardown
+      // (nulling `currentTurn`, the per-turn cleanup) still runs in
+      // the `turn_end` handler when it lands; this only fires the
+      // observable side effects that #1718 deferred unconditionally.
+      finalizeStatusReaction(chat_id, threadId, 'done')
     }
   }
@@ -5065,7 +5115,6 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
       disableLinkPreview: access.disableLinkPreview !== false,
       defaultFormat: access.parseMode ?? 'html',
       logStreamingEvent,
-      endStatusReaction,
       isPrivateChat: streamIsPrivate,
       isForumTopic: streamIsForumTopic,
       ...(sendMessageDraftFn != null ? { sendMessageDraft: sendMessageDraftFn } : {}),
@@ -6523,10 +6572,12 @@ function handleSessionEvent(ev: SessionEvent): void {
             ...(threadId != null ? { threadId } : {}),
           },
         )
-        const ceKey = statusKey(chatId, threadId)
-        const ctrl = activeStatusReactions.get(ceKey)
-        if (ctrl) ctrl.setError()
-        purgeReactionTracking(ceKey)
+        // #1713: context-exhaustion is a terminal failure path — paint 😱
+        // and finalize the controller. `setError` alone is non-terminal
+        // (recovery permitted); since this turn is genuinely ending, route
+        // through `finalize('error')` so the emoji lands and the controller
+        // stops accepting further transitions.
+        finalizeStatusReaction(chatId, threadId, 'error')
         // Surfaced during CC-5 investigation (`docs/status-ask-cause-classes.md`):
         // the context-exhaust bail path teardown was missing
         // `silencePoke.endTurn(key)`. Without it, the silence-poke state for
@@ -6537,6 +6588,7 @@ function handleSessionEvent(ev: SessionEvent): void {
         // dead and has already told the user is over (the ⚠️ Context window
         // full message above). Match the pattern used at the regular
         // turn-end path (line ~5039) and the wedged-turn path (~5290).
+        const ceKey = statusKey(chatId, threadId)
         silencePoke.endTurn(ceKey)
         pendingProgress.noteTurnEnd(ceKey)
         // Issue #195: tear down the answer-lane stream on context-exhaustion
@@ -6729,10 +6781,11 @@ function handleSessionEvent(ev: SessionEvent): void {
         }
         // Unpin without editing the message so no orphaned card lingers.
         unpinProgressCardForChat?.(chatId, threadId)
-        // Fall through to normal state cleanup (ctrl.setDone, purge, etc.)
+        // Fall through to normal state cleanup (finalize, purge, etc.)
         // but skip the regular closeProgressLane so we don't re-finalize.
-        if (ctrl) ctrl.setDone()
-        purgeReactionTracking(statusKey(chatId, threadId))
+        // #1713: silent-marker turns still finalize to 👍 — turn_end is
+        // the terminal trigger regardless of whether a reply landed.
+        finalizeStatusReaction(chatId, threadId, 'done')
         // Match the normal turn_end path's telemetry so silent-marker turns
         // still appear in turn-duration graphs.
         {
@@ -6889,24 +6942,12 @@ function handleSessionEvent(ev: SessionEvent): void {
               const recentCount = getRecentOutboundCount(backstopChatId, 2)
               if (recentCount > 0) {
                 process.stderr.write(`telegram gateway: turn-flush suppressed — reply tool sent ${recentCount} message(s) within 2s\n`)
-                // Bug D fix: do NOT fire setDone here. The previous code
-                // assumed `recentCount > 0` was sufficient proof of delivery
-                // — and it is, since recordOutbound is called synchronously
-                // after sendMessage success. But firing setDone here races
-                // with the stream_reply done=true callback (Bug Z) which now
-                // fires endStatusReaction after finalize() resolves (i.e.
-                // after the final edit lands in Telegram). Both racing on
-                // setDone is harmless (setDone is idempotent post-terminal),
-                // but the dedup branch firing FIRST means we'd be claiming
-                // delivery from a 500ms-lagged read of local history rather
-                // than from the actual API confirmation. Letting Bug Z's
-                // post-finalize callback own the 👍 transition keeps the
-                // emoji tied to true delivery. The plain `reply` tool path
-                // (PR #602 follow-up) now also fires endStatusReaction
-                // directly from executeReply after sendMessage resolves,
-                // mirroring this contract — so reply-only turns transition
-                // to terminal 👍 in their own success path rather than
-                // relying on this dedup heuristic.
+                // Do NOT finalize the status reaction here. As of #1713
+                // the reaction is only finalized by the `turn_end` IPC
+                // handler — mid-turn delivery proofs (local history,
+                // stream finalize callbacks, executeReply post-send) no
+                // longer transition the emoji. This branch just purges
+                // the per-turn reaction tracking entry and returns.
                 purgeReactionTracking(statusKey(backstopChatId, backstopThreadId))
                 return
               }
@@ -7018,7 +7059,9 @@ function handleSessionEvent(ev: SessionEvent): void {
               Date.now(),
               currentTurn?.registryKey ?? null,
             )
-            if (backstopCtrl) backstopCtrl.setDone()
+            // #1713: route the backstop terminal through finalize() —
+            // single terminal path keeps the controller contract clean.
+            if (backstopCtrl) backstopCtrl.finalize('done')
             // Unpin the card. completeTurn cleans up pinMgr's per-turn
             // state and unpins via the API. If we didn't take over a
             // turn (cardTakeover.turnKey == null), fall back to the
@@ -7034,7 +7077,9 @@ function handleSessionEvent(ev: SessionEvent): void {
             }
           } catch (err) {
             process.stderr.write(`telegram gateway: turn-flush send failed: ${(err as Error).message}\n`)
-            if (backstopCtrl) backstopCtrl.setError()
+            // #1713: backstop send failed — finalize as error so the
+            // turn ends cleanly with 😱 rather than leaving it open.
+            if (backstopCtrl) backstopCtrl.finalize('error')
           } finally {
             purgeReactionTracking(statusKey(backstopChatId, backstopThreadId))
           }
@@ -7042,8 +7087,11 @@ function handleSessionEvent(ev: SessionEvent): void {
         return
       }
-      if (ctrl) ctrl.setDone()
-      purgeReactionTracking(statusKey(chatId, threadId))
+      // #1713: turn_end is THE terminal trigger. Finalize via the
+      // single terminal path (👍). Any prior intermediate states
+      // pending in the debounce window are flushed by `finalize()`
+      // before the terminal emoji emits.
+      finalizeStatusReaction(chatId, threadId, 'done')
       {
         const sKey = streamKey(chatId, threadId)
         const turnDurationMs = turn.startedAt > 0 ? Date.now() - turn.startedAt : 0
@@ -7255,7 +7303,6 @@ function handlePtyActivity(text: string): void {
       disableLinkPreview: access.disableLinkPreview !== false,
       defaultFormat: 'text',
       logStreamingEvent,
-      endStatusReaction,
       historyEnabled: false,
       recordOutbound,
       writeError: (line) => process.stderr.write(line),
@@ -15796,6 +15843,10 @@ void (async () => {
                   ownerChatId: loadAccess().allowFrom[0] ?? '',
                   taskDescription: description,
                   resultText,
+                  // Plumb the JSONL agent id so the spool can mint a
+                  // deterministic dedup key — closes the #1719
+                  // re-fire-on-restart class.
+                  jsonlAgentId: agentId,
                 })
                 if (!decision.deliver) {
                   if (decision.reason === 'no-chat') {
@@ -15806,6 +15857,28 @@ void (async () => {
                   return
                 }
+                // #1720: when the handback is queued, sweep any still-
+                // live progress envelopes for the SAME sub-agent out of
+                // the spool. Without this a progress envelope queued
+                // moments before the worker finished could land on top
+                // of the handback turn, producing a duplicated /
+                // contradictory "still running" line. Prefix match on
+                // `s:progress:<jsonl_agent_id>:` — see `inbound-spool.ts`
+                // spoolId branch.
+                try {
+                  const progressPrefix = `s:progress:${agentId}:`
+                  const dropped = inboundSpool?.dropMatching((id) => id.startsWith(progressPrefix)) ?? 0
+                  if (dropped > 0) {
+                    process.stderr.write(
+                      `telegram gateway: subagent-handback ${agentId} swept ${dropped} live progress envelope(s) from spool\n`,
+                    )
+                  }
+                } catch (err) {
+                  process.stderr.write(
+                    `telegram gateway: subagent-handback ${agentId} progress-sweep error: ${(err as Error).message}\n`,
+                  )
+                }
                 // Deliver via pendingInboundBuffer + the idle-drain tick.
                 // The drain only releases at an idle prompt (no active
                 // turn), so the handback always lands as a clean fresh
@@ -15815,6 +15888,71 @@ void (async () => {
                   `telegram gateway: subagent-handback queued agent=${agentId} outcome=${outcome} chat=${decision.chatId} resultChars=${resultText.length}\n`,
                 )
               },
+              // conversational-pacing beat 3 — mid-flight progress for
+              // background workers (#1720). Fires on every
+              // `sub_agent_text` event; the pure `decideSubagentProgress`
+              // gates on (a) background flag, (b) bucket-not-yet-fired
+              // (deterministic `floor(elapsed / interval)`), (c) chat
+              // resolves. Envelope spoolId is
+              // `s:progress:<jsonl_agent_id>:<bucketIdx>` so a re-fire
+              // within the same bucket — or across a gateway restart —
+              // collapses to one live entry. TTL on `meta.expiresAt`
+              // suppresses stale-after-restart delivery (a 4-h-old
+              // "still working (5m)" would be a lie). Sweep on handback
+              // lives in the `onFinish` block just above.
+              onProgress: ({ agentId, description, latestSummary, elapsedMs, prevBucketIdx, setBucketIdx }) => {
+                let fleetChatId = ''
+                let isBackground = false
+                try {
+                  const fleets = progressDriver?.peekAllFleets() ?? []
+                  for (const f of fleets) {
+                    if (f.fleet.has(agentId)) {
+                      fleetChatId = f.chatId ?? ''
+                      break
+                    }
+                  }
+                } catch { /* peek failures non-fatal */ }
+                if (turnsDb != null) {
+                  try {
+                    const row = turnsDb
+                      .prepare('SELECT background FROM subagents WHERE jsonl_agent_id = ?')
+                      .get(agentId) as { background: number } | undefined
+                    if (row != null) isBackground = row.background === 1
+                  } catch { /* best-effort */ }
+                }
+                if (!isBackground) return // skip overhead for foreground
+                const decision = decideSubagentProgress({
+                  disableEnvValue: process.env.SWITCHROOM_DISABLE_SUBAGENT_PROGRESS,
+                  isBackground,
+                  fleetChatId,
+                  ownerChatId: loadAccess().allowFrom[0] ?? '',
+                  subagentJsonlId: agentId,
+                  taskDescription: description,
+                  latestSummary,
+                  elapsedMs,
+                  progressIntervalMs: DEFAULT_PROGRESS_INTERVAL_MS,
+                  lastBucketIdx: prevBucketIdx,
+                })
+                if (!decision.deliver) return
+                setBucketIdx(decision.bucketIdx)
+                pendingInboundBuffer.push(process.env.SWITCHROOM_AGENT_NAME ?? '', decision.inbound)
+                // #1725 follow-up: yield the cross-turn ambient ticker
+                // for this chat. With the progress envelope queued, the
+                // model is about to compose an explicit in-voice
+                // progress line — letting the "— still working (Nm)"
+                // edit fire in parallel would double-surface the
+                // signal. Progress envelopes target the chat level
+                // (no thread id), matching how the inbound lands.
+                pendingProgress.clearPending(
+                  statusKey(decision.chatId, undefined),
+                  'progress',
+                )
+                process.stderr.write(
+                  `telegram gateway: subagent-progress queued agent=${agentId} bucket=${decision.bucketIdx} elapsed_ms=${elapsedMs} chat=${decision.chatId}\n`,
+                )
+              },
             })
             process.stderr.write('telegram gateway: subagent-watcher active\n')
           }

package/telegram-plugin/gateway/inbound-spool.ts CHANGED Viewed

@@ -49,6 +49,36 @@ import type { InboundMessage } from './ipc-protocol.js'
  *  synthetics of the SAME logical event dedup, but distinct events
  *  (different ts) do not collapse. */
 export function spoolId(msg: InboundMessage): string {
+  // Subagent handbacks (#1719): the JSONL agent id is unique per
+  // Claude Code spawn, so use it as the dedup key. This makes the id
+  // stable across the watcher's onFinish race AND across a
+  // gateway/container restart — so a re-built handback envelope for
+  // the same finished sub-agent collapses against the live spool
+  // entry (or its tombstone) instead of minting a fresh ts-derived
+  // id and re-firing the turn. See issue #1719.
+  if (
+    msg.meta?.source === 'subagent_handback' &&
+    typeof msg.meta?.subagent_jsonl_id === 'string' &&
+    msg.meta.subagent_jsonl_id.length > 0
+  ) {
+    return `s:handback:${msg.meta.subagent_jsonl_id}`
+  }
+  // Subagent progress envelopes (#1720): deterministic per (jsonl id,
+  // bucket idx) — every elapsed bucket collapses to one live entry, so
+  // a re-fire within the same bucket window (or after a gateway
+  // restart) is a structural no-op. The bucket idx is computed by the
+  // gateway from `floor(elapsedMs / progressIntervalMs)` so a worker
+  // that emits narrative lines every 30s only produces one envelope
+  // per bucket. Mirrors the #1719 handback-spoolId pattern.
+  if (
+    msg.meta?.source === 'subagent_progress' &&
+    typeof msg.meta?.subagent_jsonl_id === 'string' &&
+    msg.meta.subagent_jsonl_id.length > 0 &&
+    typeof msg.meta?.bucket_idx === 'string' &&
+    msg.meta.bucket_idx.length > 0
+  ) {
+    return `s:progress:${msg.meta.subagent_jsonl_id}:${msg.meta.bucket_idx}`
+  }
   if (typeof msg.messageId === 'number' && msg.messageId > 0) {
     return `m:${msg.chatId}:${msg.messageId}`
   }
@@ -104,8 +134,22 @@ export interface InboundSpool {
    *  registered bridge. Idempotent. */
   ack: (msg: InboundMessage) => void
   /** Live (un-acked) entries, oldest first. Used at boot to re-push
-   *  into the in-memory buffer. Pure read — does not mutate. */
+   *  into the in-memory buffer. Pure read — does not mutate.
+   *
+   *  TTL (#1720): an entry whose `msg.meta.expiresAt` is a numeric ms
+   *  epoch in the past is OMITTED from the result. Progress envelopes
+   *  carry a TTL because stale progress lies ("still working (5m)"
+   *  delivered 4h after the worker finished is worse than no progress);
+   *  handback envelopes never set `expiresAt` so this is a no-op for
+   *  them. */
   liveEntries: () => ReplayEntry[]
+  /** Drop every live entry whose spool id matches the predicate. Used
+   *  by the handback path (#1720) to sweep stale progress envelopes
+   *  for the same sub-agent at the moment the handback is queued —
+   *  otherwise a progress envelope queued moments before the worker
+   *  finished could land on top of the handback turn. Tombstones the
+   *  dropped entries durably. */
+  dropMatching: (predicate: (id: string) => boolean) => number
   /** Escalate+drop entries older than `escalateAfterMs`. Calls
    *  `onEscalate` once per dropped entry (post the "couldn't deliver"
    *  card there). Returns the count escalated. Safe to call on a timer. */
@@ -243,7 +287,30 @@ export function createInboundSpool(opts: InboundSpoolOptions): InboundSpool {
     },
     liveEntries() {
       // Insertion order = Map iteration order = oldest first.
-      return [...live.values()].map((e) => ({ agent: e.agent, msg: e.msg }))
+      // TTL filter (#1720): skip entries whose meta.expiresAt is in the
+      // past. The on-disk log keeps them (cheap); compaction sweeps.
+      const cutoff = now()
+      const out: ReplayEntry[] = []
+      for (const e of live.values()) {
+        const expRaw = e.msg.meta?.expiresAt
+        if (typeof expRaw === 'string' && expRaw.length > 0) {
+          const exp = Number(expRaw)
+          if (Number.isFinite(exp) && exp <= cutoff) continue
+        }
+        out.push({ agent: e.agent, msg: e.msg })
+      }
+      return out
+    },
+    dropMatching(predicate) {
+      let n = 0
+      for (const [id, _e] of [...live.entries()]) {
+        if (!predicate(id)) continue
+        live.delete(id)
+        appendRecord({ t: 'ack', id })
+        n++
+      }
+      if (n > 0) maybeCompact()
+      return n
     },
     sweepEscalations(onEscalate) {
       const cutoff = now() - escalateAfterMs

package/telegram-plugin/gateway/subagent-handback-inbound-builder.ts CHANGED Viewed

@@ -48,6 +48,14 @@ export interface SubagentHandbackContext {
   resultText: string
   /** Terminal outcome as classified by the watcher. */
   outcome: 'completed' | 'failed'
+  /** JSONL filename stem for this Claude Code spawn — unique per
+   *  sub-agent run. Plumbed into `meta.subagent_jsonl_id` so the
+   *  spool can mint a deterministic dedup id (`s:handback:<id>`),
+   *  closing the #1719 re-fire-on-restart class. Optional only for
+   *  back-compat with older builder callers — when present the
+   *  spoolId branch fires, when absent the spool falls back to the
+   *  legacy ts-based id (status-quo behaviour). */
+  jsonlAgentId?: string
 }
 function truncate(s: string, max: number): string {
@@ -98,6 +106,7 @@ export function buildSubagentHandbackInbound(opts: {
     meta: {
       source: 'subagent_handback',
       outcome: opts.ctx.outcome,
+      ...(opts.ctx.jsonlAgentId ? { subagent_jsonl_id: opts.ctx.jsonlAgentId } : {}),
     },
   }
 }
@@ -128,6 +137,10 @@ export interface SubagentHandbackDecisionInput {
   ownerChatId: string
   taskDescription: string
   resultText: string
+  /** JSONL filename stem for this Claude Code spawn — forwarded into
+   *  the built inbound's `meta.subagent_jsonl_id`. See
+   *  `SubagentHandbackContext.jsonlAgentId` for the dedup rationale. */
+  jsonlAgentId?: string
   /** Deterministic clock for tests. */
   nowMs?: number
 }
@@ -178,6 +191,7 @@ export function decideSubagentHandback(
       taskDescription: input.taskDescription,
       resultText: input.resultText,
       outcome: input.outcome,
+      ...(input.jsonlAgentId ? { jsonlAgentId: input.jsonlAgentId } : {}),
     },
     ...(input.nowMs !== undefined ? { nowMs: input.nowMs } : {}),
   })