switchroom 0.13.11 → 0.13.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -76,7 +76,8 @@ import {
76
76
  import { emitRuntimeMetric } from '../runtime-metrics.js'
77
77
  import { classifyInbound } from '../inbound-classifier.js'
78
78
  import * as silencePoke from '../silence-poke.js'
79
- import { writeSilentEndState, clearSilentEndState, recordSilentTurnEnd } from '../silent-end.js'
79
+ import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
80
+ import { isFinalAnswerReply } from '../final-answer-detect.js'
80
81
  import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
81
82
  import { type SessionEvent } from '../session-tail.js'
82
83
  import {
@@ -1191,15 +1192,20 @@ type CurrentTurn = {
1191
1192
  startedAt: number
1192
1193
  gatewayReceiveAt: number
1193
1194
  replyCalled: boolean
1195
+ // #1664 — whether the model has delivered its *final answer* this turn
1196
+ // (as opposed to only an interim ack). `replyCalled` flips on the first
1197
+ // reply / stream_reply tool_use and stays true for the rest of the turn,
1198
+ // so it cannot tell "ack only" from "ack + real answer". This flag is the
1199
+ // finer signal the silent-end re-prompt needs: it is set only when a reply
1200
+ // actually lands AND `isFinalAnswerReply` (final-answer-detect.ts)
1201
+ // classifies it as the final answer — notification-bearing, or long
1202
+ // enough to be substantive, or a stream_reply done=true — OR when the
1203
+ // turn-flush safety net legitimately emits the model's terminal text. A
1204
+ // turn that ends with this still `false` triggers the silent-end re-prompt
1205
+ // even though `replyCalled` is true — the #1664 case where the real answer
1206
+ // ended up as plain transcript text rendered into an ephemeral draft.
1207
+ finalAnswerDelivered: boolean
1194
1208
  capturedText: string[]
1195
- // #1291: snapshot of capturedText.length at the moment of the most
1196
- // recent reply / stream_reply tool call. Used by decideTurnFlush to
1197
- // isolate the post-reply tail (e.g. a soft-commit reply followed by
1198
- // the real substantive answer in terminal text only) and flush it as
1199
- // a follow-up message. Pre-#1291 the existence of ANY reply call
1200
- // suppressed flush entirely — that lost long terminal-only answers
1201
- // after a "let me check" interim reply.
1202
- capturedTextLenAtLastReply: number
1203
1209
  orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
1204
1210
  registryKey: string | null
1205
1211
  // Last assistant outbound message id for the current turn — populated
@@ -4074,6 +4080,13 @@ async function executeUpdateChecklist(args: Record<string, unknown>): Promise<{
4074
4080
  }
4075
4081
 
4076
4082
  async function executeReply(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
4083
+ // #1664 — pin the turn this reply belongs to at entry. The
4084
+ // finalAnswerDelivered write near the end of this function runs after
4085
+ // several awaits; turn-pinning (the #1067 pattern used across the
4086
+ // gateway) keeps the write attributed to THIS turn rather than reading
4087
+ // module-scope currentTurn, which a future refactor could let roll over
4088
+ // mid-call.
4089
+ const turn = currentTurn
4077
4090
  const chat_id = args.chat_id as string
4078
4091
  if (!chat_id) throw new Error('reply: chat_id is required')
4079
4092
  const rawText = args.text as string | undefined
@@ -4496,6 +4509,19 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4496
4509
  } catch (err) {
4497
4510
  process.stderr.write(`telegram gateway: reply: endStatusReaction hook threw: ${err}\n`)
4498
4511
  }
4512
+ // #1664 — mark the turn's final answer as delivered when this reply
4513
+ // looks like the real answer rather than an interim ack. The
4514
+ // classification (notification-bearing OR substantive length) lives
4515
+ // in `isFinalAnswerReply`. Without this, a turn that ack'd then ended
4516
+ // with the real answer as plain transcript text (#1664) would look
4517
+ // "delivered" because replyCalled is true — and the silent-end
4518
+ // re-prompt would never engage. `rawText` is the model's own answer
4519
+ // text, measured before HTML conversion / Telegraph-link
4520
+ // substitution. Writes `turn` (pinned at executeReply entry) so the
4521
+ // flag always lands on the turn this reply belongs to.
4522
+ if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
4523
+ turn.finalAnswerDelivered = true
4524
+ }
4499
4525
  }
4500
4526
 
4501
4527
  process.stderr.write(`telegram channel: reply: finalized chatId=${chat_id} messageIds=[${sentIds.join(',')}] chunks=${chunks.length}\n`)
@@ -4509,6 +4535,8 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4509
4535
  }
4510
4536
 
4511
4537
  async function executeStreamReply(args: Record<string, unknown>): Promise<unknown> {
4538
+ // #1664 — pin the turn at entry; see executeReply for the rationale.
4539
+ const turn = currentTurn
4512
4540
  if (!args.chat_id) throw new Error('stream_reply: chat_id is required')
4513
4541
  if (args.text == null || args.text === '') throw new Error('stream_reply: text is required and cannot be empty')
4514
4542
 
@@ -4688,6 +4716,23 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
4688
4716
  const sThreadId = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
4689
4717
  outboundDedup.record(sChatId, sThreadId, args.text as string, Date.now())
4690
4718
  }
4719
+ // #1664 — mark the turn's final answer as delivered. For stream_reply a
4720
+ // call with done=true IS the final answer by definition (the model
4721
+ // explicitly closed the stream). A non-terminal stream_reply chunk also
4722
+ // counts when it carries the final-answer signals — notification-bearing
4723
+ // OR substantive length — via the same `isFinalAnswerReply` predicate
4724
+ // executeReply uses. See the CurrentTurn.finalAnswerDelivered doc-comment
4725
+ // for why replyCalled is not a sufficient signal here.
4726
+ if (
4727
+ turn != null &&
4728
+ isFinalAnswerReply({
4729
+ text: (args.text as string | undefined) ?? '',
4730
+ disableNotification: args.disable_notification === true,
4731
+ done: args.done === true,
4732
+ })
4733
+ ) {
4734
+ turn.finalAnswerDelivered = true
4735
+ }
4691
4736
  return { content: [{ type: 'text', text: `${result.status} (id: ${result.messageId ?? 'pending'})` }] }
4692
4737
  }
4693
4738
 
@@ -5705,8 +5750,8 @@ function handleSessionEvent(ev: SessionEvent): void {
5705
5750
  startedAt,
5706
5751
  gatewayReceiveAt: startedAt,
5707
5752
  replyCalled: false,
5753
+ finalAnswerDelivered: false,
5708
5754
  capturedText: [],
5709
- capturedTextLenAtLastReply: 0,
5710
5755
  orphanedReplyTimeoutId: null,
5711
5756
  registryKey: null,
5712
5757
  lastAssistantMsgId: null,
@@ -5807,12 +5852,6 @@ function handleSessionEvent(ev: SessionEvent): void {
5807
5852
  // placeholder-heartbeat label, which has been retired.
5808
5853
  if (isTelegramReplyTool(name)) {
5809
5854
  turn.replyCalled = true
5810
- // #1291: pin the captured-text index at the moment of this reply
5811
- // tool call. Anything pushed into capturedText after this point
5812
- // is the post-reply tail (e.g. the substantive answer composed
5813
- // in terminal text after a soft-commit "on it, back in a few").
5814
- // decideTurnFlush slices from this index to flush the tail.
5815
- turn.capturedTextLenAtLastReply = turn.capturedText.length
5816
5855
  if (turn.orphanedReplyTimeoutId != null) {
5817
5856
  clearTimeout(turn.orphanedReplyTimeoutId)
5818
5857
  turn.orphanedReplyTimeoutId = null
@@ -5830,6 +5869,22 @@ function handleSessionEvent(ev: SessionEvent): void {
5830
5869
  // #1067: snapshot at entry. The answer-stream creation closures
5831
5870
  // below also read `turn` instead of currentTurn so they pin to
5832
5871
  // this turn's chat for the stream's lifetime.
5872
+ //
5873
+ // #1664 ordering note: a `text` event can arrive AFTER turn_end has
5874
+ // nulled currentTurn (the issue observed `answer_lane_update
5875
+ // transport:"draft"` firing post-turn_end). Such a late event is
5876
+ // dropped here by the `turn != null` guard — it is NOT folded back
5877
+ // into the just-ended turn. That is deliberate and safe: by the
5878
+ // time this fires, the turn atom has been handed to
5879
+ // endCurrentTurnAtomic and turn_end has already run its flush /
5880
+ // silent-end decision; re-opening a closed turn (re-creating an
5881
+ // answer stream, re-evaluating decideTurnFlush) would be a large,
5882
+ // race-prone change. The #1664 safety net does not depend on
5883
+ // catching the late text: a turn whose real answer lost the race
5884
+ // ends with finalAnswerDelivered=false, so recordUndeliveredTurnEnd
5885
+ // engages the Stop-hook re-prompt and the model re-delivers the
5886
+ // answer through the reply tool. The dropped draft text is
5887
+ // recovered by re-prompt, not by post-hoc materialization.
5833
5888
  const turn = currentTurn
5834
5889
  if (turn != null) {
5835
5890
  turn.capturedText.push(ev.text)
@@ -6072,20 +6127,8 @@ function handleSessionEvent(ev: SessionEvent): void {
6072
6127
  chatId: turn.sessionChatId,
6073
6128
  replyCalled: turn.replyCalled,
6074
6129
  capturedText: turn.capturedText,
6075
- capturedTextLenAtLastReply: turn.capturedTextLenAtLastReply,
6076
6130
  flushEnabled: TURN_FLUSH_SAFETY_ENABLED,
6077
6131
  })
6078
- // #1291: when the model emitted a soft-commit reply followed by a
6079
- // substantive terminal-only answer, decideTurnFlush returns
6080
- // kind:'flush' with the post-reply tail. Log WARN so this case is
6081
- // auditable — the model SHOULD have called reply for the tail, but
6082
- // didn't, and the framework is covering for it.
6083
- if (flushDecision.kind === 'flush' && turn.replyCalled) {
6084
- process.stderr.write(
6085
- `telegram gateway: WARN post-reply-tail flush (#1291) — model emitted ${flushDecision.text.length} chars after a prior reply call without a follow-up reply tool` +
6086
- ` chat=${chatId} turnStartedAt=${turn.startedAt}\n`,
6087
- )
6088
- }
6089
6132
  if (flushDecision.kind === 'skip' && flushDecision.reason !== 'reply-called') {
6090
6133
  process.stderr.write(
6091
6134
  `telegram gateway: turn-flush skipped — reason=${flushDecision.reason}\n`,
@@ -6208,6 +6251,18 @@ function handleSessionEvent(ev: SessionEvent): void {
6208
6251
  const backstopThreadId = threadId
6209
6252
  const backstopCtrl = ctrl
6210
6253
 
6254
+ // #1664 — turn-flush only fires when !replyCalled (decideTurnFlush
6255
+ // returns 'reply-called' otherwise). It legitimately delivers the
6256
+ // model's terminal text as the answer, so the turn IS answered.
6257
+ // Mark it now so the early-return below skips the silent-end
6258
+ // re-prompt for a turn whose answer is genuinely on its way out.
6259
+ // (The IIFE that actually sends runs after this branch's `return`;
6260
+ // since the silent-end block is on the sibling reply-called path
6261
+ // that this branch never reaches, this set is belt-and-braces —
6262
+ // it keeps the captured `turn` atom internally consistent for any
6263
+ // future reader.)
6264
+ turn.finalAnswerDelivered = true
6265
+
6211
6266
  // #654 deterministic double-message fix. Hand off the pinned
6212
6267
  // progress card BEFORE state reset so the driver doesn't keep
6213
6268
  // editing it while turn-flush is rewriting it with the answer.
@@ -6440,17 +6495,31 @@ function handleSessionEvent(ev: SessionEvent): void {
6440
6495
  longest_silent_gap_ms: outboundMetrics.longestOutboundGapMs,
6441
6496
  ended_via: outboundMetrics.outboundCount > 0 ? 'reply' : 'silent',
6442
6497
  })
6443
- // #1122 PR4 / #1161: deterministic silent-end handling (see the
6444
- // silent-marker path above for the rationale).
6445
- // - first silent-end → recordSilentTurnEnd writes the state
6446
- // file so the Stop hook (silent-end-interrupt-stop.mjs)
6447
- // blocks the session-end and re-prompts the agent to reply.
6498
+ // #1122 PR4 / #1161 / #1664: deterministic undelivered-turn
6499
+ // handling (see the silent-marker path above for the rationale).
6500
+ // - first undelivered turn-end → recordSilentTurnEnd writes the
6501
+ // state file so the Stop hook (silent-end-interrupt-stop.mjs)
6502
+ // blocks the session-end and re-prompts the agent to deliver.
6448
6503
  // - the Stop-hook re-prompt is already spent and the agent is
6449
- // STILL silent → recordSilentTurnEnd returns exhausted:true;
6450
- // deliver a user-facing fallback so the turn never just
6451
- // vanishes (the user otherwise only sees the card disappear).
6452
- if (outboundMetrics.outboundCount === 0) {
6453
- const silentEnd = recordSilentTurnEnd({
6504
+ // STILL undelivered → recordSilentTurnEnd returns
6505
+ // exhausted:true; deliver a user-facing fallback so the turn
6506
+ // never just vanishes (the user otherwise only sees the card
6507
+ // disappear).
6508
+ //
6509
+ // #1664 — the trigger is "no final answer delivered", not "zero
6510
+ // outbound". `outboundCount === 0` is now just the special case
6511
+ // where nothing landed at all. The added case: the model sent an
6512
+ // interim ack via reply/stream_reply (outboundCount > 0,
6513
+ // replyCalled = true) but ended the turn with its real answer as
6514
+ // plain transcript text — rendered into an ephemeral answer-lane
6515
+ // draft and retracted at turn_end, never finalized. finalAnswer-
6516
+ // Delivered stays false there, so the re-prompt engages and the
6517
+ // model re-delivers the answer through the reply tool. NO_REPLY /
6518
+ // HEARTBEAT_OK silent-marker turns return earlier and never reach
6519
+ // this path. The turn-flush 'flush' branch also returns earlier
6520
+ // (and sets finalAnswerDelivered=true defensively).
6521
+ if (turn.finalAnswerDelivered === false) {
6522
+ const silentEnd = recordUndeliveredTurnEnd({
6454
6523
  chatId,
6455
6524
  threadId: threadId ?? null,
6456
6525
  turnKey: tKey,
@@ -14983,26 +15052,11 @@ void (async () => {
14983
15052
  // inside the sub-agent. Belt-and-braces with PR #557's
14984
15053
  // multi-signal progress gate.
14985
15054
  parentStateDir: STATE_DIR,
14986
- sendNotification: (text: string) => {
14987
- const ownerChatId = loadAccess().allowFrom[0]
14988
- if (!ownerChatId) return
14989
- // #1075: thread-id-bearing route through swallowingApiCall
14990
- // so a deleted TOPIC_ID forum thread doesn't crash the
14991
- // gateway. Notifications are best-effort.
14992
- void swallowingApiCall(
14993
- () =>
14994
- lockedBot.api.sendMessage(ownerChatId, text, {
14995
- parse_mode: 'HTML',
14996
- link_preview_options: { is_disabled: true },
14997
- ...(TOPIC_ID != null ? { message_thread_id: TOPIC_ID } : {}),
14998
- }),
14999
- {
15000
- chat_id: ownerChatId,
15001
- verb: 'subagent-watcher-notification',
15002
- ...(TOPIC_ID != null ? { threadId: TOPIC_ID } : {}),
15003
- },
15004
- )
15005
- },
15055
+ // No user-facing notification callback: the card-era
15056
+ // "✓ Worker done" message was retired with the progress
15057
+ // card (#1122). Sub-agent completion reaches the user as
15058
+ // the model's own beat-4 handback reply; the watcher's
15059
+ // role here is registry liveness + the `onFinish` cue.
15006
15060
  log: (msg) => process.stderr.write(`telegram gateway: ${msg}\n`),
15007
15061
  // Option C (#393): route stall detections into the progress-card
15008
15062
  // driver so the pinned card re-renders with a ⚠️ indicator even
@@ -2,12 +2,20 @@
2
2
  /**
3
3
  * Stop hook — auto-interrupt for silent-end turns.
4
4
  *
5
- * When a Claude Code session ends without the agent having called reply or
6
- * stream_reply (a "silent-end"), the Telegram gateway writes a state file at
5
+ * When a Claude Code session ends without the agent delivering a final
6
+ * answer to the user, the Telegram gateway writes a state file at
7
7
  * $TELEGRAM_STATE_DIR/silent-end-pending.json. This hook reads that file and,
8
8
  * if a first-time silent-end is detected (retryCount === 0), returns a
9
9
  * decision:block to re-prompt the agent instead of letting the session close.
10
10
  *
11
+ * #1664 — "no final answer delivered" covers two cases: (a) the turn ended
12
+ * with zero outbound (the original case), and (b) the model sent only an
13
+ * interim ack via reply/stream_reply but left its real answer as plain
14
+ * transcript text, which the gateway renders into an ephemeral draft and
15
+ * never finalizes. The re-prompt below tells the model to send its answer
16
+ * through the reply tool, or reply NO_REPLY if it genuinely has nothing to
17
+ * add / already delivered.
18
+ *
11
19
  * On the second silent-end (retryCount >= MAX_RETRIES), the hook allows the
12
20
  * stop. The gateway's turn-end path (recordSilentTurnEnd in silent-end.ts)
13
21
  * detects the exhausted re-prompt and delivers a user-facing fallback
@@ -104,9 +112,13 @@ function main() {
104
112
  JSON.stringify({
105
113
  decision: 'block',
106
114
  reason:
107
- 'You ran tools but never sent a reply to the user. ' +
108
- 'Call mcp__switchroom-telegram__reply or mcp__switchroom-telegram__stream_reply (with done=true) ' +
109
- 'to send your final answer now.',
115
+ 'This turn is ending without your final answer reaching the user. ' +
116
+ 'If you wrote an answer as plain text (not via a tool), the user ' +
117
+ 'cannot see it only text sent through the reply tool is delivered. ' +
118
+ 'Send your final answer now by calling mcp__switchroom-telegram__reply ' +
119
+ '(or mcp__switchroom-telegram__stream_reply with done=true). ' +
120
+ 'If your final answer has already reached the user, or you ' +
121
+ 'intentionally have nothing to add, reply with exactly NO_REPLY.',
110
122
  }),
111
123
  )
112
124
  process.exit(0)
@@ -182,22 +182,39 @@ export function readSilentEndState(deps?: SilentEndDeps): SilentEndState | null
182
182
  }
183
183
 
184
184
  /**
185
- * Record a user-message turn that ended with zero outbound messages and
186
- * report whether the deterministic re-prompt has been exhausted. This is
187
- * the gateway's single entry point for the main turn-end path.
185
+ * Record a user-message turn that ended WITHOUT the model delivering a
186
+ * final answer, and report whether the deterministic re-prompt has been
187
+ * exhausted. This is the gateway's single entry point for the main
188
+ * turn-end path.
188
189
  *
189
- * - First silent-end of a turn (no prior state, or prior `retryCount`
190
+ * #1664 the trigger generalized from "zero outbound" to "no final
191
+ * answer delivered". Two cases reach here now:
192
+ * 1. Zero outbound — the turn ended with nothing sent at all (the
193
+ * original #1122/#1161 silent-end case).
194
+ * 2. Interim-ack only — the model sent an ack via reply/stream_reply
195
+ * but ended the turn with its real answer as plain transcript text
196
+ * (rendered into an ephemeral answer-lane draft that gets retracted
197
+ * at turn_end, never finalized). The gateway tracks this via
198
+ * `CurrentTurn.finalAnswerDelivered`; case 1 is just the subset
199
+ * where that flag is false because nothing landed.
200
+ * In both cases the model still owes the user an answer, so the same
201
+ * re-prompt safety net applies — the framework re-prompts; the model
202
+ * re-delivers via the reply tool (never the framework materializing a
203
+ * message from the draft — see `reference/principles.md`).
204
+ *
205
+ * - First undelivered turn-end (no prior state, or prior `retryCount`
190
206
  * still below `SILENT_END_MAX_RETRIES`) → writes the state file via
191
207
  * `writeSilentEndState`, so `silent-end-interrupt-stop.mjs` blocks
192
208
  * the stop and re-prompts the agent. Returns `{ exhausted: false }`.
193
209
  *
194
- * - A silent-end where the prior state for the SAME turn already shows
195
- * `retryCount >= SILENT_END_MAX_RETRIES` → the Stop hook already
196
- * spent its re-prompt and the agent is STILL silent. Recovery has
197
- * failed. Clears the state file (so the Stop hook on this final turn
198
- * finds nothing pending and allows the stop cleanly) and returns
199
- * `{ exhausted: true }` — the caller MUST then deliver a user-facing
200
- * fallback so the turn never just vanishes (#1161).
210
+ * - An undelivered turn-end where the prior state for the SAME turn
211
+ * already shows `retryCount >= SILENT_END_MAX_RETRIES` → the Stop
212
+ * hook already spent its re-prompt and the agent is STILL
213
+ * undelivered. Recovery has failed. Clears the state file (so the
214
+ * Stop hook on this final turn finds nothing pending and allows the
215
+ * stop cleanly) and returns `{ exhausted: true }` — the caller MUST
216
+ * then deliver a user-facing fallback so the turn never just
217
+ * vanishes (#1161).
201
218
  *
202
219
  * Chat-less autonomous wakeup turns never reach here: the gateway only
203
220
  * creates a `currentTurn` (and therefore only runs a turn-end handler)
@@ -228,3 +245,12 @@ export function recordSilentTurnEnd(
228
245
  writeSilentEndState(args, deps)
229
246
  return { exhausted: false }
230
247
  }
248
+
249
+ /**
250
+ * #1664 — semantic alias for `recordSilentTurnEnd`. The trigger is now
251
+ * "no final answer delivered", of which "zero outbound" is one case; new
252
+ * callsites should prefer this name so the intent reads correctly. The
253
+ * behaviour, retry semantics, and `{exhausted}` contract are identical —
254
+ * `recordSilentTurnEnd` is kept for the existing callers and tests.
255
+ */
256
+ export const recordUndeliveredTurnEnd = recordSilentTurnEnd
@@ -146,11 +146,6 @@ export interface SubagentWatcherConfig {
146
146
  * an agent's home pollutes the watcher with phantom registrations).
147
147
  */
148
148
  agentCwd?: string
149
- /**
150
- * Send a fresh (non-edit) Telegram message. For stall / completion
151
- * state-transition notifications.
152
- */
153
- sendNotification: (text: string) => void
154
149
  /**
155
150
  * How often to re-scan for new subagent dirs (ms). Default 1000.
156
151
  */
@@ -862,21 +857,19 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
862
857
 
863
858
  if (entry.state === 'done' && !entry.completionNotified) {
864
859
  entry.completionNotified = true
865
- const desc = escapeHtml(truncate(entry.description, 80))
866
- const summary = entry.lastSummaryLine
867
- ? ` — ${escapeHtml(truncate(entry.lastSummaryLine, 120))}`
868
- : ''
869
- const tools = entry.toolCount > 0 ? ` (${entry.toolCount} tools)` : ''
870
- try {
871
- config.sendNotification(`✓ Worker done: ${desc}${tools}${summary}`)
872
- } catch (err) {
873
- log?.(`subagent-watcher: completion notification error: ${(err as Error).message}`)
874
- }
875
- // Symmetric `sub_agent_finished` surface (#card-audit-log). Emit
876
- // before the deferred cleanup runs so the callback always sees a
877
- // live registry entry. Historical entries that already-completed at
878
- // boot get their `completionNotified=true` shortcut in registerAgent
879
- // and skip this path entirely — only post-boot transitions fire.
860
+ // Card retired (#1122): the watcher no longer sends a user-facing
861
+ // "✓ Worker done" message. A framework-authored status line is a
862
+ // conversational-pacing anti-pattern, and the heuristic that drove
863
+ // it (silent-stall synthesis) fired on a worker mid-`Bash` as
864
+ // readily as on a finished one. The user-facing handback is the
865
+ // model's own beat-4 reply, woken by Claude Code's native
866
+ // background-task notification. Completion is surfaced here only
867
+ // via the structured `onFinish` callback — emitted before the
868
+ // deferred cleanup runs so the callback always sees a live
869
+ // registry entry. Historical entries that already-completed at
870
+ // boot get their `completionNotified=true` shortcut in
871
+ // registerAgent and skip this path only post-boot transitions
872
+ // fire.
880
873
  if (config.onFinish) {
881
874
  try {
882
875
  config.onFinish({
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Unit coverage for the #1664 final-answer detection predicate.
3
+ *
4
+ * `isFinalAnswerReply` is the finer signal the silent-end re-prompt needs:
5
+ * the gateway's `replyCalled` flag flips on the first reply / stream_reply
6
+ * tool use and cannot tell an interim ack from the real answer. This
7
+ * predicate classifies each reply so a turn whose every reply was "interim"
8
+ * (and whose real answer ended up as plain transcript text) ends with
9
+ * `finalAnswerDelivered === false` and triggers the re-prompt — the #1664
10
+ * bug (streamed answers rendered to a draft, retracted at turn_end, lost).
11
+ *
12
+ * These tests pin the pure predicate. The gateway wires it into
13
+ * executeReply / executeStreamReply (covered by the gateway integration
14
+ * surface); pinning the policy here keeps it auditable without importing
15
+ * the multi-thousand-line gateway module.
16
+ */
17
+
18
+ import { describe, it, expect } from 'vitest'
19
+ import { isFinalAnswerReply, FINAL_ANSWER_MIN_CHARS } from '../final-answer-detect.js'
20
+
21
+ describe('isFinalAnswerReply — #1664 final-answer classification', () => {
22
+ it('classifies a notification-bearing reply as the final answer', () => {
23
+ // disable_notification:false is the pacing contract's "final answer"
24
+ // signal — interim updates pass disable_notification:true.
25
+ expect(
26
+ isFinalAnswerReply({ text: 'short answer', disableNotification: false }),
27
+ ).toBe(true)
28
+ })
29
+
30
+ it('classifies a short interim ack (disable_notification:true) as NOT final', () => {
31
+ expect(
32
+ isFinalAnswerReply({ text: 'on it…', disableNotification: true }),
33
+ ).toBe(false)
34
+ })
35
+
36
+ it('length backstop: a long reply mis-marked interim still counts as final', () => {
37
+ const longText = 'x'.repeat(FINAL_ANSWER_MIN_CHARS)
38
+ expect(
39
+ isFinalAnswerReply({ text: longText, disableNotification: true }),
40
+ ).toBe(true)
41
+ })
42
+
43
+ it('length backstop is inclusive at exactly FINAL_ANSWER_MIN_CHARS', () => {
44
+ expect(
45
+ isFinalAnswerReply({
46
+ text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS),
47
+ disableNotification: true,
48
+ }),
49
+ ).toBe(true)
50
+ // One char under the threshold and marked interim → still interim.
51
+ expect(
52
+ isFinalAnswerReply({
53
+ text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS - 1),
54
+ disableNotification: true,
55
+ }),
56
+ ).toBe(false)
57
+ })
58
+
59
+ it('stream_reply done=true is always the final answer, even short + interim', () => {
60
+ // A done=true call explicitly closes the stream — it IS the answer,
61
+ // regardless of length or the notification flag.
62
+ expect(
63
+ isFinalAnswerReply({ text: 'ok', disableNotification: true, done: true }),
64
+ ).toBe(true)
65
+ })
66
+
67
+ it('a non-terminal stream_reply chunk (done=false) is classified like a plain reply', () => {
68
+ // Short interim chunk → not final.
69
+ expect(
70
+ isFinalAnswerReply({ text: 'thinking…', disableNotification: true, done: false }),
71
+ ).toBe(false)
72
+ // Notification-bearing chunk → final.
73
+ expect(
74
+ isFinalAnswerReply({ text: 'here it is', disableNotification: false, done: false }),
75
+ ).toBe(true)
76
+ })
77
+
78
+ it('an empty reply marked interim is NOT the final answer', () => {
79
+ expect(
80
+ isFinalAnswerReply({ text: '', disableNotification: true }),
81
+ ).toBe(false)
82
+ })
83
+
84
+ it('FINAL_ANSWER_MIN_CHARS is the documented 200-char backstop', () => {
85
+ // Guards the constant against silent drift — the value is referenced
86
+ // in the CurrentTurn doc-comment and the Stop-hook rationale.
87
+ expect(FINAL_ANSWER_MIN_CHARS).toBe(200)
88
+ })
89
+ })
@@ -50,7 +50,6 @@ describe('subagent-watcher: WorkerEntry.lastTool', () => {
50
50
  const intervals: Array<{ fn: () => void }> = []
51
51
  const w = startSubagentWatcher({
52
52
  agentDir,
53
- sendNotification: () => {},
54
53
  stallThresholdMs: 60_000,
55
54
  rescanMs: 500,
56
55
  now: () => Date.now(),
@@ -8,8 +8,10 @@ import {
8
8
  clearSilentEndState,
9
9
  readSilentEndState,
10
10
  recordSilentTurnEnd,
11
+ recordUndeliveredTurnEnd,
11
12
  SILENT_END_MAX_RETRIES,
12
13
  } from '../silent-end.js'
14
+ import { isFinalAnswerReply } from '../final-answer-detect.js'
13
15
 
14
16
  let stateDir: string
15
17
  const ORIG_ENV = process.env.TELEGRAM_STATE_DIR
@@ -187,6 +189,118 @@ describe('recordSilentTurnEnd — #1161 exhaustion detection', () => {
187
189
  })
188
190
  })
189
191
 
192
+ describe('recordUndeliveredTurnEnd — #1664 extended trigger', () => {
193
+ it('is the same function as recordSilentTurnEnd (semantic alias)', () => {
194
+ expect(recordUndeliveredTurnEnd).toBe(recordSilentTurnEnd)
195
+ })
196
+
197
+ // The gateway computes `finalAnswerDelivered` by OR-ing isFinalAnswerReply
198
+ // across every reply landed this turn, then engages the re-prompt iff the
199
+ // flag is still false at turn_end. These tests reproduce that exact
200
+ // decision: classify the turn's replies, then call recordUndeliveredTurnEnd
201
+ // only when no reply qualified.
202
+ function simulateTurnEnd(
203
+ replies: Array<{ text: string; disableNotification: boolean; done?: boolean }>,
204
+ turnKey: string,
205
+ ): { finalAnswerDelivered: boolean; rePromptEngaged: boolean } {
206
+ const finalAnswerDelivered = replies.some((r) =>
207
+ isFinalAnswerReply(r),
208
+ )
209
+ let rePromptEngaged = false
210
+ if (finalAnswerDelivered === false) {
211
+ recordUndeliveredTurnEnd({ chatId: 'c', threadId: null, turnKey })
212
+ rePromptEngaged = true
213
+ }
214
+ return { finalAnswerDelivered, rePromptEngaged }
215
+ }
216
+
217
+ it('#1664 regression: ack reply + answer-as-transcript → re-prompt fires', () => {
218
+ // The exact #1664 shape: the model sent a short interim ack via the
219
+ // reply tool (disable_notification:true), then ended the turn with its
220
+ // real answer as plain transcript text — which the gateway renders into
221
+ // an ephemeral draft and retracts at turn_end, never finalized. No
222
+ // reply qualified as the final answer, so the turn is undelivered.
223
+ const r = simulateTurnEnd(
224
+ [{ text: 'On it — give me a moment.', disableNotification: true }],
225
+ 'c:1664',
226
+ )
227
+ expect(r.finalAnswerDelivered).toBe(false)
228
+ expect(r.rePromptEngaged).toBe(true)
229
+ // State file written so silent-end-interrupt-stop.mjs blocks the stop.
230
+ expect(readSilentEndState()).toMatchObject({ turnKey: 'c:1664', retryCount: 0 })
231
+ })
232
+
233
+ it('a turn with a final-answer reply (notification-bearing) → re-prompt NOT engaged', () => {
234
+ const r = simulateTurnEnd(
235
+ [{ text: 'Here is the answer.', disableNotification: false }],
236
+ 'c:final',
237
+ )
238
+ expect(r.finalAnswerDelivered).toBe(true)
239
+ expect(r.rePromptEngaged).toBe(false)
240
+ expect(readSilentEndState()).toBeNull()
241
+ })
242
+
243
+ it('a long reply mis-marked interim → re-prompt NOT engaged (length backstop)', () => {
244
+ const r = simulateTurnEnd(
245
+ [{ text: 'x'.repeat(500), disableNotification: true }],
246
+ 'c:long',
247
+ )
248
+ expect(r.finalAnswerDelivered).toBe(true)
249
+ expect(r.rePromptEngaged).toBe(false)
250
+ expect(readSilentEndState()).toBeNull()
251
+ })
252
+
253
+ it('zero-outbound turn → re-prompt still engaged (regression of the original case)', () => {
254
+ // No replies at all — the original #1122 silent-end case is now just
255
+ // the subset of "no final answer delivered" where nothing landed.
256
+ const r = simulateTurnEnd([], 'c:zero')
257
+ expect(r.finalAnswerDelivered).toBe(false)
258
+ expect(r.rePromptEngaged).toBe(true)
259
+ expect(readSilentEndState()).toMatchObject({ turnKey: 'c:zero', retryCount: 0 })
260
+ })
261
+
262
+ it('interim ack followed by a final-answer reply in the same turn → NOT engaged', () => {
263
+ // The model ack'd first then properly delivered — finalAnswerDelivered
264
+ // latches true on the second reply; the turn is answered.
265
+ const r = simulateTurnEnd(
266
+ [
267
+ { text: 'Looking into it…', disableNotification: true },
268
+ { text: 'Done — the result is 42.', disableNotification: false },
269
+ ],
270
+ 'c:ack-then-final',
271
+ )
272
+ expect(r.finalAnswerDelivered).toBe(true)
273
+ expect(r.rePromptEngaged).toBe(false)
274
+ expect(readSilentEndState()).toBeNull()
275
+ })
276
+
277
+ it('stream_reply done=true counts as the final answer → NOT engaged', () => {
278
+ const r = simulateTurnEnd(
279
+ [{ text: 'ok', disableNotification: true, done: true }],
280
+ 'c:stream-done',
281
+ )
282
+ expect(r.finalAnswerDelivered).toBe(true)
283
+ expect(r.rePromptEngaged).toBe(false)
284
+ expect(readSilentEndState()).toBeNull()
285
+ })
286
+
287
+ it('exhaustion still applies on the #1664 path after the Stop-hook re-prompt', () => {
288
+ // First undelivered turn-end writes state.
289
+ expect(simulateTurnEnd(
290
+ [{ text: 'one sec', disableNotification: true }],
291
+ 'c:exhaust',
292
+ ).rePromptEngaged).toBe(true)
293
+ // Stop hook blocks once and bumps retryCount (simulated).
294
+ const path = join(stateDir, 'silent-end-pending.json')
295
+ const s = readSilentEndState()!
296
+ writeFileSync(path, JSON.stringify({ ...s, retryCount: s.retryCount + 1 }))
297
+ // Re-prompted turn STILL ends with only an interim ack → exhausted.
298
+ const second = recordUndeliveredTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:exhaust' })
299
+ expect(second.exhausted).toBe(true)
300
+ expect(readSilentEndState()).toBeNull()
301
+ })
302
+ })
303
+
190
304
  describe('silent-end-interrupt-stop hook — integration', () => {
191
305
  const hookPath = join(__dirname, '..', 'hooks', 'silent-end-interrupt-stop.mjs')
192
306
 
@@ -222,6 +336,10 @@ describe('silent-end-interrupt-stop hook — integration', () => {
222
336
  const out = JSON.parse(r.stdout.trim())
223
337
  expect(out.decision).toBe('block')
224
338
  expect(out.reason).toContain('reply')
339
+ // #1664 — the re-prompt must offer the NO_REPLY escape hatch so a
340
+ // model that already delivered (or intentionally has nothing to add)
341
+ // can end the turn cleanly instead of being forced to re-send.
342
+ expect(out.reason).toContain('NO_REPLY')
225
343
  // retryCount must have been incremented to 1
226
344
  expect(readSilentEndState()!.retryCount).toBe(1)
227
345
  })