switchroom 0.13.11 → 0.13.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +60 -5
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +34 -52
- package/telegram-plugin/final-answer-detect.ts +83 -0
- package/telegram-plugin/gateway/gateway.ts +112 -58
- package/telegram-plugin/hooks/silent-end-interrupt-stop.mjs +17 -5
- package/telegram-plugin/silent-end.ts +37 -11
- package/telegram-plugin/subagent-watcher.ts +13 -20
- package/telegram-plugin/tests/final-answer-detect.test.ts +89 -0
- package/telegram-plugin/tests/fleet-state-watcher.test.ts +0 -1
- package/telegram-plugin/tests/silent-end.test.ts +118 -0
- package/telegram-plugin/tests/subagent-registry-bugs.test.ts +1 -3
- package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +0 -1
- package/telegram-plugin/tests/subagent-watcher-parent-marker.test.ts +0 -1
- package/telegram-plugin/tests/subagent-watcher-stall-notification.test.ts +1 -4
- package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +0 -1
- package/telegram-plugin/tests/subagent-watcher.test.ts +15 -5
- package/telegram-plugin/tests/turn-flush-safety.test.ts +29 -81
- package/telegram-plugin/turn-flush-safety.ts +23 -53
|
@@ -76,7 +76,8 @@ import {
|
|
|
76
76
|
import { emitRuntimeMetric } from '../runtime-metrics.js'
|
|
77
77
|
import { classifyInbound } from '../inbound-classifier.js'
|
|
78
78
|
import * as silencePoke from '../silence-poke.js'
|
|
79
|
-
import { writeSilentEndState, clearSilentEndState,
|
|
79
|
+
import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
|
|
80
|
+
import { isFinalAnswerReply } from '../final-answer-detect.js'
|
|
80
81
|
import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
|
|
81
82
|
import { type SessionEvent } from '../session-tail.js'
|
|
82
83
|
import {
|
|
@@ -1191,15 +1192,20 @@ type CurrentTurn = {
|
|
|
1191
1192
|
startedAt: number
|
|
1192
1193
|
gatewayReceiveAt: number
|
|
1193
1194
|
replyCalled: boolean
|
|
1195
|
+
// #1664 — whether the model has delivered its *final answer* this turn
|
|
1196
|
+
// (as opposed to only an interim ack). `replyCalled` flips on the first
|
|
1197
|
+
// reply / stream_reply tool_use and stays true for the rest of the turn,
|
|
1198
|
+
// so it cannot tell "ack only" from "ack + real answer". This flag is the
|
|
1199
|
+
// finer signal the silent-end re-prompt needs: it is set only when a reply
|
|
1200
|
+
// actually lands AND `isFinalAnswerReply` (final-answer-detect.ts)
|
|
1201
|
+
// classifies it as the final answer — notification-bearing, or long
|
|
1202
|
+
// enough to be substantive, or a stream_reply done=true — OR when the
|
|
1203
|
+
// turn-flush safety net legitimately emits the model's terminal text. A
|
|
1204
|
+
// turn that ends with this still `false` triggers the silent-end re-prompt
|
|
1205
|
+
// even though `replyCalled` is true — the #1664 case where the real answer
|
|
1206
|
+
// ended up as plain transcript text rendered into an ephemeral draft.
|
|
1207
|
+
finalAnswerDelivered: boolean
|
|
1194
1208
|
capturedText: string[]
|
|
1195
|
-
// #1291: snapshot of capturedText.length at the moment of the most
|
|
1196
|
-
// recent reply / stream_reply tool call. Used by decideTurnFlush to
|
|
1197
|
-
// isolate the post-reply tail (e.g. a soft-commit reply followed by
|
|
1198
|
-
// the real substantive answer in terminal text only) and flush it as
|
|
1199
|
-
// a follow-up message. Pre-#1291 the existence of ANY reply call
|
|
1200
|
-
// suppressed flush entirely — that lost long terminal-only answers
|
|
1201
|
-
// after a "let me check" interim reply.
|
|
1202
|
-
capturedTextLenAtLastReply: number
|
|
1203
1209
|
orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
|
|
1204
1210
|
registryKey: string | null
|
|
1205
1211
|
// Last assistant outbound message id for the current turn — populated
|
|
@@ -4074,6 +4080,13 @@ async function executeUpdateChecklist(args: Record<string, unknown>): Promise<{
|
|
|
4074
4080
|
}
|
|
4075
4081
|
|
|
4076
4082
|
async function executeReply(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
|
|
4083
|
+
// #1664 — pin the turn this reply belongs to at entry. The
|
|
4084
|
+
// finalAnswerDelivered write near the end of this function runs after
|
|
4085
|
+
// several awaits; turn-pinning (the #1067 pattern used across the
|
|
4086
|
+
// gateway) keeps the write attributed to THIS turn rather than reading
|
|
4087
|
+
// module-scope currentTurn, which a future refactor could let roll over
|
|
4088
|
+
// mid-call.
|
|
4089
|
+
const turn = currentTurn
|
|
4077
4090
|
const chat_id = args.chat_id as string
|
|
4078
4091
|
if (!chat_id) throw new Error('reply: chat_id is required')
|
|
4079
4092
|
const rawText = args.text as string | undefined
|
|
@@ -4496,6 +4509,19 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
4496
4509
|
} catch (err) {
|
|
4497
4510
|
process.stderr.write(`telegram gateway: reply: endStatusReaction hook threw: ${err}\n`)
|
|
4498
4511
|
}
|
|
4512
|
+
// #1664 — mark the turn's final answer as delivered when this reply
|
|
4513
|
+
// looks like the real answer rather than an interim ack. The
|
|
4514
|
+
// classification (notification-bearing OR substantive length) lives
|
|
4515
|
+
// in `isFinalAnswerReply`. Without this, a turn that ack'd then ended
|
|
4516
|
+
// with the real answer as plain transcript text (#1664) would look
|
|
4517
|
+
// "delivered" because replyCalled is true — and the silent-end
|
|
4518
|
+
// re-prompt would never engage. `rawText` is the model's own answer
|
|
4519
|
+
// text, measured before HTML conversion / Telegraph-link
|
|
4520
|
+
// substitution. Writes `turn` (pinned at executeReply entry) so the
|
|
4521
|
+
// flag always lands on the turn this reply belongs to.
|
|
4522
|
+
if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
|
|
4523
|
+
turn.finalAnswerDelivered = true
|
|
4524
|
+
}
|
|
4499
4525
|
}
|
|
4500
4526
|
|
|
4501
4527
|
process.stderr.write(`telegram channel: reply: finalized chatId=${chat_id} messageIds=[${sentIds.join(',')}] chunks=${chunks.length}\n`)
|
|
@@ -4509,6 +4535,8 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
4509
4535
|
}
|
|
4510
4536
|
|
|
4511
4537
|
async function executeStreamReply(args: Record<string, unknown>): Promise<unknown> {
|
|
4538
|
+
// #1664 — pin the turn at entry; see executeReply for the rationale.
|
|
4539
|
+
const turn = currentTurn
|
|
4512
4540
|
if (!args.chat_id) throw new Error('stream_reply: chat_id is required')
|
|
4513
4541
|
if (args.text == null || args.text === '') throw new Error('stream_reply: text is required and cannot be empty')
|
|
4514
4542
|
|
|
@@ -4688,6 +4716,23 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
|
|
|
4688
4716
|
const sThreadId = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
|
|
4689
4717
|
outboundDedup.record(sChatId, sThreadId, args.text as string, Date.now())
|
|
4690
4718
|
}
|
|
4719
|
+
// #1664 — mark the turn's final answer as delivered. For stream_reply a
|
|
4720
|
+
// call with done=true IS the final answer by definition (the model
|
|
4721
|
+
// explicitly closed the stream). A non-terminal stream_reply chunk also
|
|
4722
|
+
// counts when it carries the final-answer signals — notification-bearing
|
|
4723
|
+
// OR substantive length — via the same `isFinalAnswerReply` predicate
|
|
4724
|
+
// executeReply uses. See the CurrentTurn.finalAnswerDelivered doc-comment
|
|
4725
|
+
// for why replyCalled is not a sufficient signal here.
|
|
4726
|
+
if (
|
|
4727
|
+
turn != null &&
|
|
4728
|
+
isFinalAnswerReply({
|
|
4729
|
+
text: (args.text as string | undefined) ?? '',
|
|
4730
|
+
disableNotification: args.disable_notification === true,
|
|
4731
|
+
done: args.done === true,
|
|
4732
|
+
})
|
|
4733
|
+
) {
|
|
4734
|
+
turn.finalAnswerDelivered = true
|
|
4735
|
+
}
|
|
4691
4736
|
return { content: [{ type: 'text', text: `${result.status} (id: ${result.messageId ?? 'pending'})` }] }
|
|
4692
4737
|
}
|
|
4693
4738
|
|
|
@@ -5705,8 +5750,8 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
5705
5750
|
startedAt,
|
|
5706
5751
|
gatewayReceiveAt: startedAt,
|
|
5707
5752
|
replyCalled: false,
|
|
5753
|
+
finalAnswerDelivered: false,
|
|
5708
5754
|
capturedText: [],
|
|
5709
|
-
capturedTextLenAtLastReply: 0,
|
|
5710
5755
|
orphanedReplyTimeoutId: null,
|
|
5711
5756
|
registryKey: null,
|
|
5712
5757
|
lastAssistantMsgId: null,
|
|
@@ -5807,12 +5852,6 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
5807
5852
|
// placeholder-heartbeat label, which has been retired.
|
|
5808
5853
|
if (isTelegramReplyTool(name)) {
|
|
5809
5854
|
turn.replyCalled = true
|
|
5810
|
-
// #1291: pin the captured-text index at the moment of this reply
|
|
5811
|
-
// tool call. Anything pushed into capturedText after this point
|
|
5812
|
-
// is the post-reply tail (e.g. the substantive answer composed
|
|
5813
|
-
// in terminal text after a soft-commit "on it, back in a few").
|
|
5814
|
-
// decideTurnFlush slices from this index to flush the tail.
|
|
5815
|
-
turn.capturedTextLenAtLastReply = turn.capturedText.length
|
|
5816
5855
|
if (turn.orphanedReplyTimeoutId != null) {
|
|
5817
5856
|
clearTimeout(turn.orphanedReplyTimeoutId)
|
|
5818
5857
|
turn.orphanedReplyTimeoutId = null
|
|
@@ -5830,6 +5869,22 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
5830
5869
|
// #1067: snapshot at entry. The answer-stream creation closures
|
|
5831
5870
|
// below also read `turn` instead of currentTurn so they pin to
|
|
5832
5871
|
// this turn's chat for the stream's lifetime.
|
|
5872
|
+
//
|
|
5873
|
+
// #1664 ordering note: a `text` event can arrive AFTER turn_end has
|
|
5874
|
+
// nulled currentTurn (the issue observed `answer_lane_update
|
|
5875
|
+
// transport:"draft"` firing post-turn_end). Such a late event is
|
|
5876
|
+
// dropped here by the `turn != null` guard — it is NOT folded back
|
|
5877
|
+
// into the just-ended turn. That is deliberate and safe: by the
|
|
5878
|
+
// time this fires, the turn atom has been handed to
|
|
5879
|
+
// endCurrentTurnAtomic and turn_end has already run its flush /
|
|
5880
|
+
// silent-end decision; re-opening a closed turn (re-creating an
|
|
5881
|
+
// answer stream, re-evaluating decideTurnFlush) would be a large,
|
|
5882
|
+
// race-prone change. The #1664 safety net does not depend on
|
|
5883
|
+
// catching the late text: a turn whose real answer lost the race
|
|
5884
|
+
// ends with finalAnswerDelivered=false, so recordUndeliveredTurnEnd
|
|
5885
|
+
// engages the Stop-hook re-prompt and the model re-delivers the
|
|
5886
|
+
// answer through the reply tool. The dropped draft text is
|
|
5887
|
+
// recovered by re-prompt, not by post-hoc materialization.
|
|
5833
5888
|
const turn = currentTurn
|
|
5834
5889
|
if (turn != null) {
|
|
5835
5890
|
turn.capturedText.push(ev.text)
|
|
@@ -6072,20 +6127,8 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
6072
6127
|
chatId: turn.sessionChatId,
|
|
6073
6128
|
replyCalled: turn.replyCalled,
|
|
6074
6129
|
capturedText: turn.capturedText,
|
|
6075
|
-
capturedTextLenAtLastReply: turn.capturedTextLenAtLastReply,
|
|
6076
6130
|
flushEnabled: TURN_FLUSH_SAFETY_ENABLED,
|
|
6077
6131
|
})
|
|
6078
|
-
// #1291: when the model emitted a soft-commit reply followed by a
|
|
6079
|
-
// substantive terminal-only answer, decideTurnFlush returns
|
|
6080
|
-
// kind:'flush' with the post-reply tail. Log WARN so this case is
|
|
6081
|
-
// auditable — the model SHOULD have called reply for the tail, but
|
|
6082
|
-
// didn't, and the framework is covering for it.
|
|
6083
|
-
if (flushDecision.kind === 'flush' && turn.replyCalled) {
|
|
6084
|
-
process.stderr.write(
|
|
6085
|
-
`telegram gateway: WARN post-reply-tail flush (#1291) — model emitted ${flushDecision.text.length} chars after a prior reply call without a follow-up reply tool` +
|
|
6086
|
-
` chat=${chatId} turnStartedAt=${turn.startedAt}\n`,
|
|
6087
|
-
)
|
|
6088
|
-
}
|
|
6089
6132
|
if (flushDecision.kind === 'skip' && flushDecision.reason !== 'reply-called') {
|
|
6090
6133
|
process.stderr.write(
|
|
6091
6134
|
`telegram gateway: turn-flush skipped — reason=${flushDecision.reason}\n`,
|
|
@@ -6208,6 +6251,18 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
6208
6251
|
const backstopThreadId = threadId
|
|
6209
6252
|
const backstopCtrl = ctrl
|
|
6210
6253
|
|
|
6254
|
+
// #1664 — turn-flush only fires when !replyCalled (decideTurnFlush
|
|
6255
|
+
// returns 'reply-called' otherwise). It legitimately delivers the
|
|
6256
|
+
// model's terminal text as the answer, so the turn IS answered.
|
|
6257
|
+
// Mark it now so the early-return below skips the silent-end
|
|
6258
|
+
// re-prompt for a turn whose answer is genuinely on its way out.
|
|
6259
|
+
// (The IIFE that actually sends runs after this branch's `return`;
|
|
6260
|
+
// since the silent-end block is on the sibling reply-called path
|
|
6261
|
+
// that this branch never reaches, this set is belt-and-braces —
|
|
6262
|
+
// it keeps the captured `turn` atom internally consistent for any
|
|
6263
|
+
// future reader.)
|
|
6264
|
+
turn.finalAnswerDelivered = true
|
|
6265
|
+
|
|
6211
6266
|
// #654 deterministic double-message fix. Hand off the pinned
|
|
6212
6267
|
// progress card BEFORE state reset so the driver doesn't keep
|
|
6213
6268
|
// editing it while turn-flush is rewriting it with the answer.
|
|
@@ -6440,17 +6495,31 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
6440
6495
|
longest_silent_gap_ms: outboundMetrics.longestOutboundGapMs,
|
|
6441
6496
|
ended_via: outboundMetrics.outboundCount > 0 ? 'reply' : 'silent',
|
|
6442
6497
|
})
|
|
6443
|
-
// #1122 PR4 / #1161: deterministic
|
|
6444
|
-
// silent-marker path above for the rationale).
|
|
6445
|
-
// - first
|
|
6446
|
-
// file so the Stop hook (silent-end-interrupt-stop.mjs)
|
|
6447
|
-
// blocks the session-end and re-prompts the agent to
|
|
6498
|
+
// #1122 PR4 / #1161 / #1664: deterministic undelivered-turn
|
|
6499
|
+
// handling (see the silent-marker path above for the rationale).
|
|
6500
|
+
// - first undelivered turn-end → recordSilentTurnEnd writes the
|
|
6501
|
+
// state file so the Stop hook (silent-end-interrupt-stop.mjs)
|
|
6502
|
+
// blocks the session-end and re-prompts the agent to deliver.
|
|
6448
6503
|
// - the Stop-hook re-prompt is already spent and the agent is
|
|
6449
|
-
// STILL
|
|
6450
|
-
// deliver a user-facing fallback so the turn
|
|
6451
|
-
// vanishes (the user otherwise only sees the card
|
|
6452
|
-
|
|
6453
|
-
|
|
6504
|
+
// STILL undelivered → recordSilentTurnEnd returns
|
|
6505
|
+
// exhausted:true; deliver a user-facing fallback so the turn
|
|
6506
|
+
// never just vanishes (the user otherwise only sees the card
|
|
6507
|
+
// disappear).
|
|
6508
|
+
//
|
|
6509
|
+
// #1664 — the trigger is "no final answer delivered", not "zero
|
|
6510
|
+
// outbound". `outboundCount === 0` is now just the special case
|
|
6511
|
+
// where nothing landed at all. The added case: the model sent an
|
|
6512
|
+
// interim ack via reply/stream_reply (outboundCount > 0,
|
|
6513
|
+
// replyCalled = true) but ended the turn with its real answer as
|
|
6514
|
+
// plain transcript text — rendered into an ephemeral answer-lane
|
|
6515
|
+
// draft and retracted at turn_end, never finalized. finalAnswer-
|
|
6516
|
+
// Delivered stays false there, so the re-prompt engages and the
|
|
6517
|
+
// model re-delivers the answer through the reply tool. NO_REPLY /
|
|
6518
|
+
// HEARTBEAT_OK silent-marker turns return earlier and never reach
|
|
6519
|
+
// this path. The turn-flush 'flush' branch also returns earlier
|
|
6520
|
+
// (and sets finalAnswerDelivered=true defensively).
|
|
6521
|
+
if (turn.finalAnswerDelivered === false) {
|
|
6522
|
+
const silentEnd = recordUndeliveredTurnEnd({
|
|
6454
6523
|
chatId,
|
|
6455
6524
|
threadId: threadId ?? null,
|
|
6456
6525
|
turnKey: tKey,
|
|
@@ -14983,26 +15052,11 @@ void (async () => {
|
|
|
14983
15052
|
// inside the sub-agent. Belt-and-braces with PR #557's
|
|
14984
15053
|
// multi-signal progress gate.
|
|
14985
15054
|
parentStateDir: STATE_DIR,
|
|
14986
|
-
|
|
14987
|
-
|
|
14988
|
-
|
|
14989
|
-
|
|
14990
|
-
|
|
14991
|
-
// gateway. Notifications are best-effort.
|
|
14992
|
-
void swallowingApiCall(
|
|
14993
|
-
() =>
|
|
14994
|
-
lockedBot.api.sendMessage(ownerChatId, text, {
|
|
14995
|
-
parse_mode: 'HTML',
|
|
14996
|
-
link_preview_options: { is_disabled: true },
|
|
14997
|
-
...(TOPIC_ID != null ? { message_thread_id: TOPIC_ID } : {}),
|
|
14998
|
-
}),
|
|
14999
|
-
{
|
|
15000
|
-
chat_id: ownerChatId,
|
|
15001
|
-
verb: 'subagent-watcher-notification',
|
|
15002
|
-
...(TOPIC_ID != null ? { threadId: TOPIC_ID } : {}),
|
|
15003
|
-
},
|
|
15004
|
-
)
|
|
15005
|
-
},
|
|
15055
|
+
// No user-facing notification callback: the card-era
|
|
15056
|
+
// "✓ Worker done" message was retired with the progress
|
|
15057
|
+
// card (#1122). Sub-agent completion reaches the user as
|
|
15058
|
+
// the model's own beat-4 handback reply; the watcher's
|
|
15059
|
+
// role here is registry liveness + the `onFinish` cue.
|
|
15006
15060
|
log: (msg) => process.stderr.write(`telegram gateway: ${msg}\n`),
|
|
15007
15061
|
// Option C (#393): route stall detections into the progress-card
|
|
15008
15062
|
// driver so the pinned card re-renders with a ⚠️ indicator even
|
|
@@ -2,12 +2,20 @@
|
|
|
2
2
|
/**
|
|
3
3
|
* Stop hook — auto-interrupt for silent-end turns.
|
|
4
4
|
*
|
|
5
|
-
* When a Claude Code session ends without the agent
|
|
6
|
-
*
|
|
5
|
+
* When a Claude Code session ends without the agent delivering a final
|
|
6
|
+
* answer to the user, the Telegram gateway writes a state file at
|
|
7
7
|
* $TELEGRAM_STATE_DIR/silent-end-pending.json. This hook reads that file and,
|
|
8
8
|
* if a first-time silent-end is detected (retryCount === 0), returns a
|
|
9
9
|
* decision:block to re-prompt the agent instead of letting the session close.
|
|
10
10
|
*
|
|
11
|
+
* #1664 — "no final answer delivered" covers two cases: (a) the turn ended
|
|
12
|
+
* with zero outbound (the original case), and (b) the model sent only an
|
|
13
|
+
* interim ack via reply/stream_reply but left its real answer as plain
|
|
14
|
+
* transcript text, which the gateway renders into an ephemeral draft and
|
|
15
|
+
* never finalizes. The re-prompt below tells the model to send its answer
|
|
16
|
+
* through the reply tool, or reply NO_REPLY if it genuinely has nothing to
|
|
17
|
+
* add / already delivered.
|
|
18
|
+
*
|
|
11
19
|
* On the second silent-end (retryCount >= MAX_RETRIES), the hook allows the
|
|
12
20
|
* stop. The gateway's turn-end path (recordSilentTurnEnd in silent-end.ts)
|
|
13
21
|
* detects the exhausted re-prompt and delivers a user-facing fallback
|
|
@@ -104,9 +112,13 @@ function main() {
|
|
|
104
112
|
JSON.stringify({
|
|
105
113
|
decision: 'block',
|
|
106
114
|
reason:
|
|
107
|
-
'
|
|
108
|
-
'
|
|
109
|
-
'
|
|
115
|
+
'This turn is ending without your final answer reaching the user. ' +
|
|
116
|
+
'If you wrote an answer as plain text (not via a tool), the user ' +
|
|
117
|
+
'cannot see it — only text sent through the reply tool is delivered. ' +
|
|
118
|
+
'Send your final answer now by calling mcp__switchroom-telegram__reply ' +
|
|
119
|
+
'(or mcp__switchroom-telegram__stream_reply with done=true). ' +
|
|
120
|
+
'If your final answer has already reached the user, or you ' +
|
|
121
|
+
'intentionally have nothing to add, reply with exactly NO_REPLY.',
|
|
110
122
|
}),
|
|
111
123
|
)
|
|
112
124
|
process.exit(0)
|
|
@@ -182,22 +182,39 @@ export function readSilentEndState(deps?: SilentEndDeps): SilentEndState | null
|
|
|
182
182
|
}
|
|
183
183
|
|
|
184
184
|
/**
|
|
185
|
-
* Record a user-message turn that ended
|
|
186
|
-
* report whether the deterministic re-prompt has been
|
|
187
|
-
* the gateway's single entry point for the main
|
|
185
|
+
* Record a user-message turn that ended WITHOUT the model delivering a
|
|
186
|
+
* final answer, and report whether the deterministic re-prompt has been
|
|
187
|
+
* exhausted. This is the gateway's single entry point for the main
|
|
188
|
+
* turn-end path.
|
|
188
189
|
*
|
|
189
|
-
*
|
|
190
|
+
* #1664 — the trigger generalized from "zero outbound" to "no final
|
|
191
|
+
* answer delivered". Two cases reach here now:
|
|
192
|
+
* 1. Zero outbound — the turn ended with nothing sent at all (the
|
|
193
|
+
* original #1122/#1161 silent-end case).
|
|
194
|
+
* 2. Interim-ack only — the model sent an ack via reply/stream_reply
|
|
195
|
+
* but ended the turn with its real answer as plain transcript text
|
|
196
|
+
* (rendered into an ephemeral answer-lane draft that gets retracted
|
|
197
|
+
* at turn_end, never finalized). The gateway tracks this via
|
|
198
|
+
* `CurrentTurn.finalAnswerDelivered`; case 1 is just the subset
|
|
199
|
+
* where that flag is false because nothing landed.
|
|
200
|
+
* In both cases the model still owes the user an answer, so the same
|
|
201
|
+
* re-prompt safety net applies — the framework re-prompts; the model
|
|
202
|
+
* re-delivers via the reply tool (never the framework materializing a
|
|
203
|
+
* message from the draft — see `reference/principles.md`).
|
|
204
|
+
*
|
|
205
|
+
* - First undelivered turn-end (no prior state, or prior `retryCount`
|
|
190
206
|
* still below `SILENT_END_MAX_RETRIES`) → writes the state file via
|
|
191
207
|
* `writeSilentEndState`, so `silent-end-interrupt-stop.mjs` blocks
|
|
192
208
|
* the stop and re-prompts the agent. Returns `{ exhausted: false }`.
|
|
193
209
|
*
|
|
194
|
-
* -
|
|
195
|
-
* `retryCount >= SILENT_END_MAX_RETRIES` → the Stop
|
|
196
|
-
* spent its re-prompt and the agent is STILL
|
|
197
|
-
* failed. Clears the state file (so the
|
|
198
|
-
* finds nothing pending and allows the
|
|
199
|
-
* `{ exhausted: true }` — the caller MUST
|
|
200
|
-
* fallback so the turn never just
|
|
210
|
+
* - An undelivered turn-end where the prior state for the SAME turn
|
|
211
|
+
* already shows `retryCount >= SILENT_END_MAX_RETRIES` → the Stop
|
|
212
|
+
* hook already spent its re-prompt and the agent is STILL
|
|
213
|
+
* undelivered. Recovery has failed. Clears the state file (so the
|
|
214
|
+
* Stop hook on this final turn finds nothing pending and allows the
|
|
215
|
+
* stop cleanly) and returns `{ exhausted: true }` — the caller MUST
|
|
216
|
+
* then deliver a user-facing fallback so the turn never just
|
|
217
|
+
* vanishes (#1161).
|
|
201
218
|
*
|
|
202
219
|
* Chat-less autonomous wakeup turns never reach here: the gateway only
|
|
203
220
|
* creates a `currentTurn` (and therefore only runs a turn-end handler)
|
|
@@ -228,3 +245,12 @@ export function recordSilentTurnEnd(
|
|
|
228
245
|
writeSilentEndState(args, deps)
|
|
229
246
|
return { exhausted: false }
|
|
230
247
|
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* #1664 — semantic alias for `recordSilentTurnEnd`. The trigger is now
|
|
251
|
+
* "no final answer delivered", of which "zero outbound" is one case; new
|
|
252
|
+
* callsites should prefer this name so the intent reads correctly. The
|
|
253
|
+
* behaviour, retry semantics, and `{exhausted}` contract are identical —
|
|
254
|
+
* `recordSilentTurnEnd` is kept for the existing callers and tests.
|
|
255
|
+
*/
|
|
256
|
+
export const recordUndeliveredTurnEnd = recordSilentTurnEnd
|
|
@@ -146,11 +146,6 @@ export interface SubagentWatcherConfig {
|
|
|
146
146
|
* an agent's home pollutes the watcher with phantom registrations).
|
|
147
147
|
*/
|
|
148
148
|
agentCwd?: string
|
|
149
|
-
/**
|
|
150
|
-
* Send a fresh (non-edit) Telegram message. For stall / completion
|
|
151
|
-
* state-transition notifications.
|
|
152
|
-
*/
|
|
153
|
-
sendNotification: (text: string) => void
|
|
154
149
|
/**
|
|
155
150
|
* How often to re-scan for new subagent dirs (ms). Default 1000.
|
|
156
151
|
*/
|
|
@@ -862,21 +857,19 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
862
857
|
|
|
863
858
|
if (entry.state === 'done' && !entry.completionNotified) {
|
|
864
859
|
entry.completionNotified = true
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
//
|
|
876
|
-
//
|
|
877
|
-
//
|
|
878
|
-
// boot get their `completionNotified=true` shortcut in registerAgent
|
|
879
|
-
// and skip this path entirely — only post-boot transitions fire.
|
|
860
|
+
// Card retired (#1122): the watcher no longer sends a user-facing
|
|
861
|
+
// "✓ Worker done" message. A framework-authored status line is a
|
|
862
|
+
// conversational-pacing anti-pattern, and the heuristic that drove
|
|
863
|
+
// it (silent-stall synthesis) fired on a worker mid-`Bash` as
|
|
864
|
+
// readily as on a finished one. The user-facing handback is the
|
|
865
|
+
// model's own beat-4 reply, woken by Claude Code's native
|
|
866
|
+
// background-task notification. Completion is surfaced here only
|
|
867
|
+
// via the structured `onFinish` callback — emitted before the
|
|
868
|
+
// deferred cleanup runs so the callback always sees a live
|
|
869
|
+
// registry entry. Historical entries that already-completed at
|
|
870
|
+
// boot get their `completionNotified=true` shortcut in
|
|
871
|
+
// registerAgent and skip this path — only post-boot transitions
|
|
872
|
+
// fire.
|
|
880
873
|
if (config.onFinish) {
|
|
881
874
|
try {
|
|
882
875
|
config.onFinish({
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit coverage for the #1664 final-answer detection predicate.
|
|
3
|
+
*
|
|
4
|
+
* `isFinalAnswerReply` is the finer signal the silent-end re-prompt needs:
|
|
5
|
+
* the gateway's `replyCalled` flag flips on the first reply / stream_reply
|
|
6
|
+
* tool use and cannot tell an interim ack from the real answer. This
|
|
7
|
+
* predicate classifies each reply so a turn whose every reply was "interim"
|
|
8
|
+
* (and whose real answer ended up as plain transcript text) ends with
|
|
9
|
+
* `finalAnswerDelivered === false` and triggers the re-prompt — the #1664
|
|
10
|
+
* bug (streamed answers rendered to a draft, retracted at turn_end, lost).
|
|
11
|
+
*
|
|
12
|
+
* These tests pin the pure predicate. The gateway wires it into
|
|
13
|
+
* executeReply / executeStreamReply (covered by the gateway integration
|
|
14
|
+
* surface); pinning the policy here keeps it auditable without importing
|
|
15
|
+
* the multi-thousand-line gateway module.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { describe, it, expect } from 'vitest'
|
|
19
|
+
import { isFinalAnswerReply, FINAL_ANSWER_MIN_CHARS } from '../final-answer-detect.js'
|
|
20
|
+
|
|
21
|
+
describe('isFinalAnswerReply — #1664 final-answer classification', () => {
|
|
22
|
+
it('classifies a notification-bearing reply as the final answer', () => {
|
|
23
|
+
// disable_notification:false is the pacing contract's "final answer"
|
|
24
|
+
// signal — interim updates pass disable_notification:true.
|
|
25
|
+
expect(
|
|
26
|
+
isFinalAnswerReply({ text: 'short answer', disableNotification: false }),
|
|
27
|
+
).toBe(true)
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it('classifies a short interim ack (disable_notification:true) as NOT final', () => {
|
|
31
|
+
expect(
|
|
32
|
+
isFinalAnswerReply({ text: 'on it…', disableNotification: true }),
|
|
33
|
+
).toBe(false)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('length backstop: a long reply mis-marked interim still counts as final', () => {
|
|
37
|
+
const longText = 'x'.repeat(FINAL_ANSWER_MIN_CHARS)
|
|
38
|
+
expect(
|
|
39
|
+
isFinalAnswerReply({ text: longText, disableNotification: true }),
|
|
40
|
+
).toBe(true)
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
it('length backstop is inclusive at exactly FINAL_ANSWER_MIN_CHARS', () => {
|
|
44
|
+
expect(
|
|
45
|
+
isFinalAnswerReply({
|
|
46
|
+
text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS),
|
|
47
|
+
disableNotification: true,
|
|
48
|
+
}),
|
|
49
|
+
).toBe(true)
|
|
50
|
+
// One char under the threshold and marked interim → still interim.
|
|
51
|
+
expect(
|
|
52
|
+
isFinalAnswerReply({
|
|
53
|
+
text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS - 1),
|
|
54
|
+
disableNotification: true,
|
|
55
|
+
}),
|
|
56
|
+
).toBe(false)
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
it('stream_reply done=true is always the final answer, even short + interim', () => {
|
|
60
|
+
// A done=true call explicitly closes the stream — it IS the answer,
|
|
61
|
+
// regardless of length or the notification flag.
|
|
62
|
+
expect(
|
|
63
|
+
isFinalAnswerReply({ text: 'ok', disableNotification: true, done: true }),
|
|
64
|
+
).toBe(true)
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
it('a non-terminal stream_reply chunk (done=false) is classified like a plain reply', () => {
|
|
68
|
+
// Short interim chunk → not final.
|
|
69
|
+
expect(
|
|
70
|
+
isFinalAnswerReply({ text: 'thinking…', disableNotification: true, done: false }),
|
|
71
|
+
).toBe(false)
|
|
72
|
+
// Notification-bearing chunk → final.
|
|
73
|
+
expect(
|
|
74
|
+
isFinalAnswerReply({ text: 'here it is', disableNotification: false, done: false }),
|
|
75
|
+
).toBe(true)
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
it('an empty reply marked interim is NOT the final answer', () => {
|
|
79
|
+
expect(
|
|
80
|
+
isFinalAnswerReply({ text: '', disableNotification: true }),
|
|
81
|
+
).toBe(false)
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
it('FINAL_ANSWER_MIN_CHARS is the documented 200-char backstop', () => {
|
|
85
|
+
// Guards the constant against silent drift — the value is referenced
|
|
86
|
+
// in the CurrentTurn doc-comment and the Stop-hook rationale.
|
|
87
|
+
expect(FINAL_ANSWER_MIN_CHARS).toBe(200)
|
|
88
|
+
})
|
|
89
|
+
})
|
|
@@ -50,7 +50,6 @@ describe('subagent-watcher: WorkerEntry.lastTool', () => {
|
|
|
50
50
|
const intervals: Array<{ fn: () => void }> = []
|
|
51
51
|
const w = startSubagentWatcher({
|
|
52
52
|
agentDir,
|
|
53
|
-
sendNotification: () => {},
|
|
54
53
|
stallThresholdMs: 60_000,
|
|
55
54
|
rescanMs: 500,
|
|
56
55
|
now: () => Date.now(),
|
|
@@ -8,8 +8,10 @@ import {
|
|
|
8
8
|
clearSilentEndState,
|
|
9
9
|
readSilentEndState,
|
|
10
10
|
recordSilentTurnEnd,
|
|
11
|
+
recordUndeliveredTurnEnd,
|
|
11
12
|
SILENT_END_MAX_RETRIES,
|
|
12
13
|
} from '../silent-end.js'
|
|
14
|
+
import { isFinalAnswerReply } from '../final-answer-detect.js'
|
|
13
15
|
|
|
14
16
|
let stateDir: string
|
|
15
17
|
const ORIG_ENV = process.env.TELEGRAM_STATE_DIR
|
|
@@ -187,6 +189,118 @@ describe('recordSilentTurnEnd — #1161 exhaustion detection', () => {
|
|
|
187
189
|
})
|
|
188
190
|
})
|
|
189
191
|
|
|
192
|
+
describe('recordUndeliveredTurnEnd — #1664 extended trigger', () => {
|
|
193
|
+
it('is the same function as recordSilentTurnEnd (semantic alias)', () => {
|
|
194
|
+
expect(recordUndeliveredTurnEnd).toBe(recordSilentTurnEnd)
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
// The gateway computes `finalAnswerDelivered` by OR-ing isFinalAnswerReply
|
|
198
|
+
// across every reply landed this turn, then engages the re-prompt iff the
|
|
199
|
+
// flag is still false at turn_end. These tests reproduce that exact
|
|
200
|
+
// decision: classify the turn's replies, then call recordUndeliveredTurnEnd
|
|
201
|
+
// only when no reply qualified.
|
|
202
|
+
function simulateTurnEnd(
|
|
203
|
+
replies: Array<{ text: string; disableNotification: boolean; done?: boolean }>,
|
|
204
|
+
turnKey: string,
|
|
205
|
+
): { finalAnswerDelivered: boolean; rePromptEngaged: boolean } {
|
|
206
|
+
const finalAnswerDelivered = replies.some((r) =>
|
|
207
|
+
isFinalAnswerReply(r),
|
|
208
|
+
)
|
|
209
|
+
let rePromptEngaged = false
|
|
210
|
+
if (finalAnswerDelivered === false) {
|
|
211
|
+
recordUndeliveredTurnEnd({ chatId: 'c', threadId: null, turnKey })
|
|
212
|
+
rePromptEngaged = true
|
|
213
|
+
}
|
|
214
|
+
return { finalAnswerDelivered, rePromptEngaged }
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
it('#1664 regression: ack reply + answer-as-transcript → re-prompt fires', () => {
|
|
218
|
+
// The exact #1664 shape: the model sent a short interim ack via the
|
|
219
|
+
// reply tool (disable_notification:true), then ended the turn with its
|
|
220
|
+
// real answer as plain transcript text — which the gateway renders into
|
|
221
|
+
// an ephemeral draft and retracts at turn_end, never finalized. No
|
|
222
|
+
// reply qualified as the final answer, so the turn is undelivered.
|
|
223
|
+
const r = simulateTurnEnd(
|
|
224
|
+
[{ text: 'On it — give me a moment.', disableNotification: true }],
|
|
225
|
+
'c:1664',
|
|
226
|
+
)
|
|
227
|
+
expect(r.finalAnswerDelivered).toBe(false)
|
|
228
|
+
expect(r.rePromptEngaged).toBe(true)
|
|
229
|
+
// State file written so silent-end-interrupt-stop.mjs blocks the stop.
|
|
230
|
+
expect(readSilentEndState()).toMatchObject({ turnKey: 'c:1664', retryCount: 0 })
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
it('a turn with a final-answer reply (notification-bearing) → re-prompt NOT engaged', () => {
|
|
234
|
+
const r = simulateTurnEnd(
|
|
235
|
+
[{ text: 'Here is the answer.', disableNotification: false }],
|
|
236
|
+
'c:final',
|
|
237
|
+
)
|
|
238
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
239
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
240
|
+
expect(readSilentEndState()).toBeNull()
|
|
241
|
+
})
|
|
242
|
+
|
|
243
|
+
it('a long reply mis-marked interim → re-prompt NOT engaged (length backstop)', () => {
|
|
244
|
+
const r = simulateTurnEnd(
|
|
245
|
+
[{ text: 'x'.repeat(500), disableNotification: true }],
|
|
246
|
+
'c:long',
|
|
247
|
+
)
|
|
248
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
249
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
250
|
+
expect(readSilentEndState()).toBeNull()
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
it('zero-outbound turn → re-prompt still engaged (regression of the original case)', () => {
|
|
254
|
+
// No replies at all — the original #1122 silent-end case is now just
|
|
255
|
+
// the subset of "no final answer delivered" where nothing landed.
|
|
256
|
+
const r = simulateTurnEnd([], 'c:zero')
|
|
257
|
+
expect(r.finalAnswerDelivered).toBe(false)
|
|
258
|
+
expect(r.rePromptEngaged).toBe(true)
|
|
259
|
+
expect(readSilentEndState()).toMatchObject({ turnKey: 'c:zero', retryCount: 0 })
|
|
260
|
+
})
|
|
261
|
+
|
|
262
|
+
it('interim ack followed by a final-answer reply in the same turn → NOT engaged', () => {
|
|
263
|
+
// The model ack'd first then properly delivered — finalAnswerDelivered
|
|
264
|
+
// latches true on the second reply; the turn is answered.
|
|
265
|
+
const r = simulateTurnEnd(
|
|
266
|
+
[
|
|
267
|
+
{ text: 'Looking into it…', disableNotification: true },
|
|
268
|
+
{ text: 'Done — the result is 42.', disableNotification: false },
|
|
269
|
+
],
|
|
270
|
+
'c:ack-then-final',
|
|
271
|
+
)
|
|
272
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
273
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
274
|
+
expect(readSilentEndState()).toBeNull()
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
it('stream_reply done=true counts as the final answer → NOT engaged', () => {
|
|
278
|
+
const r = simulateTurnEnd(
|
|
279
|
+
[{ text: 'ok', disableNotification: true, done: true }],
|
|
280
|
+
'c:stream-done',
|
|
281
|
+
)
|
|
282
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
283
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
284
|
+
expect(readSilentEndState()).toBeNull()
|
|
285
|
+
})
|
|
286
|
+
|
|
287
|
+
it('exhaustion still applies on the #1664 path after the Stop-hook re-prompt', () => {
|
|
288
|
+
// First undelivered turn-end writes state.
|
|
289
|
+
expect(simulateTurnEnd(
|
|
290
|
+
[{ text: 'one sec', disableNotification: true }],
|
|
291
|
+
'c:exhaust',
|
|
292
|
+
).rePromptEngaged).toBe(true)
|
|
293
|
+
// Stop hook blocks once and bumps retryCount (simulated).
|
|
294
|
+
const path = join(stateDir, 'silent-end-pending.json')
|
|
295
|
+
const s = readSilentEndState()!
|
|
296
|
+
writeFileSync(path, JSON.stringify({ ...s, retryCount: s.retryCount + 1 }))
|
|
297
|
+
// Re-prompted turn STILL ends with only an interim ack → exhausted.
|
|
298
|
+
const second = recordUndeliveredTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:exhaust' })
|
|
299
|
+
expect(second.exhausted).toBe(true)
|
|
300
|
+
expect(readSilentEndState()).toBeNull()
|
|
301
|
+
})
|
|
302
|
+
})
|
|
303
|
+
|
|
190
304
|
describe('silent-end-interrupt-stop hook — integration', () => {
|
|
191
305
|
const hookPath = join(__dirname, '..', 'hooks', 'silent-end-interrupt-stop.mjs')
|
|
192
306
|
|
|
@@ -222,6 +336,10 @@ describe('silent-end-interrupt-stop hook — integration', () => {
|
|
|
222
336
|
const out = JSON.parse(r.stdout.trim())
|
|
223
337
|
expect(out.decision).toBe('block')
|
|
224
338
|
expect(out.reason).toContain('reply')
|
|
339
|
+
// #1664 — the re-prompt must offer the NO_REPLY escape hatch so a
|
|
340
|
+
// model that already delivered (or intentionally has nothing to add)
|
|
341
|
+
// can end the turn cleanly instead of being forced to re-send.
|
|
342
|
+
expect(out.reason).toContain('NO_REPLY')
|
|
225
343
|
// retryCount must have been incremented to 1
|
|
226
344
|
expect(readSilentEndState()!.retryCount).toBe(1)
|
|
227
345
|
})
|