switchroom 0.14.56 → 0.14.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +68 -13
- package/telegram-plugin/final-answer-detect.ts +34 -0
- package/telegram-plugin/gateway/feed-reopen-gate.ts +162 -0
- package/telegram-plugin/gateway/gateway.ts +134 -10
- package/telegram-plugin/tests/feed-reopen-gate.test.ts +133 -0
- package/telegram-plugin/tests/final-answer-detect.test.ts +67 -1
- package/telegram-plugin/tests/tool-activity-summary.test.ts +26 -0
package/dist/cli/switchroom.js
CHANGED
|
@@ -49463,8 +49463,8 @@ var {
|
|
|
49463
49463
|
} = import__.default;
|
|
49464
49464
|
|
|
49465
49465
|
// src/build-info.ts
|
|
49466
|
-
var VERSION = "0.14.
|
|
49467
|
-
var COMMIT_SHA = "
|
|
49466
|
+
var VERSION = "0.14.58";
|
|
49467
|
+
var COMMIT_SHA = "fc4023ed";
|
|
49468
49468
|
|
|
49469
49469
|
// src/cli/agent.ts
|
|
49470
49470
|
init_source();
|
package/package.json
CHANGED
|
@@ -39372,6 +39372,13 @@ function isFinalAnswerReply(input) {
|
|
|
39372
39372
|
return true;
|
|
39373
39373
|
return false;
|
|
39374
39374
|
}
|
|
39375
|
+
function isSubstantiveFinalReply(input) {
|
|
39376
|
+
if (input.done === true)
|
|
39377
|
+
return true;
|
|
39378
|
+
if (input.text.length >= FINAL_ANSWER_MIN_CHARS)
|
|
39379
|
+
return true;
|
|
39380
|
+
return false;
|
|
39381
|
+
}
|
|
39375
39382
|
|
|
39376
39383
|
// turn-flush-safety.ts
|
|
39377
39384
|
var SILENT_MARKERS = new Set(["NO_REPLY", "HEARTBEAT_OK"]);
|
|
@@ -47366,6 +47373,28 @@ function shouldArmNoReplyDrain(input) {
|
|
|
47366
47373
|
return input.bufferedDepth > 0;
|
|
47367
47374
|
}
|
|
47368
47375
|
|
|
47376
|
+
// gateway/feed-reopen-gate.ts
|
|
47377
|
+
function shouldReopenFeedAfterAck(input) {
|
|
47378
|
+
if (!input.finalAnswerDelivered)
|
|
47379
|
+
return false;
|
|
47380
|
+
if (input.finalAnswerSubstantive)
|
|
47381
|
+
return false;
|
|
47382
|
+
return input.enabled === true;
|
|
47383
|
+
}
|
|
47384
|
+
function decideFeedReopen(input) {
|
|
47385
|
+
if (!shouldReopenFeedAfterAck(input)) {
|
|
47386
|
+
return { dropLabel: true };
|
|
47387
|
+
}
|
|
47388
|
+
return {
|
|
47389
|
+
dropLabel: false,
|
|
47390
|
+
reset: {
|
|
47391
|
+
finalAnswerDelivered: false,
|
|
47392
|
+
activityMessageId: null,
|
|
47393
|
+
activityLastSentRender: null
|
|
47394
|
+
}
|
|
47395
|
+
};
|
|
47396
|
+
}
|
|
47397
|
+
|
|
47369
47398
|
// gateway/answer-thread-resolve.ts
|
|
47370
47399
|
function resolveAnswerThreadId(input) {
|
|
47371
47400
|
if (input.explicitThreadId != null)
|
|
@@ -52195,10 +52224,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
52195
52224
|
}
|
|
52196
52225
|
|
|
52197
52226
|
// ../src/build-info.ts
|
|
52198
|
-
var VERSION = "0.14.
|
|
52199
|
-
var COMMIT_SHA = "
|
|
52200
|
-
var COMMIT_DATE = "2026-06-
|
|
52201
|
-
var LATEST_PR =
|
|
52227
|
+
var VERSION = "0.14.58";
|
|
52228
|
+
var COMMIT_SHA = "fc4023ed";
|
|
52229
|
+
var COMMIT_DATE = "2026-06-04T02:50:15Z";
|
|
52230
|
+
var LATEST_PR = 2142;
|
|
52202
52231
|
var COMMITS_AHEAD_OF_TAG = 0;
|
|
52203
52232
|
|
|
52204
52233
|
// gateway/boot-version.ts
|
|
@@ -53403,6 +53432,7 @@ var SERIALIZE_NOREPLY_DRAIN_MS = Number.isFinite(_noReplyDrainParsed) && _noRepl
|
|
|
53403
53432
|
var TURN_ORIGIN_ROUTING_ENABLED = process.env.SWITCHROOM_TURN_ORIGIN_ROUTING !== "0";
|
|
53404
53433
|
var TOPIC_FRAMING_ENABLED = process.env.SWITCHROOM_TOPIC_FRAMING !== "0";
|
|
53405
53434
|
var QUEUED_STATUS_UX_ENABLED = process.env.SWITCHROOM_QUEUED_STATUS_UX !== "0";
|
|
53435
|
+
var FEED_REOPEN_AFTER_ACK_ENABLED = process.env.SWITCHROOM_FEED_REOPEN_AFTER_ACK !== "0";
|
|
53406
53436
|
function turnInFlightForGate() {
|
|
53407
53437
|
return isDeliveryCutoverEnabled() ? isMachineInTurn() : claudeBusyKeys.size > 0;
|
|
53408
53438
|
}
|
|
@@ -55690,6 +55720,10 @@ ${url}`;
|
|
|
55690
55720
|
disableNotification
|
|
55691
55721
|
})) {
|
|
55692
55722
|
turn2.finalAnswerDelivered = true;
|
|
55723
|
+
turn2.finalAnswerSubstantive = isSubstantiveFinalReply({
|
|
55724
|
+
text: decision.mergedText,
|
|
55725
|
+
disableNotification
|
|
55726
|
+
});
|
|
55693
55727
|
}
|
|
55694
55728
|
outboundDedup.record(chat_id, threadId, decision.mergedText, Date.now(), turn2?.registryKey ?? null);
|
|
55695
55729
|
silentAnchorEditDone = true;
|
|
@@ -55889,6 +55923,7 @@ ${url}`;
|
|
|
55889
55923
|
noteSignal(statusKey(chat_id, threadId), Date.now());
|
|
55890
55924
|
if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
|
|
55891
55925
|
turn.finalAnswerDelivered = true;
|
|
55926
|
+
turn.finalAnswerSubstantive = isSubstantiveFinalReply({ text: rawText, disableNotification });
|
|
55892
55927
|
finalizeStatusReaction(chat_id, threadId, "done");
|
|
55893
55928
|
}
|
|
55894
55929
|
releaseTurnBufferGate(statusKey(chat_id, threadId), turn ?? undefined);
|
|
@@ -56055,6 +56090,11 @@ async function executeStreamReply(args) {
|
|
|
56055
56090
|
done: args.done === true
|
|
56056
56091
|
})) {
|
|
56057
56092
|
turn.finalAnswerDelivered = true;
|
|
56093
|
+
turn.finalAnswerSubstantive = isSubstantiveFinalReply({
|
|
56094
|
+
text: args.text ?? "",
|
|
56095
|
+
disableNotification: args.disable_notification === true,
|
|
56096
|
+
done: args.done === true
|
|
56097
|
+
});
|
|
56058
56098
|
const streamThreadIdForClear = args.message_thread_id != null ? Number(args.message_thread_id) : undefined;
|
|
56059
56099
|
clearSilentEndState(statusKey(streamChatId, streamThreadIdForClear));
|
|
56060
56100
|
}
|
|
@@ -57090,7 +57130,7 @@ async function drainActivitySummary(turn) {
|
|
|
57090
57130
|
turn.activityInFlight = null;
|
|
57091
57131
|
}
|
|
57092
57132
|
}
|
|
57093
|
-
function clearActivitySummary(turn) {
|
|
57133
|
+
function clearActivitySummary(turn, finalHtmlOverride) {
|
|
57094
57134
|
const chat = turn.sessionChatId;
|
|
57095
57135
|
const thread = turn.sessionThreadId;
|
|
57096
57136
|
const inFlight = turn.activityInFlight ?? Promise.resolve();
|
|
@@ -57108,7 +57148,7 @@ function clearActivitySummary(turn) {
|
|
|
57108
57148
|
}
|
|
57109
57149
|
return;
|
|
57110
57150
|
}
|
|
57111
|
-
const finalHtml = composeTurnActivity(turn, true);
|
|
57151
|
+
const finalHtml = finalHtmlOverride !== undefined ? finalHtmlOverride : composeTurnActivity(turn, true);
|
|
57112
57152
|
if (finalHtml == null)
|
|
57113
57153
|
return;
|
|
57114
57154
|
try {
|
|
@@ -57148,6 +57188,7 @@ function handleSessionEvent(ev) {
|
|
|
57148
57188
|
gatewayReceiveAt: startedAt,
|
|
57149
57189
|
replyCalled: false,
|
|
57150
57190
|
finalAnswerDelivered: false,
|
|
57191
|
+
finalAnswerSubstantive: false,
|
|
57151
57192
|
firstPingAt: null,
|
|
57152
57193
|
silentAnchorMessageId: null,
|
|
57153
57194
|
silentAnchorText: "",
|
|
@@ -57259,8 +57300,18 @@ function handleSessionEvent(ev) {
|
|
|
57259
57300
|
return;
|
|
57260
57301
|
if (isTelegramSurfaceTool(ev.toolName))
|
|
57261
57302
|
return;
|
|
57262
|
-
if (turn.finalAnswerDelivered)
|
|
57263
|
-
|
|
57303
|
+
if (turn.finalAnswerDelivered) {
|
|
57304
|
+
const reopen = decideFeedReopen({
|
|
57305
|
+
finalAnswerDelivered: turn.finalAnswerDelivered,
|
|
57306
|
+
finalAnswerSubstantive: turn.finalAnswerSubstantive,
|
|
57307
|
+
enabled: FEED_REOPEN_AFTER_ACK_ENABLED
|
|
57308
|
+
});
|
|
57309
|
+
if (reopen.dropLabel)
|
|
57310
|
+
return;
|
|
57311
|
+
turn.finalAnswerDelivered = reopen.reset.finalAnswerDelivered;
|
|
57312
|
+
turn.activityMessageId = reopen.reset.activityMessageId;
|
|
57313
|
+
turn.activityLastSentRender = reopen.reset.activityLastSentRender;
|
|
57314
|
+
}
|
|
57264
57315
|
const rendered = appendActivityLabel(turn.mirrorLines, ev.label);
|
|
57265
57316
|
if (rendered != null) {
|
|
57266
57317
|
turn.activityPendingRender = composeTurnActivity(turn) ?? rendered;
|
|
@@ -57412,6 +57463,7 @@ function handleSessionEvent(ev) {
|
|
|
57412
57463
|
turn.answerStream = null;
|
|
57413
57464
|
streamFinalizedAsAnswer = true;
|
|
57414
57465
|
turn.finalAnswerDelivered = true;
|
|
57466
|
+
turn.finalAnswerSubstantive = true;
|
|
57415
57467
|
const oldStreamedMsgId = streamedMsgId;
|
|
57416
57468
|
(async () => {
|
|
57417
57469
|
let materializedId;
|
|
@@ -57538,6 +57590,7 @@ function handleSessionEvent(ev) {
|
|
|
57538
57590
|
}
|
|
57539
57591
|
}
|
|
57540
57592
|
turn.finalAnswerDelivered = true;
|
|
57593
|
+
turn.finalAnswerSubstantive = true;
|
|
57541
57594
|
const cardTakeover = progressDriver?.takeOverCard({
|
|
57542
57595
|
chatId: backstopChatId,
|
|
57543
57596
|
threadId: backstopThreadId != null ? String(backstopThreadId) : undefined
|
|
@@ -63416,16 +63469,18 @@ var didOneTimeSetup = false;
|
|
|
63416
63469
|
const isBackground = dispatch.isBackground;
|
|
63417
63470
|
if (!isBackground) {
|
|
63418
63471
|
const turn = currentTurn;
|
|
63419
|
-
|
|
63420
|
-
if (turn != null && removed) {
|
|
63472
|
+
if (turn != null && turn.foregroundSubAgents.has(agentId)) {
|
|
63421
63473
|
const action = foregroundFinishAction({
|
|
63422
|
-
removed,
|
|
63474
|
+
removed: true,
|
|
63423
63475
|
replyCalled: turn.replyCalled,
|
|
63424
|
-
remainingForeground: turn.foregroundSubAgents.size
|
|
63476
|
+
remainingForeground: turn.foregroundSubAgents.size - 1
|
|
63425
63477
|
});
|
|
63426
63478
|
if (action === "handoff-clear") {
|
|
63427
|
-
|
|
63479
|
+
const finalHtml = composeTurnActivity(turn, true);
|
|
63480
|
+
turn.foregroundSubAgents.delete(agentId);
|
|
63481
|
+
clearActivitySummary(turn, finalHtml);
|
|
63428
63482
|
} else if (action === "recompose") {
|
|
63483
|
+
turn.foregroundSubAgents.delete(agentId);
|
|
63429
63484
|
const rendered = composeTurnActivity(turn);
|
|
63430
63485
|
if (rendered != null) {
|
|
63431
63486
|
turn.activityPendingRender = rendered;
|
|
@@ -81,3 +81,37 @@ export function isFinalAnswerReply(input: FinalAnswerReplyInput): boolean {
|
|
|
81
81
|
if (input.text.length >= FINAL_ANSWER_MIN_CHARS) return true
|
|
82
82
|
return false
|
|
83
83
|
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Pure predicate: was this reply a *substantive* final answer (as opposed
|
|
87
|
+
* to a reply that is only "final" because it pinged)? `true` if EITHER:
|
|
88
|
+
*
|
|
89
|
+
* - `done === true` — a `stream_reply` terminal call closing the stream.
|
|
90
|
+
* - `text.length >= FINAL_ANSWER_MIN_CHARS` — a substantive-length answer.
|
|
91
|
+
*
|
|
92
|
+
* This is `isFinalAnswerReply` MINUS the notification-only path. The
|
|
93
|
+
* distinction matters for the feed-reopen-after-ack gate
|
|
94
|
+
* (`feed-reopen-gate.ts`): a *short pinging* reply ("on it, checking
|
|
95
|
+
* Brevo…") is classified final by `isFinalAnswerReply` (because it pings)
|
|
96
|
+
* yet is NOT substantive — it is an interim ACK. Only such an ack should
|
|
97
|
+
* cause the live activity feed to re-open when post-ack tool work arrives.
|
|
98
|
+
*
|
|
99
|
+
* A genuine final answer (long, or a stream `done: true`) followed by
|
|
100
|
+
* routine post-answer housekeeping (a memory write / TodoWrite / Bash —
|
|
101
|
+
* none of which are surface tools, so they reach the tool_label handler)
|
|
102
|
+
* must NOT re-open the feed and must NOT reset `finalAnswerDelivered`,
|
|
103
|
+
* otherwise the silent-end re-prompt would spuriously fire and the agent
|
|
104
|
+
* would re-deliver a duplicate / garbled answer.
|
|
105
|
+
*
|
|
106
|
+
* Residual: a reply that is genuinely the final answer yet is BOTH short
|
|
107
|
+
* (<200 chars) AND pinging (e.g. "Done!") is indistinguishable here from
|
|
108
|
+
* an ack, so post-answer housekeeping after it still re-opens the feed.
|
|
109
|
+
* That is much rarer than the housekeeping-after-long-answer case this
|
|
110
|
+
* predicate protects, and is kill-switchable via
|
|
111
|
+
* `SWITCHROOM_FEED_REOPEN_AFTER_ACK=0`.
|
|
112
|
+
*/
|
|
113
|
+
export function isSubstantiveFinalReply(input: FinalAnswerReplyInput): boolean {
|
|
114
|
+
if (input.done === true) return true
|
|
115
|
+
if (input.text.length >= FINAL_ANSWER_MIN_CHARS) return true
|
|
116
|
+
return false
|
|
117
|
+
}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feed-reopen-after-ack gate (ack-first live-activity visibility).
|
|
3
|
+
*
|
|
4
|
+
* Pure decision: a `tool_label` arrived (the model is calling a tool, i.e.
|
|
5
|
+
* still WORKING) for a turn that has already been classified as having
|
|
6
|
+
* delivered its final answer. Should the gateway *re-open* the live
|
|
7
|
+
* activity feed for the post-ack work?
|
|
8
|
+
*
|
|
9
|
+
* ## The bug this closes
|
|
10
|
+
*
|
|
11
|
+
* In a forum supergroup one agent owns the whole supergroup — a single
|
|
12
|
+
* sequential `claude` CLI with a singleton `currentTurn`. When the model
|
|
13
|
+
* ACKS FIRST ("on it, checking Brevo…") and then does the actual work,
|
|
14
|
+
* that ack reply is classified as the *final answer* by
|
|
15
|
+
* `isFinalAnswerReply` (final-answer-detect.ts) whenever it pings
|
|
16
|
+
* (`!disable_notification`) OR is ≥200 chars — both common for a natural
|
|
17
|
+
* human-feel ack. That sets `turn.finalAnswerDelivered = true`, and the
|
|
18
|
+
* `tool_label` handler's `if (turn.finalAnswerDelivered) return` then
|
|
19
|
+
* drops EVERY subsequent tool label → the live feed goes dark for the
|
|
20
|
+
* real work. The agent looks silent after "On it".
|
|
21
|
+
*
|
|
22
|
+
* ## The decision
|
|
23
|
+
*
|
|
24
|
+
* A new tool label after `finalAnswerDelivered` means the earlier "final"
|
|
25
|
+
* reply MIGHT have been an interim ACK — the turn has NOT delivered its
|
|
26
|
+
* final answer if it is still doing tool work. So reclassify: re-open the
|
|
27
|
+
* feed. The caller then resets `turn.finalAnswerDelivered = false` and
|
|
28
|
+
* `turn.activityMessageId = null` (so a FRESH feed message opens below the
|
|
29
|
+
* ack) and proceeds with the normal append + drain. When the model later
|
|
30
|
+
* sends its REAL final answer, `executeReply` / `stream_reply` re-set
|
|
31
|
+
* `finalAnswerDelivered = true` via `isFinalAnswerReply` and the feed gates
|
|
32
|
+
* off correctly again.
|
|
33
|
+
*
|
|
34
|
+
* ## ACK-ONLY refinement
|
|
35
|
+
*
|
|
36
|
+
* `finalAnswerDelivered` latches true for BOTH a short pinging ack AND a
|
|
37
|
+
* substantive final answer — `isFinalAnswerReply` treats any pinging reply
|
|
38
|
+
* as "final". So reopening unconditionally is HARMFUL after a *genuine*
|
|
39
|
+
* final answer: routine post-answer housekeeping (a memory write /
|
|
40
|
+
* TodoWrite / Bash — none of these are surface tools, so they reach the
|
|
41
|
+
* tool_label handler) fires a tool label → an unconditional reopen would
|
|
42
|
+
* reset `finalAnswerDelivered=false` → the turn-end silent-end re-prompt
|
|
43
|
+
* (`if (turn.finalAnswerDelivered === false)`, NOT gated on zero-outbound)
|
|
44
|
+
* would FIRE → the agent re-delivers a DUPLICATE / garbled answer. Agents
|
|
45
|
+
* routinely write memory after answering, so this would be frequent.
|
|
46
|
+
*
|
|
47
|
+
* The fix: reopen ONLY when the prior reply that set `finalAnswerDelivered`
|
|
48
|
+
* was a SHORT ACK, not a substantive answer. The caller tracks this on the
|
|
49
|
+
* turn as `finalAnswerSubstantive` (set via `isSubstantiveFinalReply` at
|
|
50
|
+
* every site that sets `finalAnswerDelivered = true`). Reopen iff
|
|
51
|
+
* `finalAnswerDelivered && !finalAnswerSubstantive`. When the prior final
|
|
52
|
+
* was substantive, drop the label (legacy gate) — no reopen, no reset — so
|
|
53
|
+
* the silent-end re-prompt and the #2137 drain both see the genuine final
|
|
54
|
+
* correctly.
|
|
55
|
+
*
|
|
56
|
+
* ## Interactions (the reset is correct for all three consumers)
|
|
57
|
+
*
|
|
58
|
+
* 1. #2137 deliver-before-drain gate (`mayDrainBufferedInbound`): reads the
|
|
59
|
+
* ending turn's `finalAnswerDelivered` at turn-end. With the reset, an
|
|
60
|
+
* ack-first turn that is still working keeps it false → the next topic
|
|
61
|
+
* is correctly HELD (no mid-work cross-topic bleed); the bounded
|
|
62
|
+
* no-reply drain timer (~2.5s) still releases the queue if the turn
|
|
63
|
+
* truly ends without a final answer.
|
|
64
|
+
* 2. silent-end re-prompt: a turn that acks, works, then ends with NO real
|
|
65
|
+
* final answer keeps `finalAnswerDelivered=false` → the re-prompt fires
|
|
66
|
+
* (correct — the user got only an ack, no answer).
|
|
67
|
+
* 3. the feed gate itself — this module.
|
|
68
|
+
*
|
|
69
|
+
* ## Kill switch
|
|
70
|
+
*
|
|
71
|
+
* `SWITCHROOM_FEED_REOPEN_AFTER_ACK=0` reverts to the legacy behaviour: a
|
|
72
|
+
* tool label after `finalAnswerDelivered` is dropped (`return`), and the
|
|
73
|
+
* post-ack feed stays dark. The kill switch is read by the CALLER, which
|
|
74
|
+
* passes `enabled` here.
|
|
75
|
+
*/
|
|
76
|
+
|
|
77
|
+
export interface FeedReopenInput {
|
|
78
|
+
/** Whether the turn has already been classified as having delivered its
|
|
79
|
+
* final answer (`turn.finalAnswerDelivered`). On an ack-first turn this
|
|
80
|
+
* is set true by the ack reply (it pinged or was ≥200 chars), even
|
|
81
|
+
* though the model is still working. */
|
|
82
|
+
finalAnswerDelivered: boolean
|
|
83
|
+
/** Whether the reply that set `finalAnswerDelivered` was a *substantive*
|
|
84
|
+
* final answer (stream `done`, or ≥200 chars) as opposed to a short
|
|
85
|
+
* pinging interim ACK (`turn.finalAnswerSubstantive`, set via
|
|
86
|
+
* `isSubstantiveFinalReply`). Only a short ACK should re-open the feed:
|
|
87
|
+
* reopening after a genuine final answer + post-answer housekeeping
|
|
88
|
+
* would spuriously trip the silent-end re-prompt → duplicate answer. */
|
|
89
|
+
finalAnswerSubstantive: boolean
|
|
90
|
+
/** Kill-switch state. When false the reopen behaviour is OFF and a tool
|
|
91
|
+
* label after `finalAnswerDelivered` is dropped (legacy). */
|
|
92
|
+
enabled: boolean
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Pure. Given a tool label has just arrived (the model is calling a tool,
|
|
97
|
+
* so it is still working), returns true when the live activity feed should
|
|
98
|
+
* be RE-OPENED for the post-ack work.
|
|
99
|
+
*
|
|
100
|
+
* - !finalAnswerDelivered → false: the feed was never gated off; the normal
|
|
101
|
+
* append/drain path applies (no reopen needed).
|
|
102
|
+
* - finalAnswerDelivered && finalAnswerSubstantive → false: the prior final
|
|
103
|
+
* was a genuine answer (not an ack). Post-answer housekeeping tool work
|
|
104
|
+
* must NOT reopen — keep the legacy gate so the silent-end re-prompt and
|
|
105
|
+
* the #2137 drain see the delivered final correctly.
|
|
106
|
+
* - finalAnswerDelivered && !enabled (kill switch off) → false: legacy
|
|
107
|
+
* behaviour, the label is dropped by the caller.
|
|
108
|
+
* - finalAnswerDelivered && !finalAnswerSubstantive && enabled → true: the
|
|
109
|
+
* "final" reply was a short interim ack; re-open the feed.
|
|
110
|
+
*/
|
|
111
|
+
export function shouldReopenFeedAfterAck(input: FeedReopenInput): boolean {
|
|
112
|
+
if (!input.finalAnswerDelivered) return false
|
|
113
|
+
if (input.finalAnswerSubstantive) return false
|
|
114
|
+
return input.enabled === true
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/** The feed-state fields the caller mutates on reopen. */
|
|
118
|
+
export interface FeedReopenState {
|
|
119
|
+
finalAnswerDelivered: boolean
|
|
120
|
+
activityMessageId: number | null
|
|
121
|
+
activityLastSentRender: string | null
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/** The branch outcome the tool_label handler takes for a finalAnswer-
|
|
125
|
+
* delivered turn: either drop the label (legacy `return`) or reopen the
|
|
126
|
+
* feed with the given reset state. */
|
|
127
|
+
export interface FeedReopenOutcome {
|
|
128
|
+
/** True → the handler returns early (legacy: label dropped, feed dark). */
|
|
129
|
+
dropLabel: boolean
|
|
130
|
+
/** When dropLabel is false, the new feed-state fields to write on `turn`
|
|
131
|
+
* before the normal append/drain proceeds. */
|
|
132
|
+
reset?: FeedReopenState
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Pure. The complete tool_label decision for a turn already marked
|
|
137
|
+
* finalAnswerDelivered. Mirrors exactly what the gateway handler does:
|
|
138
|
+
* - reopen disabled / substantive final / not applicable → drop the label
|
|
139
|
+
* (legacy `return`); the genuine final answer's gate is preserved.
|
|
140
|
+
* - reopen → reclassify the interim ack: finalAnswerDelivered back to
|
|
141
|
+
* false (the turn has NOT delivered its final answer while still doing
|
|
142
|
+
* tool work), activityMessageId cleared so a FRESH feed message opens
|
|
143
|
+
* below the ack, and activityLastSentRender cleared so the drain loop's
|
|
144
|
+
* `pending !== lastSent` guard never mistakes the fresh render for an
|
|
145
|
+
* already-sent one.
|
|
146
|
+
*
|
|
147
|
+
* Returning the deltas (rather than mutating) keeps the decision unit-
|
|
148
|
+
* testable; the handler applies them to the live `turn` atom.
|
|
149
|
+
*/
|
|
150
|
+
export function decideFeedReopen(input: FeedReopenInput): FeedReopenOutcome {
|
|
151
|
+
if (!shouldReopenFeedAfterAck(input)) {
|
|
152
|
+
return { dropLabel: true }
|
|
153
|
+
}
|
|
154
|
+
return {
|
|
155
|
+
dropLabel: false,
|
|
156
|
+
reset: {
|
|
157
|
+
finalAnswerDelivered: false,
|
|
158
|
+
activityMessageId: null,
|
|
159
|
+
activityLastSentRender: null,
|
|
160
|
+
},
|
|
161
|
+
}
|
|
162
|
+
}
|
|
@@ -94,7 +94,7 @@ import { classifyInbound } from '../inbound-classifier.js'
|
|
|
94
94
|
import * as silencePoke from '../silence-poke.js'
|
|
95
95
|
import * as pendingProgress from '../pending-work-progress.js'
|
|
96
96
|
import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
|
|
97
|
-
import { isFinalAnswerReply } from '../final-answer-detect.js'
|
|
97
|
+
import { isFinalAnswerReply, isSubstantiveFinalReply } from '../final-answer-detect.js'
|
|
98
98
|
import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
|
|
99
99
|
import { parseVisibleAnswerStreamEnabled } from '../answer-stream-flag.js'
|
|
100
100
|
import { type SessionEvent } from '../session-tail.js'
|
|
@@ -282,6 +282,7 @@ import { createInboundSpool } from './inbound-spool.js'
|
|
|
282
282
|
import { purgeStaleTurnsForChat } from './turn-state-purge.js'
|
|
283
283
|
import { decideInboundDelivery } from './inbound-delivery-gate.js'
|
|
284
284
|
import { mayDrainBufferedInbound, shouldArmNoReplyDrain } from './serialize-drain-gate.js'
|
|
285
|
+
import { decideFeedReopen } from './feed-reopen-gate.js'
|
|
285
286
|
import { resolveAnswerThreadId } from './answer-thread-resolve.js'
|
|
286
287
|
import {
|
|
287
288
|
createDeliveryQueue,
|
|
@@ -1419,6 +1420,16 @@ const TOPIC_FRAMING_ENABLED =
|
|
|
1419
1420
|
// → no placeholder (the 👀 ack reaction still fires). Delete-on-answer.
|
|
1420
1421
|
const QUEUED_STATUS_UX_ENABLED =
|
|
1421
1422
|
process.env.SWITCHROOM_QUEUED_STATUS_UX !== '0'
|
|
1423
|
+
// Feed-reopen-after-ack. When a tool label arrives for a turn already
|
|
1424
|
+
// marked finalAnswerDelivered, the model is still WORKING — so the earlier
|
|
1425
|
+
// "final" reply was an interim ACK (an ack-first reply pings or runs ≥200
|
|
1426
|
+
// chars, both of which isFinalAnswerReply classifies as final). Re-open the
|
|
1427
|
+
// live activity feed for the post-ack work instead of dropping the label.
|
|
1428
|
+
// Kill switch off (=0) → legacy behaviour: the label is dropped and the
|
|
1429
|
+
// post-ack feed stays dark. See `feed-reopen-gate.ts` for the rationale and
|
|
1430
|
+
// the finalAnswerDelivered-consumer interactions.
|
|
1431
|
+
const FEED_REOPEN_AFTER_ACK_ENABLED =
|
|
1432
|
+
process.env.SWITCHROOM_FEED_REOPEN_AFTER_ACK !== '0'
|
|
1422
1433
|
|
|
1423
1434
|
/**
|
|
1424
1435
|
* Authoritative "is a turn in flight?" for every gate that previously
|
|
@@ -1552,6 +1563,20 @@ type CurrentTurn = {
|
|
|
1552
1563
|
// even though `replyCalled` is true — the #1664 case where the real answer
|
|
1553
1564
|
// ended up as plain transcript text rendered into an ephemeral draft.
|
|
1554
1565
|
finalAnswerDelivered: boolean
|
|
1566
|
+
// Feed-reopen-after-ack refinement — whether the reply that set
|
|
1567
|
+
// `finalAnswerDelivered` was a *substantive* final answer (stream
|
|
1568
|
+
// `done`, or ≥200 chars) as opposed to a short pinging interim ACK.
|
|
1569
|
+
// Set via `isSubstantiveFinalReply` at every site that sets
|
|
1570
|
+
// `finalAnswerDelivered = true`. The tool_label handler re-opens the
|
|
1571
|
+
// live activity feed ONLY when `finalAnswerDelivered && !finalAnswer-
|
|
1572
|
+
// Substantive` (the prior "final" was an ack). After a genuine final
|
|
1573
|
+
// answer this stays true, so routine post-answer housekeeping (memory
|
|
1574
|
+
// write / TodoWrite / Bash — non-surface tools that reach the handler)
|
|
1575
|
+
// does NOT reopen and does NOT reset `finalAnswerDelivered`, which would
|
|
1576
|
+
// otherwise spuriously trip the silent-end re-prompt → duplicate answer.
|
|
1577
|
+
// Reset to false on every fresh-turn enqueue alongside
|
|
1578
|
+
// `finalAnswerDelivered`.
|
|
1579
|
+
finalAnswerSubstantive: boolean
|
|
1555
1580
|
// #1675 (over-ping safety net): wall-clock ms of the first reply
|
|
1556
1581
|
// this turn that landed with `disable_notification: false` (a real
|
|
1557
1582
|
// device ping). The conversational-pacing contract
|
|
@@ -6305,6 +6330,12 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
6305
6330
|
})
|
|
6306
6331
|
) {
|
|
6307
6332
|
turn.finalAnswerDelivered = true
|
|
6333
|
+
// Feed-reopen refinement: a substantive merged silent-anchor
|
|
6334
|
+
// answer must NOT re-open the feed on post-answer housekeeping.
|
|
6335
|
+
turn.finalAnswerSubstantive = isSubstantiveFinalReply({
|
|
6336
|
+
text: decision.mergedText,
|
|
6337
|
+
disableNotification,
|
|
6338
|
+
})
|
|
6308
6339
|
}
|
|
6309
6340
|
outboundDedup.record(
|
|
6310
6341
|
chat_id,
|
|
@@ -6644,6 +6675,10 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
6644
6675
|
// end re-prompt from spuriously firing on a delivered final.
|
|
6645
6676
|
if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
|
|
6646
6677
|
turn.finalAnswerDelivered = true
|
|
6678
|
+
// Feed-reopen refinement: track whether this final was substantive
|
|
6679
|
+
// (≥200 chars or stream-done — not a short pinging ack) so post-answer
|
|
6680
|
+
// housekeeping tool work does NOT re-open the feed / trip silent-end.
|
|
6681
|
+
turn.finalAnswerSubstantive = isSubstantiveFinalReply({ text: rawText, disableNotification })
|
|
6647
6682
|
// #1728: release the buffer gate + emit terminal 👍. Mid-turn
|
|
6648
6683
|
// acks bypass this branch and remain non-events for the
|
|
6649
6684
|
// reaction (preserves #1713). The full turn-state teardown
|
|
@@ -6987,6 +7022,14 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
|
|
|
6987
7022
|
})
|
|
6988
7023
|
) {
|
|
6989
7024
|
turn.finalAnswerDelivered = true
|
|
7025
|
+
// Feed-reopen refinement: a stream_reply done=true (or a ≥200-char
|
|
7026
|
+
// chunk) is substantive; a short pinging non-done chunk is an ack. Only
|
|
7027
|
+
// the latter should re-open the feed on subsequent post-answer work.
|
|
7028
|
+
turn.finalAnswerSubstantive = isSubstantiveFinalReply({
|
|
7029
|
+
text: (args.text as string | undefined) ?? '',
|
|
7030
|
+
disableNotification: args.disable_notification === true,
|
|
7031
|
+
done: args.done === true,
|
|
7032
|
+
})
|
|
6990
7033
|
// #1744 follow-up — stream_reply edge case. The first-emit gate at
|
|
6991
7034
|
// L5178 only clears silent-end state on the FIRST emit of a stream.
|
|
6992
7035
|
// If a stream's first emit was ack-shaped (disable_notification:true,
|
|
@@ -8467,8 +8510,18 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
|
|
|
8467
8510
|
* Called on the first reply (hand-off) and again at turn_end (no-reply safety
|
|
8468
8511
|
* net); finalize edits are idempotent (a 'message is not modified' on the
|
|
8469
8512
|
* second call is swallowed).
|
|
8513
|
+
*
|
|
8514
|
+
* `finalHtmlOverride` (finalize path only): a render captured by the caller
|
|
8515
|
+
* BEFORE it tore down turn state the finalize render depends on. The
|
|
8516
|
+
* foreground handoff-clear path passes this — it deletes the just-finished
|
|
8517
|
+
* sub-agent's narrative right after this call, so the async
|
|
8518
|
+
* `composeTurnActivity(turn, true)` below would see an emptied feed (and, on
|
|
8519
|
+
* ack-first turns, empty `mirrorLines`), render null, and skip the finalize —
|
|
8520
|
+
* freezing the last live "→ in-progress" line. The captured render keeps the
|
|
8521
|
+
* persisted record reading done (✓). Omitted → compute it here (the common
|
|
8522
|
+
* reply/turn_end callers, where state is stable).
|
|
8470
8523
|
*/
|
|
8471
|
-
function clearActivitySummary(turn: CurrentTurn): void {
|
|
8524
|
+
function clearActivitySummary(turn: CurrentTurn, finalHtmlOverride?: string | null): void {
|
|
8472
8525
|
const chat = turn.sessionChatId
|
|
8473
8526
|
const thread = turn.sessionThreadId
|
|
8474
8527
|
const inFlight = turn.activityInFlight ?? Promise.resolve()
|
|
@@ -8489,7 +8542,8 @@ function clearActivitySummary(turn: CurrentTurn): void {
|
|
|
8489
8542
|
}
|
|
8490
8543
|
// Default: leave the status message as a record, edited to a terminal
|
|
8491
8544
|
// all-done state so it doesn't freeze on a misleading "→ in-progress" line.
|
|
8492
|
-
const finalHtml =
|
|
8545
|
+
const finalHtml =
|
|
8546
|
+
finalHtmlOverride !== undefined ? finalHtmlOverride : composeTurnActivity(turn, true)
|
|
8493
8547
|
if (finalHtml == null) return
|
|
8494
8548
|
try {
|
|
8495
8549
|
await robustApiCall(
|
|
@@ -8566,6 +8620,7 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
8566
8620
|
gatewayReceiveAt: startedAt,
|
|
8567
8621
|
replyCalled: false,
|
|
8568
8622
|
finalAnswerDelivered: false,
|
|
8623
|
+
finalAnswerSubstantive: false,
|
|
8569
8624
|
firstPingAt: null,
|
|
8570
8625
|
silentAnchorMessageId: null,
|
|
8571
8626
|
silentAnchorText: '',
|
|
@@ -8783,7 +8838,53 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
8783
8838
|
// the FINAL answer would re-`sendMessage` a fresh feed below it (flicker).
|
|
8784
8839
|
// Safe ordering: `tool_label` is real-time (PreToolUse, ~250ms) while
|
|
8785
8840
|
// `finalAnswerDelivered` is set from executeReply on the final answer.
|
|
8786
|
-
|
|
8841
|
+
//
|
|
8842
|
+
// Feed-reopen-after-ack: a tool label here means the model is STILL
|
|
8843
|
+
// working. If the turn was already marked finalAnswerDelivered, the
|
|
8844
|
+
// "final" reply MIGHT have been an interim ACK ("on it, checking
|
|
8845
|
+
// Brevo…" pings, classified final by isFinalAnswerReply), so the
|
|
8846
|
+
// post-ack work had no live feed — the gate above dropped every label.
|
|
8847
|
+
//
|
|
8848
|
+
// ACK-ONLY refinement: finalAnswerDelivered latches true for BOTH a
|
|
8849
|
+
// short pinging ack AND a substantive answer. Reopening unconditionally
|
|
8850
|
+
// is harmful after a GENUINE final answer — routine post-answer
|
|
8851
|
+
// housekeeping (memory write / TodoWrite / Bash; non-surface tools that
|
|
8852
|
+
// reach here) would reset finalAnswerDelivered=false and trip the
|
|
8853
|
+
// silent-end re-prompt (NOT zero-outbound gated) → duplicate answer. So
|
|
8854
|
+
// reopen ONLY when the prior final was a short ack
|
|
8855
|
+
// (finalAnswerSubstantive=false). When it was substantive, drop the
|
|
8856
|
+
// label (legacy gate) so the genuine final stays delivered.
|
|
8857
|
+
//
|
|
8858
|
+
// On reopen: reclassify the interim ack — the turn has NOT delivered its
|
|
8859
|
+
// final answer while still doing tool work. Reset the flag and clear
|
|
8860
|
+
// activityMessageId so a FRESH feed message opens below the ack, then
|
|
8861
|
+
// proceed normally. When the model's REAL final answer lands,
|
|
8862
|
+
// executeReply / stream_reply re-set finalAnswerDelivered=true (and
|
|
8863
|
+
// finalAnswerSubstantive) and the feed gates off again. The reset keeps
|
|
8864
|
+
// the #2137 serialize gate HOLDING the next topic mid-work (next-topic
|
|
8865
|
+
// liveness is the bounded no-reply timer's job) and lets the silent-end
|
|
8866
|
+
// re-prompt fire if the turn ends on only an ack.
|
|
8867
|
+
// Kill switch SWITCHROOM_FEED_REOPEN_AFTER_ACK=0 → legacy `return`.
|
|
8868
|
+
if (turn.finalAnswerDelivered) {
|
|
8869
|
+
// decideFeedReopen returns dropLabel (legacy return) or the reset
|
|
8870
|
+
// deltas: finalAnswerDelivered→false (the turn has NOT delivered its
|
|
8871
|
+
// final answer while still doing tool work), activityMessageId→null
|
|
8872
|
+
// (a FRESH feed message opens below the ack), activityLastSentRender
|
|
8873
|
+
// →null (so the drain loop's `pending !== lastSent` guard never
|
|
8874
|
+
// mistakes the fresh render for the ack's finalized one and skips it).
|
|
8875
|
+
const reopen = decideFeedReopen({
|
|
8876
|
+
finalAnswerDelivered: turn.finalAnswerDelivered,
|
|
8877
|
+
// ACK-ONLY: reopen only when the prior final was a short ack, not a
|
|
8878
|
+
// substantive answer — otherwise post-answer housekeeping would
|
|
8879
|
+
// reset finalAnswerDelivered and trip the silent-end re-prompt.
|
|
8880
|
+
finalAnswerSubstantive: turn.finalAnswerSubstantive,
|
|
8881
|
+
enabled: FEED_REOPEN_AFTER_ACK_ENABLED,
|
|
8882
|
+
})
|
|
8883
|
+
if (reopen.dropLabel) return
|
|
8884
|
+
turn.finalAnswerDelivered = reopen.reset!.finalAnswerDelivered
|
|
8885
|
+
turn.activityMessageId = reopen.reset!.activityMessageId
|
|
8886
|
+
turn.activityLastSentRender = reopen.reset!.activityLastSentRender
|
|
8887
|
+
}
|
|
8787
8888
|
const rendered = appendActivityLabel(turn.mirrorLines, ev.label)
|
|
8788
8889
|
if (rendered != null) {
|
|
8789
8890
|
// Recompose so any active foreground sub-agent's nested block (Model A)
|
|
@@ -9137,6 +9238,11 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
9137
9238
|
turn.answerStream = null
|
|
9138
9239
|
streamFinalizedAsAnswer = true
|
|
9139
9240
|
turn.finalAnswerDelivered = true
|
|
9241
|
+
// Feed-reopen refinement: the stream is being finalized as the
|
|
9242
|
+
// turn's answer (the model's terminal text), i.e. done=true by
|
|
9243
|
+
// construction → substantive. Post-answer housekeeping must NOT
|
|
9244
|
+
// re-open the feed.
|
|
9245
|
+
turn.finalAnswerSubstantive = true
|
|
9140
9246
|
// Capture the old streamed message_id BEFORE materialize so
|
|
9141
9247
|
// we can delete it after the fresh ping send. materialize()
|
|
9142
9248
|
// overwrites `streamMsgId` internally with the new send's id;
|
|
@@ -9413,6 +9519,12 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
9413
9519
|
// it keeps the captured `turn` atom internally consistent for any
|
|
9414
9520
|
// future reader.)
|
|
9415
9521
|
turn.finalAnswerDelivered = true
|
|
9522
|
+
// Feed-reopen refinement: turn-flush delivers the model's terminal
|
|
9523
|
+
// transcript text as the genuine answer (not an ack). Default to
|
|
9524
|
+
// substantive so a late tool label does NOT re-open the feed / trip
|
|
9525
|
+
// the silent-end re-prompt. (Belt-and-braces, like the set above —
|
|
9526
|
+
// this branch returns before any further tool_label can arrive.)
|
|
9527
|
+
turn.finalAnswerSubstantive = true
|
|
9416
9528
|
|
|
9417
9529
|
// #654 deterministic double-message fix. Hand off the pinned
|
|
9418
9530
|
// progress card BEFORE state reset so the driver doesn't keep
|
|
@@ -19278,20 +19390,32 @@ void (async () => {
|
|
|
19278
19390
|
// tool result, so there's no handback to deliver. Reaction
|
|
19279
19391
|
// promotion already ran above.
|
|
19280
19392
|
const turn = currentTurn
|
|
19281
|
-
|
|
19282
|
-
|
|
19393
|
+
// has()-then-delete (not delete-up-front): the handoff-clear
|
|
19394
|
+
// branch must render the finished sub-agent's steps as done
|
|
19395
|
+
// WHILE its narrative is still in the map, then remove it.
|
|
19396
|
+
if (turn != null && turn.foregroundSubAgents.has(agentId)) {
|
|
19283
19397
|
const action = foregroundFinishAction({
|
|
19284
|
-
removed,
|
|
19398
|
+
removed: true,
|
|
19285
19399
|
replyCalled: turn.replyCalled,
|
|
19286
|
-
|
|
19400
|
+
// size AFTER this agent's impending removal
|
|
19401
|
+
remainingForeground: turn.foregroundSubAgents.size - 1,
|
|
19287
19402
|
})
|
|
19288
19403
|
if (action === 'handoff-clear') {
|
|
19289
19404
|
// Post-ack: the last foreground sub-agent finished and
|
|
19290
19405
|
// the parent will now produce its answer inline. Hand
|
|
19291
19406
|
// the re-opened feed off to the answer, mirroring the
|
|
19292
|
-
// first-reply clear (turn_end is the safety net).
|
|
19293
|
-
|
|
19407
|
+
// first-reply clear (turn_end is the safety net). Capture
|
|
19408
|
+
// the finalized render (child steps done ✓) BEFORE the
|
|
19409
|
+
// delete, then pass it so the persisted record doesn't
|
|
19410
|
+
// freeze on a stale "→ in-progress" line (the emptied-feed
|
|
19411
|
+
// skip — see clearActivitySummary's finalHtmlOverride doc).
|
|
19412
|
+
const finalHtml = composeTurnActivity(turn, true)
|
|
19413
|
+
turn.foregroundSubAgents.delete(agentId)
|
|
19414
|
+
clearActivitySummary(turn, finalHtml)
|
|
19294
19415
|
} else if (action === 'recompose') {
|
|
19416
|
+
// Collapse the finished sub-agent's block: delete first,
|
|
19417
|
+
// then render WITHOUT it (live feed keeps its → step).
|
|
19418
|
+
turn.foregroundSubAgents.delete(agentId)
|
|
19295
19419
|
const rendered = composeTurnActivity(turn)
|
|
19296
19420
|
if (rendered != null) {
|
|
19297
19421
|
turn.activityPendingRender = rendered
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest'
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
decideFeedReopen,
|
|
5
|
+
shouldReopenFeedAfterAck,
|
|
6
|
+
} from '../gateway/feed-reopen-gate.js'
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Feed-reopen-after-ack — pure decision gate.
|
|
10
|
+
*
|
|
11
|
+
* A supergroup agent that ACKS FIRST ("on it, checking Brevo…") then works
|
|
12
|
+
* had its live activity feed go dark for the real work: the ack reply is
|
|
13
|
+
* classified as the final answer by isFinalAnswerReply (it pings or is ≥200
|
|
14
|
+
* chars), setting turn.finalAnswerDelivered=true, and the tool_label handler
|
|
15
|
+
* then dropped every subsequent label. This predicate decides whether a tool
|
|
16
|
+
* label arriving after finalAnswerDelivered (the model is still working)
|
|
17
|
+
* should RE-OPEN the feed.
|
|
18
|
+
*
|
|
19
|
+
* ACK-ONLY refinement: finalAnswerDelivered latches true for BOTH a short
|
|
20
|
+
* pinging ack AND a substantive answer. Reopening after a GENUINE final
|
|
21
|
+
* answer is harmful — post-answer housekeeping (memory write / TodoWrite /
|
|
22
|
+
* Bash) would reset finalAnswerDelivered=false and trip the silent-end
|
|
23
|
+
* re-prompt → duplicate answer. So the gate reopens ONLY when the prior
|
|
24
|
+
* final was a short ack (finalAnswerSubstantive=false).
|
|
25
|
+
*/
|
|
26
|
+
describe('shouldReopenFeedAfterAck', () => {
|
|
27
|
+
it('reopens when delivered AND NOT substantive AND enabled (the ack-first fix)', () => {
|
|
28
|
+
expect(
|
|
29
|
+
shouldReopenFeedAfterAck({
|
|
30
|
+
finalAnswerDelivered: true,
|
|
31
|
+
finalAnswerSubstantive: false,
|
|
32
|
+
enabled: true,
|
|
33
|
+
}),
|
|
34
|
+
).toBe(true)
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
it('does NOT reopen when the prior final was SUBSTANTIVE (the new guard)', () => {
|
|
38
|
+
// A real final answer followed by post-answer housekeeping tool work:
|
|
39
|
+
// keep the legacy gate (no reopen) so the silent-end re-prompt and the
|
|
40
|
+
// #2137 drain see the delivered final correctly. This is the harmful
|
|
41
|
+
// case the refinement closes.
|
|
42
|
+
expect(
|
|
43
|
+
shouldReopenFeedAfterAck({
|
|
44
|
+
finalAnswerDelivered: true,
|
|
45
|
+
finalAnswerSubstantive: true,
|
|
46
|
+
enabled: true,
|
|
47
|
+
}),
|
|
48
|
+
).toBe(false)
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
it('does NOT reopen when the kill switch is off (legacy: drop the label)', () => {
|
|
52
|
+
expect(
|
|
53
|
+
shouldReopenFeedAfterAck({
|
|
54
|
+
finalAnswerDelivered: true,
|
|
55
|
+
finalAnswerSubstantive: false,
|
|
56
|
+
enabled: false,
|
|
57
|
+
}),
|
|
58
|
+
).toBe(false)
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('does NOT reopen when the final answer was never delivered (no reopen needed)', () => {
|
|
62
|
+
// The feed was never gated off — the normal append/drain path applies.
|
|
63
|
+
expect(
|
|
64
|
+
shouldReopenFeedAfterAck({
|
|
65
|
+
finalAnswerDelivered: false,
|
|
66
|
+
finalAnswerSubstantive: false,
|
|
67
|
+
enabled: true,
|
|
68
|
+
}),
|
|
69
|
+
).toBe(false)
|
|
70
|
+
expect(
|
|
71
|
+
shouldReopenFeedAfterAck({
|
|
72
|
+
finalAnswerDelivered: false,
|
|
73
|
+
finalAnswerSubstantive: false,
|
|
74
|
+
enabled: false,
|
|
75
|
+
}),
|
|
76
|
+
).toBe(false)
|
|
77
|
+
})
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
describe('decideFeedReopen — tool_label branch outcome for a delivered turn', () => {
|
|
81
|
+
it('tool_label after a SHORT ACK (not substantive, kill switch ON) → reset + render proceeds', () => {
|
|
82
|
+
// The exact contract the gateway tool_label handler applies: the interim
|
|
83
|
+
// ack is reclassified — finalAnswerDelivered back to false, a FRESH feed
|
|
84
|
+
// message (activityMessageId null), last-sent render cleared so the drain
|
|
85
|
+
// re-sends. dropLabel false → the handler proceeds to append + drain.
|
|
86
|
+
const outcome = decideFeedReopen({
|
|
87
|
+
finalAnswerDelivered: true,
|
|
88
|
+
finalAnswerSubstantive: false,
|
|
89
|
+
enabled: true,
|
|
90
|
+
})
|
|
91
|
+
expect(outcome.dropLabel).toBe(false)
|
|
92
|
+
expect(outcome.reset).toEqual({
|
|
93
|
+
finalAnswerDelivered: false,
|
|
94
|
+
activityMessageId: null,
|
|
95
|
+
activityLastSentRender: null,
|
|
96
|
+
})
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
it('tool_label after a SUBSTANTIVE final → drops the label (the new guard, feed stays gated)', () => {
|
|
100
|
+
// Genuine final answer + post-answer housekeeping: NO reopen, NO reset.
|
|
101
|
+
// finalAnswerDelivered stays true so the silent-end re-prompt does not
|
|
102
|
+
// fire and the #2137 drain proceeds correctly.
|
|
103
|
+
const outcome = decideFeedReopen({
|
|
104
|
+
finalAnswerDelivered: true,
|
|
105
|
+
finalAnswerSubstantive: true,
|
|
106
|
+
enabled: true,
|
|
107
|
+
})
|
|
108
|
+
expect(outcome.dropLabel).toBe(true)
|
|
109
|
+
expect(outcome.reset).toBeUndefined()
|
|
110
|
+
})
|
|
111
|
+
|
|
112
|
+
it('kill switch OFF → drops the label (legacy early return, feed stays dark)', () => {
|
|
113
|
+
const outcome = decideFeedReopen({
|
|
114
|
+
finalAnswerDelivered: true,
|
|
115
|
+
finalAnswerSubstantive: false,
|
|
116
|
+
enabled: false,
|
|
117
|
+
})
|
|
118
|
+
expect(outcome.dropLabel).toBe(true)
|
|
119
|
+
expect(outcome.reset).toBeUndefined()
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
it('finalAnswerDelivered false → no reopen branch (handler never reaches it)', () => {
|
|
123
|
+
// The handler only calls decideFeedReopen inside `if (finalAnswerDelivered)`,
|
|
124
|
+
// but the predicate is total: a false flag yields dropLabel (no reset).
|
|
125
|
+
const outcome = decideFeedReopen({
|
|
126
|
+
finalAnswerDelivered: false,
|
|
127
|
+
finalAnswerSubstantive: false,
|
|
128
|
+
enabled: true,
|
|
129
|
+
})
|
|
130
|
+
expect(outcome.dropLabel).toBe(true)
|
|
131
|
+
expect(outcome.reset).toBeUndefined()
|
|
132
|
+
})
|
|
133
|
+
})
|
|
@@ -16,7 +16,11 @@
|
|
|
16
16
|
*/
|
|
17
17
|
|
|
18
18
|
import { describe, it, expect } from 'vitest'
|
|
19
|
-
import {
|
|
19
|
+
import {
|
|
20
|
+
isFinalAnswerReply,
|
|
21
|
+
isSubstantiveFinalReply,
|
|
22
|
+
FINAL_ANSWER_MIN_CHARS,
|
|
23
|
+
} from '../final-answer-detect.js'
|
|
20
24
|
|
|
21
25
|
describe('isFinalAnswerReply — #1664 final-answer classification', () => {
|
|
22
26
|
it('classifies a notification-bearing reply as the final answer', () => {
|
|
@@ -87,3 +91,65 @@ describe('isFinalAnswerReply — #1664 final-answer classification', () => {
|
|
|
87
91
|
expect(FINAL_ANSWER_MIN_CHARS).toBe(200)
|
|
88
92
|
})
|
|
89
93
|
})
|
|
94
|
+
|
|
95
|
+
describe('isSubstantiveFinalReply — feed-reopen ACK-ONLY distinction', () => {
|
|
96
|
+
// isSubstantiveFinalReply is isFinalAnswerReply MINUS the ping-only path.
|
|
97
|
+
// It tells "genuine final answer" (stream-done or ≥200 chars) apart from
|
|
98
|
+
// "final only because it pinged" (a short interim ack). The feed-reopen
|
|
99
|
+
// gate reopens only when finalAnswerDelivered && !substantive, so a real
|
|
100
|
+
// answer + post-answer housekeeping does NOT spuriously reopen / trip the
|
|
101
|
+
// silent-end re-prompt.
|
|
102
|
+
|
|
103
|
+
it('stream_reply done=true → substantive (closes the stream = the answer)', () => {
|
|
104
|
+
expect(
|
|
105
|
+
isSubstantiveFinalReply({ text: 'ok', disableNotification: true, done: true }),
|
|
106
|
+
).toBe(true)
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
it('a reply at/over the length backstop → substantive', () => {
|
|
110
|
+
expect(
|
|
111
|
+
isSubstantiveFinalReply({
|
|
112
|
+
text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS),
|
|
113
|
+
disableNotification: true,
|
|
114
|
+
}),
|
|
115
|
+
).toBe(true)
|
|
116
|
+
// One under the threshold, silent → not substantive.
|
|
117
|
+
expect(
|
|
118
|
+
isSubstantiveFinalReply({
|
|
119
|
+
text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS - 1),
|
|
120
|
+
disableNotification: true,
|
|
121
|
+
}),
|
|
122
|
+
).toBe(false)
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
it('a short PINGING reply is final but NOT substantive (the ack case)', () => {
|
|
126
|
+
// The crux: isFinalAnswerReply says true (it pings), but this is the
|
|
127
|
+
// ack the feed-reopen gate must treat as reopen-eligible — NOT a real
|
|
128
|
+
// answer. So isSubstantiveFinalReply must say false.
|
|
129
|
+
expect(
|
|
130
|
+
isFinalAnswerReply({ text: 'on it, checking Brevo…', disableNotification: false }),
|
|
131
|
+
).toBe(true)
|
|
132
|
+
expect(
|
|
133
|
+
isSubstantiveFinalReply({ text: 'on it, checking Brevo…', disableNotification: false }),
|
|
134
|
+
).toBe(false)
|
|
135
|
+
})
|
|
136
|
+
|
|
137
|
+
it('a short SILENT interim reply is neither final nor substantive', () => {
|
|
138
|
+
expect(
|
|
139
|
+
isFinalAnswerReply({ text: 'thinking…', disableNotification: true }),
|
|
140
|
+
).toBe(false)
|
|
141
|
+
expect(
|
|
142
|
+
isSubstantiveFinalReply({ text: 'thinking…', disableNotification: true }),
|
|
143
|
+
).toBe(false)
|
|
144
|
+
})
|
|
145
|
+
|
|
146
|
+
it('a long reply is substantive regardless of the ping flag', () => {
|
|
147
|
+
const longText = 'x'.repeat(FINAL_ANSWER_MIN_CHARS)
|
|
148
|
+
expect(
|
|
149
|
+
isSubstantiveFinalReply({ text: longText, disableNotification: false }),
|
|
150
|
+
).toBe(true)
|
|
151
|
+
expect(
|
|
152
|
+
isSubstantiveFinalReply({ text: longText, disableNotification: true }),
|
|
153
|
+
).toBe(true)
|
|
154
|
+
})
|
|
155
|
+
})
|
|
@@ -222,4 +222,30 @@ describe("renderActivityFeedWithNested — foreground sub-agent nesting (Model A
|
|
|
222
222
|
"<i>✓ Reading a.ts</i>",
|
|
223
223
|
);
|
|
224
224
|
});
|
|
225
|
+
|
|
226
|
+
// Pins the invariant the gateway's foreground handoff-clear path relies on:
|
|
227
|
+
// on an ack-first turn the parent feed is empty (mirrorLines=[]) and the only
|
|
228
|
+
// content is the foreground sub-agent's nested narrative. The finalized
|
|
229
|
+
// render MUST be captured WHILE that narrative is present — once the gateway
|
|
230
|
+
// removes the finished sub-agent from the map, the render collapses to null
|
|
231
|
+
// and the finalize would be skipped, freezing the last live "→" line. This is
|
|
232
|
+
// exactly why clearActivitySummary takes a pre-delete finalHtmlOverride.
|
|
233
|
+
describe("foreground handoff-clear: capture-before-delete invariant", () => {
|
|
234
|
+
it("ack-first (empty parent) + child present → non-null all-done render (✓, no →)", () => {
|
|
235
|
+
const out = renderActivityFeedWithNested(
|
|
236
|
+
[],
|
|
237
|
+
["Sleep 2 for step 8", "Step 8 done; final echo", "All eight steps completed"],
|
|
238
|
+
true,
|
|
239
|
+
);
|
|
240
|
+
expect(out).not.toBeNull();
|
|
241
|
+
expect(out).not.toContain("→");
|
|
242
|
+
expect(out).toContain("All eight steps completed");
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
it("ack-first (empty parent) + child REMOVED → null (the emptied-feed skip the gateway must avoid)", () => {
|
|
246
|
+
// After foregroundSubAgents.delete(agentId), the parent has nothing left
|
|
247
|
+
// to render on an ack-first turn → null → finalize would no-op.
|
|
248
|
+
expect(renderActivityFeedWithNested([], [], true)).toBeNull();
|
|
249
|
+
});
|
|
250
|
+
});
|
|
225
251
|
});
|