switchroom 0.13.12 → 0.13.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +60 -5
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +33 -6
- package/telegram-plugin/final-answer-detect.ts +83 -0
- package/telegram-plugin/gateway/gateway.ts +107 -11
- package/telegram-plugin/hooks/silent-end-interrupt-stop.mjs +17 -5
- package/telegram-plugin/silent-end.ts +37 -11
- package/telegram-plugin/tests/final-answer-detect.test.ts +89 -0
- package/telegram-plugin/tests/silent-end.test.ts +118 -0
package/dist/cli/switchroom.js
CHANGED
|
@@ -26509,17 +26509,34 @@ var init_wait = __esm(() => {
|
|
|
26509
26509
|
// src/cli/drive.ts
|
|
26510
26510
|
var exports_drive = {};
|
|
26511
26511
|
__export(exports_drive, {
|
|
26512
|
+
workspaceScopesForTier: () => workspaceScopesForTier,
|
|
26513
|
+
selectGoogleWorkspaceScopes: () => selectGoogleWorkspaceScopes,
|
|
26512
26514
|
selectDriveAccountScopes: () => selectDriveAccountScopes,
|
|
26513
26515
|
runDriveOAuthFlow: () => runDriveOAuthFlow,
|
|
26514
26516
|
registerDriveCommand: () => registerDriveCommand,
|
|
26515
26517
|
__test: () => __test,
|
|
26518
|
+
GOOGLE_SLIDES_SCOPE: () => GOOGLE_SLIDES_SCOPE,
|
|
26519
|
+
GOOGLE_SHEETS_SCOPE: () => GOOGLE_SHEETS_SCOPE,
|
|
26520
|
+
GOOGLE_DOCS_SCOPE: () => GOOGLE_DOCS_SCOPE,
|
|
26516
26521
|
DRIVE_WRITE_SCOPES: () => DRIVE_WRITE_SCOPES,
|
|
26517
26522
|
DRIVE_READONLY_SCOPES: () => DRIVE_READONLY_SCOPES
|
|
26518
26523
|
});
|
|
26519
26524
|
import { createInterface as createInterface2 } from "node:readline";
|
|
26525
|
+
function workspaceScopesForTier(tier) {
|
|
26526
|
+
const docScopes = [GOOGLE_DOCS_SCOPE, GOOGLE_SHEETS_SCOPE];
|
|
26527
|
+
if (tier === "extended" || tier === "complete") {
|
|
26528
|
+
return [...docScopes, GOOGLE_SLIDES_SCOPE];
|
|
26529
|
+
}
|
|
26530
|
+
return docScopes;
|
|
26531
|
+
}
|
|
26520
26532
|
function selectDriveAccountScopes(write) {
|
|
26521
26533
|
return write ? DRIVE_WRITE_SCOPES : DRIVE_READONLY_SCOPES;
|
|
26522
26534
|
}
|
|
26535
|
+
function selectGoogleWorkspaceScopes(opts) {
|
|
26536
|
+
const base = selectDriveAccountScopes(opts.write);
|
|
26537
|
+
const workspace = workspaceScopesForTier(opts.tier ?? "core");
|
|
26538
|
+
return [...new Set([...base, ...workspace])];
|
|
26539
|
+
}
|
|
26523
26540
|
function getVaultPath(configPath) {
|
|
26524
26541
|
try {
|
|
26525
26542
|
const config = loadConfig(configPath);
|
|
@@ -26932,7 +26949,7 @@ function registerDriveCommand(program3, deps = {}) {
|
|
|
26932
26949
|
await runDisconnect({ agentName: agent }, deps);
|
|
26933
26950
|
});
|
|
26934
26951
|
}
|
|
26935
|
-
var EXIT_OK = 0, EXIT_DENIED = 1, EXIT_TIMEOUT = 2, EXIT_RATE_LIMITED = 3, EXIT_ERROR = 4, EXIT_ABORTED = 130, DRIVE_READONLY_SCOPES, DRIVE_WRITE_SCOPES, DEFAULT_SCOPES, __test;
|
|
26952
|
+
var EXIT_OK = 0, EXIT_DENIED = 1, EXIT_TIMEOUT = 2, EXIT_RATE_LIMITED = 3, EXIT_ERROR = 4, EXIT_ABORTED = 130, DRIVE_READONLY_SCOPES, DRIVE_WRITE_SCOPES, DEFAULT_SCOPES, GOOGLE_DOCS_SCOPE = "https://www.googleapis.com/auth/documents", GOOGLE_SHEETS_SCOPE = "https://www.googleapis.com/auth/spreadsheets", GOOGLE_SLIDES_SCOPE = "https://www.googleapis.com/auth/presentations", __test;
|
|
26936
26953
|
var init_drive = __esm(() => {
|
|
26937
26954
|
init_source();
|
|
26938
26955
|
init_loader();
|
|
@@ -47314,8 +47331,8 @@ var {
|
|
|
47314
47331
|
} = import__.default;
|
|
47315
47332
|
|
|
47316
47333
|
// src/build-info.ts
|
|
47317
|
-
var VERSION = "0.13.
|
|
47318
|
-
var COMMIT_SHA = "
|
|
47334
|
+
var VERSION = "0.13.13";
|
|
47335
|
+
var COMMIT_SHA = "dc583d57";
|
|
47319
47336
|
|
|
47320
47337
|
// src/cli/agent.ts
|
|
47321
47338
|
init_source();
|
|
@@ -54578,7 +54595,7 @@ function registerAccountAdd(accountParent) {
|
|
|
54578
54595
|
accountParent.command("add <account>").description("Mint a Google OAuth refresh token for <account> and register it with the auth-broker. For Drive scopes the effective flow is desktop-loopback (device-code returns invalid_scope for Drive; OOB is retired) \u2014 use a Desktop OAuth client; on a headless host complete the browser step over an SSH port-forward. Add --write for create/edit (drive.file); default is read-only.").option("--replace", "Overwrite existing credentials for <account> (default refuses if account already registered)", false).option("--write", "Request Drive WRITE scope (drive.file: create + edit app-created files) in addition to read. Default is read-only \u2014 a read grant never silently becomes a write grant. Re-consent an existing account with `--replace --write`.", false).action(withConfigError(async (account, opts) => {
|
|
54579
54596
|
const normalizedAccount = validateAndNormalizeAccountEmail(account);
|
|
54580
54597
|
const [
|
|
54581
|
-
{ runDriveOAuthFlow: runDriveOAuthFlow2,
|
|
54598
|
+
{ runDriveOAuthFlow: runDriveOAuthFlow2, selectGoogleWorkspaceScopes: selectGoogleWorkspaceScopes2 },
|
|
54582
54599
|
{ selectInitialTier: selectInitialTier2 },
|
|
54583
54600
|
{ brokerCall: brokerCall2 },
|
|
54584
54601
|
{ loadConfig: loadConfig2, resolvePath: resolvePath2 },
|
|
@@ -54647,7 +54664,11 @@ function registerAccountAdd(accountParent) {
|
|
|
54647
54664
|
clientIdRaw = await resolveRef(clientIdRaw, "google_client_id");
|
|
54648
54665
|
clientSecretRaw = await resolveRef(clientSecretRaw, "google_client_secret");
|
|
54649
54666
|
}
|
|
54650
|
-
const
|
|
54667
|
+
const tier = gw.tier ?? "core";
|
|
54668
|
+
const accountScopes = selectGoogleWorkspaceScopes2({
|
|
54669
|
+
write: opts.write ?? false,
|
|
54670
|
+
tier
|
|
54671
|
+
});
|
|
54651
54672
|
const oauthCfg = {
|
|
54652
54673
|
client_id: clientIdRaw,
|
|
54653
54674
|
client_secret: clientSecretRaw,
|
|
@@ -54656,6 +54677,9 @@ function registerAccountAdd(accountParent) {
|
|
|
54656
54677
|
if (opts.write) {
|
|
54657
54678
|
console.log(source_default.yellow(" Requesting Drive WRITE scope (drive.file \u2014 create/edit app-created files)."));
|
|
54658
54679
|
}
|
|
54680
|
+
console.log(source_default.gray(` Workspace tier: ${tier} \u2014 requesting Docs + Sheets` + (tier === "extended" || tier === "complete" ? " + Slides" : "") + " API scopes so the tier's tools can authenticate."));
|
|
54681
|
+
console.log(source_default.gray(` Changing the tier later requires re-running this command
|
|
54682
|
+
` + " (`--replace`) \u2014 OAuth scopes are fixed at consent time."));
|
|
54659
54683
|
const oauthEnv = {
|
|
54660
54684
|
DISPLAY: process.env.DISPLAY,
|
|
54661
54685
|
WAYLAND_DISPLAY: process.env.WAYLAND_DISPLAY,
|
|
@@ -73048,6 +73072,29 @@ function buildSeedCredentials(input) {
|
|
|
73048
73072
|
}
|
|
73049
73073
|
var AIOFILE_PIN = "aiofile==3.10.2";
|
|
73050
73074
|
var AIOFILE_PKG = AIOFILE_PIN.split("==")[0];
|
|
73075
|
+
function requiredWorkspaceScopesForTier(tier) {
|
|
73076
|
+
const docs = [
|
|
73077
|
+
"https://www.googleapis.com/auth/documents",
|
|
73078
|
+
"https://www.googleapis.com/auth/spreadsheets"
|
|
73079
|
+
];
|
|
73080
|
+
if (tier === "extended" || tier === "complete") {
|
|
73081
|
+
return [...docs, "https://www.googleapis.com/auth/presentations"];
|
|
73082
|
+
}
|
|
73083
|
+
return docs;
|
|
73084
|
+
}
|
|
73085
|
+
function findMissingWorkspaceScopes(seedScope, tier) {
|
|
73086
|
+
const have = new Set(seedScope.split(/\s+/).map((s) => s.trim()).filter((s) => s.length > 0));
|
|
73087
|
+
return requiredWorkspaceScopesForTier(tier).filter((s) => !have.has(s));
|
|
73088
|
+
}
|
|
73089
|
+
var DRIVE_FILE_SCOPE = "https://www.googleapis.com/auth/drive.file";
|
|
73090
|
+
function buildMissingScopeWarning(missing, tier, accountEmail, hasWriteScope) {
|
|
73091
|
+
const short = missing.map((s) => s.replace(/^https:\/\/www\.googleapis\.com\/auth\//, "")).join(", ");
|
|
73092
|
+
return `drive-mcp-launcher: WARNING \u2014 the Google account '${accountEmail}' was ` + `consented WITHOUT the scope(s) needed for tier '${tier ?? "core"}': ` + `${short}.
|
|
73093
|
+
` + ` The matching MCP tools (Docs / Sheets / Slides create+edit) will FAIL ` + `to authenticate. OAuth scopes are fixed at consent time \u2014 re-run on the ` + `host to re-mint the token with the correct scopes:
|
|
73094
|
+
` + ` switchroom auth google account add ${accountEmail} --replace` + `${hasWriteScope ? " --write" : ""}
|
|
73095
|
+
` + ` (scopes are derived from \`google_workspace.tier\` \u2014 set the tier ` + `before re-running${hasWriteScope ? "; --write preserves the existing " + "Drive write capability" : ""}). Drive read/file tools are unaffected.
|
|
73096
|
+
`;
|
|
73097
|
+
}
|
|
73051
73098
|
function buildUvxArgs(tier) {
|
|
73052
73099
|
const args = [
|
|
73053
73100
|
"--from",
|
|
@@ -73072,6 +73119,9 @@ function buildChildEnv(baseEnv, credentialsDir, accountEmail) {
|
|
|
73072
73119
|
delete env2.WORKSPACE_MCP_STATELESS_MODE;
|
|
73073
73120
|
delete env2.GOOGLE_APPLICATION_CREDENTIALS;
|
|
73074
73121
|
delete env2.WORKSPACE_MCP_SERVICE_ACCOUNT_FILE;
|
|
73122
|
+
if (!env2.WORKSPACE_MCP_PORT) {
|
|
73123
|
+
env2.WORKSPACE_MCP_PORT = env2.SWITCHROOM_GDRIVE_MCP_PORT ?? "8631";
|
|
73124
|
+
}
|
|
73075
73125
|
return env2;
|
|
73076
73126
|
}
|
|
73077
73127
|
function classifyRootSchema(schema) {
|
|
@@ -73288,6 +73338,11 @@ async function runDriveMcpLauncher(opts) {
|
|
|
73288
73338
|
process.exit(1);
|
|
73289
73339
|
}
|
|
73290
73340
|
const tier = opts.tier ?? configSecrets.tier;
|
|
73341
|
+
const missingScopes = findMissingWorkspaceScopes(brokerCreds.scope, tier);
|
|
73342
|
+
if (missingScopes.length > 0) {
|
|
73343
|
+
const hasWriteScope = brokerCreds.scope.split(/\s+/).map((s) => s.trim()).includes(DRIVE_FILE_SCOPE);
|
|
73344
|
+
process.stderr.write(buildMissingScopeWarning(missingScopes, tier, brokerCreds.accountEmail, hasWriteScope));
|
|
73345
|
+
}
|
|
73291
73346
|
const args = buildUvxArgs(tier);
|
|
73292
73347
|
const env2 = buildChildEnv(process.env, credentialsDir, brokerCreds.accountEmail);
|
|
73293
73348
|
const { spawn: spawn5 } = await import("node:child_process");
|
package/package.json
CHANGED
|
@@ -37383,6 +37383,19 @@ function recordSilentTurnEnd(args, deps) {
|
|
|
37383
37383
|
writeSilentEndState(args, deps);
|
|
37384
37384
|
return { exhausted: false };
|
|
37385
37385
|
}
|
|
37386
|
+
var recordUndeliveredTurnEnd = recordSilentTurnEnd;
|
|
37387
|
+
|
|
37388
|
+
// final-answer-detect.ts
|
|
37389
|
+
var FINAL_ANSWER_MIN_CHARS = 200;
|
|
37390
|
+
function isFinalAnswerReply(input) {
|
|
37391
|
+
if (input.done === true)
|
|
37392
|
+
return true;
|
|
37393
|
+
if (!input.disableNotification)
|
|
37394
|
+
return true;
|
|
37395
|
+
if (input.text.length >= FINAL_ANSWER_MIN_CHARS)
|
|
37396
|
+
return true;
|
|
37397
|
+
return false;
|
|
37398
|
+
}
|
|
37386
37399
|
|
|
37387
37400
|
// turn-flush-safety.ts
|
|
37388
37401
|
var SILENT_MARKERS = new Set(["NO_REPLY", "HEARTBEAT_OK"]);
|
|
@@ -48003,11 +48016,11 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
48003
48016
|
}
|
|
48004
48017
|
|
|
48005
48018
|
// ../src/build-info.ts
|
|
48006
|
-
var VERSION = "0.13.
|
|
48007
|
-
var COMMIT_SHA = "
|
|
48008
|
-
var COMMIT_DATE = "2026-05-
|
|
48019
|
+
var VERSION = "0.13.13";
|
|
48020
|
+
var COMMIT_SHA = "dc583d57";
|
|
48021
|
+
var COMMIT_DATE = "2026-05-22T22:02:14+10:00";
|
|
48009
48022
|
var LATEST_PR = null;
|
|
48010
|
-
var COMMITS_AHEAD_OF_TAG =
|
|
48023
|
+
var COMMITS_AHEAD_OF_TAG = 3;
|
|
48011
48024
|
|
|
48012
48025
|
// gateway/boot-version.ts
|
|
48013
48026
|
function formatRelativeAgo(iso) {
|
|
@@ -50422,6 +50435,7 @@ async function executeUpdateChecklist(args) {
|
|
|
50422
50435
|
return { content: [{ type: "text", text: `checklist updated (id: ${message_id})` }] };
|
|
50423
50436
|
}
|
|
50424
50437
|
async function executeReply(args) {
|
|
50438
|
+
const turn = currentTurn;
|
|
50425
50439
|
const chat_id = args.chat_id;
|
|
50426
50440
|
if (!chat_id)
|
|
50427
50441
|
throw new Error("reply: chat_id is required");
|
|
@@ -50707,6 +50721,9 @@ ${url}`;
|
|
|
50707
50721
|
process.stderr.write(`telegram gateway: reply: endStatusReaction hook threw: ${err}
|
|
50708
50722
|
`);
|
|
50709
50723
|
}
|
|
50724
|
+
if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
|
|
50725
|
+
turn.finalAnswerDelivered = true;
|
|
50726
|
+
}
|
|
50710
50727
|
}
|
|
50711
50728
|
process.stderr.write(`telegram channel: reply: finalized chatId=${chat_id} messageIds=[${sentIds.join(",")}] chunks=${chunks.length}
|
|
50712
50729
|
`);
|
|
@@ -50716,6 +50733,7 @@ ${url}`;
|
|
|
50716
50733
|
return { content: [{ type: "text", text: result }] };
|
|
50717
50734
|
}
|
|
50718
50735
|
async function executeStreamReply(args) {
|
|
50736
|
+
const turn = currentTurn;
|
|
50719
50737
|
if (!args.chat_id)
|
|
50720
50738
|
throw new Error("stream_reply: chat_id is required");
|
|
50721
50739
|
if (args.text == null || args.text === "")
|
|
@@ -50818,6 +50836,13 @@ async function executeStreamReply(args) {
|
|
|
50818
50836
|
const sThreadId = args.message_thread_id != null ? Number(args.message_thread_id) : undefined;
|
|
50819
50837
|
outboundDedup.record(sChatId, sThreadId, args.text, Date.now());
|
|
50820
50838
|
}
|
|
50839
|
+
if (turn != null && isFinalAnswerReply({
|
|
50840
|
+
text: args.text ?? "",
|
|
50841
|
+
disableNotification: args.disable_notification === true,
|
|
50842
|
+
done: args.done === true
|
|
50843
|
+
})) {
|
|
50844
|
+
turn.finalAnswerDelivered = true;
|
|
50845
|
+
}
|
|
50821
50846
|
return { content: [{ type: "text", text: `${result.status} (id: ${result.messageId ?? "pending"})` }] };
|
|
50822
50847
|
}
|
|
50823
50848
|
async function executeProgressUpdate(args) {
|
|
@@ -51569,6 +51594,7 @@ function handleSessionEvent(ev) {
|
|
|
51569
51594
|
startedAt,
|
|
51570
51595
|
gatewayReceiveAt: startedAt,
|
|
51571
51596
|
replyCalled: false,
|
|
51597
|
+
finalAnswerDelivered: false,
|
|
51572
51598
|
capturedText: [],
|
|
51573
51599
|
orphanedReplyTimeoutId: null,
|
|
51574
51600
|
registryKey: null,
|
|
@@ -51868,6 +51894,7 @@ function handleSessionEvent(ev) {
|
|
|
51868
51894
|
const backstopChatId = chatId;
|
|
51869
51895
|
const backstopThreadId = threadId;
|
|
51870
51896
|
const backstopCtrl = ctrl;
|
|
51897
|
+
turn.finalAnswerDelivered = true;
|
|
51871
51898
|
const cardTakeover = progressDriver?.takeOverCard({
|
|
51872
51899
|
chatId: backstopChatId,
|
|
51873
51900
|
threadId: backstopThreadId != null ? String(backstopThreadId) : undefined
|
|
@@ -52006,8 +52033,8 @@ function handleSessionEvent(ev) {
|
|
|
52006
52033
|
longest_silent_gap_ms: outboundMetrics.longestOutboundGapMs,
|
|
52007
52034
|
ended_via: outboundMetrics.outboundCount > 0 ? "reply" : "silent"
|
|
52008
52035
|
});
|
|
52009
|
-
if (
|
|
52010
|
-
const silentEnd =
|
|
52036
|
+
if (turn.finalAnswerDelivered === false) {
|
|
52037
|
+
const silentEnd = recordUndeliveredTurnEnd({
|
|
52011
52038
|
chatId,
|
|
52012
52039
|
threadId: threadId ?? null,
|
|
52013
52040
|
turnKey: tKey
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* final-answer-detect.ts — #1664 "did this reply deliver the final answer?"
|
|
3
|
+
*
|
|
4
|
+
* Background. An agent often ends a turn with its real answer as plain
|
|
5
|
+
* assistant transcript text instead of a `reply` / `stream_reply` tool
|
|
6
|
+
* call. The gateway renders that transcript as a live Telegram draft
|
|
7
|
+
* (`sendMessageDraft`) and, at turn_end, retracts the draft — so the
|
|
8
|
+
* answer is never finalized and the user watches it vanish (#1664).
|
|
9
|
+
*
|
|
10
|
+
* The gateway's `replyCalled` flag flips on the FIRST reply / stream_reply
|
|
11
|
+
* tool use and stays true for the rest of the turn. It cannot distinguish
|
|
12
|
+
* "the model sent an interim ack" from "the model sent its real answer" —
|
|
13
|
+
* both set `replyCalled`. The silent-end re-prompt safety net needs a
|
|
14
|
+
* finer signal: it must engage when a turn ended with only an interim
|
|
15
|
+
* ack and the real answer left as transcript text.
|
|
16
|
+
*
|
|
17
|
+
* This module is that finer signal — a pure predicate the gateway calls
|
|
18
|
+
* for each reply that lands. A turn whose every reply was classified
|
|
19
|
+
* "interim" ends with `CurrentTurn.finalAnswerDelivered === false`, which
|
|
20
|
+
* triggers the re-prompt; a turn with at least one "final" reply does not.
|
|
21
|
+
*
|
|
22
|
+
* Keeping the policy in one unit-testable function is the point — the
|
|
23
|
+
* gateway is a multi-thousand-line module that's expensive to import in a
|
|
24
|
+
* test. See `telegram-plugin/tests/final-answer-detect.test.ts`.
|
|
25
|
+
*
|
|
26
|
+
* The fix re-prompts the model; it never materializes the draft into a
|
|
27
|
+
* message (`reference/principles.md`: the model communicates, the
|
|
28
|
+
* framework is the safety net). So a false "interim" classification is
|
|
29
|
+
* cheap (one extra re-prompt) and a false "final" classification is the
|
|
30
|
+
* dangerous one (a real answer left undelivered) — the length backstop
|
|
31
|
+
* exists to make the dangerous miss rare.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Length backstop for the final-answer classification. The pacing
|
|
36
|
+
* contract (`docs/telegram-style.md`) says interim updates pass
|
|
37
|
+
* `disable_notification: true` and the final answer omits it — so a
|
|
38
|
+
* notification-bearing reply is the primary "final answer" signal. But a
|
|
39
|
+
* model that mis-marks a genuinely substantive reply as interim
|
|
40
|
+
* (`disable_notification: true` on what is really the answer) would
|
|
41
|
+
* otherwise leave the turn looking undelivered. Any reply at or above
|
|
42
|
+
* this many characters therefore ALSO counts as the final answer,
|
|
43
|
+
* regardless of the notification flag. 200 chars is comfortably longer
|
|
44
|
+
* than a typical interim ack ("on it", "looking into that…", "give me a
|
|
45
|
+
* sec") and short enough that a real answer almost always clears it.
|
|
46
|
+
*/
|
|
47
|
+
export const FINAL_ANSWER_MIN_CHARS = 200
|
|
48
|
+
|
|
49
|
+
export interface FinalAnswerReplyInput {
|
|
50
|
+
/** The reply text the model sent (the model's own answer text, before
|
|
51
|
+
* any HTML conversion or Telegraph-link substitution). */
|
|
52
|
+
text: string
|
|
53
|
+
/** The `disable_notification` argument the reply tool was called with.
|
|
54
|
+
* `true` is the pacing contract's "interim update" marker; the final
|
|
55
|
+
* answer omits it (effectively `false`). */
|
|
56
|
+
disableNotification: boolean
|
|
57
|
+
/** For `stream_reply` only: whether this call carried `done: true`. A
|
|
58
|
+
* `done: true` call explicitly closes the stream and IS the final
|
|
59
|
+
* answer by definition. Pass `false` for the plain `reply` tool. */
|
|
60
|
+
done?: boolean
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Pure predicate: did this reply deliver the turn's final answer (as
|
|
65
|
+
* opposed to an interim ack)? `true` if ANY of:
|
|
66
|
+
*
|
|
67
|
+
* - `done === true` — a `stream_reply` terminal call; the model
|
|
68
|
+
* explicitly closed the stream, so this is the final answer.
|
|
69
|
+
* - `disableNotification === false` — the pacing contract's explicit
|
|
70
|
+
* "final answer" signal (interim updates set it `true`).
|
|
71
|
+
* - `text.length >= FINAL_ANSWER_MIN_CHARS` — the length backstop for
|
|
72
|
+
* a substantive answer mis-marked as interim.
|
|
73
|
+
*
|
|
74
|
+
* The gateway ORs this across every reply in a turn; once one reply
|
|
75
|
+
* qualifies, `CurrentTurn.finalAnswerDelivered` latches true and the
|
|
76
|
+
* silent-end re-prompt will not engage for that turn.
|
|
77
|
+
*/
|
|
78
|
+
export function isFinalAnswerReply(input: FinalAnswerReplyInput): boolean {
|
|
79
|
+
if (input.done === true) return true
|
|
80
|
+
if (!input.disableNotification) return true
|
|
81
|
+
if (input.text.length >= FINAL_ANSWER_MIN_CHARS) return true
|
|
82
|
+
return false
|
|
83
|
+
}
|
|
@@ -76,7 +76,8 @@ import {
|
|
|
76
76
|
import { emitRuntimeMetric } from '../runtime-metrics.js'
|
|
77
77
|
import { classifyInbound } from '../inbound-classifier.js'
|
|
78
78
|
import * as silencePoke from '../silence-poke.js'
|
|
79
|
-
import { writeSilentEndState, clearSilentEndState,
|
|
79
|
+
import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
|
|
80
|
+
import { isFinalAnswerReply } from '../final-answer-detect.js'
|
|
80
81
|
import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
|
|
81
82
|
import { type SessionEvent } from '../session-tail.js'
|
|
82
83
|
import {
|
|
@@ -1191,6 +1192,19 @@ type CurrentTurn = {
|
|
|
1191
1192
|
startedAt: number
|
|
1192
1193
|
gatewayReceiveAt: number
|
|
1193
1194
|
replyCalled: boolean
|
|
1195
|
+
// #1664 — whether the model has delivered its *final answer* this turn
|
|
1196
|
+
// (as opposed to only an interim ack). `replyCalled` flips on the first
|
|
1197
|
+
// reply / stream_reply tool_use and stays true for the rest of the turn,
|
|
1198
|
+
// so it cannot tell "ack only" from "ack + real answer". This flag is the
|
|
1199
|
+
// finer signal the silent-end re-prompt needs: it is set only when a reply
|
|
1200
|
+
// actually lands AND `isFinalAnswerReply` (final-answer-detect.ts)
|
|
1201
|
+
// classifies it as the final answer — notification-bearing, or long
|
|
1202
|
+
// enough to be substantive, or a stream_reply done=true — OR when the
|
|
1203
|
+
// turn-flush safety net legitimately emits the model's terminal text. A
|
|
1204
|
+
// turn that ends with this still `false` triggers the silent-end re-prompt
|
|
1205
|
+
// even though `replyCalled` is true — the #1664 case where the real answer
|
|
1206
|
+
// ended up as plain transcript text rendered into an ephemeral draft.
|
|
1207
|
+
finalAnswerDelivered: boolean
|
|
1194
1208
|
capturedText: string[]
|
|
1195
1209
|
orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
|
|
1196
1210
|
registryKey: string | null
|
|
@@ -4066,6 +4080,13 @@ async function executeUpdateChecklist(args: Record<string, unknown>): Promise<{
|
|
|
4066
4080
|
}
|
|
4067
4081
|
|
|
4068
4082
|
async function executeReply(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
|
|
4083
|
+
// #1664 — pin the turn this reply belongs to at entry. The
|
|
4084
|
+
// finalAnswerDelivered write near the end of this function runs after
|
|
4085
|
+
// several awaits; turn-pinning (the #1067 pattern used across the
|
|
4086
|
+
// gateway) keeps the write attributed to THIS turn rather than reading
|
|
4087
|
+
// module-scope currentTurn, which a future refactor could let roll over
|
|
4088
|
+
// mid-call.
|
|
4089
|
+
const turn = currentTurn
|
|
4069
4090
|
const chat_id = args.chat_id as string
|
|
4070
4091
|
if (!chat_id) throw new Error('reply: chat_id is required')
|
|
4071
4092
|
const rawText = args.text as string | undefined
|
|
@@ -4488,6 +4509,19 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
4488
4509
|
} catch (err) {
|
|
4489
4510
|
process.stderr.write(`telegram gateway: reply: endStatusReaction hook threw: ${err}\n`)
|
|
4490
4511
|
}
|
|
4512
|
+
// #1664 — mark the turn's final answer as delivered when this reply
|
|
4513
|
+
// looks like the real answer rather than an interim ack. The
|
|
4514
|
+
// classification (notification-bearing OR substantive length) lives
|
|
4515
|
+
// in `isFinalAnswerReply`. Without this, a turn that ack'd then ended
|
|
4516
|
+
// with the real answer as plain transcript text (#1664) would look
|
|
4517
|
+
// "delivered" because replyCalled is true — and the silent-end
|
|
4518
|
+
// re-prompt would never engage. `rawText` is the model's own answer
|
|
4519
|
+
// text, measured before HTML conversion / Telegraph-link
|
|
4520
|
+
// substitution. Writes `turn` (pinned at executeReply entry) so the
|
|
4521
|
+
// flag always lands on the turn this reply belongs to.
|
|
4522
|
+
if (turn != null && isFinalAnswerReply({ text: rawText, disableNotification })) {
|
|
4523
|
+
turn.finalAnswerDelivered = true
|
|
4524
|
+
}
|
|
4491
4525
|
}
|
|
4492
4526
|
|
|
4493
4527
|
process.stderr.write(`telegram channel: reply: finalized chatId=${chat_id} messageIds=[${sentIds.join(',')}] chunks=${chunks.length}\n`)
|
|
@@ -4501,6 +4535,8 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
4501
4535
|
}
|
|
4502
4536
|
|
|
4503
4537
|
async function executeStreamReply(args: Record<string, unknown>): Promise<unknown> {
|
|
4538
|
+
// #1664 — pin the turn at entry; see executeReply for the rationale.
|
|
4539
|
+
const turn = currentTurn
|
|
4504
4540
|
if (!args.chat_id) throw new Error('stream_reply: chat_id is required')
|
|
4505
4541
|
if (args.text == null || args.text === '') throw new Error('stream_reply: text is required and cannot be empty')
|
|
4506
4542
|
|
|
@@ -4680,6 +4716,23 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
|
|
|
4680
4716
|
const sThreadId = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
|
|
4681
4717
|
outboundDedup.record(sChatId, sThreadId, args.text as string, Date.now())
|
|
4682
4718
|
}
|
|
4719
|
+
// #1664 — mark the turn's final answer as delivered. For stream_reply a
|
|
4720
|
+
// call with done=true IS the final answer by definition (the model
|
|
4721
|
+
// explicitly closed the stream). A non-terminal stream_reply chunk also
|
|
4722
|
+
// counts when it carries the final-answer signals — notification-bearing
|
|
4723
|
+
// OR substantive length — via the same `isFinalAnswerReply` predicate
|
|
4724
|
+
// executeReply uses. See the CurrentTurn.finalAnswerDelivered doc-comment
|
|
4725
|
+
// for why replyCalled is not a sufficient signal here.
|
|
4726
|
+
if (
|
|
4727
|
+
turn != null &&
|
|
4728
|
+
isFinalAnswerReply({
|
|
4729
|
+
text: (args.text as string | undefined) ?? '',
|
|
4730
|
+
disableNotification: args.disable_notification === true,
|
|
4731
|
+
done: args.done === true,
|
|
4732
|
+
})
|
|
4733
|
+
) {
|
|
4734
|
+
turn.finalAnswerDelivered = true
|
|
4735
|
+
}
|
|
4683
4736
|
return { content: [{ type: 'text', text: `${result.status} (id: ${result.messageId ?? 'pending'})` }] }
|
|
4684
4737
|
}
|
|
4685
4738
|
|
|
@@ -5697,6 +5750,7 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
5697
5750
|
startedAt,
|
|
5698
5751
|
gatewayReceiveAt: startedAt,
|
|
5699
5752
|
replyCalled: false,
|
|
5753
|
+
finalAnswerDelivered: false,
|
|
5700
5754
|
capturedText: [],
|
|
5701
5755
|
orphanedReplyTimeoutId: null,
|
|
5702
5756
|
registryKey: null,
|
|
@@ -5815,6 +5869,22 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
5815
5869
|
// #1067: snapshot at entry. The answer-stream creation closures
|
|
5816
5870
|
// below also read `turn` instead of currentTurn so they pin to
|
|
5817
5871
|
// this turn's chat for the stream's lifetime.
|
|
5872
|
+
//
|
|
5873
|
+
// #1664 ordering note: a `text` event can arrive AFTER turn_end has
|
|
5874
|
+
// nulled currentTurn (the issue observed `answer_lane_update
|
|
5875
|
+
// transport:"draft"` firing post-turn_end). Such a late event is
|
|
5876
|
+
// dropped here by the `turn != null` guard — it is NOT folded back
|
|
5877
|
+
// into the just-ended turn. That is deliberate and safe: by the
|
|
5878
|
+
// time this fires, the turn atom has been handed to
|
|
5879
|
+
// endCurrentTurnAtomic and turn_end has already run its flush /
|
|
5880
|
+
// silent-end decision; re-opening a closed turn (re-creating an
|
|
5881
|
+
// answer stream, re-evaluating decideTurnFlush) would be a large,
|
|
5882
|
+
// race-prone change. The #1664 safety net does not depend on
|
|
5883
|
+
// catching the late text: a turn whose real answer lost the race
|
|
5884
|
+
// ends with finalAnswerDelivered=false, so recordUndeliveredTurnEnd
|
|
5885
|
+
// engages the Stop-hook re-prompt and the model re-delivers the
|
|
5886
|
+
// answer through the reply tool. The dropped draft text is
|
|
5887
|
+
// recovered by re-prompt, not by post-hoc materialization.
|
|
5818
5888
|
const turn = currentTurn
|
|
5819
5889
|
if (turn != null) {
|
|
5820
5890
|
turn.capturedText.push(ev.text)
|
|
@@ -6181,6 +6251,18 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
6181
6251
|
const backstopThreadId = threadId
|
|
6182
6252
|
const backstopCtrl = ctrl
|
|
6183
6253
|
|
|
6254
|
+
// #1664 — turn-flush only fires when !replyCalled (decideTurnFlush
|
|
6255
|
+
// returns 'reply-called' otherwise). It legitimately delivers the
|
|
6256
|
+
// model's terminal text as the answer, so the turn IS answered.
|
|
6257
|
+
// Mark it now so the early-return below skips the silent-end
|
|
6258
|
+
// re-prompt for a turn whose answer is genuinely on its way out.
|
|
6259
|
+
// (The IIFE that actually sends runs after this branch's `return`;
|
|
6260
|
+
// since the silent-end block is on the sibling reply-called path
|
|
6261
|
+
// that this branch never reaches, this set is belt-and-braces —
|
|
6262
|
+
// it keeps the captured `turn` atom internally consistent for any
|
|
6263
|
+
// future reader.)
|
|
6264
|
+
turn.finalAnswerDelivered = true
|
|
6265
|
+
|
|
6184
6266
|
// #654 deterministic double-message fix. Hand off the pinned
|
|
6185
6267
|
// progress card BEFORE state reset so the driver doesn't keep
|
|
6186
6268
|
// editing it while turn-flush is rewriting it with the answer.
|
|
@@ -6413,17 +6495,31 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
6413
6495
|
longest_silent_gap_ms: outboundMetrics.longestOutboundGapMs,
|
|
6414
6496
|
ended_via: outboundMetrics.outboundCount > 0 ? 'reply' : 'silent',
|
|
6415
6497
|
})
|
|
6416
|
-
// #1122 PR4 / #1161: deterministic
|
|
6417
|
-
// silent-marker path above for the rationale).
|
|
6418
|
-
// - first
|
|
6419
|
-
// file so the Stop hook (silent-end-interrupt-stop.mjs)
|
|
6420
|
-
// blocks the session-end and re-prompts the agent to
|
|
6498
|
+
// #1122 PR4 / #1161 / #1664: deterministic undelivered-turn
|
|
6499
|
+
// handling (see the silent-marker path above for the rationale).
|
|
6500
|
+
// - first undelivered turn-end → recordSilentTurnEnd writes the
|
|
6501
|
+
// state file so the Stop hook (silent-end-interrupt-stop.mjs)
|
|
6502
|
+
// blocks the session-end and re-prompts the agent to deliver.
|
|
6421
6503
|
// - the Stop-hook re-prompt is already spent and the agent is
|
|
6422
|
-
// STILL
|
|
6423
|
-
// deliver a user-facing fallback so the turn
|
|
6424
|
-
// vanishes (the user otherwise only sees the card
|
|
6425
|
-
|
|
6426
|
-
|
|
6504
|
+
// STILL undelivered → recordSilentTurnEnd returns
|
|
6505
|
+
// exhausted:true; deliver a user-facing fallback so the turn
|
|
6506
|
+
// never just vanishes (the user otherwise only sees the card
|
|
6507
|
+
// disappear).
|
|
6508
|
+
//
|
|
6509
|
+
// #1664 — the trigger is "no final answer delivered", not "zero
|
|
6510
|
+
// outbound". `outboundCount === 0` is now just the special case
|
|
6511
|
+
// where nothing landed at all. The added case: the model sent an
|
|
6512
|
+
// interim ack via reply/stream_reply (outboundCount > 0,
|
|
6513
|
+
// replyCalled = true) but ended the turn with its real answer as
|
|
6514
|
+
// plain transcript text — rendered into an ephemeral answer-lane
|
|
6515
|
+
// draft and retracted at turn_end, never finalized. finalAnswer-
|
|
6516
|
+
// Delivered stays false there, so the re-prompt engages and the
|
|
6517
|
+
// model re-delivers the answer through the reply tool. NO_REPLY /
|
|
6518
|
+
// HEARTBEAT_OK silent-marker turns return earlier and never reach
|
|
6519
|
+
// this path. The turn-flush 'flush' branch also returns earlier
|
|
6520
|
+
// (and sets finalAnswerDelivered=true defensively).
|
|
6521
|
+
if (turn.finalAnswerDelivered === false) {
|
|
6522
|
+
const silentEnd = recordUndeliveredTurnEnd({
|
|
6427
6523
|
chatId,
|
|
6428
6524
|
threadId: threadId ?? null,
|
|
6429
6525
|
turnKey: tKey,
|
|
@@ -2,12 +2,20 @@
|
|
|
2
2
|
/**
|
|
3
3
|
* Stop hook — auto-interrupt for silent-end turns.
|
|
4
4
|
*
|
|
5
|
-
* When a Claude Code session ends without the agent
|
|
6
|
-
*
|
|
5
|
+
* When a Claude Code session ends without the agent delivering a final
|
|
6
|
+
* answer to the user, the Telegram gateway writes a state file at
|
|
7
7
|
* $TELEGRAM_STATE_DIR/silent-end-pending.json. This hook reads that file and,
|
|
8
8
|
* if a first-time silent-end is detected (retryCount === 0), returns a
|
|
9
9
|
* decision:block to re-prompt the agent instead of letting the session close.
|
|
10
10
|
*
|
|
11
|
+
* #1664 — "no final answer delivered" covers two cases: (a) the turn ended
|
|
12
|
+
* with zero outbound (the original case), and (b) the model sent only an
|
|
13
|
+
* interim ack via reply/stream_reply but left its real answer as plain
|
|
14
|
+
* transcript text, which the gateway renders into an ephemeral draft and
|
|
15
|
+
* never finalizes. The re-prompt below tells the model to send its answer
|
|
16
|
+
* through the reply tool, or reply NO_REPLY if it genuinely has nothing to
|
|
17
|
+
* add / already delivered.
|
|
18
|
+
*
|
|
11
19
|
* On the second silent-end (retryCount >= MAX_RETRIES), the hook allows the
|
|
12
20
|
* stop. The gateway's turn-end path (recordSilentTurnEnd in silent-end.ts)
|
|
13
21
|
* detects the exhausted re-prompt and delivers a user-facing fallback
|
|
@@ -104,9 +112,13 @@ function main() {
|
|
|
104
112
|
JSON.stringify({
|
|
105
113
|
decision: 'block',
|
|
106
114
|
reason:
|
|
107
|
-
'
|
|
108
|
-
'
|
|
109
|
-
'
|
|
115
|
+
'This turn is ending without your final answer reaching the user. ' +
|
|
116
|
+
'If you wrote an answer as plain text (not via a tool), the user ' +
|
|
117
|
+
'cannot see it — only text sent through the reply tool is delivered. ' +
|
|
118
|
+
'Send your final answer now by calling mcp__switchroom-telegram__reply ' +
|
|
119
|
+
'(or mcp__switchroom-telegram__stream_reply with done=true). ' +
|
|
120
|
+
'If your final answer has already reached the user, or you ' +
|
|
121
|
+
'intentionally have nothing to add, reply with exactly NO_REPLY.',
|
|
110
122
|
}),
|
|
111
123
|
)
|
|
112
124
|
process.exit(0)
|
|
@@ -182,22 +182,39 @@ export function readSilentEndState(deps?: SilentEndDeps): SilentEndState | null
|
|
|
182
182
|
}
|
|
183
183
|
|
|
184
184
|
/**
|
|
185
|
-
* Record a user-message turn that ended
|
|
186
|
-
* report whether the deterministic re-prompt has been
|
|
187
|
-
* the gateway's single entry point for the main
|
|
185
|
+
* Record a user-message turn that ended WITHOUT the model delivering a
|
|
186
|
+
* final answer, and report whether the deterministic re-prompt has been
|
|
187
|
+
* exhausted. This is the gateway's single entry point for the main
|
|
188
|
+
* turn-end path.
|
|
188
189
|
*
|
|
189
|
-
*
|
|
190
|
+
* #1664 — the trigger generalized from "zero outbound" to "no final
|
|
191
|
+
* answer delivered". Two cases reach here now:
|
|
192
|
+
* 1. Zero outbound — the turn ended with nothing sent at all (the
|
|
193
|
+
* original #1122/#1161 silent-end case).
|
|
194
|
+
* 2. Interim-ack only — the model sent an ack via reply/stream_reply
|
|
195
|
+
* but ended the turn with its real answer as plain transcript text
|
|
196
|
+
* (rendered into an ephemeral answer-lane draft that gets retracted
|
|
197
|
+
* at turn_end, never finalized). The gateway tracks this via
|
|
198
|
+
* `CurrentTurn.finalAnswerDelivered`; case 1 is just the subset
|
|
199
|
+
* where that flag is false because nothing landed.
|
|
200
|
+
* In both cases the model still owes the user an answer, so the same
|
|
201
|
+
* re-prompt safety net applies — the framework re-prompts; the model
|
|
202
|
+
* re-delivers via the reply tool (never the framework materializing a
|
|
203
|
+
* message from the draft — see `reference/principles.md`).
|
|
204
|
+
*
|
|
205
|
+
* - First undelivered turn-end (no prior state, or prior `retryCount`
|
|
190
206
|
* still below `SILENT_END_MAX_RETRIES`) → writes the state file via
|
|
191
207
|
* `writeSilentEndState`, so `silent-end-interrupt-stop.mjs` blocks
|
|
192
208
|
* the stop and re-prompts the agent. Returns `{ exhausted: false }`.
|
|
193
209
|
*
|
|
194
|
-
* -
|
|
195
|
-
* `retryCount >= SILENT_END_MAX_RETRIES` → the Stop
|
|
196
|
-
* spent its re-prompt and the agent is STILL
|
|
197
|
-
* failed. Clears the state file (so the
|
|
198
|
-
* finds nothing pending and allows the
|
|
199
|
-
* `{ exhausted: true }` — the caller MUST
|
|
200
|
-
* fallback so the turn never just
|
|
210
|
+
* - An undelivered turn-end where the prior state for the SAME turn
|
|
211
|
+
* already shows `retryCount >= SILENT_END_MAX_RETRIES` → the Stop
|
|
212
|
+
* hook already spent its re-prompt and the agent is STILL
|
|
213
|
+
* undelivered. Recovery has failed. Clears the state file (so the
|
|
214
|
+
* Stop hook on this final turn finds nothing pending and allows the
|
|
215
|
+
* stop cleanly) and returns `{ exhausted: true }` — the caller MUST
|
|
216
|
+
* then deliver a user-facing fallback so the turn never just
|
|
217
|
+
* vanishes (#1161).
|
|
201
218
|
*
|
|
202
219
|
* Chat-less autonomous wakeup turns never reach here: the gateway only
|
|
203
220
|
* creates a `currentTurn` (and therefore only runs a turn-end handler)
|
|
@@ -228,3 +245,12 @@ export function recordSilentTurnEnd(
|
|
|
228
245
|
writeSilentEndState(args, deps)
|
|
229
246
|
return { exhausted: false }
|
|
230
247
|
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* #1664 — semantic alias for `recordSilentTurnEnd`. The trigger is now
|
|
251
|
+
* "no final answer delivered", of which "zero outbound" is one case; new
|
|
252
|
+
* callsites should prefer this name so the intent reads correctly. The
|
|
253
|
+
* behaviour, retry semantics, and `{exhausted}` contract are identical —
|
|
254
|
+
* `recordSilentTurnEnd` is kept for the existing callers and tests.
|
|
255
|
+
*/
|
|
256
|
+
export const recordUndeliveredTurnEnd = recordSilentTurnEnd
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit coverage for the #1664 final-answer detection predicate.
|
|
3
|
+
*
|
|
4
|
+
* `isFinalAnswerReply` is the finer signal the silent-end re-prompt needs:
|
|
5
|
+
* the gateway's `replyCalled` flag flips on the first reply / stream_reply
|
|
6
|
+
* tool use and cannot tell an interim ack from the real answer. This
|
|
7
|
+
* predicate classifies each reply so a turn whose every reply was "interim"
|
|
8
|
+
* (and whose real answer ended up as plain transcript text) ends with
|
|
9
|
+
* `finalAnswerDelivered === false` and triggers the re-prompt — the #1664
|
|
10
|
+
* bug (streamed answers rendered to a draft, retracted at turn_end, lost).
|
|
11
|
+
*
|
|
12
|
+
* These tests pin the pure predicate. The gateway wires it into
|
|
13
|
+
* executeReply / executeStreamReply (covered by the gateway integration
|
|
14
|
+
* surface); pinning the policy here keeps it auditable without importing
|
|
15
|
+
* the multi-thousand-line gateway module.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { describe, it, expect } from 'vitest'
|
|
19
|
+
import { isFinalAnswerReply, FINAL_ANSWER_MIN_CHARS } from '../final-answer-detect.js'
|
|
20
|
+
|
|
21
|
+
describe('isFinalAnswerReply — #1664 final-answer classification', () => {
|
|
22
|
+
it('classifies a notification-bearing reply as the final answer', () => {
|
|
23
|
+
// disable_notification:false is the pacing contract's "final answer"
|
|
24
|
+
// signal — interim updates pass disable_notification:true.
|
|
25
|
+
expect(
|
|
26
|
+
isFinalAnswerReply({ text: 'short answer', disableNotification: false }),
|
|
27
|
+
).toBe(true)
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it('classifies a short interim ack (disable_notification:true) as NOT final', () => {
|
|
31
|
+
expect(
|
|
32
|
+
isFinalAnswerReply({ text: 'on it…', disableNotification: true }),
|
|
33
|
+
).toBe(false)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('length backstop: a long reply mis-marked interim still counts as final', () => {
|
|
37
|
+
const longText = 'x'.repeat(FINAL_ANSWER_MIN_CHARS)
|
|
38
|
+
expect(
|
|
39
|
+
isFinalAnswerReply({ text: longText, disableNotification: true }),
|
|
40
|
+
).toBe(true)
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
it('length backstop is inclusive at exactly FINAL_ANSWER_MIN_CHARS', () => {
|
|
44
|
+
expect(
|
|
45
|
+
isFinalAnswerReply({
|
|
46
|
+
text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS),
|
|
47
|
+
disableNotification: true,
|
|
48
|
+
}),
|
|
49
|
+
).toBe(true)
|
|
50
|
+
// One char under the threshold and marked interim → still interim.
|
|
51
|
+
expect(
|
|
52
|
+
isFinalAnswerReply({
|
|
53
|
+
text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS - 1),
|
|
54
|
+
disableNotification: true,
|
|
55
|
+
}),
|
|
56
|
+
).toBe(false)
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
it('stream_reply done=true is always the final answer, even short + interim', () => {
|
|
60
|
+
// A done=true call explicitly closes the stream — it IS the answer,
|
|
61
|
+
// regardless of length or the notification flag.
|
|
62
|
+
expect(
|
|
63
|
+
isFinalAnswerReply({ text: 'ok', disableNotification: true, done: true }),
|
|
64
|
+
).toBe(true)
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
it('a non-terminal stream_reply chunk (done=false) is classified like a plain reply', () => {
|
|
68
|
+
// Short interim chunk → not final.
|
|
69
|
+
expect(
|
|
70
|
+
isFinalAnswerReply({ text: 'thinking…', disableNotification: true, done: false }),
|
|
71
|
+
).toBe(false)
|
|
72
|
+
// Notification-bearing chunk → final.
|
|
73
|
+
expect(
|
|
74
|
+
isFinalAnswerReply({ text: 'here it is', disableNotification: false, done: false }),
|
|
75
|
+
).toBe(true)
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
it('an empty reply marked interim is NOT the final answer', () => {
|
|
79
|
+
expect(
|
|
80
|
+
isFinalAnswerReply({ text: '', disableNotification: true }),
|
|
81
|
+
).toBe(false)
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
it('FINAL_ANSWER_MIN_CHARS is the documented 200-char backstop', () => {
|
|
85
|
+
// Guards the constant against silent drift — the value is referenced
|
|
86
|
+
// in the CurrentTurn doc-comment and the Stop-hook rationale.
|
|
87
|
+
expect(FINAL_ANSWER_MIN_CHARS).toBe(200)
|
|
88
|
+
})
|
|
89
|
+
})
|
|
@@ -8,8 +8,10 @@ import {
|
|
|
8
8
|
clearSilentEndState,
|
|
9
9
|
readSilentEndState,
|
|
10
10
|
recordSilentTurnEnd,
|
|
11
|
+
recordUndeliveredTurnEnd,
|
|
11
12
|
SILENT_END_MAX_RETRIES,
|
|
12
13
|
} from '../silent-end.js'
|
|
14
|
+
import { isFinalAnswerReply } from '../final-answer-detect.js'
|
|
13
15
|
|
|
14
16
|
let stateDir: string
|
|
15
17
|
const ORIG_ENV = process.env.TELEGRAM_STATE_DIR
|
|
@@ -187,6 +189,118 @@ describe('recordSilentTurnEnd — #1161 exhaustion detection', () => {
|
|
|
187
189
|
})
|
|
188
190
|
})
|
|
189
191
|
|
|
192
|
+
describe('recordUndeliveredTurnEnd — #1664 extended trigger', () => {
|
|
193
|
+
it('is the same function as recordSilentTurnEnd (semantic alias)', () => {
|
|
194
|
+
expect(recordUndeliveredTurnEnd).toBe(recordSilentTurnEnd)
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
// The gateway computes `finalAnswerDelivered` by OR-ing isFinalAnswerReply
|
|
198
|
+
// across every reply landed this turn, then engages the re-prompt iff the
|
|
199
|
+
// flag is still false at turn_end. These tests reproduce that exact
|
|
200
|
+
// decision: classify the turn's replies, then call recordUndeliveredTurnEnd
|
|
201
|
+
// only when no reply qualified.
|
|
202
|
+
function simulateTurnEnd(
|
|
203
|
+
replies: Array<{ text: string; disableNotification: boolean; done?: boolean }>,
|
|
204
|
+
turnKey: string,
|
|
205
|
+
): { finalAnswerDelivered: boolean; rePromptEngaged: boolean } {
|
|
206
|
+
const finalAnswerDelivered = replies.some((r) =>
|
|
207
|
+
isFinalAnswerReply(r),
|
|
208
|
+
)
|
|
209
|
+
let rePromptEngaged = false
|
|
210
|
+
if (finalAnswerDelivered === false) {
|
|
211
|
+
recordUndeliveredTurnEnd({ chatId: 'c', threadId: null, turnKey })
|
|
212
|
+
rePromptEngaged = true
|
|
213
|
+
}
|
|
214
|
+
return { finalAnswerDelivered, rePromptEngaged }
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
it('#1664 regression: ack reply + answer-as-transcript → re-prompt fires', () => {
|
|
218
|
+
// The exact #1664 shape: the model sent a short interim ack via the
|
|
219
|
+
// reply tool (disable_notification:true), then ended the turn with its
|
|
220
|
+
// real answer as plain transcript text — which the gateway renders into
|
|
221
|
+
// an ephemeral draft and retracts at turn_end, never finalized. No
|
|
222
|
+
// reply qualified as the final answer, so the turn is undelivered.
|
|
223
|
+
const r = simulateTurnEnd(
|
|
224
|
+
[{ text: 'On it — give me a moment.', disableNotification: true }],
|
|
225
|
+
'c:1664',
|
|
226
|
+
)
|
|
227
|
+
expect(r.finalAnswerDelivered).toBe(false)
|
|
228
|
+
expect(r.rePromptEngaged).toBe(true)
|
|
229
|
+
// State file written so silent-end-interrupt-stop.mjs blocks the stop.
|
|
230
|
+
expect(readSilentEndState()).toMatchObject({ turnKey: 'c:1664', retryCount: 0 })
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
it('a turn with a final-answer reply (notification-bearing) → re-prompt NOT engaged', () => {
|
|
234
|
+
const r = simulateTurnEnd(
|
|
235
|
+
[{ text: 'Here is the answer.', disableNotification: false }],
|
|
236
|
+
'c:final',
|
|
237
|
+
)
|
|
238
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
239
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
240
|
+
expect(readSilentEndState()).toBeNull()
|
|
241
|
+
})
|
|
242
|
+
|
|
243
|
+
it('a long reply mis-marked interim → re-prompt NOT engaged (length backstop)', () => {
|
|
244
|
+
const r = simulateTurnEnd(
|
|
245
|
+
[{ text: 'x'.repeat(500), disableNotification: true }],
|
|
246
|
+
'c:long',
|
|
247
|
+
)
|
|
248
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
249
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
250
|
+
expect(readSilentEndState()).toBeNull()
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
it('zero-outbound turn → re-prompt still engaged (regression of the original case)', () => {
|
|
254
|
+
// No replies at all — the original #1122 silent-end case is now just
|
|
255
|
+
// the subset of "no final answer delivered" where nothing landed.
|
|
256
|
+
const r = simulateTurnEnd([], 'c:zero')
|
|
257
|
+
expect(r.finalAnswerDelivered).toBe(false)
|
|
258
|
+
expect(r.rePromptEngaged).toBe(true)
|
|
259
|
+
expect(readSilentEndState()).toMatchObject({ turnKey: 'c:zero', retryCount: 0 })
|
|
260
|
+
})
|
|
261
|
+
|
|
262
|
+
it('interim ack followed by a final-answer reply in the same turn → NOT engaged', () => {
|
|
263
|
+
// The model ack'd first then properly delivered — finalAnswerDelivered
|
|
264
|
+
// latches true on the second reply; the turn is answered.
|
|
265
|
+
const r = simulateTurnEnd(
|
|
266
|
+
[
|
|
267
|
+
{ text: 'Looking into it…', disableNotification: true },
|
|
268
|
+
{ text: 'Done — the result is 42.', disableNotification: false },
|
|
269
|
+
],
|
|
270
|
+
'c:ack-then-final',
|
|
271
|
+
)
|
|
272
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
273
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
274
|
+
expect(readSilentEndState()).toBeNull()
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
it('stream_reply done=true counts as the final answer → NOT engaged', () => {
|
|
278
|
+
const r = simulateTurnEnd(
|
|
279
|
+
[{ text: 'ok', disableNotification: true, done: true }],
|
|
280
|
+
'c:stream-done',
|
|
281
|
+
)
|
|
282
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
283
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
284
|
+
expect(readSilentEndState()).toBeNull()
|
|
285
|
+
})
|
|
286
|
+
|
|
287
|
+
it('exhaustion still applies on the #1664 path after the Stop-hook re-prompt', () => {
|
|
288
|
+
// First undelivered turn-end writes state.
|
|
289
|
+
expect(simulateTurnEnd(
|
|
290
|
+
[{ text: 'one sec', disableNotification: true }],
|
|
291
|
+
'c:exhaust',
|
|
292
|
+
).rePromptEngaged).toBe(true)
|
|
293
|
+
// Stop hook blocks once and bumps retryCount (simulated).
|
|
294
|
+
const path = join(stateDir, 'silent-end-pending.json')
|
|
295
|
+
const s = readSilentEndState()!
|
|
296
|
+
writeFileSync(path, JSON.stringify({ ...s, retryCount: s.retryCount + 1 }))
|
|
297
|
+
// Re-prompted turn STILL ends with only an interim ack → exhausted.
|
|
298
|
+
const second = recordUndeliveredTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:exhaust' })
|
|
299
|
+
expect(second.exhausted).toBe(true)
|
|
300
|
+
expect(readSilentEndState()).toBeNull()
|
|
301
|
+
})
|
|
302
|
+
})
|
|
303
|
+
|
|
190
304
|
describe('silent-end-interrupt-stop hook — integration', () => {
|
|
191
305
|
const hookPath = join(__dirname, '..', 'hooks', 'silent-end-interrupt-stop.mjs')
|
|
192
306
|
|
|
@@ -222,6 +336,10 @@ describe('silent-end-interrupt-stop hook — integration', () => {
|
|
|
222
336
|
const out = JSON.parse(r.stdout.trim())
|
|
223
337
|
expect(out.decision).toBe('block')
|
|
224
338
|
expect(out.reason).toContain('reply')
|
|
339
|
+
// #1664 — the re-prompt must offer the NO_REPLY escape hatch so a
|
|
340
|
+
// model that already delivered (or intentionally has nothing to add)
|
|
341
|
+
// can end the turn cleanly instead of being forced to re-send.
|
|
342
|
+
expect(out.reason).toContain('NO_REPLY')
|
|
225
343
|
// retryCount must have been incremented to 1
|
|
226
344
|
expect(readSilentEndState()!.retryCount).toBe(1)
|
|
227
345
|
})
|