switchroom 0.13.11 → 0.13.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +6 -51
- package/telegram-plugin/gateway/gateway.ts +5 -47
- package/telegram-plugin/subagent-watcher.ts +13 -20
- package/telegram-plugin/tests/fleet-state-watcher.test.ts +0 -1
- package/telegram-plugin/tests/subagent-registry-bugs.test.ts +1 -3
- package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +0 -1
- package/telegram-plugin/tests/subagent-watcher-parent-marker.test.ts +0 -1
- package/telegram-plugin/tests/subagent-watcher-stall-notification.test.ts +1 -4
- package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +0 -1
- package/telegram-plugin/tests/subagent-watcher.test.ts +15 -5
- package/telegram-plugin/tests/turn-flush-safety.test.ts +29 -81
- package/telegram-plugin/turn-flush-safety.ts +23 -53
package/dist/cli/switchroom.js
CHANGED
|
@@ -47314,8 +47314,8 @@ var {
|
|
|
47314
47314
|
} = import__.default;
|
|
47315
47315
|
|
|
47316
47316
|
// src/build-info.ts
|
|
47317
|
-
var VERSION = "0.13.
|
|
47318
|
-
var COMMIT_SHA = "
|
|
47317
|
+
var VERSION = "0.13.12";
|
|
47318
|
+
var COMMIT_SHA = "18363dfb";
|
|
47319
47319
|
|
|
47320
47320
|
// src/cli/agent.ts
|
|
47321
47321
|
init_source();
|
package/package.json
CHANGED
|
@@ -40593,28 +40593,12 @@ function isSilentFlushMarker2(text) {
|
|
|
40593
40593
|
}
|
|
40594
40594
|
return SILENT_MARKERS2.has(trimmed.toUpperCase());
|
|
40595
40595
|
}
|
|
40596
|
-
var REPLY_CALLED_TAIL_MIN_CHARS = 40;
|
|
40597
40596
|
function decideTurnFlush(input) {
|
|
40598
40597
|
const flushEnabled = input.flushEnabled !== false;
|
|
40599
40598
|
if (!flushEnabled)
|
|
40600
40599
|
return { kind: "skip", reason: "flag-disabled" };
|
|
40601
|
-
if (input.replyCalled)
|
|
40602
|
-
|
|
40603
|
-
const tail = input.capturedText.slice(tailIdx).join(`
|
|
40604
|
-
`).trim();
|
|
40605
|
-
const minChars = input.replyCalledTailMinChars ?? REPLY_CALLED_TAIL_MIN_CHARS;
|
|
40606
|
-
if (tail.length === 0) {
|
|
40607
|
-
return { kind: "skip", reason: "reply-called" };
|
|
40608
|
-
}
|
|
40609
|
-
if (tail.length < minChars) {
|
|
40610
|
-
return { kind: "skip", reason: "reply-called-no-new-text" };
|
|
40611
|
-
}
|
|
40612
|
-
if (input.chatId == null)
|
|
40613
|
-
return { kind: "skip", reason: "no-inbound-chat" };
|
|
40614
|
-
if (isSilentFlushMarker2(tail))
|
|
40615
|
-
return { kind: "skip", reason: "silent-marker" };
|
|
40616
|
-
return { kind: "flush", text: tail };
|
|
40617
|
-
}
|
|
40600
|
+
if (input.replyCalled)
|
|
40601
|
+
return { kind: "skip", reason: "reply-called" };
|
|
40618
40602
|
if (input.chatId == null)
|
|
40619
40603
|
return { kind: "skip", reason: "no-inbound-chat" };
|
|
40620
40604
|
const joined = input.capturedText.join(`
|
|
@@ -46654,14 +46638,6 @@ function startSubagentWatcher(config) {
|
|
|
46654
46638
|
return;
|
|
46655
46639
|
if (entry.state === "done" && !entry.completionNotified) {
|
|
46656
46640
|
entry.completionNotified = true;
|
|
46657
|
-
const desc = escapeHtml8(truncate3(entry.description, 80));
|
|
46658
|
-
const summary = entry.lastSummaryLine ? ` \u2014 ${escapeHtml8(truncate3(entry.lastSummaryLine, 120))}` : "";
|
|
46659
|
-
const tools = entry.toolCount > 0 ? ` (${entry.toolCount} tools)` : "";
|
|
46660
|
-
try {
|
|
46661
|
-
config.sendNotification(`\u2713 Worker done: ${desc}${tools}${summary}`);
|
|
46662
|
-
} catch (err) {
|
|
46663
|
-
log?.(`subagent-watcher: completion notification error: ${err.message}`);
|
|
46664
|
-
}
|
|
46665
46641
|
if (config.onFinish) {
|
|
46666
46642
|
try {
|
|
46667
46643
|
config.onFinish({
|
|
@@ -48027,11 +48003,11 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
48027
48003
|
}
|
|
48028
48004
|
|
|
48029
48005
|
// ../src/build-info.ts
|
|
48030
|
-
var VERSION = "0.13.
|
|
48031
|
-
var COMMIT_SHA = "
|
|
48032
|
-
var COMMIT_DATE = "2026-05-
|
|
48006
|
+
var VERSION = "0.13.12";
|
|
48007
|
+
var COMMIT_SHA = "18363dfb";
|
|
48008
|
+
var COMMIT_DATE = "2026-05-22T19:32:19+10:00";
|
|
48033
48009
|
var LATEST_PR = null;
|
|
48034
|
-
var COMMITS_AHEAD_OF_TAG =
|
|
48010
|
+
var COMMITS_AHEAD_OF_TAG = 5;
|
|
48035
48011
|
|
|
48036
48012
|
// gateway/boot-version.ts
|
|
48037
48013
|
function formatRelativeAgo(iso) {
|
|
@@ -51594,7 +51570,6 @@ function handleSessionEvent(ev) {
|
|
|
51594
51570
|
gatewayReceiveAt: startedAt,
|
|
51595
51571
|
replyCalled: false,
|
|
51596
51572
|
capturedText: [],
|
|
51597
|
-
capturedTextLenAtLastReply: 0,
|
|
51598
51573
|
orphanedReplyTimeoutId: null,
|
|
51599
51574
|
registryKey: null,
|
|
51600
51575
|
lastAssistantMsgId: null,
|
|
@@ -51659,7 +51634,6 @@ function handleSessionEvent(ev) {
|
|
|
51659
51634
|
const name = ev.toolName;
|
|
51660
51635
|
if (isTelegramReplyTool(name)) {
|
|
51661
51636
|
turn.replyCalled = true;
|
|
51662
|
-
turn.capturedTextLenAtLastReply = turn.capturedText.length;
|
|
51663
51637
|
if (turn.orphanedReplyTimeoutId != null) {
|
|
51664
51638
|
clearTimeout(turn.orphanedReplyTimeoutId);
|
|
51665
51639
|
turn.orphanedReplyTimeoutId = null;
|
|
@@ -51822,13 +51796,8 @@ function handleSessionEvent(ev) {
|
|
|
51822
51796
|
chatId: turn.sessionChatId,
|
|
51823
51797
|
replyCalled: turn.replyCalled,
|
|
51824
51798
|
capturedText: turn.capturedText,
|
|
51825
|
-
capturedTextLenAtLastReply: turn.capturedTextLenAtLastReply,
|
|
51826
51799
|
flushEnabled: TURN_FLUSH_SAFETY_ENABLED
|
|
51827
51800
|
});
|
|
51828
|
-
if (flushDecision.kind === "flush" && turn.replyCalled) {
|
|
51829
|
-
process.stderr.write(`telegram gateway: WARN post-reply-tail flush (#1291) \u2014 model emitted ${flushDecision.text.length} chars after a prior reply call without a follow-up reply tool chat=${chatId} turnStartedAt=${turn.startedAt}
|
|
51830
|
-
`);
|
|
51831
|
-
}
|
|
51832
51801
|
if (flushDecision.kind === "skip" && flushDecision.reason !== "reply-called") {
|
|
51833
51802
|
process.stderr.write(`telegram gateway: turn-flush skipped \u2014 reason=${flushDecision.reason}
|
|
51834
51803
|
`);
|
|
@@ -57308,20 +57277,6 @@ var didOneTimeSetup = false;
|
|
|
57308
57277
|
agentCwd: watcherAgentDir,
|
|
57309
57278
|
db: turnsDb,
|
|
57310
57279
|
parentStateDir: STATE_DIR,
|
|
57311
|
-
sendNotification: (text) => {
|
|
57312
|
-
const ownerChatId = loadAccess().allowFrom[0];
|
|
57313
|
-
if (!ownerChatId)
|
|
57314
|
-
return;
|
|
57315
|
-
swallowingApiCall(() => lockedBot.api.sendMessage(ownerChatId, text, {
|
|
57316
|
-
parse_mode: "HTML",
|
|
57317
|
-
link_preview_options: { is_disabled: true },
|
|
57318
|
-
...TOPIC_ID != null ? { message_thread_id: TOPIC_ID } : {}
|
|
57319
|
-
}), {
|
|
57320
|
-
chat_id: ownerChatId,
|
|
57321
|
-
verb: "subagent-watcher-notification",
|
|
57322
|
-
...TOPIC_ID != null ? { threadId: TOPIC_ID } : {}
|
|
57323
|
-
});
|
|
57324
|
-
},
|
|
57325
57280
|
log: (msg) => process.stderr.write(`telegram gateway: ${msg}
|
|
57326
57281
|
`),
|
|
57327
57282
|
onStall: (agentId, idleMs, description) => {
|
|
@@ -1192,14 +1192,6 @@ type CurrentTurn = {
|
|
|
1192
1192
|
gatewayReceiveAt: number
|
|
1193
1193
|
replyCalled: boolean
|
|
1194
1194
|
capturedText: string[]
|
|
1195
|
-
// #1291: snapshot of capturedText.length at the moment of the most
|
|
1196
|
-
// recent reply / stream_reply tool call. Used by decideTurnFlush to
|
|
1197
|
-
// isolate the post-reply tail (e.g. a soft-commit reply followed by
|
|
1198
|
-
// the real substantive answer in terminal text only) and flush it as
|
|
1199
|
-
// a follow-up message. Pre-#1291 the existence of ANY reply call
|
|
1200
|
-
// suppressed flush entirely — that lost long terminal-only answers
|
|
1201
|
-
// after a "let me check" interim reply.
|
|
1202
|
-
capturedTextLenAtLastReply: number
|
|
1203
1195
|
orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
|
|
1204
1196
|
registryKey: string | null
|
|
1205
1197
|
// Last assistant outbound message id for the current turn — populated
|
|
@@ -5706,7 +5698,6 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
5706
5698
|
gatewayReceiveAt: startedAt,
|
|
5707
5699
|
replyCalled: false,
|
|
5708
5700
|
capturedText: [],
|
|
5709
|
-
capturedTextLenAtLastReply: 0,
|
|
5710
5701
|
orphanedReplyTimeoutId: null,
|
|
5711
5702
|
registryKey: null,
|
|
5712
5703
|
lastAssistantMsgId: null,
|
|
@@ -5807,12 +5798,6 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
5807
5798
|
// placeholder-heartbeat label, which has been retired.
|
|
5808
5799
|
if (isTelegramReplyTool(name)) {
|
|
5809
5800
|
turn.replyCalled = true
|
|
5810
|
-
// #1291: pin the captured-text index at the moment of this reply
|
|
5811
|
-
// tool call. Anything pushed into capturedText after this point
|
|
5812
|
-
// is the post-reply tail (e.g. the substantive answer composed
|
|
5813
|
-
// in terminal text after a soft-commit "on it, back in a few").
|
|
5814
|
-
// decideTurnFlush slices from this index to flush the tail.
|
|
5815
|
-
turn.capturedTextLenAtLastReply = turn.capturedText.length
|
|
5816
5801
|
if (turn.orphanedReplyTimeoutId != null) {
|
|
5817
5802
|
clearTimeout(turn.orphanedReplyTimeoutId)
|
|
5818
5803
|
turn.orphanedReplyTimeoutId = null
|
|
@@ -6072,20 +6057,8 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
6072
6057
|
chatId: turn.sessionChatId,
|
|
6073
6058
|
replyCalled: turn.replyCalled,
|
|
6074
6059
|
capturedText: turn.capturedText,
|
|
6075
|
-
capturedTextLenAtLastReply: turn.capturedTextLenAtLastReply,
|
|
6076
6060
|
flushEnabled: TURN_FLUSH_SAFETY_ENABLED,
|
|
6077
6061
|
})
|
|
6078
|
-
// #1291: when the model emitted a soft-commit reply followed by a
|
|
6079
|
-
// substantive terminal-only answer, decideTurnFlush returns
|
|
6080
|
-
// kind:'flush' with the post-reply tail. Log WARN so this case is
|
|
6081
|
-
// auditable — the model SHOULD have called reply for the tail, but
|
|
6082
|
-
// didn't, and the framework is covering for it.
|
|
6083
|
-
if (flushDecision.kind === 'flush' && turn.replyCalled) {
|
|
6084
|
-
process.stderr.write(
|
|
6085
|
-
`telegram gateway: WARN post-reply-tail flush (#1291) — model emitted ${flushDecision.text.length} chars after a prior reply call without a follow-up reply tool` +
|
|
6086
|
-
` chat=${chatId} turnStartedAt=${turn.startedAt}\n`,
|
|
6087
|
-
)
|
|
6088
|
-
}
|
|
6089
6062
|
if (flushDecision.kind === 'skip' && flushDecision.reason !== 'reply-called') {
|
|
6090
6063
|
process.stderr.write(
|
|
6091
6064
|
`telegram gateway: turn-flush skipped — reason=${flushDecision.reason}\n`,
|
|
@@ -14983,26 +14956,11 @@ void (async () => {
|
|
|
14983
14956
|
// inside the sub-agent. Belt-and-braces with PR #557's
|
|
14984
14957
|
// multi-signal progress gate.
|
|
14985
14958
|
parentStateDir: STATE_DIR,
|
|
14986
|
-
|
|
14987
|
-
|
|
14988
|
-
|
|
14989
|
-
|
|
14990
|
-
|
|
14991
|
-
// gateway. Notifications are best-effort.
|
|
14992
|
-
void swallowingApiCall(
|
|
14993
|
-
() =>
|
|
14994
|
-
lockedBot.api.sendMessage(ownerChatId, text, {
|
|
14995
|
-
parse_mode: 'HTML',
|
|
14996
|
-
link_preview_options: { is_disabled: true },
|
|
14997
|
-
...(TOPIC_ID != null ? { message_thread_id: TOPIC_ID } : {}),
|
|
14998
|
-
}),
|
|
14999
|
-
{
|
|
15000
|
-
chat_id: ownerChatId,
|
|
15001
|
-
verb: 'subagent-watcher-notification',
|
|
15002
|
-
...(TOPIC_ID != null ? { threadId: TOPIC_ID } : {}),
|
|
15003
|
-
},
|
|
15004
|
-
)
|
|
15005
|
-
},
|
|
14959
|
+
// No user-facing notification callback: the card-era
|
|
14960
|
+
// "✓ Worker done" message was retired with the progress
|
|
14961
|
+
// card (#1122). Sub-agent completion reaches the user as
|
|
14962
|
+
// the model's own beat-4 handback reply; the watcher's
|
|
14963
|
+
// role here is registry liveness + the `onFinish` cue.
|
|
15006
14964
|
log: (msg) => process.stderr.write(`telegram gateway: ${msg}\n`),
|
|
15007
14965
|
// Option C (#393): route stall detections into the progress-card
|
|
15008
14966
|
// driver so the pinned card re-renders with a ⚠️ indicator even
|
|
@@ -146,11 +146,6 @@ export interface SubagentWatcherConfig {
|
|
|
146
146
|
* an agent's home pollutes the watcher with phantom registrations).
|
|
147
147
|
*/
|
|
148
148
|
agentCwd?: string
|
|
149
|
-
/**
|
|
150
|
-
* Send a fresh (non-edit) Telegram message. For stall / completion
|
|
151
|
-
* state-transition notifications.
|
|
152
|
-
*/
|
|
153
|
-
sendNotification: (text: string) => void
|
|
154
149
|
/**
|
|
155
150
|
* How often to re-scan for new subagent dirs (ms). Default 1000.
|
|
156
151
|
*/
|
|
@@ -862,21 +857,19 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
862
857
|
|
|
863
858
|
if (entry.state === 'done' && !entry.completionNotified) {
|
|
864
859
|
entry.completionNotified = true
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
//
|
|
876
|
-
//
|
|
877
|
-
//
|
|
878
|
-
// boot get their `completionNotified=true` shortcut in registerAgent
|
|
879
|
-
// and skip this path entirely — only post-boot transitions fire.
|
|
860
|
+
// Card retired (#1122): the watcher no longer sends a user-facing
|
|
861
|
+
// "✓ Worker done" message. A framework-authored status line is a
|
|
862
|
+
// conversational-pacing anti-pattern, and the heuristic that drove
|
|
863
|
+
// it (silent-stall synthesis) fired on a worker mid-`Bash` as
|
|
864
|
+
// readily as on a finished one. The user-facing handback is the
|
|
865
|
+
// model's own beat-4 reply, woken by Claude Code's native
|
|
866
|
+
// background-task notification. Completion is surfaced here only
|
|
867
|
+
// via the structured `onFinish` callback — emitted before the
|
|
868
|
+
// deferred cleanup runs so the callback always sees a live
|
|
869
|
+
// registry entry. Historical entries that already-completed at
|
|
870
|
+
// boot get their `completionNotified=true` shortcut in
|
|
871
|
+
// registerAgent and skip this path — only post-boot transitions
|
|
872
|
+
// fire.
|
|
880
873
|
if (config.onFinish) {
|
|
881
874
|
try {
|
|
882
875
|
config.onFinish({
|
|
@@ -50,7 +50,6 @@ describe('subagent-watcher: WorkerEntry.lastTool', () => {
|
|
|
50
50
|
const intervals: Array<{ fn: () => void }> = []
|
|
51
51
|
const w = startSubagentWatcher({
|
|
52
52
|
agentDir,
|
|
53
|
-
sendNotification: () => {},
|
|
54
53
|
stallThresholdMs: 60_000,
|
|
55
54
|
rescanMs: 500,
|
|
56
55
|
now: () => Date.now(),
|
|
@@ -148,7 +148,6 @@ function makeHarnessWithDb(opts: {
|
|
|
148
148
|
} = opts
|
|
149
149
|
|
|
150
150
|
let currentTime = 10_000
|
|
151
|
-
const notifications: string[] = []
|
|
152
151
|
const logs: string[] = []
|
|
153
152
|
|
|
154
153
|
const fileContents: Map<string, Buffer> = new Map()
|
|
@@ -217,7 +216,6 @@ function makeHarnessWithDb(opts: {
|
|
|
217
216
|
|
|
218
217
|
const watcher = startSubagentWatcher({
|
|
219
218
|
agentDir,
|
|
220
|
-
sendNotification: (text) => notifications.push(text),
|
|
221
219
|
stallThresholdMs,
|
|
222
220
|
// Mirror the active-loop threshold for fixtures with toolCount=0;
|
|
223
221
|
// tests that need the silent-synthesis vs active-loop distinction
|
|
@@ -257,7 +255,7 @@ function makeHarnessWithDb(opts: {
|
|
|
257
255
|
if (pollInterval) pollInterval.fn()
|
|
258
256
|
}
|
|
259
257
|
|
|
260
|
-
return {
|
|
258
|
+
return { logs, advance, poll, watcher, now: () => currentTime, mockFs, fileContents }
|
|
261
259
|
}
|
|
262
260
|
|
|
263
261
|
// ─── Bug 1 — ID mismatch: watcher never bumps last_activity_at ───────────────
|
|
@@ -84,7 +84,6 @@ function makeHarness(opts: {
|
|
|
84
84
|
silentSynthesisStallThresholdMs: configStallThresholdMs,
|
|
85
85
|
silentStallTerminalMs: configSilentStallTerminalMs,
|
|
86
86
|
rescanMs: 500,
|
|
87
|
-
sendNotification: () => {},
|
|
88
87
|
onStall: (_id, idleMs) => stallCalls.push({ idleMs }),
|
|
89
88
|
onStallTerminal: (id) => stallTerminalCalls.push({ agentId: id }),
|
|
90
89
|
onFinish: ({ outcome }) => finishCalls.push({ outcome }),
|
|
@@ -86,7 +86,6 @@ describe('subagent-watcher: parent turn-active marker refresh (#501)', () => {
|
|
|
86
86
|
let nextRef = 1
|
|
87
87
|
const watcher = startSubagentWatcher({
|
|
88
88
|
agentDir: opts.agentDir,
|
|
89
|
-
sendNotification: () => { /* noop */ },
|
|
90
89
|
stallThresholdMs: 60_000,
|
|
91
90
|
rescanMs: 500,
|
|
92
91
|
now: () => Date.now(),
|
|
@@ -26,7 +26,6 @@ function subAgentUserMsg(promptText: string) {
|
|
|
26
26
|
// ─── Harness (mirrors subagent-watcher.test.ts pattern) ──────────────────────
|
|
27
27
|
|
|
28
28
|
interface StallHarness {
|
|
29
|
-
notifications: string[]
|
|
30
29
|
stallCalls: Array<{ agentId: string; idleMs: number; description: string }>
|
|
31
30
|
unstallCalls: Array<{ agentId: string; description: string }>
|
|
32
31
|
logs: string[]
|
|
@@ -55,7 +54,6 @@ function makeStallHarness(opts: {
|
|
|
55
54
|
} = opts
|
|
56
55
|
|
|
57
56
|
let currentTime = 1000
|
|
58
|
-
const notifications: string[] = []
|
|
59
57
|
const stallCalls: Array<{ agentId: string; idleMs: number; description: string }> = []
|
|
60
58
|
const unstallCalls: Array<{ agentId: string; description: string }> = []
|
|
61
59
|
const logs: string[] = []
|
|
@@ -139,7 +137,6 @@ function makeStallHarness(opts: {
|
|
|
139
137
|
// silent-synthesis vs active-loop split.
|
|
140
138
|
silentSynthesisStallThresholdMs: silentSynthesisStallThresholdMs ?? stallThresholdMs,
|
|
141
139
|
rescanMs,
|
|
142
|
-
sendNotification: (text) => notifications.push(text),
|
|
143
140
|
onStall: (id, idle, desc) => stallCalls.push({ agentId: id, idleMs: idle, description: desc }),
|
|
144
141
|
onUnstall: (id, desc) => unstallCalls.push({ agentId: id, description: desc }),
|
|
145
142
|
now: () => currentTime,
|
|
@@ -168,7 +165,7 @@ function makeStallHarness(opts: {
|
|
|
168
165
|
}
|
|
169
166
|
}
|
|
170
167
|
|
|
171
|
-
return {
|
|
168
|
+
return { stallCalls, unstallCalls, logs, advance, watcher, now: () => currentTime, fileContents, jsonlPath }
|
|
172
169
|
}
|
|
173
170
|
|
|
174
171
|
// ─── Tests ────────────────────────────────────────────────────────────────────
|
|
@@ -127,7 +127,6 @@ function makeHarness(opts: {
|
|
|
127
127
|
silentSynthesisStallThresholdMs: stallThresholdMs,
|
|
128
128
|
silentStallTerminalMs,
|
|
129
129
|
rescanMs,
|
|
130
|
-
sendNotification: () => {},
|
|
131
130
|
onStall: (id, idleMs) => stallCalls.push({ agentId: id, idleMs }),
|
|
132
131
|
onUnstall: (id) => unstallCalls.push({ agentId: id }),
|
|
133
132
|
onStallTerminal: (id, desc) => stallTerminalCalls.push({ agentId: id, description: desc }),
|
|
@@ -198,7 +198,10 @@ function makeHarness(opts: {
|
|
|
198
198
|
|
|
199
199
|
const watcher = startSubagentWatcher({
|
|
200
200
|
agentDir,
|
|
201
|
-
|
|
201
|
+
// Card retired (#1122): completion surfaces via onFinish, not a
|
|
202
|
+
// user-facing message. Capture it so the completion assertions still
|
|
203
|
+
// verify the terminal-transition + de-dup behaviour.
|
|
204
|
+
onFinish: (info) => notifications.push(`✓ Worker done: ${info.description}`),
|
|
202
205
|
stallThresholdMs,
|
|
203
206
|
// Mirror the active-loop threshold so existing fixtures (which have
|
|
204
207
|
// toolCount=0 and use the simple "advance past N" model) keep
|
|
@@ -382,8 +385,13 @@ describe('startSubagentWatcher', () => {
|
|
|
382
385
|
let nextRef = 1
|
|
383
386
|
const watcher = startSubagentWatcher({
|
|
384
387
|
agentDir: opts.agentDir,
|
|
385
|
-
|
|
386
|
-
|
|
388
|
+
// Card retired (#1122): completion surfaces via onFinish. Capture
|
|
389
|
+
// it for the completion assertions and still delegate to any
|
|
390
|
+
// test-supplied onFinish.
|
|
391
|
+
onFinish: (info) => {
|
|
392
|
+
notifications.push(`✓ Worker done: ${info.description}`)
|
|
393
|
+
opts.onFinish?.(info)
|
|
394
|
+
},
|
|
387
395
|
stallThresholdMs: 60_000,
|
|
388
396
|
rescanMs: 500,
|
|
389
397
|
now: () => Date.now(),
|
|
@@ -994,7 +1002,8 @@ describe('startSubagentWatcher', () => {
|
|
|
994
1002
|
const watcher = startSubagentWatcher({
|
|
995
1003
|
agentDir: opts.agentDir,
|
|
996
1004
|
...(opts.agentCwd !== undefined ? { agentCwd: opts.agentCwd } : {}),
|
|
997
|
-
|
|
1005
|
+
// Card retired (#1122): completion surfaces via onFinish.
|
|
1006
|
+
onFinish: (info) => notifications.push(`✓ Worker done: ${info.description}`),
|
|
998
1007
|
stallThresholdMs: 60_000,
|
|
999
1008
|
rescanMs: 500,
|
|
1000
1009
|
now: () => Date.now(),
|
|
@@ -1133,7 +1142,8 @@ describe('startSubagentWatcher', () => {
|
|
|
1133
1142
|
let nextRef = 1
|
|
1134
1143
|
const watcher = startSubagentWatcher({
|
|
1135
1144
|
agentDir,
|
|
1136
|
-
|
|
1145
|
+
// Card retired (#1122): completion surfaces via onFinish.
|
|
1146
|
+
onFinish: (info) => notifications.push(`✓ Worker done: ${info.description}`),
|
|
1137
1147
|
stallThresholdMs: 60_000,
|
|
1138
1148
|
rescanMs: 500,
|
|
1139
1149
|
now: () => Date.now(),
|
|
@@ -138,112 +138,60 @@ describe('decideTurnFlush', () => {
|
|
|
138
138
|
).toEqual({ kind: 'skip', reason: 'reply-called' })
|
|
139
139
|
})
|
|
140
140
|
|
|
141
|
-
//
|
|
142
|
-
//
|
|
143
|
-
//
|
|
144
|
-
//
|
|
145
|
-
//
|
|
146
|
-
|
|
147
|
-
|
|
141
|
+
// The turn-flush safety net covers exactly one failure mode: a turn that
|
|
142
|
+
// ended with the model never having said anything. Once the model has
|
|
143
|
+
// called reply / stream_reply the turn is served — any assistant text it
|
|
144
|
+
// emits afterwards is its own end-of-turn wrap-up (a closing summary,
|
|
145
|
+
// narration to itself), NOT a message it chose to send. The framework
|
|
146
|
+
// must never promote that terminal text into a second Telegram bubble.
|
|
147
|
+
//
|
|
148
|
+
// Regression guard for the redundant-follow-up-message fix: this reverts
|
|
149
|
+
// the #1291 post-reply-tail flush, which posted a duplicate recap on
|
|
150
|
+
// essentially every turn because the model habitually writes a closing
|
|
151
|
+
// summary after its final reply. See reference/conversational-pacing.md
|
|
152
|
+
// — "the framework owns the beat; the model authors the words".
|
|
153
|
+
describe('reply-called turns never flush trailing terminal text', () => {
|
|
154
|
+
it('skips even when a long substantive tail follows the reply', () => {
|
|
148
155
|
const decision = decideTurnFlush({
|
|
149
156
|
chatId: '700',
|
|
150
157
|
replyCalled: true,
|
|
151
|
-
// Index 0 = the captured text BEFORE the reply tool was called
|
|
152
|
-
// (some thinking-as-text). Indices 1..2 are post-reply.
|
|
153
158
|
capturedText: [
|
|
154
159
|
'thinking out loud before the reply',
|
|
155
|
-
'
|
|
156
|
-
|
|
160
|
+
'Answered the Playwright question and acked the calendar ' +
|
|
161
|
+
'diagnosis is still in flight. Will surface the root cause ' +
|
|
162
|
+
'when the worker returns.',
|
|
157
163
|
],
|
|
158
|
-
capturedTextLenAtLastReply: 1,
|
|
159
|
-
})
|
|
160
|
-
expect(decision).toEqual({
|
|
161
|
-
kind: 'flush',
|
|
162
|
-
text:
|
|
163
|
-
'Now here is the actual substantive answer the model composed ' +
|
|
164
|
-
'\nin terminal text only after the interim reply call.',
|
|
165
|
-
})
|
|
166
|
-
})
|
|
167
|
-
|
|
168
|
-
it('skips with reply-called-no-new-text when post-reply tail is below threshold', () => {
|
|
169
|
-
const decision = decideTurnFlush({
|
|
170
|
-
chatId: '701',
|
|
171
|
-
replyCalled: true,
|
|
172
|
-
capturedText: ['the pre-reply scratch', 'ok.'], // tail = "ok." (3 chars)
|
|
173
|
-
capturedTextLenAtLastReply: 1,
|
|
174
|
-
})
|
|
175
|
-
expect(decision).toEqual({
|
|
176
|
-
kind: 'skip',
|
|
177
|
-
reason: 'reply-called-no-new-text',
|
|
178
|
-
})
|
|
179
|
-
})
|
|
180
|
-
|
|
181
|
-
it('skips with reply-called when there is no post-reply text at all', () => {
|
|
182
|
-
const decision = decideTurnFlush({
|
|
183
|
-
chatId: '702',
|
|
184
|
-
replyCalled: true,
|
|
185
|
-
capturedText: ['everything-was-before-the-reply'],
|
|
186
|
-
capturedTextLenAtLastReply: 1, // tail slice is empty
|
|
187
164
|
})
|
|
188
165
|
expect(decision).toEqual({ kind: 'skip', reason: 'reply-called' })
|
|
189
166
|
})
|
|
190
167
|
|
|
191
|
-
it('
|
|
168
|
+
it('skips regardless of how many text blocks trail the reply', () => {
|
|
192
169
|
const decision = decideTurnFlush({
|
|
193
|
-
chatId: '
|
|
194
|
-
replyCalled: true,
|
|
195
|
-
capturedText: ['real answer pre-reply', 'NO_REPLY'],
|
|
196
|
-
capturedTextLenAtLastReply: 1,
|
|
197
|
-
replyCalledTailMinChars: 1, // force the marker check
|
|
198
|
-
})
|
|
199
|
-
expect(decision).toEqual({ kind: 'skip', reason: 'silent-marker' })
|
|
200
|
-
})
|
|
201
|
-
|
|
202
|
-
it('post-reply tail with null chatId still skips (no-inbound-chat)', () => {
|
|
203
|
-
const decision = decideTurnFlush({
|
|
204
|
-
chatId: null,
|
|
170
|
+
chatId: '701',
|
|
205
171
|
replyCalled: true,
|
|
206
172
|
capturedText: [
|
|
207
|
-
'
|
|
208
|
-
'
|
|
173
|
+
'a substantive paragraph the model wrote as terminal text',
|
|
174
|
+
'and another one, each well over any old length threshold',
|
|
175
|
+
'and a third closing summary block for good measure',
|
|
209
176
|
],
|
|
210
|
-
capturedTextLenAtLastReply: 1,
|
|
211
|
-
})
|
|
212
|
-
expect(decision).toEqual({ kind: 'skip', reason: 'no-inbound-chat' })
|
|
213
|
-
})
|
|
214
|
-
|
|
215
|
-
it('preserves pre-#1291 behaviour when capturedTextLenAtLastReply is omitted', () => {
|
|
216
|
-
// Legacy caller doesn't track the marker — defaults to
|
|
217
|
-
// capturedText.length, so the tail slice is empty and we skip
|
|
218
|
-
// with reason 'reply-called' (the original behaviour).
|
|
219
|
-
const decision = decideTurnFlush({
|
|
220
|
-
chatId: '704',
|
|
221
|
-
replyCalled: true,
|
|
222
|
-
capturedText: ['some answer the model emitted'],
|
|
223
177
|
})
|
|
224
178
|
expect(decision).toEqual({ kind: 'skip', reason: 'reply-called' })
|
|
225
179
|
})
|
|
226
180
|
|
|
227
|
-
it('
|
|
181
|
+
it('skips with reply-called when capturedText is empty', () => {
|
|
228
182
|
const decision = decideTurnFlush({
|
|
229
|
-
chatId: '
|
|
183
|
+
chatId: '702',
|
|
230
184
|
replyCalled: true,
|
|
231
|
-
capturedText: [
|
|
232
|
-
capturedTextLenAtLastReply: 1,
|
|
233
|
-
replyCalledTailMinChars: 10,
|
|
185
|
+
capturedText: [],
|
|
234
186
|
})
|
|
235
|
-
expect(decision
|
|
187
|
+
expect(decision).toEqual({ kind: 'skip', reason: 'reply-called' })
|
|
236
188
|
})
|
|
237
189
|
|
|
238
|
-
it('feature flag off still wins over
|
|
190
|
+
it('feature flag off still wins over a reply-called turn', () => {
|
|
239
191
|
const decision = decideTurnFlush({
|
|
240
|
-
chatId: '
|
|
192
|
+
chatId: '703',
|
|
241
193
|
replyCalled: true,
|
|
242
|
-
capturedText: [
|
|
243
|
-
'pre',
|
|
244
|
-
'a long substantive post-reply tail that would otherwise flush',
|
|
245
|
-
],
|
|
246
|
-
capturedTextLenAtLastReply: 1,
|
|
194
|
+
capturedText: ['a long substantive tail that pre-fix would flush'],
|
|
247
195
|
flushEnabled: false,
|
|
248
196
|
})
|
|
249
197
|
expect(decision).toEqual({ kind: 'skip', reason: 'flag-disabled' })
|
|
@@ -57,7 +57,6 @@ export type FlushDecision =
|
|
|
57
57
|
export type FlushSkipReason =
|
|
58
58
|
| 'flag-disabled'
|
|
59
59
|
| 'reply-called'
|
|
60
|
-
| 'reply-called-no-new-text'
|
|
61
60
|
| 'no-inbound-chat'
|
|
62
61
|
| 'empty-text'
|
|
63
62
|
| 'silent-marker'
|
|
@@ -70,35 +69,14 @@ export interface FlushDecisionInput {
|
|
|
70
69
|
* this turn. */
|
|
71
70
|
replyCalled: boolean
|
|
72
71
|
/** Raw text content blocks accumulated from assistant events across the
|
|
73
|
-
* turn. Joined + trimmed internally.
|
|
72
|
+
* turn. Joined + trimmed internally. Only consulted when `replyCalled`
|
|
73
|
+
* is false — once the model has called reply / stream_reply the turn is
|
|
74
|
+
* served and trailing terminal text is dropped (see `decideTurnFlush`). */
|
|
74
75
|
capturedText: string[]
|
|
75
|
-
/** Snapshot of `capturedText.length` at the moment of the most recent
|
|
76
|
-
* reply / stream_reply tool call in this turn. Indices `[capturedText
|
|
77
|
-
* length-at-last-reply, capturedText.length)` are the post-reply tail
|
|
78
|
-
* — substantive content the model emitted AFTER the reply (e.g. soft
|
|
79
|
-
* commit "on it, back in a few" followed by the real answer in
|
|
80
|
-
* terminal text only, the #1291 repro). When the tail meets
|
|
81
|
-
* `replyCalledTailMinChars` we flush it; otherwise we skip.
|
|
82
|
-
*
|
|
83
|
-
* Defaults to `capturedText.length` (treat all captured text as
|
|
84
|
-
* pre-reply, preserve the pre-#1291 behaviour where any reply tool
|
|
85
|
-
* call suppressed flush entirely) so callers that don't track the
|
|
86
|
-
* marker keep the old contract. */
|
|
87
|
-
capturedTextLenAtLastReply?: number
|
|
88
|
-
/** Minimum trimmed-tail length to qualify a post-reply tail flush.
|
|
89
|
-
* Defaults to `REPLY_CALLED_TAIL_MIN_CHARS` (40). Below this we skip
|
|
90
|
-
* with `reply-called-no-new-text` — typical for trailing markdown
|
|
91
|
-
* artifacts or a one-word afterthought. */
|
|
92
|
-
replyCalledTailMinChars?: number
|
|
93
76
|
/** Feature flag — defaults to true. Pass `false` to force skip everywhere. */
|
|
94
77
|
flushEnabled?: boolean
|
|
95
78
|
}
|
|
96
79
|
|
|
97
|
-
/** Default minimum trimmed length for the post-reply tail to be flushed
|
|
98
|
-
* as a follow-up message. Below this we treat the tail as noise / artifact
|
|
99
|
-
* and skip silently. */
|
|
100
|
-
export const REPLY_CALLED_TAIL_MIN_CHARS = 40
|
|
101
|
-
|
|
102
80
|
/**
|
|
103
81
|
* Pure decision: should the gateway deterministically send the model's
|
|
104
82
|
* captured assistant text at turn_end? Returns `{kind: 'flush', text}` with
|
|
@@ -107,39 +85,31 @@ export const REPLY_CALLED_TAIL_MIN_CHARS = 40
|
|
|
107
85
|
* Ordering of checks is deliberate: cheapest/strongest first so logs
|
|
108
86
|
* attribute a skip to the most specific cause.
|
|
109
87
|
*
|
|
110
|
-
*
|
|
111
|
-
*
|
|
112
|
-
*
|
|
113
|
-
* `
|
|
114
|
-
*
|
|
115
|
-
*
|
|
88
|
+
* The safety net has exactly one job: a turn that ended with the model
|
|
89
|
+
* having said *nothing* to the user. Once `replyCalled` is true the model
|
|
90
|
+
* has communicated through the proper channel and the decision is always
|
|
91
|
+
* `skip` — assistant text emitted after a reply is the model's own
|
|
92
|
+
* end-of-turn wrap-up (a closing summary, narration to itself), not a
|
|
93
|
+
* message it chose to send. Promoting that terminal text into a Telegram
|
|
94
|
+
* message second-guesses an explicit reply and posts a redundant duplicate
|
|
95
|
+
* on essentially every turn, because the model habitually writes a closing
|
|
96
|
+
* summary. The framework owns the *beat*; the model authors the *words*
|
|
97
|
+
* and emits them via reply (`reference/conversational-pacing.md`).
|
|
98
|
+
*
|
|
99
|
+
* (This reverts the #1291 post-reply-tail flush. Its intent — catch a
|
|
100
|
+
* soft-commit reply followed by the real answer in terminal text only —
|
|
101
|
+
* could not be told apart from the habitual wrap-up by length, so it
|
|
102
|
+
* misfired constantly. A model that soft-commits and never delivers is a
|
|
103
|
+
* pacing failure caught by the silence-poke ladder, not papered over here.)
|
|
116
104
|
*/
|
|
117
105
|
export function decideTurnFlush(input: FlushDecisionInput): FlushDecision {
|
|
118
106
|
const flushEnabled = input.flushEnabled !== false
|
|
119
107
|
if (!flushEnabled) return { kind: 'skip', reason: 'flag-disabled' }
|
|
120
108
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
if (tail.length === 0) {
|
|
126
|
-
// The reply tool was called and nothing of substance came after —
|
|
127
|
-
// the turn is fully served by the reply. Skip silently (the gateway
|
|
128
|
-
// WARN gate excludes this reason from logs).
|
|
129
|
-
return { kind: 'skip', reason: 'reply-called' }
|
|
130
|
-
}
|
|
131
|
-
if (tail.length < minChars) {
|
|
132
|
-
// Post-reply tail exists but is below the substantive-content
|
|
133
|
-
// threshold — typically trailing markdown artifacts or a one-word
|
|
134
|
-
// afterthought. Skip but with a distinct reason so this case IS
|
|
135
|
-
// logged (auditable for #1291 regressions, vs the silent
|
|
136
|
-
// 'reply-called' which is the expected steady state).
|
|
137
|
-
return { kind: 'skip', reason: 'reply-called-no-new-text' }
|
|
138
|
-
}
|
|
139
|
-
if (input.chatId == null) return { kind: 'skip', reason: 'no-inbound-chat' }
|
|
140
|
-
if (isSilentFlushMarker(tail)) return { kind: 'skip', reason: 'silent-marker' }
|
|
141
|
-
return { kind: 'flush', text: tail }
|
|
142
|
-
}
|
|
109
|
+
// The model communicated through the proper channel — trust it. Any
|
|
110
|
+
// assistant text it emitted as terminal text afterwards is its own
|
|
111
|
+
// end-of-turn wrap-up, never a second Telegram message.
|
|
112
|
+
if (input.replyCalled) return { kind: 'skip', reason: 'reply-called' }
|
|
143
113
|
|
|
144
114
|
if (input.chatId == null) return { kind: 'skip', reason: 'no-inbound-chat' }
|
|
145
115
|
const joined = input.capturedText.join('\n').trim()
|