switchroom 0.13.13 → 0.13.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +293 -92
- package/telegram-plugin/gateway/gateway.ts +223 -17
- package/telegram-plugin/pending-work-progress.ts +377 -0
- package/telegram-plugin/runtime-metrics.ts +20 -0
- package/telegram-plugin/tests/pending-work-progress.test.ts +354 -0
- package/telegram-plugin/uat/scenarios/cross-turn-pending-progress-dm.test.ts +239 -0
- package/telegram-plugin/uat/scenarios/visible-answer-stream-dm.test.ts +219 -0
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-turn pending-async progress — issue #1445.
|
|
3
|
+
*
|
|
4
|
+
* When a turn ends with pending background async work (the model
|
|
5
|
+
* dispatched `Agent` / `Task` and ended its turn before the worker
|
|
6
|
+
* returned), keep editing the model's last reply *in place* at
|
|
7
|
+
* intervals so the user sees ambient liveness during the wait — without
|
|
8
|
+
* any new pinged messages and without re-introducing the retired
|
|
9
|
+
* progress card.
|
|
10
|
+
*
|
|
11
|
+
* Background data justifying this module (2026-05-23 forensic + UAT):
|
|
12
|
+
*
|
|
13
|
+
* - silence-poke success rate is 0–7% across hundreds of fires
|
|
14
|
+
* (finn: 0/78, clerk: 6/91, klanker: 5/158) — the polite levels
|
|
15
|
+
* reach the model as `<system-reminder>`s piggybacked on the next
|
|
16
|
+
* tool result, so they (a) only land if the model is actively
|
|
17
|
+
* cycling tools, (b) compete with hundreds of other tokens, and (c)
|
|
18
|
+
* only ever exist while the turn is open. The 300s framework
|
|
19
|
+
* fallback is the only user-visible silence-poke output, and its
|
|
20
|
+
* first job is to *kill the wedged turn*.
|
|
21
|
+
*
|
|
22
|
+
* - The dominant user-visible failure mode (issue #1445) is in fact
|
|
23
|
+
* cross-turn: the model calls `Agent` (or `Bash` with
|
|
24
|
+
* `run_in_background:true`), sends one ack reply that pings, then
|
|
25
|
+
* ends the turn. The silence-poke ladder is *gone* the moment
|
|
26
|
+
* endTurn() fires. The user then sees nothing for 10–30+ minutes
|
|
27
|
+
* until the worker returns. A live UAT confirmed: a deliberate
|
|
28
|
+
* `sleep 350` prompt produced one `[PING] Background sleep running;
|
|
29
|
+
* awaiting completion notification.` at +19s and the turn ended.
|
|
30
|
+
*
|
|
31
|
+
* Mechanism:
|
|
32
|
+
*
|
|
33
|
+
* tool_use(Agent|Task) → mark chat key `pending=true`
|
|
34
|
+
* outbound reply → capture anchor (messageId, text)
|
|
35
|
+
* turn_end with pending+anchor → activate the timer for the key
|
|
36
|
+
* tick (every 5s, edit every → editMessageText against the anchor
|
|
37
|
+
* EDIT_INTERVAL_MS) appending/refreshing the suffix
|
|
38
|
+
* " — still working (Nm)"
|
|
39
|
+
* inbound user message → clear (user re-engaged or moved on)
|
|
40
|
+
* subagent_handback inject → clear (model about to re-engage)
|
|
41
|
+
* MAX_LIFETIME_MS budget cap → clear (give up; 30 min default)
|
|
42
|
+
*
|
|
43
|
+
* Single shared timer for the whole gateway — like silence-poke's
|
|
44
|
+
* `tick()`, the per-key cost is O(map size) per poll. The poll
|
|
45
|
+
* interval is short (5s) but edits are spaced at EDIT_INTERVAL_MS so
|
|
46
|
+
* the Telegram bot.api editMessageText rate stays well under limits.
|
|
47
|
+
*
|
|
48
|
+
* Edits are plain text (no parseMode). The suffix is appended to the
|
|
49
|
+
* model's authored text; on subsequent edits the prior suffix is
|
|
50
|
+
* stripped before re-appending so the message never accumulates
|
|
51
|
+
* duplicate suffixes.
|
|
52
|
+
*
|
|
53
|
+
* Kill switch: `SWITCHROOM_DISABLE_PENDING_PROGRESS=1` disables the
|
|
54
|
+
* whole subsystem. The conversational-pacing prompt is unaffected.
|
|
55
|
+
*/
|
|
56
|
+
|
|
57
|
+
export const EDIT_INTERVAL_MS = 60_000
|
|
58
|
+
export const POLL_INTERVAL_MS = 5_000
|
|
59
|
+
export const MAX_LIFETIME_MS = 30 * 60_000
|
|
60
|
+
/** Telegram message length limit is 4096; budget headroom for the
|
|
61
|
+
* suffix and any escape expansion. If the anchor text plus suffix
|
|
62
|
+
* would exceed this, we skip the edit (the user still sees the
|
|
63
|
+
* original) rather than truncate the model's authored prose. */
|
|
64
|
+
export const TELEGRAM_MSG_CAP = 4000
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Regex matching the suffix we append. Used to strip a prior suffix
|
|
68
|
+
* before appending the next one. The (\d+) covers "1m" / "12m" / etc.
|
|
69
|
+
* Kept anchored to end-of-string so it only matches OUR suffix, not
|
|
70
|
+
* something the model happened to write.
|
|
71
|
+
*/
|
|
72
|
+
const SUFFIX_RE = /\n\n— still working \(\d+m\)$/
|
|
73
|
+
|
|
74
|
+
export interface PendingProgressEditCtx {
|
|
75
|
+
chatId: string
|
|
76
|
+
threadId: number | null
|
|
77
|
+
messageId: number
|
|
78
|
+
newText: string
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Discriminated union — kept structurally identical to the
|
|
83
|
+
* `pending_progress_*` variants in `runtime-metrics.ts:RuntimeMetricEvent`
|
|
84
|
+
* so the gateway's `emitMetric: emitRuntimeMetric` wire-up typechecks
|
|
85
|
+
* cleanly with no cast. `started` carries only the chat key; `edited`
|
|
86
|
+
* always carries the cumulative elapsed time; `cleared` carries an
|
|
87
|
+
* optional elapsed + the reason (`inbound` | `handback` | `timeout` |
|
|
88
|
+
* `manual`).
|
|
89
|
+
*/
|
|
90
|
+
export type PendingProgressMetric =
|
|
91
|
+
| { kind: 'pending_progress_started'; chatKey: string }
|
|
92
|
+
| { kind: 'pending_progress_edited'; chatKey: string; elapsedMs: number }
|
|
93
|
+
| {
|
|
94
|
+
kind: 'pending_progress_cleared'
|
|
95
|
+
chatKey: string
|
|
96
|
+
elapsedMs?: number
|
|
97
|
+
reason?: string
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export interface PendingProgressDeps {
|
|
101
|
+
editMessage: (ctx: PendingProgressEditCtx) => Promise<void>
|
|
102
|
+
emitMetric?: (event: PendingProgressMetric) => void
|
|
103
|
+
/** Optional clock override for tests. */
|
|
104
|
+
nowMs?: () => number
|
|
105
|
+
/** Optional poll interval override for tests. */
|
|
106
|
+
pollIntervalMs?: number
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
interface State {
|
|
110
|
+
/** True after a `tool_use(Agent|Task)` was observed for this key in
|
|
111
|
+
* the current turn. Cleared on next turn start. */
|
|
112
|
+
pending: boolean
|
|
113
|
+
/** The captured anchor — last outbound reply message_id for this
|
|
114
|
+
* key. */
|
|
115
|
+
anchorMessageId: number | null
|
|
116
|
+
/** The captured anchor text — what the model wrote, *minus* any
|
|
117
|
+
* prior pending-progress suffix. Used as the base for every edit. */
|
|
118
|
+
anchorOriginalText: string
|
|
119
|
+
/** Wall-clock ms when the cross-turn ambient state was *activated*
|
|
120
|
+
* (at turn_end with pending+anchor). null before activation. */
|
|
121
|
+
activatedAt: number | null
|
|
122
|
+
/** Wall-clock ms of last edit fire — gates the EDIT_INTERVAL_MS
|
|
123
|
+
* cadence. null until first edit fires. */
|
|
124
|
+
lastEditAt: number | null
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const stateByKey = new Map<string, State>()
|
|
128
|
+
let timer: ReturnType<typeof setInterval> | null = null
|
|
129
|
+
let activeDeps: PendingProgressDeps | null = null
|
|
130
|
+
|
|
131
|
+
function enabled(): boolean {
|
|
132
|
+
const v = process.env.SWITCHROOM_DISABLE_PENDING_PROGRESS
|
|
133
|
+
return !(v === '1' || v === 'true')
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function nowMs(): number {
|
|
137
|
+
return activeDeps?.nowMs ? activeDeps.nowMs() : Date.now()
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function ensure(key: string): State {
|
|
141
|
+
let s = stateByKey.get(key)
|
|
142
|
+
if (!s) {
|
|
143
|
+
s = {
|
|
144
|
+
pending: false,
|
|
145
|
+
anchorMessageId: null,
|
|
146
|
+
anchorOriginalText: '',
|
|
147
|
+
activatedAt: null,
|
|
148
|
+
lastEditAt: null,
|
|
149
|
+
}
|
|
150
|
+
stateByKey.set(key, s)
|
|
151
|
+
}
|
|
152
|
+
return s
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Fresh turn — reset the per-turn `pending` flag and the per-turn
|
|
157
|
+
* anchor. The cross-turn `activated` state is per-PRIOR-turn and is
|
|
158
|
+
* cleared by the explicit clear paths (`clearPending` with reason
|
|
159
|
+
* `inbound` / `handback` / `timeout`), not by a new turn. The gateway
|
|
160
|
+
* wires those clears at TWO sites for full coverage:
|
|
161
|
+
*
|
|
162
|
+
* 1. `handleInbound` (real user message) → `clearPending('inbound')`
|
|
163
|
+
* — the fast path; fires the moment the gateway sees an inbound,
|
|
164
|
+
* before the new turn atom is even built.
|
|
165
|
+
* 2. `handleSessionEvent` `enqueue` case (every fresh turn atom)
|
|
166
|
+
* → `clearPending('handback')` — the backstop covering
|
|
167
|
+
* synthesised wakes (subagent-handback, cron, vault grant,
|
|
168
|
+
* restart marker) that push directly to `pendingInboundBuffer`
|
|
169
|
+
* and bypass `handleInbound`. Idempotent w/r/t the first clear.
|
|
170
|
+
*
|
|
171
|
+
* `startTurn` itself only matters if the state map already has an
|
|
172
|
+
* entry for `key` — which post-fix is impossible (the clears
|
|
173
|
+
* delete it). Kept for test ergonomics and as defence-in-depth.
|
|
174
|
+
*/
|
|
175
|
+
export function startTurn(key: string): void {
|
|
176
|
+
if (!enabled()) return
|
|
177
|
+
const s = stateByKey.get(key)
|
|
178
|
+
if (s == null) return
|
|
179
|
+
// Only the per-turn fields reset. activatedAt/lastEditAt belong to
|
|
180
|
+
// the prior turn's pending-progress and are cleared separately.
|
|
181
|
+
s.pending = false
|
|
182
|
+
s.anchorMessageId = null
|
|
183
|
+
s.anchorOriginalText = ''
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Mark this chat as having dispatched async background work in the
|
|
188
|
+
* current turn. Idempotent. Called when the gateway sees a `tool_use`
|
|
189
|
+
* for `Agent` or `Task`.
|
|
190
|
+
*/
|
|
191
|
+
export function noteAsyncDispatch(key: string): void {
|
|
192
|
+
if (!enabled()) return
|
|
193
|
+
ensure(key).pending = true
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Capture an outbound reply as a candidate anchor for cross-turn
|
|
198
|
+
* editing. Called on every successful bot reply send. If a prior
|
|
199
|
+
* pending-progress suffix is present in the text (rare — should only
|
|
200
|
+
* happen if we sent something to ourselves), strip it before storing
|
|
201
|
+
* so subsequent edits don't double-suffix.
|
|
202
|
+
*/
|
|
203
|
+
export function noteOutbound(
|
|
204
|
+
key: string,
|
|
205
|
+
opts: { messageId: number; text: string },
|
|
206
|
+
): void {
|
|
207
|
+
if (!enabled()) return
|
|
208
|
+
const s = ensure(key)
|
|
209
|
+
s.anchorMessageId = opts.messageId
|
|
210
|
+
s.anchorOriginalText = opts.text.replace(SUFFIX_RE, '')
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Called at turn_end. If the turn had a pending async dispatch AND
|
|
215
|
+
* captured an anchor, activate the cross-turn ambient state — the
|
|
216
|
+
* timer will start editing.
|
|
217
|
+
*
|
|
218
|
+
* If pending=false OR no anchor was captured, drop the state entry
|
|
219
|
+
* entirely (nothing for us to do).
|
|
220
|
+
*/
|
|
221
|
+
export function noteTurnEnd(key: string): void {
|
|
222
|
+
if (!enabled()) return
|
|
223
|
+
const s = stateByKey.get(key)
|
|
224
|
+
if (s == null) return
|
|
225
|
+
if (s.pending && s.anchorMessageId != null) {
|
|
226
|
+
s.activatedAt = nowMs()
|
|
227
|
+
// lastEditAt is null so the first edit fires after one full
|
|
228
|
+
// EDIT_INTERVAL_MS from activation — not immediately.
|
|
229
|
+
s.lastEditAt = s.activatedAt
|
|
230
|
+
activeDeps?.emitMetric?.({
|
|
231
|
+
kind: 'pending_progress_started',
|
|
232
|
+
chatKey: key,
|
|
233
|
+
})
|
|
234
|
+
} else {
|
|
235
|
+
stateByKey.delete(key)
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Clear pending-progress for a chat — reasons:
|
|
241
|
+
* 'inbound' — user sent a new message, they're re-engaged
|
|
242
|
+
* 'handback' — switchroom injected a subagent_handback channel turn
|
|
243
|
+
* 'timeout' — exceeded MAX_LIFETIME_MS
|
|
244
|
+
* 'manual' — test / debug
|
|
245
|
+
*/
|
|
246
|
+
export function clearPending(
|
|
247
|
+
key: string,
|
|
248
|
+
reason: 'inbound' | 'handback' | 'timeout' | 'manual',
|
|
249
|
+
): void {
|
|
250
|
+
if (!stateByKey.has(key)) return
|
|
251
|
+
const s = stateByKey.get(key)!
|
|
252
|
+
const elapsed = s.activatedAt != null ? nowMs() - s.activatedAt : 0
|
|
253
|
+
stateByKey.delete(key)
|
|
254
|
+
activeDeps?.emitMetric?.({
|
|
255
|
+
kind: 'pending_progress_cleared',
|
|
256
|
+
chatKey: key,
|
|
257
|
+
elapsedMs: elapsed,
|
|
258
|
+
reason,
|
|
259
|
+
})
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Start the shared interval timer. Idempotent. Honours the kill
|
|
264
|
+
* switch — no-op when disabled.
|
|
265
|
+
*/
|
|
266
|
+
export function startTimer(deps: PendingProgressDeps): void {
|
|
267
|
+
if (!enabled()) return
|
|
268
|
+
if (timer != null) return
|
|
269
|
+
activeDeps = deps
|
|
270
|
+
const interval = deps.pollIntervalMs ?? POLL_INTERVAL_MS
|
|
271
|
+
timer = setInterval(() => tick(nowMs()), interval)
|
|
272
|
+
if (typeof timer.unref === 'function') timer.unref()
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/** Stop the timer. Idempotent. */
|
|
276
|
+
export function stopTimer(): void {
|
|
277
|
+
if (timer != null) {
|
|
278
|
+
clearInterval(timer)
|
|
279
|
+
timer = null
|
|
280
|
+
}
|
|
281
|
+
activeDeps = null
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Parse `<chatId>:<threadIdOrEmpty>` back into structured fields,
|
|
286
|
+
* matching the `statusKey` shape used throughout the gateway.
|
|
287
|
+
*/
|
|
288
|
+
function parseKey(key: string): { chatId: string; threadId: number | null } {
|
|
289
|
+
const idx = key.indexOf(':')
|
|
290
|
+
if (idx < 0) return { chatId: key, threadId: null }
|
|
291
|
+
const chatId = key.slice(0, idx)
|
|
292
|
+
const tail = key.slice(idx + 1)
|
|
293
|
+
if (tail === '' || tail === 'undefined') return { chatId, threadId: null }
|
|
294
|
+
const n = Number(tail)
|
|
295
|
+
return { chatId, threadId: Number.isFinite(n) ? n : null }
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function tick(now: number): void {
|
|
299
|
+
if (activeDeps == null) return
|
|
300
|
+
for (const [key, s] of stateByKey.entries()) {
|
|
301
|
+
if (s.activatedAt == null || s.anchorMessageId == null) continue
|
|
302
|
+
|
|
303
|
+
const elapsed = now - s.activatedAt
|
|
304
|
+
if (elapsed >= MAX_LIFETIME_MS) {
|
|
305
|
+
clearPending(key, 'timeout')
|
|
306
|
+
continue
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
const sinceEdit = s.lastEditAt == null ? 0 : now - s.lastEditAt
|
|
310
|
+
if (sinceEdit < EDIT_INTERVAL_MS) continue
|
|
311
|
+
|
|
312
|
+
// Build suffix from elapsed wall-clock. Always at least 1m so the
|
|
313
|
+
// user-visible counter reads honestly (we only edit at intervals
|
|
314
|
+
// ≥ EDIT_INTERVAL_MS = 60s).
|
|
315
|
+
const minutes = Math.max(1, Math.round(elapsed / 60_000))
|
|
316
|
+
const suffix = `\n\n— still working (${minutes}m)`
|
|
317
|
+
const newText = s.anchorOriginalText + suffix
|
|
318
|
+
|
|
319
|
+
if (newText.length > TELEGRAM_MSG_CAP) {
|
|
320
|
+
// Don't truncate the model's prose — just skip this edit.
|
|
321
|
+
// The previous edit (or the original) is still visible.
|
|
322
|
+
s.lastEditAt = now
|
|
323
|
+
continue
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
const { chatId, threadId } = parseKey(key)
|
|
327
|
+
s.lastEditAt = now
|
|
328
|
+
|
|
329
|
+
const editCtx: PendingProgressEditCtx = {
|
|
330
|
+
chatId,
|
|
331
|
+
threadId,
|
|
332
|
+
messageId: s.anchorMessageId,
|
|
333
|
+
newText,
|
|
334
|
+
}
|
|
335
|
+
// Fire-and-forget so a slow edit doesn't block the tick loop.
|
|
336
|
+
// Errors are logged but never bubble (a 429 / "message not modified"
|
|
337
|
+
// / chat-deleted is a soft failure).
|
|
338
|
+
void Promise.resolve()
|
|
339
|
+
.then(() => activeDeps!.editMessage(editCtx))
|
|
340
|
+
.then(() => {
|
|
341
|
+
activeDeps!.emitMetric?.({
|
|
342
|
+
kind: 'pending_progress_edited',
|
|
343
|
+
chatKey: key,
|
|
344
|
+
elapsedMs: elapsed,
|
|
345
|
+
})
|
|
346
|
+
})
|
|
347
|
+
.catch((err) => {
|
|
348
|
+
process.stderr.write(
|
|
349
|
+
`pending-work-progress: edit failed key=${key} ` +
|
|
350
|
+
`msg=${editCtx.messageId}: ${(err as Error).message}\n`,
|
|
351
|
+
)
|
|
352
|
+
})
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// ─── Test helpers ─────────────────────────────────────────────────────────
|
|
357
|
+
|
|
358
|
+
/** Test-only: drive one tick deterministically. */
|
|
359
|
+
export function __tickForTests(now: number): void {
|
|
360
|
+
tick(now)
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/** Test-only: install deps without starting the real timer. */
|
|
364
|
+
export function __setDepsForTests(deps: PendingProgressDeps | null): void {
|
|
365
|
+
activeDeps = deps
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/** Test-only: peek at per-key state. */
|
|
369
|
+
export function __getStateForTests(key: string): State | undefined {
|
|
370
|
+
return stateByKey.get(key)
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/** Test-only: full reset. */
|
|
374
|
+
export function __resetAllForTests(): void {
|
|
375
|
+
stateByKey.clear()
|
|
376
|
+
stopTimer()
|
|
377
|
+
}
|
|
@@ -104,6 +104,26 @@ export type RuntimeMetricEvent =
|
|
|
104
104
|
fallback_kind: 'working' | 'thinking'
|
|
105
105
|
silence_ms: number
|
|
106
106
|
}
|
|
107
|
+
/**
|
|
108
|
+
* #1445 cross-turn pending-async ambient lifecycle. `started` fires
|
|
109
|
+
* when a turn ends with a captured anchor AND a pending Agent/Task/
|
|
110
|
+
* Bash-background dispatch — i.e. the framework will now edit the
|
|
111
|
+
* model's last reply in place every ~60s until cleared. `edited`
|
|
112
|
+
* fires on each successful in-place edit; `elapsed_ms` is how long
|
|
113
|
+
* ambient has been running for this chat. `cleared` fires when
|
|
114
|
+
* ambient stops — `reason` says why (inbound / handback / timeout).
|
|
115
|
+
* Targets: edited/started ratio is the "still alive minutes per
|
|
116
|
+
* activation" health proxy; cleared.reason='inbound' should
|
|
117
|
+
* dominate (model + user resolving naturally).
|
|
118
|
+
*/
|
|
119
|
+
| { kind: 'pending_progress_started'; chatKey: string }
|
|
120
|
+
| { kind: 'pending_progress_edited'; chatKey: string; elapsedMs: number }
|
|
121
|
+
| {
|
|
122
|
+
kind: 'pending_progress_cleared'
|
|
123
|
+
chatKey: string
|
|
124
|
+
elapsedMs?: number
|
|
125
|
+
reason?: string
|
|
126
|
+
}
|
|
107
127
|
|
|
108
128
|
/**
|
|
109
129
|
* The JSONL sink lives under the runtime state dir so it's per-agent
|