switchroom 0.14.62 → 0.14.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +148 -35
- package/telegram-plugin/gateway/auto-classify-mid-turn.ts +119 -0
- package/telegram-plugin/gateway/escalation-drive.ts +79 -0
- package/telegram-plugin/gateway/gateway.ts +146 -52
- package/telegram-plugin/gateway/obligation-ledger.ts +45 -3
- package/telegram-plugin/hooks/tool-label-pretool.mjs +32 -12
- package/telegram-plugin/tests/auto-classify-mid-turn.test.ts +87 -0
- package/telegram-plugin/tests/escalation-drive.test.ts +123 -0
- package/telegram-plugin/tests/obligation-determinism.test.ts +63 -3
- package/telegram-plugin/tests/obligation-ledger.test.ts +92 -0
|
@@ -142,6 +142,7 @@ import {
|
|
|
142
142
|
resolveRetentionDays as resolveRegistryRetentionDays,
|
|
143
143
|
} from '../registry/reaper.js'
|
|
144
144
|
import { parseQueuePrefix, parseSteerPrefix, formatPriorAssistantPreview, formatReplyToText } from '../steering.js'
|
|
145
|
+
import { autoClassifyMidTurnInbound } from './auto-classify-mid-turn.js'
|
|
145
146
|
import {
|
|
146
147
|
renderOperatorEvent,
|
|
147
148
|
shouldEmitOperatorEvent,
|
|
@@ -289,7 +290,7 @@ import {
|
|
|
289
290
|
obligationEscalationText,
|
|
290
291
|
} from './obligation-ledger.js'
|
|
291
292
|
import { loadObligations, persistObligations } from './obligation-store.js'
|
|
292
|
-
import {
|
|
293
|
+
import { driveEscalation } from './escalation-drive.js'
|
|
293
294
|
import { createInboundSpool } from './inbound-spool.js'
|
|
294
295
|
import { purgeStaleTurnsForChat } from './turn-state-purge.js'
|
|
295
296
|
import { decideInboundDelivery } from './inbound-delivery-gate.js'
|
|
@@ -1423,6 +1424,47 @@ const OBLIGATION_ESCALATE_MAX = 3
|
|
|
1423
1424
|
// bounded escalate ladder to a terminal. 45s comfortably exceeds robustApiCall's
|
|
1424
1425
|
// 3-attempt network backoff so a legitimate slow send isn't cut short.
|
|
1425
1426
|
const OBLIGATION_ESCALATE_SEND_DEADLINE_MS = 45_000
|
|
1427
|
+
// Escalate-grace window. A slow / background-worker / multi-segment turn ends
|
|
1428
|
+
// (the in-flight gate clears) BEFORE its trailing answer's reply lands, and the
|
|
1429
|
+
// 5s sweep would re-present/escalate in that gap — a false "⚠️ I may have missed
|
|
1430
|
+
// this" on a message that's actively being answered (fuzz-confirmed on v0.14.62:
|
|
1431
|
+
// ~14% of marko's no-reply turn-ends had the answer in flight). An obligation
|
|
1432
|
+
// whose handling turn ended < this ago is skipped by decideAtIdle, giving the
|
|
1433
|
+
// trailing answer's close a beat to fire. Bounded: each re-present is itself a
|
|
1434
|
+
// turn that re-stamps once, representCount is capped → the ladder still
|
|
1435
|
+
// terminates. 45s > the observed "answer lands within ~60s, usually <40s" gap.
|
|
1436
|
+
// Kill switch: SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS=0 → pre-grace behaviour.
|
|
1437
|
+
const OBLIGATION_ESCALATE_GRACE_MS = (() => {
|
|
1438
|
+
const raw = process.env.SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS
|
|
1439
|
+
if (raw == null || raw === '') return 45_000
|
|
1440
|
+
const n = Number(raw)
|
|
1441
|
+
return Number.isFinite(n) && n >= 0 ? n : 45_000
|
|
1442
|
+
})()
|
|
1443
|
+
|
|
1444
|
+
// ─── Mid-turn auto-classify (steer-vs-queue), SHADOW mode ─────────────────────
|
|
1445
|
+
// Today a no-prefix mid-turn message always QUEUES. autoClassifyMidTurnInbound
|
|
1446
|
+
// (auto-classify-mid-turn.ts) is the basis for a smarter default using
|
|
1447
|
+
// topic-vs-active-turn + reply-recency. Phase 1 ships SHADOW-ONLY: when this
|
|
1448
|
+
// flag is on we COMPUTE + LOG what we'd decide (decision/reason/same_topic/
|
|
1449
|
+
// ms_since_out) but the behaviour is UNCHANGED (still queue) — to gather the
|
|
1450
|
+
// real-world distribution (how often mid-turn messages are same-topic
|
|
1451
|
+
// continuations vs cross-topic, and the recency spread) before any action flips
|
|
1452
|
+
// on. Default OFF → zero overhead. The action windows below stay 0 in shadow.
|
|
1453
|
+
const AUTOCLASSIFY_MIDTURN_SHADOW = process.env.SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW === '1'
|
|
1454
|
+
// Per-(chat,thread) wall-clock ms of the agent's LAST visible output — the
|
|
1455
|
+
// recency clock the classifier uses (NOT turn age: a long actively-narrating
|
|
1456
|
+
// worker turn must not read "stale"). Stamped beside signalTracker.noteOutbound.
|
|
1457
|
+
// LRU-bounded so a long-lived gateway with many topics can't grow unboundedly.
|
|
1458
|
+
const lastAgentOutputAt = new Map<string, number>()
|
|
1459
|
+
const LAST_OUTPUT_MAX_KEYS = 512
|
|
1460
|
+
function noteAgentOutputAt(key: string, ts: number): void {
|
|
1461
|
+
lastAgentOutputAt.delete(key) // re-insert → most-recently-used at the tail
|
|
1462
|
+
lastAgentOutputAt.set(key, ts)
|
|
1463
|
+
if (lastAgentOutputAt.size > LAST_OUTPUT_MAX_KEYS) {
|
|
1464
|
+
const oldest = lastAgentOutputAt.keys().next().value
|
|
1465
|
+
if (oldest !== undefined) lastAgentOutputAt.delete(oldest)
|
|
1466
|
+
}
|
|
1467
|
+
}
|
|
1426
1468
|
// Durable snapshot of the open obligation set on the persistent per-agent
|
|
1427
1469
|
// volume (STATE_DIR = /state/agent/telegram in prod). Closes the restart hole:
|
|
1428
1470
|
// the in-memory ledger alone empties on restart and the spool's boot-replay
|
|
@@ -1999,6 +2041,29 @@ let pendingDeferredInterrupt: PendingDeferredInterrupt | null = null
|
|
|
1999
2041
|
* Idempotent: nulls the slot and clears the timer before doing any work so a
|
|
2000
2042
|
* boundary event and the timeout can't double-fire.
|
|
2001
2043
|
*/
|
|
2044
|
+
/**
|
|
2045
|
+
* An `!` interrupt SIGINT-kills the in-flight turn. That turn was handling a
|
|
2046
|
+
* user message with an open obligation, and the killed turn does NOT reliably
|
|
2047
|
+
* emit turn_end (so endCurrentTurnAtomic never closes it) — so without this the
|
|
2048
|
+
* obligation survives and the idle sweep later re-presents/escalates "you have
|
|
2049
|
+
* an earlier message you never answered" for a question the user EXPLICITLY
|
|
2050
|
+
* cancelled. An interrupt is a deliberate redirect, so closing that obligation
|
|
2051
|
+
* is the correct terminal (the user chose to interrupt; they can re-ask). Only
|
|
2052
|
+
* the interrupted turn's OWN obligation is closed — queued siblings (other open
|
|
2053
|
+
* obligations) are untouched. No-op when the flag is off, no turn is in flight,
|
|
2054
|
+
* or the turn isn't a tracked obligation (synthetic / already closed).
|
|
2055
|
+
*/
|
|
2056
|
+
function cancelInterruptedObligation(): void {
|
|
2057
|
+
if (!OBLIGATION_LEDGER_ENABLED) return
|
|
2058
|
+
const turn = currentTurn
|
|
2059
|
+
if (turn == null) return
|
|
2060
|
+
if (obligationLedger.close(turn.turnId)) {
|
|
2061
|
+
process.stderr.write(
|
|
2062
|
+
`telegram gateway: obligation cancelled by interrupt origin=${turn.turnId}\n`,
|
|
2063
|
+
)
|
|
2064
|
+
}
|
|
2065
|
+
}
|
|
2066
|
+
|
|
2002
2067
|
async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<void> {
|
|
2003
2068
|
const pending = pendingDeferredInterrupt
|
|
2004
2069
|
if (pending == null) return
|
|
@@ -2027,6 +2092,10 @@ async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<vo
|
|
|
2027
2092
|
process.stderr.write(`telegram gateway: deferred-interrupt SIGINT failed: ${(err as Error).message}\n`)
|
|
2028
2093
|
}
|
|
2029
2094
|
|
|
2095
|
+
// The SIGINT just killed the in-flight turn — cancel its obligation so the
|
|
2096
|
+
// interrupted (user-redirected) question isn't re-presented/escalated later.
|
|
2097
|
+
cancelInterruptedObligation()
|
|
2098
|
+
|
|
2030
2099
|
// Deliver the replacement body as a fresh turn to the freshly-killed
|
|
2031
2100
|
// bridge — same sendToAgent + buffer-on-miss primitive the synchronous
|
|
2032
2101
|
// interrupt carve-out uses at the handleInbound delivery site.
|
|
@@ -2426,8 +2495,17 @@ function endCurrentTurnAtomic(turn: CurrentTurn): void {
|
|
|
2426
2495
|
// finalAnswerDelivered===false → stays open → re-presented (the intended
|
|
2427
2496
|
// catch). close() is a no-op for synthetic turns (turnId not in the ledger).
|
|
2428
2497
|
// No-op when the flag is off.
|
|
2429
|
-
if (OBLIGATION_LEDGER_ENABLED
|
|
2430
|
-
|
|
2498
|
+
if (OBLIGATION_LEDGER_ENABLED) {
|
|
2499
|
+
if (turn.finalAnswerDelivered) {
|
|
2500
|
+
obligationLedger.close(turn.turnId)
|
|
2501
|
+
} else {
|
|
2502
|
+
// Turn ended WITHOUT a final answer. If this turn was handling an open
|
|
2503
|
+
// obligation, stamp its grace clock so the idle sweep waits before
|
|
2504
|
+
// re-presenting/escalating — a slow/worker answer may still be in flight
|
|
2505
|
+
// (the over-escalation fix). No-op when turn.turnId isn't an open
|
|
2506
|
+
// obligation (synthetic / already-closed turn).
|
|
2507
|
+
obligationLedger.noteTurnEnded(turn.turnId, Date.now())
|
|
2508
|
+
}
|
|
2431
2509
|
}
|
|
2432
2510
|
// Component 2 — clear any prior no-reply drain timer for this turn; a
|
|
2433
2511
|
// fresh end re-evaluates below. (Idempotent — null when never armed.)
|
|
@@ -4925,7 +5003,13 @@ function obligationSweep(): void {
|
|
|
4925
5003
|
if (!obligationLedger.hasOpen()) return
|
|
4926
5004
|
if (turnInFlightForGate()) return // a turn is running — let it finish/answer
|
|
4927
5005
|
const agent = process.env.SWITCHROOM_AGENT_NAME ?? ''
|
|
4928
|
-
|
|
5006
|
+
// Grace window: skip an obligation whose handling turn ended < grace ago — its
|
|
5007
|
+
// trailing slow/worker answer may still be landing (over-escalation fix).
|
|
5008
|
+
const decision = obligationLedger.decideAtIdle(
|
|
5009
|
+
OBLIGATION_ESCALATE_GRACE_MS > 0
|
|
5010
|
+
? { now: Date.now(), graceMs: OBLIGATION_ESCALATE_GRACE_MS }
|
|
5011
|
+
: undefined,
|
|
5012
|
+
)
|
|
4929
5013
|
const o = decision.obligation
|
|
4930
5014
|
if (decision.action === 'none' || o == null) return
|
|
4931
5015
|
if (decision.action === 'represent') {
|
|
@@ -4950,54 +5034,30 @@ function obligationSweep(): void {
|
|
|
4950
5034
|
// (dead topic even after thread-fallback, blocked bot) is bounded by
|
|
4951
5035
|
// OBLIGATION_ESCALATE_MAX → close best-effort (the user is unreachable, so a
|
|
4952
5036
|
// bounded give-up beats an infinite loop / a boot-surviving poison record).
|
|
4953
|
-
|
|
4954
|
-
|
|
4955
|
-
|
|
4956
|
-
|
|
4957
|
-
|
|
4958
|
-
|
|
4959
|
-
|
|
4960
|
-
//
|
|
4961
|
-
|
|
4962
|
-
|
|
4963
|
-
|
|
4964
|
-
|
|
4965
|
-
|
|
4966
|
-
|
|
4967
|
-
|
|
4968
|
-
|
|
4969
|
-
|
|
4970
|
-
|
|
4971
|
-
|
|
4972
|
-
|
|
4973
|
-
|
|
4974
|
-
|
|
4975
|
-
|
|
4976
|
-
|
|
4977
|
-
OBLIGATION_ESCALATE_SEND_DEADLINE_MS,
|
|
4978
|
-
'obligation escalation send timed out',
|
|
4979
|
-
)
|
|
4980
|
-
.then(() => {
|
|
4981
|
-
obligationLedger.close(escId)
|
|
4982
|
-
process.stderr.write(
|
|
4983
|
-
`telegram gateway: obligation escalation delivered + closed origin=${escId}\n`,
|
|
4984
|
-
)
|
|
4985
|
-
})
|
|
4986
|
-
.catch((err) => {
|
|
4987
|
-
if (attempt >= OBLIGATION_ESCALATE_MAX) {
|
|
4988
|
-
obligationLedger.close(escId)
|
|
4989
|
-
process.stderr.write(
|
|
4990
|
-
`telegram gateway: obligation escalation PERMANENTLY undeliverable after ${attempt} attempts — closing best-effort origin=${escId}: ${err}\n`,
|
|
4991
|
-
)
|
|
4992
|
-
} else {
|
|
4993
|
-
process.stderr.write(
|
|
4994
|
-
`telegram gateway: obligation escalation send failed (attempt ${attempt}/${OBLIGATION_ESCALATE_MAX}), retrying next sweep origin=${escId}: ${err}\n`,
|
|
4995
|
-
)
|
|
4996
|
-
}
|
|
4997
|
-
})
|
|
4998
|
-
.finally(() => {
|
|
4999
|
-
obligationEscalateInFlight.delete(escId)
|
|
5000
|
-
})
|
|
5037
|
+
// Drive one escalation attempt. The send is a direct Telegram nudge
|
|
5038
|
+
// (retryWithThreadFallback: a stale/renumbered topic → THREAD_NOT_FOUND retries
|
|
5039
|
+
// thread-less, the #2096 pattern). driveEscalation guards against concurrent
|
|
5040
|
+
// sends, bounds the send with withDeadline (so a hung send can't leak the
|
|
5041
|
+
// in-flight flag and wedge the obligation OPEN), closes only after a successful
|
|
5042
|
+
// send, and bounds permanent failures to a best-effort close. Extracted so the
|
|
5043
|
+
// hang → bounded → terminal path is executable in escalation-drive.test.ts —
|
|
5044
|
+
// the path neither mtcute (can't hang Telegram) nor a synchronous test reaches.
|
|
5045
|
+
void driveEscalation({
|
|
5046
|
+
escId: o.originTurnId,
|
|
5047
|
+
inFlight: obligationEscalateInFlight,
|
|
5048
|
+
ledger: obligationLedger,
|
|
5049
|
+
send: () =>
|
|
5050
|
+
retryWithThreadFallback(
|
|
5051
|
+
robustApiCall,
|
|
5052
|
+
(tid) =>
|
|
5053
|
+
bot.api.sendMessage(o.chatId, obligationEscalationText(o), {
|
|
5054
|
+
...(tid != null ? { message_thread_id: tid } : {}),
|
|
5055
|
+
}),
|
|
5056
|
+
{ threadId: o.threadId, chat_id: o.chatId, verb: 'obligation.escalate' },
|
|
5057
|
+
),
|
|
5058
|
+
maxAttempts: OBLIGATION_ESCALATE_MAX,
|
|
5059
|
+
deadlineMs: OBLIGATION_ESCALATE_SEND_DEADLINE_MS,
|
|
5060
|
+
})
|
|
5001
5061
|
}
|
|
5002
5062
|
if (!STATIC && OBLIGATION_LEDGER_ENABLED) {
|
|
5003
5063
|
setInterval(obligationSweep, OBLIGATION_SWEEP_MS).unref()
|
|
@@ -6503,6 +6563,10 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
6503
6563
|
// silence-poke clock so the next poke is measured from this send.
|
|
6504
6564
|
signalTracker.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
6505
6565
|
silencePoke.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
6566
|
+
// Mid-turn auto-classify recency clock: the agent just produced visible output
|
|
6567
|
+
// in this chat/thread (cross-turn, unlike silencePoke's per-turn lastOutboundAt).
|
|
6568
|
+
// Only maintained when the shadow flag is on → truly zero overhead by default.
|
|
6569
|
+
if (AUTOCLASSIFY_MIDTURN_SHADOW) noteAgentOutputAt(statusKey(chat_id, threadId), Date.now())
|
|
6506
6570
|
// PR3b-cutover: feed lastOutboundAt to the delivery machine so its
|
|
6507
6571
|
// TTL `tick` suppresses the fallback for a long-but-active turn
|
|
6508
6572
|
// (model streaming past 5 min) — parity with silencePoke's own
|
|
@@ -10808,6 +10872,9 @@ async function handleInbound(
|
|
|
10808
10872
|
} catch (err) {
|
|
10809
10873
|
process.stderr.write(`telegram gateway: interrupt-marker SIGINT failed: ${(err as Error).message}\n`)
|
|
10810
10874
|
}
|
|
10875
|
+
// The SIGINT just killed the in-flight turn — cancel its obligation so the
|
|
10876
|
+
// interrupted (user-redirected) question isn't re-presented/escalated later.
|
|
10877
|
+
cancelInterruptedObligation()
|
|
10811
10878
|
}
|
|
10812
10879
|
if (interrupt.emptyBody) {
|
|
10813
10880
|
// #1075: thread-id-bearing — route through swallowingApiCall so
|
|
@@ -11414,6 +11481,33 @@ async function handleInbound(
|
|
|
11414
11481
|
isSteering = priorTurnInFlight && isSteerPrefix
|
|
11415
11482
|
if (priorTurnInFlight) priorTurnStartedAt = activeTurnStartedAt.get(key)
|
|
11416
11483
|
|
|
11484
|
+
// Mid-turn auto-classify SHADOW: compute what a topic+recency classifier
|
|
11485
|
+
// WOULD decide and log it — behaviour is UNCHANGED (isSteering above is
|
|
11486
|
+
// untouched). Gathers the real-world distribution (same-topic continuation
|
|
11487
|
+
// vs cross-topic, recency spread) to tune auto-steer before it ever acts.
|
|
11488
|
+
// No-op unless the shadow flag is on AND a turn is in flight (the only case
|
|
11489
|
+
// a steer-vs-queue decision is meaningful).
|
|
11490
|
+
if (AUTOCLASSIFY_MIDTURN_SHADOW && priorTurnInFlight) {
|
|
11491
|
+
const lastOut = lastAgentOutputAt.get(key)
|
|
11492
|
+
const msSinceOut = lastOut != null ? Date.now() - lastOut : null
|
|
11493
|
+
const shadow = autoClassifyMidTurnInbound({
|
|
11494
|
+
isSteerPrefix,
|
|
11495
|
+
isQueuePrefix: isQueuedPrefix,
|
|
11496
|
+
priorTurnInFlight,
|
|
11497
|
+
isDm: isDmChatId(chat_id),
|
|
11498
|
+
incomingThreadId: messageThreadId ?? null,
|
|
11499
|
+
activeTurnThreadId: currentTurn?.sessionThreadId ?? null,
|
|
11500
|
+
msSinceLastAgentOutput: msSinceOut,
|
|
11501
|
+
dmSteerWindowMs: 0, // DM auto-steer stays off (the April regime)
|
|
11502
|
+
topicSteerWindowMs: 8_000, // candidate window — what we're tuning
|
|
11503
|
+
})
|
|
11504
|
+
process.stderr.write(
|
|
11505
|
+
`telegram gateway: autoclassify-shadow chat_id=${chat_id} ` +
|
|
11506
|
+
`would=${shadow.decision} reason=${shadow.reason} same_topic=${shadow.sameTopic ?? '-'} ` +
|
|
11507
|
+
`ms_since_out=${msSinceOut ?? '-'} actual=${isSteering ? 'steer' : 'queue'}\n`,
|
|
11508
|
+
)
|
|
11509
|
+
}
|
|
11510
|
+
|
|
11417
11511
|
if (access.statusReactions !== false) {
|
|
11418
11512
|
if (isSteering) {
|
|
11419
11513
|
// Explicit steer: mark with 🤝 on the inbound message; leave the
|
|
@@ -44,6 +44,17 @@ export interface Obligation {
|
|
|
44
44
|
* can't loop forever — and, because it is part of the durable snapshot,
|
|
45
45
|
* can't become a boot-surviving poison record either. */
|
|
46
46
|
escalateAttempts?: number
|
|
47
|
+
/** Wall-clock ms the most recent turn handling THIS obligation ended (stamped
|
|
48
|
+
* at turn_end via noteTurnEnded). Drives the escalate-grace window: a slow /
|
|
49
|
+
* background-worker / multi-segment turn ends (the in-flight gate clears)
|
|
50
|
+
* before its trailing answer's reply lands, and the sweep would otherwise
|
|
51
|
+
* re-present/escalate in that gap — a false "I may have missed this" on a
|
|
52
|
+
* message that's actively being answered (fuzz-confirmed on v0.14.62). The
|
|
53
|
+
* decision waits `graceMs` after this stamp before acting, so the trailing
|
|
54
|
+
* answer's close has a beat to fire. Bounded: each re-present is itself a turn
|
|
55
|
+
* that re-stamps this once, and representCount is capped, so the ladder still
|
|
56
|
+
* terminates. Durable (part of the snapshot) so the grace survives restart. */
|
|
57
|
+
lastTurnEndedAt?: number
|
|
47
58
|
}
|
|
48
59
|
|
|
49
60
|
/** What the gateway should do for the oldest open obligation at an idle boundary. */
|
|
@@ -162,19 +173,50 @@ export class ObligationLedger {
|
|
|
162
173
|
* does not mutate. The caller performs the side effect then calls
|
|
163
174
|
* markRepresented / close accordingly.
|
|
164
175
|
*
|
|
165
|
-
* - 'none' → no open obligation
|
|
176
|
+
* - 'none' → no open obligation (or all open ones are within their
|
|
177
|
+
* escalate-grace window); the agent may idle.
|
|
166
178
|
* - 'represent' → re-present `obligation` as a fresh must-answer turn.
|
|
167
179
|
* - 'escalate' → it has already been re-presented maxRepresents times; send
|
|
168
180
|
* ONE operator-visible "did I miss this?" and close it
|
|
169
181
|
* (caller calls close) rather than loop forever.
|
|
182
|
+
*
|
|
183
|
+
* GRACE WINDOW (opts.graceMs > 0): an obligation whose handling turn ended less
|
|
184
|
+
* than `graceMs` ago is SKIPPED — its trailing answer may still be in flight
|
|
185
|
+
* (a worker / long-think / multi-segment turn ends the in-flight gate before
|
|
186
|
+
* the reply lands). We pick the oldest obligation that is OUT of grace, so a
|
|
187
|
+
* genuinely-stale one is still acted on while a freshly-ended one waits. Pure
|
|
188
|
+
* (clock injected via opts.now, mirroring the builder convention). With no opts
|
|
189
|
+
* (or graceMs<=0) this is the pre-grace behaviour exactly.
|
|
170
190
|
*/
|
|
171
|
-
decideAtIdle(): LedgerDecision {
|
|
172
|
-
const o =
|
|
191
|
+
decideAtIdle(opts?: { now: number; graceMs: number }): LedgerDecision {
|
|
192
|
+
const o =
|
|
193
|
+
opts != null && opts.graceMs > 0 ? this.oldestEligible(opts.now, opts.graceMs) : this.oldest()
|
|
173
194
|
if (o === undefined) return { action: 'none' }
|
|
174
195
|
if (o.representCount >= this.maxRepresents) return { action: 'escalate', obligation: o }
|
|
175
196
|
return { action: 'represent', obligation: o }
|
|
176
197
|
}
|
|
177
198
|
|
|
199
|
+
/** The oldest open obligation whose handling turn ended at least `graceMs` ago
|
|
200
|
+
* (or never ended — a still-queued obligation has no lastTurnEndedAt and is
|
|
201
|
+
* always eligible; it can't have a trailing answer in flight). */
|
|
202
|
+
private oldestEligible(now: number, graceMs: number): Obligation | undefined {
|
|
203
|
+
let best: Obligation | undefined
|
|
204
|
+
for (const o of this.open.values()) {
|
|
205
|
+
if (o.lastTurnEndedAt != null && now - o.lastTurnEndedAt < graceMs) continue // within grace
|
|
206
|
+
if (best === undefined || o.openedAt < best.openedAt) best = o
|
|
207
|
+
}
|
|
208
|
+
return best
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/** Stamp that the most recent turn handling `originTurnId` just ended (drives
|
|
212
|
+
* the escalate-grace window). No-op if the obligation isn't open. Persists. */
|
|
213
|
+
noteTurnEnded(originTurnId: string, ts: number): void {
|
|
214
|
+
const o = this.open.get(originTurnId)
|
|
215
|
+
if (o === undefined) return
|
|
216
|
+
o.lastTurnEndedAt = ts
|
|
217
|
+
this.persist()
|
|
218
|
+
}
|
|
219
|
+
|
|
178
220
|
/**
|
|
179
221
|
* Decide which obligation a substantive reply discharges — DETERMINISTICALLY,
|
|
180
222
|
* holding for any model behavior:
|
|
@@ -35,6 +35,26 @@ function readStdin() {
|
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
/**
|
|
39
|
+
* Coerce a tool-input field to display text WITHOUT the `[object Object]`
|
|
40
|
+
* trap. Only primitives carry a meaningful label: strings pass through,
|
|
41
|
+
* numbers/booleans stringify cleanly. Objects and arrays return '' so the
|
|
42
|
+
* caller falls through to its next fallback (a sibling field, or the
|
|
43
|
+
* humanized tool name) instead of surfacing literal "[object Object]".
|
|
44
|
+
*
|
|
45
|
+
* This guards the MCP-tool path in particular: an operator-configured
|
|
46
|
+
* server (e.g. Brevo CRM) may pass a filter/query OBJECT in `query` /
|
|
47
|
+
* `description` / `title`, and the old `String(i.query ?? '')` coercion
|
|
48
|
+
* rendered that as "[object Object]" on the live activity feed. The
|
|
49
|
+
* renderer's own `clip()` already rejects non-strings; this mirrors that
|
|
50
|
+
* contract at the hook so the bad value never reaches the sidecar JSONL.
|
|
51
|
+
*/
|
|
52
|
+
function asText(v) {
|
|
53
|
+
if (typeof v === 'string') return v
|
|
54
|
+
if (typeof v === 'number' || typeof v === 'boolean') return String(v)
|
|
55
|
+
return ''
|
|
56
|
+
}
|
|
57
|
+
|
|
38
58
|
/**
|
|
39
59
|
* One-line, length-bounded escape of a value for inclusion in a label.
|
|
40
60
|
* Newlines collapsed, very long strings truncated with an ellipsis.
|
|
@@ -82,10 +102,10 @@ export function computeLabel(toolName, input) {
|
|
|
82
102
|
// for Bash/Task, matching the gateway's describeToolUse rendering.
|
|
83
103
|
switch (toolName) {
|
|
84
104
|
case 'Bash':
|
|
85
|
-
return clip(
|
|
105
|
+
return clip(asText(i.description), 70).trim() || 'Running a command'
|
|
86
106
|
case 'Task':
|
|
87
107
|
case 'Agent': {
|
|
88
|
-
const d = clip(
|
|
108
|
+
const d = clip(asText(i.description), 60).trim()
|
|
89
109
|
return d ? `Delegating: ${d}` : 'Delegating to a sub-agent'
|
|
90
110
|
}
|
|
91
111
|
case 'TodoWrite':
|
|
@@ -103,16 +123,16 @@ export function computeLabel(toolName, input) {
|
|
|
103
123
|
case 'Write':
|
|
104
124
|
return `Writing ${clip(safeBasename(i.file_path))}`.trim()
|
|
105
125
|
case 'Grep': {
|
|
106
|
-
const path = i.path ? clip(
|
|
107
|
-
const pat = clip(
|
|
126
|
+
const path = i.path ? clip(asText(i.path), 40) : '.'
|
|
127
|
+
const pat = clip(asText(i.pattern), 40)
|
|
108
128
|
return `Searching ${path} for ${pat}`
|
|
109
129
|
}
|
|
110
130
|
case 'Glob':
|
|
111
|
-
return `Finding files matching ${clip(
|
|
131
|
+
return `Finding files matching ${clip(asText(i.pattern), 60)}`
|
|
112
132
|
case 'WebFetch':
|
|
113
133
|
return `Fetching ${clip(urlHostPath(i.url), 60)}`
|
|
114
134
|
case 'WebSearch':
|
|
115
|
-
return `Searching the web for ${clip(
|
|
135
|
+
return `Searching the web for ${clip(asText(i.query), 60)}`
|
|
116
136
|
case 'NotebookEdit':
|
|
117
137
|
return `Editing notebook ${clip(safeBasename(i.notebook_path))}`
|
|
118
138
|
case 'BashOutput':
|
|
@@ -128,7 +148,7 @@ export function computeLabel(toolName, input) {
|
|
|
128
148
|
// sidecar JSONL and recover which skill fired per turn —
|
|
129
149
|
// the progress card path that used to surface this was retired
|
|
130
150
|
// when `progressDriver` was nulled out in #1122 PR3.
|
|
131
|
-
const slug = clip(
|
|
151
|
+
const slug = clip(asText(i.skill), 64)
|
|
132
152
|
return slug ? `Running skill ${slug}` : null
|
|
133
153
|
}
|
|
134
154
|
}
|
|
@@ -141,7 +161,7 @@ export function computeLabel(toolName, input) {
|
|
|
141
161
|
case 'mcp__switchroom-telegram__stream_reply':
|
|
142
162
|
return 'Replying'
|
|
143
163
|
case 'mcp__switchroom-telegram__react': {
|
|
144
|
-
const emoji = clip(
|
|
164
|
+
const emoji = clip(asText(i.emoji), 8)
|
|
145
165
|
return emoji ? `Reacting ${emoji}` : 'Reacting'
|
|
146
166
|
}
|
|
147
167
|
case 'mcp__switchroom-telegram__get_recent_messages':
|
|
@@ -177,7 +197,7 @@ export function computeLabel(toolName, input) {
|
|
|
177
197
|
return 'Looking through your files'
|
|
178
198
|
if (server === 'notion' || server === 'claude_ai_notion') return 'Checking your notes'
|
|
179
199
|
if (server === 'perplexity') {
|
|
180
|
-
const q = clip(
|
|
200
|
+
const q = clip(asText(i.query) || asText(i.description), 60).trim()
|
|
181
201
|
return q ? `Searching the web for ${q}` : 'Searching the web'
|
|
182
202
|
}
|
|
183
203
|
if (server === 'webkite') {
|
|
@@ -186,9 +206,9 @@ export function computeLabel(toolName, input) {
|
|
|
186
206
|
}
|
|
187
207
|
// Unknown MCP server: prefer a model-authored field, else humanized tool.
|
|
188
208
|
const desc =
|
|
189
|
-
clip(
|
|
190
|
-
clip(
|
|
191
|
-
clip(
|
|
209
|
+
clip(asText(i.description), 60).trim() ||
|
|
210
|
+
clip(asText(i.query), 50).trim() ||
|
|
211
|
+
clip(asText(i.title), 50).trim()
|
|
192
212
|
if (desc) return desc
|
|
193
213
|
return `Using ${tool.replace(/[-_]+/g, ' ')}`
|
|
194
214
|
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { autoClassifyMidTurnInbound, type AutoClassifyInput } from "../gateway/auto-classify-mid-turn.js";
|
|
3
|
+
|
|
4
|
+
function base(over: Partial<AutoClassifyInput> = {}): AutoClassifyInput {
|
|
5
|
+
return {
|
|
6
|
+
isSteerPrefix: false,
|
|
7
|
+
isQueuePrefix: false,
|
|
8
|
+
priorTurnInFlight: true,
|
|
9
|
+
isDm: false,
|
|
10
|
+
incomingThreadId: 3,
|
|
11
|
+
activeTurnThreadId: 3,
|
|
12
|
+
msSinceLastAgentOutput: 2000,
|
|
13
|
+
dmSteerWindowMs: 0, // DM auto-steer off by default
|
|
14
|
+
topicSteerWindowMs: 8000,
|
|
15
|
+
...over,
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
describe("autoClassifyMidTurnInbound", () => {
|
|
20
|
+
it("explicit /steer prefix always wins", () => {
|
|
21
|
+
const r = autoClassifyMidTurnInbound(base({ isSteerPrefix: true, incomingThreadId: 9, activeTurnThreadId: 3 }));
|
|
22
|
+
expect(r.decision).toBe("steer");
|
|
23
|
+
expect(r.reason).toBe("steer_prefix");
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it("explicit /queue prefix always wins", () => {
|
|
27
|
+
expect(autoClassifyMidTurnInbound(base({ isQueuePrefix: true })).decision).toBe("queue");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("no turn in flight → queue (fresh turn, not our decision)", () => {
|
|
31
|
+
const r = autoClassifyMidTurnInbound(base({ priorTurnInFlight: false }));
|
|
32
|
+
expect(r.decision).toBe("queue");
|
|
33
|
+
expect(r.reason).toBe("not_mid_turn");
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
// ── Supergroup: topic is the strong signal ──
|
|
37
|
+
it("supergroup, DIFFERENT topic than the active turn → queue (cross_topic), regardless of recency", () => {
|
|
38
|
+
const r = autoClassifyMidTurnInbound(base({ incomingThreadId: 5, activeTurnThreadId: 3, msSinceLastAgentOutput: 100 }));
|
|
39
|
+
expect(r.decision).toBe("queue");
|
|
40
|
+
expect(r.reason).toBe("cross_topic");
|
|
41
|
+
expect(r.sameTopic).toBe(false);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it("supergroup, SAME topic + recent → steer", () => {
|
|
45
|
+
const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 3000, topicSteerWindowMs: 8000 }));
|
|
46
|
+
expect(r.decision).toBe("steer");
|
|
47
|
+
expect(r.reason).toBe("same_topic_recent");
|
|
48
|
+
expect(r.sameTopic).toBe(true);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("supergroup, SAME topic but STALE (older than window) → queue", () => {
|
|
52
|
+
const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 20000, topicSteerWindowMs: 8000 }));
|
|
53
|
+
expect(r.decision).toBe("queue");
|
|
54
|
+
expect(r.reason).toBe("same_topic_stale");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("supergroup, no recency recorded (null) → queue (not treated as recent)", () => {
|
|
58
|
+
const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: null }));
|
|
59
|
+
expect(r.decision).toBe("queue");
|
|
60
|
+
expect(r.reason).toBe("same_topic_stale");
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("topicSteerWindowMs=0 (auto-steer off) → queue, still reports sameTopic", () => {
|
|
64
|
+
const r = autoClassifyMidTurnInbound(base({ topicSteerWindowMs: 0, incomingThreadId: 3, activeTurnThreadId: 3 }));
|
|
65
|
+
expect(r.decision).toBe("queue");
|
|
66
|
+
expect(r.reason).toBe("topic_disabled");
|
|
67
|
+
expect(r.sameTopic).toBe(true);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("canonical thread compare: null/undefined/0 collapse to the same no-thread bucket", () => {
|
|
71
|
+
expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 0, activeTurnThreadId: null })).sameTopic).toBe(true);
|
|
72
|
+
expect(autoClassifyMidTurnInbound(base({ incomingThreadId: undefined, activeTurnThreadId: 0 })).sameTopic).toBe(true);
|
|
73
|
+
expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 1, activeTurnThreadId: 0 })).sameTopic).toBe(false);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
// ── DM: timing-only, off by default ──
|
|
77
|
+
it("DM with dmSteerWindowMs=0 (default) → queue even if recent (DM auto-steer off)", () => {
|
|
78
|
+
const r = autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 500, dmSteerWindowMs: 0 }));
|
|
79
|
+
expect(r.decision).toBe("queue");
|
|
80
|
+
expect(r.reason).toBe("dm_disabled");
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("DM with dmSteerWindowMs>0 + recent → steer; stale → queue", () => {
|
|
84
|
+
expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 5000, dmSteerWindowMs: 10000 })).decision).toBe("steer");
|
|
85
|
+
expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 15000, dmSteerWindowMs: 10000 })).decision).toBe("queue");
|
|
86
|
+
});
|
|
87
|
+
});
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { driveEscalation } from "../gateway/escalation-drive.js";
|
|
3
|
+
import { ObligationLedger } from "../gateway/obligation-ledger.js";
|
|
4
|
+
|
|
5
|
+
// Drives the REAL escalation step (the code obligationSweep calls) with the REAL
|
|
6
|
+
// ObligationLedger and the REAL withDeadline — including a fake hanging send,
|
|
7
|
+
// the exact path the total proof flagged and that mtcute / a synchronous test
|
|
8
|
+
// cannot reach. This is the executable verification of the hang-wedge fix.
|
|
9
|
+
|
|
10
|
+
function openEscalatable(L: ObligationLedger, id: string) {
|
|
11
|
+
L.openIfAbsent({ originTurnId: id, chatId: "-100", threadId: 3, messageId: 1, text: "x", openedAt: 0 });
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const MAX = 3;
|
|
15
|
+
const DEADLINE = 15; // ms — short so the hang case settles fast and deterministically
|
|
16
|
+
|
|
17
|
+
describe("driveEscalation — the obligation escalation step is bounded and always reaches a terminal", () => {
|
|
18
|
+
it("a successful send closes the obligation and clears the in-flight flag", async () => {
|
|
19
|
+
const L = new ObligationLedger(2);
|
|
20
|
+
openEscalatable(L, "c#1");
|
|
21
|
+
const inFlight = new Set<string>();
|
|
22
|
+
await driveEscalation({
|
|
23
|
+
escId: "c#1",
|
|
24
|
+
inFlight,
|
|
25
|
+
ledger: L,
|
|
26
|
+
send: () => Promise.resolve("sent"),
|
|
27
|
+
maxAttempts: MAX,
|
|
28
|
+
deadlineMs: DEADLINE,
|
|
29
|
+
log: () => {},
|
|
30
|
+
});
|
|
31
|
+
expect(L.isOpen("c#1")).toBe(false); // closed
|
|
32
|
+
expect(inFlight.has("c#1")).toBe(false); // flag cleared
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("a transient failure below the cap stays OPEN and clears the flag (retried next sweep)", async () => {
|
|
36
|
+
const L = new ObligationLedger(2);
|
|
37
|
+
openEscalatable(L, "c#1");
|
|
38
|
+
const inFlight = new Set<string>();
|
|
39
|
+
await driveEscalation({
|
|
40
|
+
escId: "c#1",
|
|
41
|
+
inFlight,
|
|
42
|
+
ledger: L,
|
|
43
|
+
send: () => Promise.reject(new Error("network blip")),
|
|
44
|
+
maxAttempts: MAX,
|
|
45
|
+
deadlineMs: DEADLINE,
|
|
46
|
+
log: () => {},
|
|
47
|
+
});
|
|
48
|
+
expect(L.isOpen("c#1")).toBe(true); // still open — will retry
|
|
49
|
+
expect(inFlight.has("c#1")).toBe(false); // flag cleared, so the next sweep can re-enter
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("THE FIX: a send that NEVER settles still clears the flag (bounded by the deadline)", async () => {
|
|
53
|
+
const L = new ObligationLedger(2);
|
|
54
|
+
openEscalatable(L, "c#1");
|
|
55
|
+
const inFlight = new Set<string>();
|
|
56
|
+
let sendInvoked = 0;
|
|
57
|
+
const start = Date.now();
|
|
58
|
+
// A promise that never resolves/rejects — the stalled send that, pre-fix,
|
|
59
|
+
// left the in-flight flag set forever and wedged the obligation OPEN.
|
|
60
|
+
await driveEscalation({
|
|
61
|
+
escId: "c#1",
|
|
62
|
+
inFlight,
|
|
63
|
+
ledger: L,
|
|
64
|
+
send: () => {
|
|
65
|
+
sendInvoked++;
|
|
66
|
+
return new Promise(() => {});
|
|
67
|
+
},
|
|
68
|
+
maxAttempts: MAX,
|
|
69
|
+
deadlineMs: DEADLINE,
|
|
70
|
+
log: () => {},
|
|
71
|
+
});
|
|
72
|
+
expect(sendInvoked).toBe(1);
|
|
73
|
+
expect(inFlight.has("c#1")).toBe(false); // cleared despite the hang — the wedge is gone
|
|
74
|
+
expect(Date.now() - start).toBeLessThan(DEADLINE + 500); // settled at the deadline, not "never"
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it("repeated hung sends reach a bounded terminal (close best-effort), never an infinite loop", async () => {
|
|
78
|
+
const L = new ObligationLedger(2);
|
|
79
|
+
openEscalatable(L, "c#1");
|
|
80
|
+
const inFlight = new Set<string>();
|
|
81
|
+
let sends = 0;
|
|
82
|
+
let drives = 0;
|
|
83
|
+
// Simulate the 5s sweep firing repeatedly while every send hangs.
|
|
84
|
+
while (L.isOpen("c#1") && drives < 20) {
|
|
85
|
+
drives++;
|
|
86
|
+
const p = driveEscalation({
|
|
87
|
+
escId: "c#1",
|
|
88
|
+
inFlight,
|
|
89
|
+
ledger: L,
|
|
90
|
+
send: () => {
|
|
91
|
+
sends++;
|
|
92
|
+
return new Promise(() => {});
|
|
93
|
+
},
|
|
94
|
+
maxAttempts: MAX,
|
|
95
|
+
deadlineMs: DEADLINE,
|
|
96
|
+
log: () => {},
|
|
97
|
+
});
|
|
98
|
+
if (p) await p; // each attempt settles within the deadline
|
|
99
|
+
}
|
|
100
|
+
expect(L.isOpen("c#1")).toBe(false); // reached a terminal (closed best-effort)
|
|
101
|
+
expect(inFlight.has("c#1")).toBe(false);
|
|
102
|
+
expect(sends).toBe(MAX); // exactly maxAttempts sends, then close — bounded
|
|
103
|
+
expect(drives).toBeLessThanOrEqual(MAX + 1);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it("the in-flight guard prevents a concurrent second send for the same obligation", async () => {
|
|
107
|
+
const L = new ObligationLedger(2);
|
|
108
|
+
openEscalatable(L, "c#1");
|
|
109
|
+
const inFlight = new Set<string>();
|
|
110
|
+
let sends = 0;
|
|
111
|
+
const hang = () => {
|
|
112
|
+
sends++;
|
|
113
|
+
return new Promise<void>(() => {});
|
|
114
|
+
};
|
|
115
|
+
const p1 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
|
|
116
|
+
// Second call while the first is still awaiting → must be a no-op.
|
|
117
|
+
const p2 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
|
|
118
|
+
expect(p2).toBeUndefined(); // guarded
|
|
119
|
+
expect(sends).toBe(1); // only one send fired
|
|
120
|
+
expect(L.list()[0].escalateAttempts).toBe(1); // only one attempt recorded
|
|
121
|
+
await p1; // let the first settle so we don't leak a pending timer
|
|
122
|
+
});
|
|
123
|
+
});
|