switchroom 0.14.62 → 0.14.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +149 -36
- package/telegram-plugin/gateway/auto-classify-mid-turn.ts +119 -0
- package/telegram-plugin/gateway/escalation-drive.ts +79 -0
- package/telegram-plugin/gateway/gateway.ts +154 -55
- package/telegram-plugin/gateway/obligation-ledger.ts +45 -3
- package/telegram-plugin/hooks/tool-label-pretool.mjs +32 -12
- package/telegram-plugin/tests/auto-classify-mid-turn.test.ts +87 -0
- package/telegram-plugin/tests/escalation-drive.test.ts +123 -0
- package/telegram-plugin/tests/obligation-determinism.test.ts +63 -3
- package/telegram-plugin/tests/obligation-ledger.test.ts +92 -0
|
@@ -142,6 +142,7 @@ import {
|
|
|
142
142
|
resolveRetentionDays as resolveRegistryRetentionDays,
|
|
143
143
|
} from '../registry/reaper.js'
|
|
144
144
|
import { parseQueuePrefix, parseSteerPrefix, formatPriorAssistantPreview, formatReplyToText } from '../steering.js'
|
|
145
|
+
import { autoClassifyMidTurnInbound } from './auto-classify-mid-turn.js'
|
|
145
146
|
import {
|
|
146
147
|
renderOperatorEvent,
|
|
147
148
|
shouldEmitOperatorEvent,
|
|
@@ -289,7 +290,7 @@ import {
|
|
|
289
290
|
obligationEscalationText,
|
|
290
291
|
} from './obligation-ledger.js'
|
|
291
292
|
import { loadObligations, persistObligations } from './obligation-store.js'
|
|
292
|
-
import {
|
|
293
|
+
import { driveEscalation } from './escalation-drive.js'
|
|
293
294
|
import { createInboundSpool } from './inbound-spool.js'
|
|
294
295
|
import { purgeStaleTurnsForChat } from './turn-state-purge.js'
|
|
295
296
|
import { decideInboundDelivery } from './inbound-delivery-gate.js'
|
|
@@ -1399,9 +1400,12 @@ const deliveryQueue = createDeliveryQueue<InboundMessage>()
|
|
|
1399
1400
|
// re-presented (bounded) until it closes, so a message the model read but never
|
|
1400
1401
|
// answered (the marko 715 drop) cannot be silently lost. ADDITIVE + flagged: it
|
|
1401
1402
|
// runs ALONGSIDE the existing acks/spool/buffer (PR3 retires the redundant
|
|
1402
|
-
// pieces).
|
|
1403
|
-
//
|
|
1404
|
-
|
|
1403
|
+
// pieces). DEFAULT ON (graduated from canary 2026-06-04 after the hang-fix
|
|
1404
|
+
// (#2152, total-proof), the escalate-grace (#2156, kills the fuzz-found
|
|
1405
|
+
// over-escalation), and interrupt-cancel (#2157) — proven on marko (supergroup)
|
|
1406
|
+
// + test-harness for days with 0 false cards). Kill switch:
|
|
1407
|
+
// SWITCHROOM_OBLIGATION_LEDGER=0 → every hook below is a no-op → zero change.
|
|
1408
|
+
const OBLIGATION_LEDGER_ENABLED = process.env.SWITCHROOM_OBLIGATION_LEDGER !== '0'
|
|
1405
1409
|
const OBLIGATION_REPRESENT_MAX = 2
|
|
1406
1410
|
const OBLIGATION_SWEEP_MS = 5_000
|
|
1407
1411
|
// Bound on escalation SEND attempts. The escalation now closes only AFTER a
|
|
@@ -1423,6 +1427,49 @@ const OBLIGATION_ESCALATE_MAX = 3
|
|
|
1423
1427
|
// bounded escalate ladder to a terminal. 45s comfortably exceeds robustApiCall's
|
|
1424
1428
|
// 3-attempt network backoff so a legitimate slow send isn't cut short.
|
|
1425
1429
|
const OBLIGATION_ESCALATE_SEND_DEADLINE_MS = 45_000
|
|
1430
|
+
// Escalate-grace window. A slow / background-worker / multi-segment turn ends
|
|
1431
|
+
// (the in-flight gate clears) BEFORE its trailing answer's reply lands, and the
|
|
1432
|
+
// 5s sweep would re-present/escalate in that gap — a false "⚠️ I may have missed
|
|
1433
|
+
// this" on a message that's actively being answered (fuzz-confirmed on v0.14.62:
|
|
1434
|
+
// ~14% of marko's no-reply turn-ends had the answer in flight). An obligation
|
|
1435
|
+
// whose handling turn ended < this ago is skipped by decideAtIdle, giving the
|
|
1436
|
+
// trailing answer's close a beat to fire. Bounded: each re-present is itself a
|
|
1437
|
+
// turn that re-stamps once, representCount is capped → the ladder still
|
|
1438
|
+
// terminates. 45s > the observed "answer lands within ~60s, usually <40s" gap.
|
|
1439
|
+
// Kill switch: SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS=0 → pre-grace behaviour.
|
|
1440
|
+
const OBLIGATION_ESCALATE_GRACE_MS = (() => {
|
|
1441
|
+
const raw = process.env.SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS
|
|
1442
|
+
if (raw == null || raw === '') return 45_000
|
|
1443
|
+
const n = Number(raw)
|
|
1444
|
+
return Number.isFinite(n) && n >= 0 ? n : 45_000
|
|
1445
|
+
})()
|
|
1446
|
+
|
|
1447
|
+
// ─── Mid-turn auto-classify (steer-vs-queue), SHADOW mode ─────────────────────
|
|
1448
|
+
// Today a no-prefix mid-turn message always QUEUES. autoClassifyMidTurnInbound
|
|
1449
|
+
// (auto-classify-mid-turn.ts) is the basis for a smarter default using
|
|
1450
|
+
// topic-vs-active-turn + reply-recency. Phase 1 ships SHADOW-ONLY: when this
|
|
1451
|
+
// flag is on we COMPUTE + LOG what we'd decide (decision/reason/same_topic/
|
|
1452
|
+
// ms_since_out) but the behaviour is UNCHANGED (still queue) — to gather the
|
|
1453
|
+
// real-world distribution (how often mid-turn messages are same-topic
|
|
1454
|
+
// continuations vs cross-topic, and the recency spread) before any action flips
|
|
1455
|
+
// on. DEFAULT ON fleet-wide (data-gathering: zero behaviour change — only logs +
|
|
1456
|
+
// a bounded recency map). This is a TEMPORARY default; when auto-steer ships it
|
|
1457
|
+
// supersedes shadow. Kill switch: SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW=0.
|
|
1458
|
+
const AUTOCLASSIFY_MIDTURN_SHADOW = process.env.SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW !== '0'
|
|
1459
|
+
// Per-(chat,thread) wall-clock ms of the agent's LAST visible output — the
|
|
1460
|
+
// recency clock the classifier uses (NOT turn age: a long actively-narrating
|
|
1461
|
+
// worker turn must not read "stale"). Stamped beside signalTracker.noteOutbound.
|
|
1462
|
+
// LRU-bounded so a long-lived gateway with many topics can't grow unboundedly.
|
|
1463
|
+
const lastAgentOutputAt = new Map<string, number>()
|
|
1464
|
+
const LAST_OUTPUT_MAX_KEYS = 512
|
|
1465
|
+
function noteAgentOutputAt(key: string, ts: number): void {
|
|
1466
|
+
lastAgentOutputAt.delete(key) // re-insert → most-recently-used at the tail
|
|
1467
|
+
lastAgentOutputAt.set(key, ts)
|
|
1468
|
+
if (lastAgentOutputAt.size > LAST_OUTPUT_MAX_KEYS) {
|
|
1469
|
+
const oldest = lastAgentOutputAt.keys().next().value
|
|
1470
|
+
if (oldest !== undefined) lastAgentOutputAt.delete(oldest)
|
|
1471
|
+
}
|
|
1472
|
+
}
|
|
1426
1473
|
// Durable snapshot of the open obligation set on the persistent per-agent
|
|
1427
1474
|
// volume (STATE_DIR = /state/agent/telegram in prod). Closes the restart hole:
|
|
1428
1475
|
// the in-memory ledger alone empties on restart and the spool's boot-replay
|
|
@@ -1999,6 +2046,29 @@ let pendingDeferredInterrupt: PendingDeferredInterrupt | null = null
|
|
|
1999
2046
|
* Idempotent: nulls the slot and clears the timer before doing any work so a
|
|
2000
2047
|
* boundary event and the timeout can't double-fire.
|
|
2001
2048
|
*/
|
|
2049
|
+
/**
|
|
2050
|
+
* An `!` interrupt SIGINT-kills the in-flight turn. That turn was handling a
|
|
2051
|
+
* user message with an open obligation, and the killed turn does NOT reliably
|
|
2052
|
+
* emit turn_end (so endCurrentTurnAtomic never closes it) — so without this the
|
|
2053
|
+
* obligation survives and the idle sweep later re-presents/escalates "you have
|
|
2054
|
+
* an earlier message you never answered" for a question the user EXPLICITLY
|
|
2055
|
+
* cancelled. An interrupt is a deliberate redirect, so closing that obligation
|
|
2056
|
+
* is the correct terminal (the user chose to interrupt; they can re-ask). Only
|
|
2057
|
+
* the interrupted turn's OWN obligation is closed — queued siblings (other open
|
|
2058
|
+
* obligations) are untouched. No-op when the flag is off, no turn is in flight,
|
|
2059
|
+
* or the turn isn't a tracked obligation (synthetic / already closed).
|
|
2060
|
+
*/
|
|
2061
|
+
function cancelInterruptedObligation(): void {
|
|
2062
|
+
if (!OBLIGATION_LEDGER_ENABLED) return
|
|
2063
|
+
const turn = currentTurn
|
|
2064
|
+
if (turn == null) return
|
|
2065
|
+
if (obligationLedger.close(turn.turnId)) {
|
|
2066
|
+
process.stderr.write(
|
|
2067
|
+
`telegram gateway: obligation cancelled by interrupt origin=${turn.turnId}\n`,
|
|
2068
|
+
)
|
|
2069
|
+
}
|
|
2070
|
+
}
|
|
2071
|
+
|
|
2002
2072
|
async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<void> {
|
|
2003
2073
|
const pending = pendingDeferredInterrupt
|
|
2004
2074
|
if (pending == null) return
|
|
@@ -2027,6 +2097,10 @@ async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<vo
|
|
|
2027
2097
|
process.stderr.write(`telegram gateway: deferred-interrupt SIGINT failed: ${(err as Error).message}\n`)
|
|
2028
2098
|
}
|
|
2029
2099
|
|
|
2100
|
+
// The SIGINT just killed the in-flight turn — cancel its obligation so the
|
|
2101
|
+
// interrupted (user-redirected) question isn't re-presented/escalated later.
|
|
2102
|
+
cancelInterruptedObligation()
|
|
2103
|
+
|
|
2030
2104
|
// Deliver the replacement body as a fresh turn to the freshly-killed
|
|
2031
2105
|
// bridge — same sendToAgent + buffer-on-miss primitive the synchronous
|
|
2032
2106
|
// interrupt carve-out uses at the handleInbound delivery site.
|
|
@@ -2426,8 +2500,17 @@ function endCurrentTurnAtomic(turn: CurrentTurn): void {
|
|
|
2426
2500
|
// finalAnswerDelivered===false → stays open → re-presented (the intended
|
|
2427
2501
|
// catch). close() is a no-op for synthetic turns (turnId not in the ledger).
|
|
2428
2502
|
// No-op when the flag is off.
|
|
2429
|
-
if (OBLIGATION_LEDGER_ENABLED
|
|
2430
|
-
|
|
2503
|
+
if (OBLIGATION_LEDGER_ENABLED) {
|
|
2504
|
+
if (turn.finalAnswerDelivered) {
|
|
2505
|
+
obligationLedger.close(turn.turnId)
|
|
2506
|
+
} else {
|
|
2507
|
+
// Turn ended WITHOUT a final answer. If this turn was handling an open
|
|
2508
|
+
// obligation, stamp its grace clock so the idle sweep waits before
|
|
2509
|
+
// re-presenting/escalating — a slow/worker answer may still be in flight
|
|
2510
|
+
// (the over-escalation fix). No-op when turn.turnId isn't an open
|
|
2511
|
+
// obligation (synthetic / already-closed turn).
|
|
2512
|
+
obligationLedger.noteTurnEnded(turn.turnId, Date.now())
|
|
2513
|
+
}
|
|
2431
2514
|
}
|
|
2432
2515
|
// Component 2 — clear any prior no-reply drain timer for this turn; a
|
|
2433
2516
|
// fresh end re-evaluates below. (Idempotent — null when never armed.)
|
|
@@ -4925,7 +5008,13 @@ function obligationSweep(): void {
|
|
|
4925
5008
|
if (!obligationLedger.hasOpen()) return
|
|
4926
5009
|
if (turnInFlightForGate()) return // a turn is running — let it finish/answer
|
|
4927
5010
|
const agent = process.env.SWITCHROOM_AGENT_NAME ?? ''
|
|
4928
|
-
|
|
5011
|
+
// Grace window: skip an obligation whose handling turn ended < grace ago — its
|
|
5012
|
+
// trailing slow/worker answer may still be landing (over-escalation fix).
|
|
5013
|
+
const decision = obligationLedger.decideAtIdle(
|
|
5014
|
+
OBLIGATION_ESCALATE_GRACE_MS > 0
|
|
5015
|
+
? { now: Date.now(), graceMs: OBLIGATION_ESCALATE_GRACE_MS }
|
|
5016
|
+
: undefined,
|
|
5017
|
+
)
|
|
4929
5018
|
const o = decision.obligation
|
|
4930
5019
|
if (decision.action === 'none' || o == null) return
|
|
4931
5020
|
if (decision.action === 'represent') {
|
|
@@ -4950,54 +5039,30 @@ function obligationSweep(): void {
|
|
|
4950
5039
|
// (dead topic even after thread-fallback, blocked bot) is bounded by
|
|
4951
5040
|
// OBLIGATION_ESCALATE_MAX → close best-effort (the user is unreachable, so a
|
|
4952
5041
|
// bounded give-up beats an infinite loop / a boot-surviving poison record).
|
|
4953
|
-
|
|
4954
|
-
|
|
4955
|
-
|
|
4956
|
-
|
|
4957
|
-
|
|
4958
|
-
|
|
4959
|
-
|
|
4960
|
-
//
|
|
4961
|
-
|
|
4962
|
-
|
|
4963
|
-
|
|
4964
|
-
|
|
4965
|
-
|
|
4966
|
-
|
|
4967
|
-
|
|
4968
|
-
|
|
4969
|
-
|
|
4970
|
-
|
|
4971
|
-
|
|
4972
|
-
|
|
4973
|
-
|
|
4974
|
-
|
|
4975
|
-
|
|
4976
|
-
|
|
4977
|
-
OBLIGATION_ESCALATE_SEND_DEADLINE_MS,
|
|
4978
|
-
'obligation escalation send timed out',
|
|
4979
|
-
)
|
|
4980
|
-
.then(() => {
|
|
4981
|
-
obligationLedger.close(escId)
|
|
4982
|
-
process.stderr.write(
|
|
4983
|
-
`telegram gateway: obligation escalation delivered + closed origin=${escId}\n`,
|
|
4984
|
-
)
|
|
4985
|
-
})
|
|
4986
|
-
.catch((err) => {
|
|
4987
|
-
if (attempt >= OBLIGATION_ESCALATE_MAX) {
|
|
4988
|
-
obligationLedger.close(escId)
|
|
4989
|
-
process.stderr.write(
|
|
4990
|
-
`telegram gateway: obligation escalation PERMANENTLY undeliverable after ${attempt} attempts — closing best-effort origin=${escId}: ${err}\n`,
|
|
4991
|
-
)
|
|
4992
|
-
} else {
|
|
4993
|
-
process.stderr.write(
|
|
4994
|
-
`telegram gateway: obligation escalation send failed (attempt ${attempt}/${OBLIGATION_ESCALATE_MAX}), retrying next sweep origin=${escId}: ${err}\n`,
|
|
4995
|
-
)
|
|
4996
|
-
}
|
|
4997
|
-
})
|
|
4998
|
-
.finally(() => {
|
|
4999
|
-
obligationEscalateInFlight.delete(escId)
|
|
5000
|
-
})
|
|
5042
|
+
// Drive one escalation attempt. The send is a direct Telegram nudge
|
|
5043
|
+
// (retryWithThreadFallback: a stale/renumbered topic → THREAD_NOT_FOUND retries
|
|
5044
|
+
// thread-less, the #2096 pattern). driveEscalation guards against concurrent
|
|
5045
|
+
// sends, bounds the send with withDeadline (so a hung send can't leak the
|
|
5046
|
+
// in-flight flag and wedge the obligation OPEN), closes only after a successful
|
|
5047
|
+
// send, and bounds permanent failures to a best-effort close. Extracted so the
|
|
5048
|
+
// hang → bounded → terminal path is executable in escalation-drive.test.ts —
|
|
5049
|
+
// the path neither mtcute (can't hang Telegram) nor a synchronous test reaches.
|
|
5050
|
+
void driveEscalation({
|
|
5051
|
+
escId: o.originTurnId,
|
|
5052
|
+
inFlight: obligationEscalateInFlight,
|
|
5053
|
+
ledger: obligationLedger,
|
|
5054
|
+
send: () =>
|
|
5055
|
+
retryWithThreadFallback(
|
|
5056
|
+
robustApiCall,
|
|
5057
|
+
(tid) =>
|
|
5058
|
+
bot.api.sendMessage(o.chatId, obligationEscalationText(o), {
|
|
5059
|
+
...(tid != null ? { message_thread_id: tid } : {}),
|
|
5060
|
+
}),
|
|
5061
|
+
{ threadId: o.threadId, chat_id: o.chatId, verb: 'obligation.escalate' },
|
|
5062
|
+
),
|
|
5063
|
+
maxAttempts: OBLIGATION_ESCALATE_MAX,
|
|
5064
|
+
deadlineMs: OBLIGATION_ESCALATE_SEND_DEADLINE_MS,
|
|
5065
|
+
})
|
|
5001
5066
|
}
|
|
5002
5067
|
if (!STATIC && OBLIGATION_LEDGER_ENABLED) {
|
|
5003
5068
|
setInterval(obligationSweep, OBLIGATION_SWEEP_MS).unref()
|
|
@@ -6503,6 +6568,10 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
6503
6568
|
// silence-poke clock so the next poke is measured from this send.
|
|
6504
6569
|
signalTracker.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
6505
6570
|
silencePoke.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
6571
|
+
// Mid-turn auto-classify recency clock: the agent just produced visible output
|
|
6572
|
+
// in this chat/thread (cross-turn, unlike silencePoke's per-turn lastOutboundAt).
|
|
6573
|
+
// Only maintained when the shadow flag is on → truly zero overhead by default.
|
|
6574
|
+
if (AUTOCLASSIFY_MIDTURN_SHADOW) noteAgentOutputAt(statusKey(chat_id, threadId), Date.now())
|
|
6506
6575
|
// PR3b-cutover: feed lastOutboundAt to the delivery machine so its
|
|
6507
6576
|
// TTL `tick` suppresses the fallback for a long-but-active turn
|
|
6508
6577
|
// (model streaming past 5 min) — parity with silencePoke's own
|
|
@@ -10808,6 +10877,9 @@ async function handleInbound(
|
|
|
10808
10877
|
} catch (err) {
|
|
10809
10878
|
process.stderr.write(`telegram gateway: interrupt-marker SIGINT failed: ${(err as Error).message}\n`)
|
|
10810
10879
|
}
|
|
10880
|
+
// The SIGINT just killed the in-flight turn — cancel its obligation so the
|
|
10881
|
+
// interrupted (user-redirected) question isn't re-presented/escalated later.
|
|
10882
|
+
cancelInterruptedObligation()
|
|
10811
10883
|
}
|
|
10812
10884
|
if (interrupt.emptyBody) {
|
|
10813
10885
|
// #1075: thread-id-bearing — route through swallowingApiCall so
|
|
@@ -11414,6 +11486,33 @@ async function handleInbound(
|
|
|
11414
11486
|
isSteering = priorTurnInFlight && isSteerPrefix
|
|
11415
11487
|
if (priorTurnInFlight) priorTurnStartedAt = activeTurnStartedAt.get(key)
|
|
11416
11488
|
|
|
11489
|
+
// Mid-turn auto-classify SHADOW: compute what a topic+recency classifier
|
|
11490
|
+
// WOULD decide and log it — behaviour is UNCHANGED (isSteering above is
|
|
11491
|
+
// untouched). Gathers the real-world distribution (same-topic continuation
|
|
11492
|
+
// vs cross-topic, recency spread) to tune auto-steer before it ever acts.
|
|
11493
|
+
// No-op unless the shadow flag is on AND a turn is in flight (the only case
|
|
11494
|
+
// a steer-vs-queue decision is meaningful).
|
|
11495
|
+
if (AUTOCLASSIFY_MIDTURN_SHADOW && priorTurnInFlight) {
|
|
11496
|
+
const lastOut = lastAgentOutputAt.get(key)
|
|
11497
|
+
const msSinceOut = lastOut != null ? Date.now() - lastOut : null
|
|
11498
|
+
const shadow = autoClassifyMidTurnInbound({
|
|
11499
|
+
isSteerPrefix,
|
|
11500
|
+
isQueuePrefix: isQueuedPrefix,
|
|
11501
|
+
priorTurnInFlight,
|
|
11502
|
+
isDm: isDmChatId(chat_id),
|
|
11503
|
+
incomingThreadId: messageThreadId ?? null,
|
|
11504
|
+
activeTurnThreadId: currentTurn?.sessionThreadId ?? null,
|
|
11505
|
+
msSinceLastAgentOutput: msSinceOut,
|
|
11506
|
+
dmSteerWindowMs: 0, // DM auto-steer stays off (the April regime)
|
|
11507
|
+
topicSteerWindowMs: 8_000, // candidate window — what we're tuning
|
|
11508
|
+
})
|
|
11509
|
+
process.stderr.write(
|
|
11510
|
+
`telegram gateway: autoclassify-shadow chat_id=${chat_id} ` +
|
|
11511
|
+
`would=${shadow.decision} reason=${shadow.reason} same_topic=${shadow.sameTopic ?? '-'} ` +
|
|
11512
|
+
`ms_since_out=${msSinceOut ?? '-'} actual=${isSteering ? 'steer' : 'queue'}\n`,
|
|
11513
|
+
)
|
|
11514
|
+
}
|
|
11515
|
+
|
|
11417
11516
|
if (access.statusReactions !== false) {
|
|
11418
11517
|
if (isSteering) {
|
|
11419
11518
|
// Explicit steer: mark with 🤝 on the inbound message; leave the
|
|
@@ -44,6 +44,17 @@ export interface Obligation {
|
|
|
44
44
|
* can't loop forever — and, because it is part of the durable snapshot,
|
|
45
45
|
* can't become a boot-surviving poison record either. */
|
|
46
46
|
escalateAttempts?: number
|
|
47
|
+
/** Wall-clock ms the most recent turn handling THIS obligation ended (stamped
|
|
48
|
+
* at turn_end via noteTurnEnded). Drives the escalate-grace window: a slow /
|
|
49
|
+
* background-worker / multi-segment turn ends (the in-flight gate clears)
|
|
50
|
+
* before its trailing answer's reply lands, and the sweep would otherwise
|
|
51
|
+
* re-present/escalate in that gap — a false "I may have missed this" on a
|
|
52
|
+
* message that's actively being answered (fuzz-confirmed on v0.14.62). The
|
|
53
|
+
* decision waits `graceMs` after this stamp before acting, so the trailing
|
|
54
|
+
* answer's close has a beat to fire. Bounded: each re-present is itself a turn
|
|
55
|
+
* that re-stamps this once, and representCount is capped, so the ladder still
|
|
56
|
+
* terminates. Durable (part of the snapshot) so the grace survives restart. */
|
|
57
|
+
lastTurnEndedAt?: number
|
|
47
58
|
}
|
|
48
59
|
|
|
49
60
|
/** What the gateway should do for the oldest open obligation at an idle boundary. */
|
|
@@ -162,19 +173,50 @@ export class ObligationLedger {
|
|
|
162
173
|
* does not mutate. The caller performs the side effect then calls
|
|
163
174
|
* markRepresented / close accordingly.
|
|
164
175
|
*
|
|
165
|
-
* - 'none' → no open obligation
|
|
176
|
+
* - 'none' → no open obligation (or all open ones are within their
|
|
177
|
+
* escalate-grace window); the agent may idle.
|
|
166
178
|
* - 'represent' → re-present `obligation` as a fresh must-answer turn.
|
|
167
179
|
* - 'escalate' → it has already been re-presented maxRepresents times; send
|
|
168
180
|
* ONE operator-visible "did I miss this?" and close it
|
|
169
181
|
* (caller calls close) rather than loop forever.
|
|
182
|
+
*
|
|
183
|
+
* GRACE WINDOW (opts.graceMs > 0): an obligation whose handling turn ended less
|
|
184
|
+
* than `graceMs` ago is SKIPPED — its trailing answer may still be in flight
|
|
185
|
+
* (a worker / long-think / multi-segment turn ends the in-flight gate before
|
|
186
|
+
* the reply lands). We pick the oldest obligation that is OUT of grace, so a
|
|
187
|
+
* genuinely-stale one is still acted on while a freshly-ended one waits. Pure
|
|
188
|
+
* (clock injected via opts.now, mirroring the builder convention). With no opts
|
|
189
|
+
* (or graceMs<=0) this is the pre-grace behaviour exactly.
|
|
170
190
|
*/
|
|
171
|
-
decideAtIdle(): LedgerDecision {
|
|
172
|
-
const o =
|
|
191
|
+
decideAtIdle(opts?: { now: number; graceMs: number }): LedgerDecision {
|
|
192
|
+
const o =
|
|
193
|
+
opts != null && opts.graceMs > 0 ? this.oldestEligible(opts.now, opts.graceMs) : this.oldest()
|
|
173
194
|
if (o === undefined) return { action: 'none' }
|
|
174
195
|
if (o.representCount >= this.maxRepresents) return { action: 'escalate', obligation: o }
|
|
175
196
|
return { action: 'represent', obligation: o }
|
|
176
197
|
}
|
|
177
198
|
|
|
199
|
+
/** The oldest open obligation whose handling turn ended at least `graceMs` ago
|
|
200
|
+
* (or never ended — a still-queued obligation has no lastTurnEndedAt and is
|
|
201
|
+
* always eligible; it can't have a trailing answer in flight). */
|
|
202
|
+
private oldestEligible(now: number, graceMs: number): Obligation | undefined {
|
|
203
|
+
let best: Obligation | undefined
|
|
204
|
+
for (const o of this.open.values()) {
|
|
205
|
+
if (o.lastTurnEndedAt != null && now - o.lastTurnEndedAt < graceMs) continue // within grace
|
|
206
|
+
if (best === undefined || o.openedAt < best.openedAt) best = o
|
|
207
|
+
}
|
|
208
|
+
return best
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/** Stamp that the most recent turn handling `originTurnId` just ended (drives
|
|
212
|
+
* the escalate-grace window). No-op if the obligation isn't open. Persists. */
|
|
213
|
+
noteTurnEnded(originTurnId: string, ts: number): void {
|
|
214
|
+
const o = this.open.get(originTurnId)
|
|
215
|
+
if (o === undefined) return
|
|
216
|
+
o.lastTurnEndedAt = ts
|
|
217
|
+
this.persist()
|
|
218
|
+
}
|
|
219
|
+
|
|
178
220
|
/**
|
|
179
221
|
* Decide which obligation a substantive reply discharges — DETERMINISTICALLY,
|
|
180
222
|
* holding for any model behavior:
|
|
@@ -35,6 +35,26 @@ function readStdin() {
|
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
/**
|
|
39
|
+
* Coerce a tool-input field to display text WITHOUT the `[object Object]`
|
|
40
|
+
* trap. Only primitives carry a meaningful label: strings pass through,
|
|
41
|
+
* numbers/booleans stringify cleanly. Objects and arrays return '' so the
|
|
42
|
+
* caller falls through to its next fallback (a sibling field, or the
|
|
43
|
+
* humanized tool name) instead of surfacing literal "[object Object]".
|
|
44
|
+
*
|
|
45
|
+
* This guards the MCP-tool path in particular: an operator-configured
|
|
46
|
+
* server (e.g. Brevo CRM) may pass a filter/query OBJECT in `query` /
|
|
47
|
+
* `description` / `title`, and the old `String(i.query ?? '')` coercion
|
|
48
|
+
* rendered that as "[object Object]" on the live activity feed. The
|
|
49
|
+
* renderer's own `clip()` already rejects non-strings; this mirrors that
|
|
50
|
+
* contract at the hook so the bad value never reaches the sidecar JSONL.
|
|
51
|
+
*/
|
|
52
|
+
function asText(v) {
|
|
53
|
+
if (typeof v === 'string') return v
|
|
54
|
+
if (typeof v === 'number' || typeof v === 'boolean') return String(v)
|
|
55
|
+
return ''
|
|
56
|
+
}
|
|
57
|
+
|
|
38
58
|
/**
|
|
39
59
|
* One-line, length-bounded escape of a value for inclusion in a label.
|
|
40
60
|
* Newlines collapsed, very long strings truncated with an ellipsis.
|
|
@@ -82,10 +102,10 @@ export function computeLabel(toolName, input) {
|
|
|
82
102
|
// for Bash/Task, matching the gateway's describeToolUse rendering.
|
|
83
103
|
switch (toolName) {
|
|
84
104
|
case 'Bash':
|
|
85
|
-
return clip(
|
|
105
|
+
return clip(asText(i.description), 70).trim() || 'Running a command'
|
|
86
106
|
case 'Task':
|
|
87
107
|
case 'Agent': {
|
|
88
|
-
const d = clip(
|
|
108
|
+
const d = clip(asText(i.description), 60).trim()
|
|
89
109
|
return d ? `Delegating: ${d}` : 'Delegating to a sub-agent'
|
|
90
110
|
}
|
|
91
111
|
case 'TodoWrite':
|
|
@@ -103,16 +123,16 @@ export function computeLabel(toolName, input) {
|
|
|
103
123
|
case 'Write':
|
|
104
124
|
return `Writing ${clip(safeBasename(i.file_path))}`.trim()
|
|
105
125
|
case 'Grep': {
|
|
106
|
-
const path = i.path ? clip(
|
|
107
|
-
const pat = clip(
|
|
126
|
+
const path = i.path ? clip(asText(i.path), 40) : '.'
|
|
127
|
+
const pat = clip(asText(i.pattern), 40)
|
|
108
128
|
return `Searching ${path} for ${pat}`
|
|
109
129
|
}
|
|
110
130
|
case 'Glob':
|
|
111
|
-
return `Finding files matching ${clip(
|
|
131
|
+
return `Finding files matching ${clip(asText(i.pattern), 60)}`
|
|
112
132
|
case 'WebFetch':
|
|
113
133
|
return `Fetching ${clip(urlHostPath(i.url), 60)}`
|
|
114
134
|
case 'WebSearch':
|
|
115
|
-
return `Searching the web for ${clip(
|
|
135
|
+
return `Searching the web for ${clip(asText(i.query), 60)}`
|
|
116
136
|
case 'NotebookEdit':
|
|
117
137
|
return `Editing notebook ${clip(safeBasename(i.notebook_path))}`
|
|
118
138
|
case 'BashOutput':
|
|
@@ -128,7 +148,7 @@ export function computeLabel(toolName, input) {
|
|
|
128
148
|
// sidecar JSONL and recover which skill fired per turn —
|
|
129
149
|
// the progress card path that used to surface this was retired
|
|
130
150
|
// when `progressDriver` was nulled out in #1122 PR3.
|
|
131
|
-
const slug = clip(
|
|
151
|
+
const slug = clip(asText(i.skill), 64)
|
|
132
152
|
return slug ? `Running skill ${slug}` : null
|
|
133
153
|
}
|
|
134
154
|
}
|
|
@@ -141,7 +161,7 @@ export function computeLabel(toolName, input) {
|
|
|
141
161
|
case 'mcp__switchroom-telegram__stream_reply':
|
|
142
162
|
return 'Replying'
|
|
143
163
|
case 'mcp__switchroom-telegram__react': {
|
|
144
|
-
const emoji = clip(
|
|
164
|
+
const emoji = clip(asText(i.emoji), 8)
|
|
145
165
|
return emoji ? `Reacting ${emoji}` : 'Reacting'
|
|
146
166
|
}
|
|
147
167
|
case 'mcp__switchroom-telegram__get_recent_messages':
|
|
@@ -177,7 +197,7 @@ export function computeLabel(toolName, input) {
|
|
|
177
197
|
return 'Looking through your files'
|
|
178
198
|
if (server === 'notion' || server === 'claude_ai_notion') return 'Checking your notes'
|
|
179
199
|
if (server === 'perplexity') {
|
|
180
|
-
const q = clip(
|
|
200
|
+
const q = clip(asText(i.query) || asText(i.description), 60).trim()
|
|
181
201
|
return q ? `Searching the web for ${q}` : 'Searching the web'
|
|
182
202
|
}
|
|
183
203
|
if (server === 'webkite') {
|
|
@@ -186,9 +206,9 @@ export function computeLabel(toolName, input) {
|
|
|
186
206
|
}
|
|
187
207
|
// Unknown MCP server: prefer a model-authored field, else humanized tool.
|
|
188
208
|
const desc =
|
|
189
|
-
clip(
|
|
190
|
-
clip(
|
|
191
|
-
clip(
|
|
209
|
+
clip(asText(i.description), 60).trim() ||
|
|
210
|
+
clip(asText(i.query), 50).trim() ||
|
|
211
|
+
clip(asText(i.title), 50).trim()
|
|
192
212
|
if (desc) return desc
|
|
193
213
|
return `Using ${tool.replace(/[-_]+/g, ' ')}`
|
|
194
214
|
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { autoClassifyMidTurnInbound, type AutoClassifyInput } from "../gateway/auto-classify-mid-turn.js";
|
|
3
|
+
|
|
4
|
+
function base(over: Partial<AutoClassifyInput> = {}): AutoClassifyInput {
|
|
5
|
+
return {
|
|
6
|
+
isSteerPrefix: false,
|
|
7
|
+
isQueuePrefix: false,
|
|
8
|
+
priorTurnInFlight: true,
|
|
9
|
+
isDm: false,
|
|
10
|
+
incomingThreadId: 3,
|
|
11
|
+
activeTurnThreadId: 3,
|
|
12
|
+
msSinceLastAgentOutput: 2000,
|
|
13
|
+
dmSteerWindowMs: 0, // DM auto-steer off by default
|
|
14
|
+
topicSteerWindowMs: 8000,
|
|
15
|
+
...over,
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
describe("autoClassifyMidTurnInbound", () => {
|
|
20
|
+
it("explicit /steer prefix always wins", () => {
|
|
21
|
+
const r = autoClassifyMidTurnInbound(base({ isSteerPrefix: true, incomingThreadId: 9, activeTurnThreadId: 3 }));
|
|
22
|
+
expect(r.decision).toBe("steer");
|
|
23
|
+
expect(r.reason).toBe("steer_prefix");
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it("explicit /queue prefix always wins", () => {
|
|
27
|
+
expect(autoClassifyMidTurnInbound(base({ isQueuePrefix: true })).decision).toBe("queue");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("no turn in flight → queue (fresh turn, not our decision)", () => {
|
|
31
|
+
const r = autoClassifyMidTurnInbound(base({ priorTurnInFlight: false }));
|
|
32
|
+
expect(r.decision).toBe("queue");
|
|
33
|
+
expect(r.reason).toBe("not_mid_turn");
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
// ── Supergroup: topic is the strong signal ──
|
|
37
|
+
it("supergroup, DIFFERENT topic than the active turn → queue (cross_topic), regardless of recency", () => {
|
|
38
|
+
const r = autoClassifyMidTurnInbound(base({ incomingThreadId: 5, activeTurnThreadId: 3, msSinceLastAgentOutput: 100 }));
|
|
39
|
+
expect(r.decision).toBe("queue");
|
|
40
|
+
expect(r.reason).toBe("cross_topic");
|
|
41
|
+
expect(r.sameTopic).toBe(false);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it("supergroup, SAME topic + recent → steer", () => {
|
|
45
|
+
const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 3000, topicSteerWindowMs: 8000 }));
|
|
46
|
+
expect(r.decision).toBe("steer");
|
|
47
|
+
expect(r.reason).toBe("same_topic_recent");
|
|
48
|
+
expect(r.sameTopic).toBe(true);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("supergroup, SAME topic but STALE (older than window) → queue", () => {
|
|
52
|
+
const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 20000, topicSteerWindowMs: 8000 }));
|
|
53
|
+
expect(r.decision).toBe("queue");
|
|
54
|
+
expect(r.reason).toBe("same_topic_stale");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("supergroup, no recency recorded (null) → queue (not treated as recent)", () => {
|
|
58
|
+
const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: null }));
|
|
59
|
+
expect(r.decision).toBe("queue");
|
|
60
|
+
expect(r.reason).toBe("same_topic_stale");
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("topicSteerWindowMs=0 (auto-steer off) → queue, still reports sameTopic", () => {
|
|
64
|
+
const r = autoClassifyMidTurnInbound(base({ topicSteerWindowMs: 0, incomingThreadId: 3, activeTurnThreadId: 3 }));
|
|
65
|
+
expect(r.decision).toBe("queue");
|
|
66
|
+
expect(r.reason).toBe("topic_disabled");
|
|
67
|
+
expect(r.sameTopic).toBe(true);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("canonical thread compare: null/undefined/0 collapse to the same no-thread bucket", () => {
|
|
71
|
+
expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 0, activeTurnThreadId: null })).sameTopic).toBe(true);
|
|
72
|
+
expect(autoClassifyMidTurnInbound(base({ incomingThreadId: undefined, activeTurnThreadId: 0 })).sameTopic).toBe(true);
|
|
73
|
+
expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 1, activeTurnThreadId: 0 })).sameTopic).toBe(false);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
// ── DM: timing-only, off by default ──
|
|
77
|
+
it("DM with dmSteerWindowMs=0 (default) → queue even if recent (DM auto-steer off)", () => {
|
|
78
|
+
const r = autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 500, dmSteerWindowMs: 0 }));
|
|
79
|
+
expect(r.decision).toBe("queue");
|
|
80
|
+
expect(r.reason).toBe("dm_disabled");
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("DM with dmSteerWindowMs>0 + recent → steer; stale → queue", () => {
|
|
84
|
+
expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 5000, dmSteerWindowMs: 10000 })).decision).toBe("steer");
|
|
85
|
+
expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 15000, dmSteerWindowMs: 10000 })).decision).toBe("queue");
|
|
86
|
+
});
|
|
87
|
+
});
|