switchroom 0.14.62 → 0.14.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -142,6 +142,7 @@ import {
142
142
  resolveRetentionDays as resolveRegistryRetentionDays,
143
143
  } from '../registry/reaper.js'
144
144
  import { parseQueuePrefix, parseSteerPrefix, formatPriorAssistantPreview, formatReplyToText } from '../steering.js'
145
+ import { autoClassifyMidTurnInbound } from './auto-classify-mid-turn.js'
145
146
  import {
146
147
  renderOperatorEvent,
147
148
  shouldEmitOperatorEvent,
@@ -289,7 +290,7 @@ import {
289
290
  obligationEscalationText,
290
291
  } from './obligation-ledger.js'
291
292
  import { loadObligations, persistObligations } from './obligation-store.js'
292
- import { withDeadline } from './with-deadline.js'
293
+ import { driveEscalation } from './escalation-drive.js'
293
294
  import { createInboundSpool } from './inbound-spool.js'
294
295
  import { purgeStaleTurnsForChat } from './turn-state-purge.js'
295
296
  import { decideInboundDelivery } from './inbound-delivery-gate.js'
@@ -1423,6 +1424,47 @@ const OBLIGATION_ESCALATE_MAX = 3
1423
1424
  // bounded escalate ladder to a terminal. 45s comfortably exceeds robustApiCall's
1424
1425
  // 3-attempt network backoff so a legitimate slow send isn't cut short.
1425
1426
  const OBLIGATION_ESCALATE_SEND_DEADLINE_MS = 45_000
1427
+ // Escalate-grace window. A slow / background-worker / multi-segment turn ends
1428
+ // (the in-flight gate clears) BEFORE its trailing answer's reply lands, and the
1429
+ // 5s sweep would re-present/escalate in that gap — a false "⚠️ I may have missed
1430
+ // this" on a message that's actively being answered (fuzz-confirmed on v0.14.62:
1431
+ // ~14% of marko's no-reply turn-ends had the answer in flight). An obligation
1432
+ // whose handling turn ended < this ago is skipped by decideAtIdle, giving the
1433
+ // trailing answer's close a beat to fire. Bounded: each re-present is itself a
1434
+ // turn that re-stamps once, representCount is capped → the ladder still
1435
+ // terminates. 45s > the observed "answer lands within ~60s, usually <40s" gap.
1436
+ // Kill switch: SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS=0 → pre-grace behaviour.
1437
+ const OBLIGATION_ESCALATE_GRACE_MS = (() => {
1438
+ const raw = process.env.SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS
1439
+ if (raw == null || raw === '') return 45_000
1440
+ const n = Number(raw)
1441
+ return Number.isFinite(n) && n >= 0 ? n : 45_000
1442
+ })()
1443
+
1444
+ // ─── Mid-turn auto-classify (steer-vs-queue), SHADOW mode ─────────────────────
1445
+ // Today a no-prefix mid-turn message always QUEUES. autoClassifyMidTurnInbound
1446
+ // (auto-classify-mid-turn.ts) is the basis for a smarter default using
1447
+ // topic-vs-active-turn + reply-recency. Phase 1 ships SHADOW-ONLY: when this
1448
+ // flag is on we COMPUTE + LOG what we'd decide (decision/reason/same_topic/
1449
+ // ms_since_out) but the behaviour is UNCHANGED (still queue) — to gather the
1450
+ // real-world distribution (how often mid-turn messages are same-topic
1451
+ // continuations vs cross-topic, and the recency spread) before any action flips
1452
+ // on. Default OFF → zero overhead. The action windows below stay 0 in shadow.
1453
+ const AUTOCLASSIFY_MIDTURN_SHADOW = process.env.SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW === '1'
1454
+ // Per-(chat,thread) wall-clock ms of the agent's LAST visible output — the
1455
+ // recency clock the classifier uses (NOT turn age: a long actively-narrating
1456
+ // worker turn must not read "stale"). Stamped beside signalTracker.noteOutbound.
1457
+ // LRU-bounded so a long-lived gateway with many topics can't grow unboundedly.
1458
+ const lastAgentOutputAt = new Map<string, number>()
1459
+ const LAST_OUTPUT_MAX_KEYS = 512
1460
+ function noteAgentOutputAt(key: string, ts: number): void {
1461
+ lastAgentOutputAt.delete(key) // re-insert → most-recently-used at the tail
1462
+ lastAgentOutputAt.set(key, ts)
1463
+ if (lastAgentOutputAt.size > LAST_OUTPUT_MAX_KEYS) {
1464
+ const oldest = lastAgentOutputAt.keys().next().value
1465
+ if (oldest !== undefined) lastAgentOutputAt.delete(oldest)
1466
+ }
1467
+ }
1426
1468
  // Durable snapshot of the open obligation set on the persistent per-agent
1427
1469
  // volume (STATE_DIR = /state/agent/telegram in prod). Closes the restart hole:
1428
1470
  // the in-memory ledger alone empties on restart and the spool's boot-replay
@@ -1999,6 +2041,29 @@ let pendingDeferredInterrupt: PendingDeferredInterrupt | null = null
1999
2041
  * Idempotent: nulls the slot and clears the timer before doing any work so a
2000
2042
  * boundary event and the timeout can't double-fire.
2001
2043
  */
2044
+ /**
2045
+ * An `!` interrupt SIGINT-kills the in-flight turn. That turn was handling a
2046
+ * user message with an open obligation, and the killed turn does NOT reliably
2047
+ * emit turn_end (so endCurrentTurnAtomic never closes it) — so without this the
2048
+ * obligation survives and the idle sweep later re-presents/escalates "you have
2049
+ * an earlier message you never answered" for a question the user EXPLICITLY
2050
+ * cancelled. An interrupt is a deliberate redirect, so closing that obligation
2051
+ * is the correct terminal (the user chose to interrupt; they can re-ask). Only
2052
+ * the interrupted turn's OWN obligation is closed — queued siblings (other open
2053
+ * obligations) are untouched. No-op when the flag is off, no turn is in flight,
2054
+ * or the turn isn't a tracked obligation (synthetic / already closed).
2055
+ */
2056
+ function cancelInterruptedObligation(): void {
2057
+ if (!OBLIGATION_LEDGER_ENABLED) return
2058
+ const turn = currentTurn
2059
+ if (turn == null) return
2060
+ if (obligationLedger.close(turn.turnId)) {
2061
+ process.stderr.write(
2062
+ `telegram gateway: obligation cancelled by interrupt origin=${turn.turnId}\n`,
2063
+ )
2064
+ }
2065
+ }
2066
+
2002
2067
  async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<void> {
2003
2068
  const pending = pendingDeferredInterrupt
2004
2069
  if (pending == null) return
@@ -2027,6 +2092,10 @@ async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<vo
2027
2092
  process.stderr.write(`telegram gateway: deferred-interrupt SIGINT failed: ${(err as Error).message}\n`)
2028
2093
  }
2029
2094
 
2095
+ // The SIGINT just killed the in-flight turn — cancel its obligation so the
2096
+ // interrupted (user-redirected) question isn't re-presented/escalated later.
2097
+ cancelInterruptedObligation()
2098
+
2030
2099
  // Deliver the replacement body as a fresh turn to the freshly-killed
2031
2100
  // bridge — same sendToAgent + buffer-on-miss primitive the synchronous
2032
2101
  // interrupt carve-out uses at the handleInbound delivery site.
@@ -2426,8 +2495,17 @@ function endCurrentTurnAtomic(turn: CurrentTurn): void {
2426
2495
  // finalAnswerDelivered===false → stays open → re-presented (the intended
2427
2496
  // catch). close() is a no-op for synthetic turns (turnId not in the ledger).
2428
2497
  // No-op when the flag is off.
2429
- if (OBLIGATION_LEDGER_ENABLED && turn.finalAnswerDelivered) {
2430
- obligationLedger.close(turn.turnId)
2498
+ if (OBLIGATION_LEDGER_ENABLED) {
2499
+ if (turn.finalAnswerDelivered) {
2500
+ obligationLedger.close(turn.turnId)
2501
+ } else {
2502
+ // Turn ended WITHOUT a final answer. If this turn was handling an open
2503
+ // obligation, stamp its grace clock so the idle sweep waits before
2504
+ // re-presenting/escalating — a slow/worker answer may still be in flight
2505
+ // (the over-escalation fix). No-op when turn.turnId isn't an open
2506
+ // obligation (synthetic / already-closed turn).
2507
+ obligationLedger.noteTurnEnded(turn.turnId, Date.now())
2508
+ }
2431
2509
  }
2432
2510
  // Component 2 — clear any prior no-reply drain timer for this turn; a
2433
2511
  // fresh end re-evaluates below. (Idempotent — null when never armed.)
@@ -4925,7 +5003,13 @@ function obligationSweep(): void {
4925
5003
  if (!obligationLedger.hasOpen()) return
4926
5004
  if (turnInFlightForGate()) return // a turn is running — let it finish/answer
4927
5005
  const agent = process.env.SWITCHROOM_AGENT_NAME ?? ''
4928
- const decision = obligationLedger.decideAtIdle()
5006
+ // Grace window: skip an obligation whose handling turn ended < grace ago — its
5007
+ // trailing slow/worker answer may still be landing (over-escalation fix).
5008
+ const decision = obligationLedger.decideAtIdle(
5009
+ OBLIGATION_ESCALATE_GRACE_MS > 0
5010
+ ? { now: Date.now(), graceMs: OBLIGATION_ESCALATE_GRACE_MS }
5011
+ : undefined,
5012
+ )
4929
5013
  const o = decision.obligation
4930
5014
  if (decision.action === 'none' || o == null) return
4931
5015
  if (decision.action === 'represent') {
@@ -4950,54 +5034,30 @@ function obligationSweep(): void {
4950
5034
  // (dead topic even after thread-fallback, blocked bot) is bounded by
4951
5035
  // OBLIGATION_ESCALATE_MAX → close best-effort (the user is unreachable, so a
4952
5036
  // bounded give-up beats an infinite loop / a boot-surviving poison record).
4953
- if (obligationEscalateInFlight.has(o.originTurnId)) return // a send is already awaiting
4954
- const escId = o.originTurnId
4955
- const attempt = obligationLedger.markEscalateAttempt(escId)
4956
- obligationEscalateInFlight.add(escId)
4957
- process.stderr.write(
4958
- `telegram gateway: obligation escalating (exhausted ${OBLIGATION_REPRESENT_MAX} re-presents) origin=${escId} attempt=${attempt}/${OBLIGATION_ESCALATE_MAX}\n`,
4959
- )
4960
- // retryWithThreadFallback: a stale/renumbered topic returns THREAD_NOT_FOUND;
4961
- // retry WITHOUT the thread so the nudge still lands in the chat (the #2096
4962
- // pattern) instead of being permanently undeliverable to a dead topic.
4963
- // withDeadline: grammy/fetch impose no request timeout and `.finally` (which
4964
- // clears the in-flight flag) only runs on settle — so a hung send would leak
4965
- // the flag forever and wedge this obligation OPEN. Racing against a deadline
4966
- // guarantees the chain settles, the flag always clears, and a hang becomes a
4967
- // bounded reject handled exactly like any other failed attempt.
4968
- void withDeadline(
4969
- retryWithThreadFallback(
4970
- robustApiCall,
4971
- (tid) =>
4972
- bot.api.sendMessage(o.chatId, obligationEscalationText(o), {
4973
- ...(tid != null ? { message_thread_id: tid } : {}),
4974
- }),
4975
- { threadId: o.threadId, chat_id: o.chatId, verb: 'obligation.escalate' },
4976
- ),
4977
- OBLIGATION_ESCALATE_SEND_DEADLINE_MS,
4978
- 'obligation escalation send timed out',
4979
- )
4980
- .then(() => {
4981
- obligationLedger.close(escId)
4982
- process.stderr.write(
4983
- `telegram gateway: obligation escalation delivered + closed origin=${escId}\n`,
4984
- )
4985
- })
4986
- .catch((err) => {
4987
- if (attempt >= OBLIGATION_ESCALATE_MAX) {
4988
- obligationLedger.close(escId)
4989
- process.stderr.write(
4990
- `telegram gateway: obligation escalation PERMANENTLY undeliverable after ${attempt} attempts — closing best-effort origin=${escId}: ${err}\n`,
4991
- )
4992
- } else {
4993
- process.stderr.write(
4994
- `telegram gateway: obligation escalation send failed (attempt ${attempt}/${OBLIGATION_ESCALATE_MAX}), retrying next sweep origin=${escId}: ${err}\n`,
4995
- )
4996
- }
4997
- })
4998
- .finally(() => {
4999
- obligationEscalateInFlight.delete(escId)
5000
- })
5037
+ // Drive one escalation attempt. The send is a direct Telegram nudge
5038
+ // (retryWithThreadFallback: a stale/renumbered topic → THREAD_NOT_FOUND retries
5039
+ // thread-less, the #2096 pattern). driveEscalation guards against concurrent
5040
+ // sends, bounds the send with withDeadline (so a hung send can't leak the
5041
+ // in-flight flag and wedge the obligation OPEN), closes only after a successful
5042
+ // send, and bounds permanent failures to a best-effort close. Extracted so the
5043
+ // hang → bounded → terminal path is executable in escalation-drive.test.ts —
5044
+ // the path neither mtcute (can't hang Telegram) nor a synchronous test reaches.
5045
+ void driveEscalation({
5046
+ escId: o.originTurnId,
5047
+ inFlight: obligationEscalateInFlight,
5048
+ ledger: obligationLedger,
5049
+ send: () =>
5050
+ retryWithThreadFallback(
5051
+ robustApiCall,
5052
+ (tid) =>
5053
+ bot.api.sendMessage(o.chatId, obligationEscalationText(o), {
5054
+ ...(tid != null ? { message_thread_id: tid } : {}),
5055
+ }),
5056
+ { threadId: o.threadId, chat_id: o.chatId, verb: 'obligation.escalate' },
5057
+ ),
5058
+ maxAttempts: OBLIGATION_ESCALATE_MAX,
5059
+ deadlineMs: OBLIGATION_ESCALATE_SEND_DEADLINE_MS,
5060
+ })
5001
5061
  }
5002
5062
  if (!STATIC && OBLIGATION_LEDGER_ENABLED) {
5003
5063
  setInterval(obligationSweep, OBLIGATION_SWEEP_MS).unref()
@@ -6503,6 +6563,10 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
6503
6563
  // silence-poke clock so the next poke is measured from this send.
6504
6564
  signalTracker.noteOutbound(statusKey(chat_id, threadId), Date.now())
6505
6565
  silencePoke.noteOutbound(statusKey(chat_id, threadId), Date.now())
6566
+ // Mid-turn auto-classify recency clock: the agent just produced visible output
6567
+ // in this chat/thread (cross-turn, unlike silencePoke's per-turn lastOutboundAt).
6568
+ // Only maintained when the shadow flag is on → truly zero overhead by default.
6569
+ if (AUTOCLASSIFY_MIDTURN_SHADOW) noteAgentOutputAt(statusKey(chat_id, threadId), Date.now())
6506
6570
  // PR3b-cutover: feed lastOutboundAt to the delivery machine so its
6507
6571
  // TTL `tick` suppresses the fallback for a long-but-active turn
6508
6572
  // (model streaming past 5 min) — parity with silencePoke's own
@@ -10808,6 +10872,9 @@ async function handleInbound(
10808
10872
  } catch (err) {
10809
10873
  process.stderr.write(`telegram gateway: interrupt-marker SIGINT failed: ${(err as Error).message}\n`)
10810
10874
  }
10875
+ // The SIGINT just killed the in-flight turn — cancel its obligation so the
10876
+ // interrupted (user-redirected) question isn't re-presented/escalated later.
10877
+ cancelInterruptedObligation()
10811
10878
  }
10812
10879
  if (interrupt.emptyBody) {
10813
10880
  // #1075: thread-id-bearing — route through swallowingApiCall so
@@ -11414,6 +11481,33 @@ async function handleInbound(
11414
11481
  isSteering = priorTurnInFlight && isSteerPrefix
11415
11482
  if (priorTurnInFlight) priorTurnStartedAt = activeTurnStartedAt.get(key)
11416
11483
 
11484
+ // Mid-turn auto-classify SHADOW: compute what a topic+recency classifier
11485
+ // WOULD decide and log it — behaviour is UNCHANGED (isSteering above is
11486
+ // untouched). Gathers the real-world distribution (same-topic continuation
11487
+ // vs cross-topic, recency spread) to tune auto-steer before it ever acts.
11488
+ // No-op unless the shadow flag is on AND a turn is in flight (the only case
11489
+ // a steer-vs-queue decision is meaningful).
11490
+ if (AUTOCLASSIFY_MIDTURN_SHADOW && priorTurnInFlight) {
11491
+ const lastOut = lastAgentOutputAt.get(key)
11492
+ const msSinceOut = lastOut != null ? Date.now() - lastOut : null
11493
+ const shadow = autoClassifyMidTurnInbound({
11494
+ isSteerPrefix,
11495
+ isQueuePrefix: isQueuedPrefix,
11496
+ priorTurnInFlight,
11497
+ isDm: isDmChatId(chat_id),
11498
+ incomingThreadId: messageThreadId ?? null,
11499
+ activeTurnThreadId: currentTurn?.sessionThreadId ?? null,
11500
+ msSinceLastAgentOutput: msSinceOut,
11501
+ dmSteerWindowMs: 0, // DM auto-steer stays off (the April regime)
11502
+ topicSteerWindowMs: 8_000, // candidate window — what we're tuning
11503
+ })
11504
+ process.stderr.write(
11505
+ `telegram gateway: autoclassify-shadow chat_id=${chat_id} ` +
11506
+ `would=${shadow.decision} reason=${shadow.reason} same_topic=${shadow.sameTopic ?? '-'} ` +
11507
+ `ms_since_out=${msSinceOut ?? '-'} actual=${isSteering ? 'steer' : 'queue'}\n`,
11508
+ )
11509
+ }
11510
+
11417
11511
  if (access.statusReactions !== false) {
11418
11512
  if (isSteering) {
11419
11513
  // Explicit steer: mark with 🤝 on the inbound message; leave the
@@ -44,6 +44,17 @@ export interface Obligation {
44
44
  * can't loop forever — and, because it is part of the durable snapshot,
45
45
  * can't become a boot-surviving poison record either. */
46
46
  escalateAttempts?: number
47
+ /** Wall-clock ms the most recent turn handling THIS obligation ended (stamped
48
+ * at turn_end via noteTurnEnded). Drives the escalate-grace window: a slow /
49
+ * background-worker / multi-segment turn ends (the in-flight gate clears)
50
+ * before its trailing answer's reply lands, and the sweep would otherwise
51
+ * re-present/escalate in that gap — a false "I may have missed this" on a
52
+ * message that's actively being answered (fuzz-confirmed on v0.14.62). The
53
+ * decision waits `graceMs` after this stamp before acting, so the trailing
54
+ * answer's close has a beat to fire. Bounded: each re-present is itself a turn
55
+ * that re-stamps this once, and representCount is capped, so the ladder still
56
+ * terminates. Durable (part of the snapshot) so the grace survives restart. */
57
+ lastTurnEndedAt?: number
47
58
  }
48
59
 
49
60
  /** What the gateway should do for the oldest open obligation at an idle boundary. */
@@ -162,19 +173,50 @@ export class ObligationLedger {
162
173
  * does not mutate. The caller performs the side effect then calls
163
174
  * markRepresented / close accordingly.
164
175
  *
165
- * - 'none' → no open obligation; the agent may idle.
176
+ * - 'none' → no open obligation (or all open ones are within their
177
+ * escalate-grace window); the agent may idle.
166
178
  * - 'represent' → re-present `obligation` as a fresh must-answer turn.
167
179
  * - 'escalate' → it has already been re-presented maxRepresents times; send
168
180
  * ONE operator-visible "did I miss this?" and close it
169
181
  * (caller calls close) rather than loop forever.
182
+ *
183
+ * GRACE WINDOW (opts.graceMs > 0): an obligation whose handling turn ended less
184
+ * than `graceMs` ago is SKIPPED — its trailing answer may still be in flight
185
+ * (a worker / long-think / multi-segment turn ends the in-flight gate before
186
+ * the reply lands). We pick the oldest obligation that is OUT of grace, so a
187
+ * genuinely-stale one is still acted on while a freshly-ended one waits. Pure
188
+ * (clock injected via opts.now, mirroring the builder convention). With no opts
189
+ * (or graceMs<=0) this is the pre-grace behaviour exactly.
170
190
  */
171
- decideAtIdle(): LedgerDecision {
172
- const o = this.oldest()
191
+ decideAtIdle(opts?: { now: number; graceMs: number }): LedgerDecision {
192
+ const o =
193
+ opts != null && opts.graceMs > 0 ? this.oldestEligible(opts.now, opts.graceMs) : this.oldest()
173
194
  if (o === undefined) return { action: 'none' }
174
195
  if (o.representCount >= this.maxRepresents) return { action: 'escalate', obligation: o }
175
196
  return { action: 'represent', obligation: o }
176
197
  }
177
198
 
199
+ /** The oldest open obligation whose handling turn ended at least `graceMs` ago
200
+ * (or never ended — a still-queued obligation has no lastTurnEndedAt and is
201
+ * always eligible; it can't have a trailing answer in flight). */
202
+ private oldestEligible(now: number, graceMs: number): Obligation | undefined {
203
+ let best: Obligation | undefined
204
+ for (const o of this.open.values()) {
205
+ if (o.lastTurnEndedAt != null && now - o.lastTurnEndedAt < graceMs) continue // within grace
206
+ if (best === undefined || o.openedAt < best.openedAt) best = o
207
+ }
208
+ return best
209
+ }
210
+
211
+ /** Stamp that the most recent turn handling `originTurnId` just ended (drives
212
+ * the escalate-grace window). No-op if the obligation isn't open. Persists. */
213
+ noteTurnEnded(originTurnId: string, ts: number): void {
214
+ const o = this.open.get(originTurnId)
215
+ if (o === undefined) return
216
+ o.lastTurnEndedAt = ts
217
+ this.persist()
218
+ }
219
+
178
220
  /**
179
221
  * Decide which obligation a substantive reply discharges — DETERMINISTICALLY,
180
222
  * holding for any model behavior:
@@ -35,6 +35,26 @@ function readStdin() {
35
35
  }
36
36
  }
37
37
 
38
+ /**
39
+ * Coerce a tool-input field to display text WITHOUT the `[object Object]`
40
+ * trap. Only primitives carry a meaningful label: strings pass through,
41
+ * numbers/booleans stringify cleanly. Objects and arrays return '' so the
42
+ * caller falls through to its next fallback (a sibling field, or the
43
+ * humanized tool name) instead of surfacing literal "[object Object]".
44
+ *
45
+ * This guards the MCP-tool path in particular: an operator-configured
46
+ * server (e.g. Brevo CRM) may pass a filter/query OBJECT in `query` /
47
+ * `description` / `title`, and the old `String(i.query ?? '')` coercion
48
+ * rendered that as "[object Object]" on the live activity feed. The
49
+ * renderer's own `clip()` already rejects non-strings; this mirrors that
50
+ * contract at the hook so the bad value never reaches the sidecar JSONL.
51
+ */
52
+ function asText(v) {
53
+ if (typeof v === 'string') return v
54
+ if (typeof v === 'number' || typeof v === 'boolean') return String(v)
55
+ return ''
56
+ }
57
+
38
58
  /**
39
59
  * One-line, length-bounded escape of a value for inclusion in a label.
40
60
  * Newlines collapsed, very long strings truncated with an ellipsis.
@@ -82,10 +102,10 @@ export function computeLabel(toolName, input) {
82
102
  // for Bash/Task, matching the gateway's describeToolUse rendering.
83
103
  switch (toolName) {
84
104
  case 'Bash':
85
- return clip(String(i.description ?? ''), 70).trim() || 'Running a command'
105
+ return clip(asText(i.description), 70).trim() || 'Running a command'
86
106
  case 'Task':
87
107
  case 'Agent': {
88
- const d = clip(String(i.description ?? ''), 60).trim()
108
+ const d = clip(asText(i.description), 60).trim()
89
109
  return d ? `Delegating: ${d}` : 'Delegating to a sub-agent'
90
110
  }
91
111
  case 'TodoWrite':
@@ -103,16 +123,16 @@ export function computeLabel(toolName, input) {
103
123
  case 'Write':
104
124
  return `Writing ${clip(safeBasename(i.file_path))}`.trim()
105
125
  case 'Grep': {
106
- const path = i.path ? clip(String(i.path), 40) : '.'
107
- const pat = clip(String(i.pattern ?? ''), 40)
126
+ const path = i.path ? clip(asText(i.path), 40) : '.'
127
+ const pat = clip(asText(i.pattern), 40)
108
128
  return `Searching ${path} for ${pat}`
109
129
  }
110
130
  case 'Glob':
111
- return `Finding files matching ${clip(String(i.pattern ?? ''), 60)}`
131
+ return `Finding files matching ${clip(asText(i.pattern), 60)}`
112
132
  case 'WebFetch':
113
133
  return `Fetching ${clip(urlHostPath(i.url), 60)}`
114
134
  case 'WebSearch':
115
- return `Searching the web for ${clip(String(i.query ?? ''), 60)}`
135
+ return `Searching the web for ${clip(asText(i.query), 60)}`
116
136
  case 'NotebookEdit':
117
137
  return `Editing notebook ${clip(safeBasename(i.notebook_path))}`
118
138
  case 'BashOutput':
@@ -128,7 +148,7 @@ export function computeLabel(toolName, input) {
128
148
  // sidecar JSONL and recover which skill fired per turn —
129
149
  // the progress card path that used to surface this was retired
130
150
  // when `progressDriver` was nulled out in #1122 PR3.
131
- const slug = clip(String(i.skill ?? ''), 64)
151
+ const slug = clip(asText(i.skill), 64)
132
152
  return slug ? `Running skill ${slug}` : null
133
153
  }
134
154
  }
@@ -141,7 +161,7 @@ export function computeLabel(toolName, input) {
141
161
  case 'mcp__switchroom-telegram__stream_reply':
142
162
  return 'Replying'
143
163
  case 'mcp__switchroom-telegram__react': {
144
- const emoji = clip(String(i.emoji ?? ''), 8)
164
+ const emoji = clip(asText(i.emoji), 8)
145
165
  return emoji ? `Reacting ${emoji}` : 'Reacting'
146
166
  }
147
167
  case 'mcp__switchroom-telegram__get_recent_messages':
@@ -177,7 +197,7 @@ export function computeLabel(toolName, input) {
177
197
  return 'Looking through your files'
178
198
  if (server === 'notion' || server === 'claude_ai_notion') return 'Checking your notes'
179
199
  if (server === 'perplexity') {
180
- const q = clip(String(i.query ?? i.description ?? ''), 60).trim()
200
+ const q = clip(asText(i.query) || asText(i.description), 60).trim()
181
201
  return q ? `Searching the web for ${q}` : 'Searching the web'
182
202
  }
183
203
  if (server === 'webkite') {
@@ -186,9 +206,9 @@ export function computeLabel(toolName, input) {
186
206
  }
187
207
  // Unknown MCP server: prefer a model-authored field, else humanized tool.
188
208
  const desc =
189
- clip(String(i.description ?? ''), 60).trim() ||
190
- clip(String(i.query ?? ''), 50).trim() ||
191
- clip(String(i.title ?? ''), 50).trim()
209
+ clip(asText(i.description), 60).trim() ||
210
+ clip(asText(i.query), 50).trim() ||
211
+ clip(asText(i.title), 50).trim()
192
212
  if (desc) return desc
193
213
  return `Using ${tool.replace(/[-_]+/g, ' ')}`
194
214
  }
@@ -0,0 +1,87 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { autoClassifyMidTurnInbound, type AutoClassifyInput } from "../gateway/auto-classify-mid-turn.js";
3
+
4
+ function base(over: Partial<AutoClassifyInput> = {}): AutoClassifyInput {
5
+ return {
6
+ isSteerPrefix: false,
7
+ isQueuePrefix: false,
8
+ priorTurnInFlight: true,
9
+ isDm: false,
10
+ incomingThreadId: 3,
11
+ activeTurnThreadId: 3,
12
+ msSinceLastAgentOutput: 2000,
13
+ dmSteerWindowMs: 0, // DM auto-steer off by default
14
+ topicSteerWindowMs: 8000,
15
+ ...over,
16
+ };
17
+ }
18
+
19
+ describe("autoClassifyMidTurnInbound", () => {
20
+ it("explicit /steer prefix always wins", () => {
21
+ const r = autoClassifyMidTurnInbound(base({ isSteerPrefix: true, incomingThreadId: 9, activeTurnThreadId: 3 }));
22
+ expect(r.decision).toBe("steer");
23
+ expect(r.reason).toBe("steer_prefix");
24
+ });
25
+
26
+ it("explicit /queue prefix always wins", () => {
27
+ expect(autoClassifyMidTurnInbound(base({ isQueuePrefix: true })).decision).toBe("queue");
28
+ });
29
+
30
+ it("no turn in flight → queue (fresh turn, not our decision)", () => {
31
+ const r = autoClassifyMidTurnInbound(base({ priorTurnInFlight: false }));
32
+ expect(r.decision).toBe("queue");
33
+ expect(r.reason).toBe("not_mid_turn");
34
+ });
35
+
36
+ // ── Supergroup: topic is the strong signal ──
37
+ it("supergroup, DIFFERENT topic than the active turn → queue (cross_topic), regardless of recency", () => {
38
+ const r = autoClassifyMidTurnInbound(base({ incomingThreadId: 5, activeTurnThreadId: 3, msSinceLastAgentOutput: 100 }));
39
+ expect(r.decision).toBe("queue");
40
+ expect(r.reason).toBe("cross_topic");
41
+ expect(r.sameTopic).toBe(false);
42
+ });
43
+
44
+ it("supergroup, SAME topic + recent → steer", () => {
45
+ const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 3000, topicSteerWindowMs: 8000 }));
46
+ expect(r.decision).toBe("steer");
47
+ expect(r.reason).toBe("same_topic_recent");
48
+ expect(r.sameTopic).toBe(true);
49
+ });
50
+
51
+ it("supergroup, SAME topic but STALE (older than window) → queue", () => {
52
+ const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 20000, topicSteerWindowMs: 8000 }));
53
+ expect(r.decision).toBe("queue");
54
+ expect(r.reason).toBe("same_topic_stale");
55
+ });
56
+
57
+ it("supergroup, no recency recorded (null) → queue (not treated as recent)", () => {
58
+ const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: null }));
59
+ expect(r.decision).toBe("queue");
60
+ expect(r.reason).toBe("same_topic_stale");
61
+ });
62
+
63
+ it("topicSteerWindowMs=0 (auto-steer off) → queue, still reports sameTopic", () => {
64
+ const r = autoClassifyMidTurnInbound(base({ topicSteerWindowMs: 0, incomingThreadId: 3, activeTurnThreadId: 3 }));
65
+ expect(r.decision).toBe("queue");
66
+ expect(r.reason).toBe("topic_disabled");
67
+ expect(r.sameTopic).toBe(true);
68
+ });
69
+
70
+ it("canonical thread compare: null/undefined/0 collapse to the same no-thread bucket", () => {
71
+ expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 0, activeTurnThreadId: null })).sameTopic).toBe(true);
72
+ expect(autoClassifyMidTurnInbound(base({ incomingThreadId: undefined, activeTurnThreadId: 0 })).sameTopic).toBe(true);
73
+ expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 1, activeTurnThreadId: 0 })).sameTopic).toBe(false);
74
+ });
75
+
76
+ // ── DM: timing-only, off by default ──
77
+ it("DM with dmSteerWindowMs=0 (default) → queue even if recent (DM auto-steer off)", () => {
78
+ const r = autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 500, dmSteerWindowMs: 0 }));
79
+ expect(r.decision).toBe("queue");
80
+ expect(r.reason).toBe("dm_disabled");
81
+ });
82
+
83
+ it("DM with dmSteerWindowMs>0 + recent → steer; stale → queue", () => {
84
+ expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 5000, dmSteerWindowMs: 10000 })).decision).toBe("steer");
85
+ expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 15000, dmSteerWindowMs: 10000 })).decision).toBe("queue");
86
+ });
87
+ });
@@ -0,0 +1,123 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { driveEscalation } from "../gateway/escalation-drive.js";
3
+ import { ObligationLedger } from "../gateway/obligation-ledger.js";
4
+
5
+ // Drives the REAL escalation step (the code obligationSweep calls) with the REAL
6
+ // ObligationLedger and the REAL withDeadline — including a fake hanging send,
7
+ // the exact path the total proof flagged and that mtcute / a synchronous test
8
+ // cannot reach. This is the executable verification of the hang-wedge fix.
9
+
10
+ function openEscalatable(L: ObligationLedger, id: string) {
11
+ L.openIfAbsent({ originTurnId: id, chatId: "-100", threadId: 3, messageId: 1, text: "x", openedAt: 0 });
12
+ }
13
+
14
+ const MAX = 3;
15
+ const DEADLINE = 15; // ms — short so the hang case settles fast and deterministically
16
+
17
+ describe("driveEscalation — the obligation escalation step is bounded and always reaches a terminal", () => {
18
+ it("a successful send closes the obligation and clears the in-flight flag", async () => {
19
+ const L = new ObligationLedger(2);
20
+ openEscalatable(L, "c#1");
21
+ const inFlight = new Set<string>();
22
+ await driveEscalation({
23
+ escId: "c#1",
24
+ inFlight,
25
+ ledger: L,
26
+ send: () => Promise.resolve("sent"),
27
+ maxAttempts: MAX,
28
+ deadlineMs: DEADLINE,
29
+ log: () => {},
30
+ });
31
+ expect(L.isOpen("c#1")).toBe(false); // closed
32
+ expect(inFlight.has("c#1")).toBe(false); // flag cleared
33
+ });
34
+
35
+ it("a transient failure below the cap stays OPEN and clears the flag (retried next sweep)", async () => {
36
+ const L = new ObligationLedger(2);
37
+ openEscalatable(L, "c#1");
38
+ const inFlight = new Set<string>();
39
+ await driveEscalation({
40
+ escId: "c#1",
41
+ inFlight,
42
+ ledger: L,
43
+ send: () => Promise.reject(new Error("network blip")),
44
+ maxAttempts: MAX,
45
+ deadlineMs: DEADLINE,
46
+ log: () => {},
47
+ });
48
+ expect(L.isOpen("c#1")).toBe(true); // still open — will retry
49
+ expect(inFlight.has("c#1")).toBe(false); // flag cleared, so the next sweep can re-enter
50
+ });
51
+
52
+ it("THE FIX: a send that NEVER settles still clears the flag (bounded by the deadline)", async () => {
53
+ const L = new ObligationLedger(2);
54
+ openEscalatable(L, "c#1");
55
+ const inFlight = new Set<string>();
56
+ let sendInvoked = 0;
57
+ const start = Date.now();
58
+ // A promise that never resolves/rejects — the stalled send that, pre-fix,
59
+ // left the in-flight flag set forever and wedged the obligation OPEN.
60
+ await driveEscalation({
61
+ escId: "c#1",
62
+ inFlight,
63
+ ledger: L,
64
+ send: () => {
65
+ sendInvoked++;
66
+ return new Promise(() => {});
67
+ },
68
+ maxAttempts: MAX,
69
+ deadlineMs: DEADLINE,
70
+ log: () => {},
71
+ });
72
+ expect(sendInvoked).toBe(1);
73
+ expect(inFlight.has("c#1")).toBe(false); // cleared despite the hang — the wedge is gone
74
+ expect(Date.now() - start).toBeLessThan(DEADLINE + 500); // settled at the deadline, not "never"
75
+ });
76
+
77
+ it("repeated hung sends reach a bounded terminal (close best-effort), never an infinite loop", async () => {
78
+ const L = new ObligationLedger(2);
79
+ openEscalatable(L, "c#1");
80
+ const inFlight = new Set<string>();
81
+ let sends = 0;
82
+ let drives = 0;
83
+ // Simulate the 5s sweep firing repeatedly while every send hangs.
84
+ while (L.isOpen("c#1") && drives < 20) {
85
+ drives++;
86
+ const p = driveEscalation({
87
+ escId: "c#1",
88
+ inFlight,
89
+ ledger: L,
90
+ send: () => {
91
+ sends++;
92
+ return new Promise(() => {});
93
+ },
94
+ maxAttempts: MAX,
95
+ deadlineMs: DEADLINE,
96
+ log: () => {},
97
+ });
98
+ if (p) await p; // each attempt settles within the deadline
99
+ }
100
+ expect(L.isOpen("c#1")).toBe(false); // reached a terminal (closed best-effort)
101
+ expect(inFlight.has("c#1")).toBe(false);
102
+ expect(sends).toBe(MAX); // exactly maxAttempts sends, then close — bounded
103
+ expect(drives).toBeLessThanOrEqual(MAX + 1);
104
+ });
105
+
106
+ it("the in-flight guard prevents a concurrent second send for the same obligation", async () => {
107
+ const L = new ObligationLedger(2);
108
+ openEscalatable(L, "c#1");
109
+ const inFlight = new Set<string>();
110
+ let sends = 0;
111
+ const hang = () => {
112
+ sends++;
113
+ return new Promise<void>(() => {});
114
+ };
115
+ const p1 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
116
+ // Second call while the first is still awaiting → must be a no-op.
117
+ const p2 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
118
+ expect(p2).toBeUndefined(); // guarded
119
+ expect(sends).toBe(1); // only one send fired
120
+ expect(L.list()[0].escalateAttempts).toBe(1); // only one attempt recorded
121
+ await p1; // let the first settle so we don't leak a pending timer
122
+ });
123
+ });