switchroom 0.14.62 → 0.14.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -142,6 +142,7 @@ import {
142
142
  resolveRetentionDays as resolveRegistryRetentionDays,
143
143
  } from '../registry/reaper.js'
144
144
  import { parseQueuePrefix, parseSteerPrefix, formatPriorAssistantPreview, formatReplyToText } from '../steering.js'
145
+ import { autoClassifyMidTurnInbound } from './auto-classify-mid-turn.js'
145
146
  import {
146
147
  renderOperatorEvent,
147
148
  shouldEmitOperatorEvent,
@@ -289,7 +290,7 @@ import {
289
290
  obligationEscalationText,
290
291
  } from './obligation-ledger.js'
291
292
  import { loadObligations, persistObligations } from './obligation-store.js'
292
- import { withDeadline } from './with-deadline.js'
293
+ import { driveEscalation } from './escalation-drive.js'
293
294
  import { createInboundSpool } from './inbound-spool.js'
294
295
  import { purgeStaleTurnsForChat } from './turn-state-purge.js'
295
296
  import { decideInboundDelivery } from './inbound-delivery-gate.js'
@@ -1399,9 +1400,12 @@ const deliveryQueue = createDeliveryQueue<InboundMessage>()
1399
1400
  // re-presented (bounded) until it closes, so a message the model read but never
1400
1401
  // answered (the marko 715 drop) cannot be silently lost. ADDITIVE + flagged: it
1401
1402
  // runs ALONGSIDE the existing acks/spool/buffer (PR3 retires the redundant
1402
- // pieces). Default OFF the canary turns it on (713/715 interleave UAT) before
1403
- // any fleet activation. When off, every hook below is a no-op → zero change.
1404
- const OBLIGATION_LEDGER_ENABLED = process.env.SWITCHROOM_OBLIGATION_LEDGER === '1'
1403
+ // pieces). DEFAULT ON (graduated from canary 2026-06-04 after the hang-fix
1404
+ // (#2152, total-proof), the escalate-grace (#2156, kills the fuzz-found
1405
+ // over-escalation), and interrupt-cancel (#2157) — proven on marko (supergroup)
1406
+ // + test-harness for days with 0 false cards). Kill switch:
1407
+ // SWITCHROOM_OBLIGATION_LEDGER=0 → every hook below is a no-op → zero change.
1408
+ const OBLIGATION_LEDGER_ENABLED = process.env.SWITCHROOM_OBLIGATION_LEDGER !== '0'
1405
1409
  const OBLIGATION_REPRESENT_MAX = 2
1406
1410
  const OBLIGATION_SWEEP_MS = 5_000
1407
1411
  // Bound on escalation SEND attempts. The escalation now closes only AFTER a
@@ -1423,6 +1427,49 @@ const OBLIGATION_ESCALATE_MAX = 3
1423
1427
  // bounded escalate ladder to a terminal. 45s comfortably exceeds robustApiCall's
1424
1428
  // 3-attempt network backoff so a legitimate slow send isn't cut short.
1425
1429
  const OBLIGATION_ESCALATE_SEND_DEADLINE_MS = 45_000
1430
+ // Escalate-grace window. A slow / background-worker / multi-segment turn ends
1431
+ // (the in-flight gate clears) BEFORE its trailing answer's reply lands, and the
1432
+ // 5s sweep would re-present/escalate in that gap — a false "⚠️ I may have missed
1433
+ // this" on a message that's actively being answered (fuzz-confirmed on v0.14.62:
1434
+ // ~14% of marko's no-reply turn-ends had the answer in flight). An obligation
1435
+ // whose handling turn ended < this ago is skipped by decideAtIdle, giving the
1436
+ // trailing answer's close a beat to fire. Bounded: each re-present is itself a
1437
+ // turn that re-stamps once, representCount is capped → the ladder still
1438
+ // terminates. 45s > the observed "answer lands within ~60s, usually <40s" gap.
1439
+ // Kill switch: SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS=0 → pre-grace behaviour.
1440
+ const OBLIGATION_ESCALATE_GRACE_MS = (() => {
1441
+ const raw = process.env.SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS
1442
+ if (raw == null || raw === '') return 45_000
1443
+ const n = Number(raw)
1444
+ return Number.isFinite(n) && n >= 0 ? n : 45_000
1445
+ })()
1446
+
1447
+ // ─── Mid-turn auto-classify (steer-vs-queue), SHADOW mode ─────────────────────
1448
+ // Today a no-prefix mid-turn message always QUEUES. autoClassifyMidTurnInbound
1449
+ // (auto-classify-mid-turn.ts) is the basis for a smarter default using
1450
+ // topic-vs-active-turn + reply-recency. Phase 1 ships SHADOW-ONLY: when this
1451
+ // flag is on we COMPUTE + LOG what we'd decide (decision/reason/same_topic/
1452
+ // ms_since_out) but the behaviour is UNCHANGED (still queue) — to gather the
1453
+ // real-world distribution (how often mid-turn messages are same-topic
1454
+ // continuations vs cross-topic, and the recency spread) before any action flips
1455
+ // on. DEFAULT ON fleet-wide (data-gathering: zero behaviour change — only logs +
1456
+ // a bounded recency map). This is a TEMPORARY default; when auto-steer ships it
1457
+ // supersedes shadow. Kill switch: SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW=0.
1458
+ const AUTOCLASSIFY_MIDTURN_SHADOW = process.env.SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW !== '0'
1459
+ // Per-(chat,thread) wall-clock ms of the agent's LAST visible output — the
1460
+ // recency clock the classifier uses (NOT turn age: a long actively-narrating
1461
+ // worker turn must not read "stale"). Stamped beside signalTracker.noteOutbound.
1462
+ // LRU-bounded so a long-lived gateway with many topics can't grow unboundedly.
1463
+ const lastAgentOutputAt = new Map<string, number>()
1464
+ const LAST_OUTPUT_MAX_KEYS = 512
1465
+ function noteAgentOutputAt(key: string, ts: number): void {
1466
+ lastAgentOutputAt.delete(key) // re-insert → most-recently-used at the tail
1467
+ lastAgentOutputAt.set(key, ts)
1468
+ if (lastAgentOutputAt.size > LAST_OUTPUT_MAX_KEYS) {
1469
+ const oldest = lastAgentOutputAt.keys().next().value
1470
+ if (oldest !== undefined) lastAgentOutputAt.delete(oldest)
1471
+ }
1472
+ }
1426
1473
  // Durable snapshot of the open obligation set on the persistent per-agent
1427
1474
  // volume (STATE_DIR = /state/agent/telegram in prod). Closes the restart hole:
1428
1475
  // the in-memory ledger alone empties on restart and the spool's boot-replay
@@ -1999,6 +2046,29 @@ let pendingDeferredInterrupt: PendingDeferredInterrupt | null = null
1999
2046
  * Idempotent: nulls the slot and clears the timer before doing any work so a
2000
2047
  * boundary event and the timeout can't double-fire.
2001
2048
  */
2049
+ /**
2050
+ * An `!` interrupt SIGINT-kills the in-flight turn. That turn was handling a
2051
+ * user message with an open obligation, and the killed turn does NOT reliably
2052
+ * emit turn_end (so endCurrentTurnAtomic never closes it) — so without this the
2053
+ * obligation survives and the idle sweep later re-presents/escalates "you have
2054
+ * an earlier message you never answered" for a question the user EXPLICITLY
2055
+ * cancelled. An interrupt is a deliberate redirect, so closing that obligation
2056
+ * is the correct terminal (the user chose to interrupt; they can re-ask). Only
2057
+ * the interrupted turn's OWN obligation is closed — queued siblings (other open
2058
+ * obligations) are untouched. No-op when the flag is off, no turn is in flight,
2059
+ * or the turn isn't a tracked obligation (synthetic / already closed).
2060
+ */
2061
+ function cancelInterruptedObligation(): void {
2062
+ if (!OBLIGATION_LEDGER_ENABLED) return
2063
+ const turn = currentTurn
2064
+ if (turn == null) return
2065
+ if (obligationLedger.close(turn.turnId)) {
2066
+ process.stderr.write(
2067
+ `telegram gateway: obligation cancelled by interrupt origin=${turn.turnId}\n`,
2068
+ )
2069
+ }
2070
+ }
2071
+
2002
2072
  async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<void> {
2003
2073
  const pending = pendingDeferredInterrupt
2004
2074
  if (pending == null) return
@@ -2027,6 +2097,10 @@ async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<vo
2027
2097
  process.stderr.write(`telegram gateway: deferred-interrupt SIGINT failed: ${(err as Error).message}\n`)
2028
2098
  }
2029
2099
 
2100
+ // The SIGINT just killed the in-flight turn — cancel its obligation so the
2101
+ // interrupted (user-redirected) question isn't re-presented/escalated later.
2102
+ cancelInterruptedObligation()
2103
+
2030
2104
  // Deliver the replacement body as a fresh turn to the freshly-killed
2031
2105
  // bridge — same sendToAgent + buffer-on-miss primitive the synchronous
2032
2106
  // interrupt carve-out uses at the handleInbound delivery site.
@@ -2426,8 +2500,17 @@ function endCurrentTurnAtomic(turn: CurrentTurn): void {
2426
2500
  // finalAnswerDelivered===false → stays open → re-presented (the intended
2427
2501
  // catch). close() is a no-op for synthetic turns (turnId not in the ledger).
2428
2502
  // No-op when the flag is off.
2429
- if (OBLIGATION_LEDGER_ENABLED && turn.finalAnswerDelivered) {
2430
- obligationLedger.close(turn.turnId)
2503
+ if (OBLIGATION_LEDGER_ENABLED) {
2504
+ if (turn.finalAnswerDelivered) {
2505
+ obligationLedger.close(turn.turnId)
2506
+ } else {
2507
+ // Turn ended WITHOUT a final answer. If this turn was handling an open
2508
+ // obligation, stamp its grace clock so the idle sweep waits before
2509
+ // re-presenting/escalating — a slow/worker answer may still be in flight
2510
+ // (the over-escalation fix). No-op when turn.turnId isn't an open
2511
+ // obligation (synthetic / already-closed turn).
2512
+ obligationLedger.noteTurnEnded(turn.turnId, Date.now())
2513
+ }
2431
2514
  }
2432
2515
  // Component 2 — clear any prior no-reply drain timer for this turn; a
2433
2516
  // fresh end re-evaluates below. (Idempotent — null when never armed.)
@@ -4925,7 +5008,13 @@ function obligationSweep(): void {
4925
5008
  if (!obligationLedger.hasOpen()) return
4926
5009
  if (turnInFlightForGate()) return // a turn is running — let it finish/answer
4927
5010
  const agent = process.env.SWITCHROOM_AGENT_NAME ?? ''
4928
- const decision = obligationLedger.decideAtIdle()
5011
+ // Grace window: skip an obligation whose handling turn ended < grace ago — its
5012
+ // trailing slow/worker answer may still be landing (over-escalation fix).
5013
+ const decision = obligationLedger.decideAtIdle(
5014
+ OBLIGATION_ESCALATE_GRACE_MS > 0
5015
+ ? { now: Date.now(), graceMs: OBLIGATION_ESCALATE_GRACE_MS }
5016
+ : undefined,
5017
+ )
4929
5018
  const o = decision.obligation
4930
5019
  if (decision.action === 'none' || o == null) return
4931
5020
  if (decision.action === 'represent') {
@@ -4950,54 +5039,30 @@ function obligationSweep(): void {
4950
5039
  // (dead topic even after thread-fallback, blocked bot) is bounded by
4951
5040
  // OBLIGATION_ESCALATE_MAX → close best-effort (the user is unreachable, so a
4952
5041
  // bounded give-up beats an infinite loop / a boot-surviving poison record).
4953
- if (obligationEscalateInFlight.has(o.originTurnId)) return // a send is already awaiting
4954
- const escId = o.originTurnId
4955
- const attempt = obligationLedger.markEscalateAttempt(escId)
4956
- obligationEscalateInFlight.add(escId)
4957
- process.stderr.write(
4958
- `telegram gateway: obligation escalating (exhausted ${OBLIGATION_REPRESENT_MAX} re-presents) origin=${escId} attempt=${attempt}/${OBLIGATION_ESCALATE_MAX}\n`,
4959
- )
4960
- // retryWithThreadFallback: a stale/renumbered topic returns THREAD_NOT_FOUND;
4961
- // retry WITHOUT the thread so the nudge still lands in the chat (the #2096
4962
- // pattern) instead of being permanently undeliverable to a dead topic.
4963
- // withDeadline: grammy/fetch impose no request timeout and `.finally` (which
4964
- // clears the in-flight flag) only runs on settle — so a hung send would leak
4965
- // the flag forever and wedge this obligation OPEN. Racing against a deadline
4966
- // guarantees the chain settles, the flag always clears, and a hang becomes a
4967
- // bounded reject handled exactly like any other failed attempt.
4968
- void withDeadline(
4969
- retryWithThreadFallback(
4970
- robustApiCall,
4971
- (tid) =>
4972
- bot.api.sendMessage(o.chatId, obligationEscalationText(o), {
4973
- ...(tid != null ? { message_thread_id: tid } : {}),
4974
- }),
4975
- { threadId: o.threadId, chat_id: o.chatId, verb: 'obligation.escalate' },
4976
- ),
4977
- OBLIGATION_ESCALATE_SEND_DEADLINE_MS,
4978
- 'obligation escalation send timed out',
4979
- )
4980
- .then(() => {
4981
- obligationLedger.close(escId)
4982
- process.stderr.write(
4983
- `telegram gateway: obligation escalation delivered + closed origin=${escId}\n`,
4984
- )
4985
- })
4986
- .catch((err) => {
4987
- if (attempt >= OBLIGATION_ESCALATE_MAX) {
4988
- obligationLedger.close(escId)
4989
- process.stderr.write(
4990
- `telegram gateway: obligation escalation PERMANENTLY undeliverable after ${attempt} attempts — closing best-effort origin=${escId}: ${err}\n`,
4991
- )
4992
- } else {
4993
- process.stderr.write(
4994
- `telegram gateway: obligation escalation send failed (attempt ${attempt}/${OBLIGATION_ESCALATE_MAX}), retrying next sweep origin=${escId}: ${err}\n`,
4995
- )
4996
- }
4997
- })
4998
- .finally(() => {
4999
- obligationEscalateInFlight.delete(escId)
5000
- })
5042
+ // Drive one escalation attempt. The send is a direct Telegram nudge
5043
+ // (retryWithThreadFallback: a stale/renumbered topic → THREAD_NOT_FOUND retries
5044
+ // thread-less, the #2096 pattern). driveEscalation guards against concurrent
5045
+ // sends, bounds the send with withDeadline (so a hung send can't leak the
5046
+ // in-flight flag and wedge the obligation OPEN), closes only after a successful
5047
+ // send, and bounds permanent failures to a best-effort close. Extracted so the
5048
+ // hang → bounded → terminal path is executable in escalation-drive.test.ts —
5049
+ // the path neither mtcute (can't hang Telegram) nor a synchronous test reaches.
5050
+ void driveEscalation({
5051
+ escId: o.originTurnId,
5052
+ inFlight: obligationEscalateInFlight,
5053
+ ledger: obligationLedger,
5054
+ send: () =>
5055
+ retryWithThreadFallback(
5056
+ robustApiCall,
5057
+ (tid) =>
5058
+ bot.api.sendMessage(o.chatId, obligationEscalationText(o), {
5059
+ ...(tid != null ? { message_thread_id: tid } : {}),
5060
+ }),
5061
+ { threadId: o.threadId, chat_id: o.chatId, verb: 'obligation.escalate' },
5062
+ ),
5063
+ maxAttempts: OBLIGATION_ESCALATE_MAX,
5064
+ deadlineMs: OBLIGATION_ESCALATE_SEND_DEADLINE_MS,
5065
+ })
5001
5066
  }
5002
5067
  if (!STATIC && OBLIGATION_LEDGER_ENABLED) {
5003
5068
  setInterval(obligationSweep, OBLIGATION_SWEEP_MS).unref()
@@ -6503,6 +6568,10 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
6503
6568
  // silence-poke clock so the next poke is measured from this send.
6504
6569
  signalTracker.noteOutbound(statusKey(chat_id, threadId), Date.now())
6505
6570
  silencePoke.noteOutbound(statusKey(chat_id, threadId), Date.now())
6571
+ // Mid-turn auto-classify recency clock: the agent just produced visible output
6572
+ // in this chat/thread (cross-turn, unlike silencePoke's per-turn lastOutboundAt).
6573
+ // Only maintained when the shadow flag is on → truly zero overhead by default.
6574
+ if (AUTOCLASSIFY_MIDTURN_SHADOW) noteAgentOutputAt(statusKey(chat_id, threadId), Date.now())
6506
6575
  // PR3b-cutover: feed lastOutboundAt to the delivery machine so its
6507
6576
  // TTL `tick` suppresses the fallback for a long-but-active turn
6508
6577
  // (model streaming past 5 min) — parity with silencePoke's own
@@ -10808,6 +10877,9 @@ async function handleInbound(
10808
10877
  } catch (err) {
10809
10878
  process.stderr.write(`telegram gateway: interrupt-marker SIGINT failed: ${(err as Error).message}\n`)
10810
10879
  }
10880
+ // The SIGINT just killed the in-flight turn — cancel its obligation so the
10881
+ // interrupted (user-redirected) question isn't re-presented/escalated later.
10882
+ cancelInterruptedObligation()
10811
10883
  }
10812
10884
  if (interrupt.emptyBody) {
10813
10885
  // #1075: thread-id-bearing — route through swallowingApiCall so
@@ -11414,6 +11486,33 @@ async function handleInbound(
11414
11486
  isSteering = priorTurnInFlight && isSteerPrefix
11415
11487
  if (priorTurnInFlight) priorTurnStartedAt = activeTurnStartedAt.get(key)
11416
11488
 
11489
+ // Mid-turn auto-classify SHADOW: compute what a topic+recency classifier
11490
+ // WOULD decide and log it — behaviour is UNCHANGED (isSteering above is
11491
+ // untouched). Gathers the real-world distribution (same-topic continuation
11492
+ // vs cross-topic, recency spread) to tune auto-steer before it ever acts.
11493
+ // No-op unless the shadow flag is on AND a turn is in flight (the only case
11494
+ // a steer-vs-queue decision is meaningful).
11495
+ if (AUTOCLASSIFY_MIDTURN_SHADOW && priorTurnInFlight) {
11496
+ const lastOut = lastAgentOutputAt.get(key)
11497
+ const msSinceOut = lastOut != null ? Date.now() - lastOut : null
11498
+ const shadow = autoClassifyMidTurnInbound({
11499
+ isSteerPrefix,
11500
+ isQueuePrefix: isQueuedPrefix,
11501
+ priorTurnInFlight,
11502
+ isDm: isDmChatId(chat_id),
11503
+ incomingThreadId: messageThreadId ?? null,
11504
+ activeTurnThreadId: currentTurn?.sessionThreadId ?? null,
11505
+ msSinceLastAgentOutput: msSinceOut,
11506
+ dmSteerWindowMs: 0, // DM auto-steer stays off (the April regime)
11507
+ topicSteerWindowMs: 8_000, // candidate window — what we're tuning
11508
+ })
11509
+ process.stderr.write(
11510
+ `telegram gateway: autoclassify-shadow chat_id=${chat_id} ` +
11511
+ `would=${shadow.decision} reason=${shadow.reason} same_topic=${shadow.sameTopic ?? '-'} ` +
11512
+ `ms_since_out=${msSinceOut ?? '-'} actual=${isSteering ? 'steer' : 'queue'}\n`,
11513
+ )
11514
+ }
11515
+
11417
11516
  if (access.statusReactions !== false) {
11418
11517
  if (isSteering) {
11419
11518
  // Explicit steer: mark with 🤝 on the inbound message; leave the
@@ -44,6 +44,17 @@ export interface Obligation {
44
44
  * can't loop forever — and, because it is part of the durable snapshot,
45
45
  * can't become a boot-surviving poison record either. */
46
46
  escalateAttempts?: number
47
+ /** Wall-clock ms the most recent turn handling THIS obligation ended (stamped
48
+ * at turn_end via noteTurnEnded). Drives the escalate-grace window: a slow /
49
+ * background-worker / multi-segment turn ends (the in-flight gate clears)
50
+ * before its trailing answer's reply lands, and the sweep would otherwise
51
+ * re-present/escalate in that gap — a false "I may have missed this" on a
52
+ * message that's actively being answered (fuzz-confirmed on v0.14.62). The
53
+ * decision waits `graceMs` after this stamp before acting, so the trailing
54
+ * answer's close has a beat to fire. Bounded: each re-present is itself a turn
55
+ * that re-stamps this once, and representCount is capped, so the ladder still
56
+ * terminates. Durable (part of the snapshot) so the grace survives restart. */
57
+ lastTurnEndedAt?: number
47
58
  }
48
59
 
49
60
  /** What the gateway should do for the oldest open obligation at an idle boundary. */
@@ -162,19 +173,50 @@ export class ObligationLedger {
162
173
  * does not mutate. The caller performs the side effect then calls
163
174
  * markRepresented / close accordingly.
164
175
  *
165
- * - 'none' → no open obligation; the agent may idle.
176
+ * - 'none' → no open obligation (or all open ones are within their
177
+ * escalate-grace window); the agent may idle.
166
178
  * - 'represent' → re-present `obligation` as a fresh must-answer turn.
167
179
  * - 'escalate' → it has already been re-presented maxRepresents times; send
168
180
  * ONE operator-visible "did I miss this?" and close it
169
181
  * (caller calls close) rather than loop forever.
182
+ *
183
+ * GRACE WINDOW (opts.graceMs > 0): an obligation whose handling turn ended less
184
+ * than `graceMs` ago is SKIPPED — its trailing answer may still be in flight
185
+ * (a worker / long-think / multi-segment turn ends the in-flight gate before
186
+ * the reply lands). We pick the oldest obligation that is OUT of grace, so a
187
+ * genuinely-stale one is still acted on while a freshly-ended one waits. Pure
188
+ * (clock injected via opts.now, mirroring the builder convention). With no opts
189
+ * (or graceMs<=0) this is the pre-grace behaviour exactly.
170
190
  */
171
- decideAtIdle(): LedgerDecision {
172
- const o = this.oldest()
191
+ decideAtIdle(opts?: { now: number; graceMs: number }): LedgerDecision {
192
+ const o =
193
+ opts != null && opts.graceMs > 0 ? this.oldestEligible(opts.now, opts.graceMs) : this.oldest()
173
194
  if (o === undefined) return { action: 'none' }
174
195
  if (o.representCount >= this.maxRepresents) return { action: 'escalate', obligation: o }
175
196
  return { action: 'represent', obligation: o }
176
197
  }
177
198
 
199
+ /** The oldest open obligation whose handling turn ended at least `graceMs` ago
200
+ * (or never ended — a still-queued obligation has no lastTurnEndedAt and is
201
+ * always eligible; it can't have a trailing answer in flight). */
202
+ private oldestEligible(now: number, graceMs: number): Obligation | undefined {
203
+ let best: Obligation | undefined
204
+ for (const o of this.open.values()) {
205
+ if (o.lastTurnEndedAt != null && now - o.lastTurnEndedAt < graceMs) continue // within grace
206
+ if (best === undefined || o.openedAt < best.openedAt) best = o
207
+ }
208
+ return best
209
+ }
210
+
211
+ /** Stamp that the most recent turn handling `originTurnId` just ended (drives
212
+ * the escalate-grace window). No-op if the obligation isn't open. Persists. */
213
+ noteTurnEnded(originTurnId: string, ts: number): void {
214
+ const o = this.open.get(originTurnId)
215
+ if (o === undefined) return
216
+ o.lastTurnEndedAt = ts
217
+ this.persist()
218
+ }
219
+
178
220
  /**
179
221
  * Decide which obligation a substantive reply discharges — DETERMINISTICALLY,
180
222
  * holding for any model behavior:
@@ -35,6 +35,26 @@ function readStdin() {
35
35
  }
36
36
  }
37
37
 
38
+ /**
39
+ * Coerce a tool-input field to display text WITHOUT the `[object Object]`
40
+ * trap. Only primitives carry a meaningful label: strings pass through,
41
+ * numbers/booleans stringify cleanly. Objects and arrays return '' so the
42
+ * caller falls through to its next fallback (a sibling field, or the
43
+ * humanized tool name) instead of surfacing literal "[object Object]".
44
+ *
45
+ * This guards the MCP-tool path in particular: an operator-configured
46
+ * server (e.g. Brevo CRM) may pass a filter/query OBJECT in `query` /
47
+ * `description` / `title`, and the old `String(i.query ?? '')` coercion
48
+ * rendered that as "[object Object]" on the live activity feed. The
49
+ * renderer's own `clip()` already rejects non-strings; this mirrors that
50
+ * contract at the hook so the bad value never reaches the sidecar JSONL.
51
+ */
52
+ function asText(v) {
53
+ if (typeof v === 'string') return v
54
+ if (typeof v === 'number' || typeof v === 'boolean') return String(v)
55
+ return ''
56
+ }
57
+
38
58
  /**
39
59
  * One-line, length-bounded escape of a value for inclusion in a label.
40
60
  * Newlines collapsed, very long strings truncated with an ellipsis.
@@ -82,10 +102,10 @@ export function computeLabel(toolName, input) {
82
102
  // for Bash/Task, matching the gateway's describeToolUse rendering.
83
103
  switch (toolName) {
84
104
  case 'Bash':
85
- return clip(String(i.description ?? ''), 70).trim() || 'Running a command'
105
+ return clip(asText(i.description), 70).trim() || 'Running a command'
86
106
  case 'Task':
87
107
  case 'Agent': {
88
- const d = clip(String(i.description ?? ''), 60).trim()
108
+ const d = clip(asText(i.description), 60).trim()
89
109
  return d ? `Delegating: ${d}` : 'Delegating to a sub-agent'
90
110
  }
91
111
  case 'TodoWrite':
@@ -103,16 +123,16 @@ export function computeLabel(toolName, input) {
103
123
  case 'Write':
104
124
  return `Writing ${clip(safeBasename(i.file_path))}`.trim()
105
125
  case 'Grep': {
106
- const path = i.path ? clip(String(i.path), 40) : '.'
107
- const pat = clip(String(i.pattern ?? ''), 40)
126
+ const path = i.path ? clip(asText(i.path), 40) : '.'
127
+ const pat = clip(asText(i.pattern), 40)
108
128
  return `Searching ${path} for ${pat}`
109
129
  }
110
130
  case 'Glob':
111
- return `Finding files matching ${clip(String(i.pattern ?? ''), 60)}`
131
+ return `Finding files matching ${clip(asText(i.pattern), 60)}`
112
132
  case 'WebFetch':
113
133
  return `Fetching ${clip(urlHostPath(i.url), 60)}`
114
134
  case 'WebSearch':
115
- return `Searching the web for ${clip(String(i.query ?? ''), 60)}`
135
+ return `Searching the web for ${clip(asText(i.query), 60)}`
116
136
  case 'NotebookEdit':
117
137
  return `Editing notebook ${clip(safeBasename(i.notebook_path))}`
118
138
  case 'BashOutput':
@@ -128,7 +148,7 @@ export function computeLabel(toolName, input) {
128
148
  // sidecar JSONL and recover which skill fired per turn —
129
149
  // the progress card path that used to surface this was retired
130
150
  // when `progressDriver` was nulled out in #1122 PR3.
131
- const slug = clip(String(i.skill ?? ''), 64)
151
+ const slug = clip(asText(i.skill), 64)
132
152
  return slug ? `Running skill ${slug}` : null
133
153
  }
134
154
  }
@@ -141,7 +161,7 @@ export function computeLabel(toolName, input) {
141
161
  case 'mcp__switchroom-telegram__stream_reply':
142
162
  return 'Replying'
143
163
  case 'mcp__switchroom-telegram__react': {
144
- const emoji = clip(String(i.emoji ?? ''), 8)
164
+ const emoji = clip(asText(i.emoji), 8)
145
165
  return emoji ? `Reacting ${emoji}` : 'Reacting'
146
166
  }
147
167
  case 'mcp__switchroom-telegram__get_recent_messages':
@@ -177,7 +197,7 @@ export function computeLabel(toolName, input) {
177
197
  return 'Looking through your files'
178
198
  if (server === 'notion' || server === 'claude_ai_notion') return 'Checking your notes'
179
199
  if (server === 'perplexity') {
180
- const q = clip(String(i.query ?? i.description ?? ''), 60).trim()
200
+ const q = clip(asText(i.query) || asText(i.description), 60).trim()
181
201
  return q ? `Searching the web for ${q}` : 'Searching the web'
182
202
  }
183
203
  if (server === 'webkite') {
@@ -186,9 +206,9 @@ export function computeLabel(toolName, input) {
186
206
  }
187
207
  // Unknown MCP server: prefer a model-authored field, else humanized tool.
188
208
  const desc =
189
- clip(String(i.description ?? ''), 60).trim() ||
190
- clip(String(i.query ?? ''), 50).trim() ||
191
- clip(String(i.title ?? ''), 50).trim()
209
+ clip(asText(i.description), 60).trim() ||
210
+ clip(asText(i.query), 50).trim() ||
211
+ clip(asText(i.title), 50).trim()
192
212
  if (desc) return desc
193
213
  return `Using ${tool.replace(/[-_]+/g, ' ')}`
194
214
  }
@@ -0,0 +1,87 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { autoClassifyMidTurnInbound, type AutoClassifyInput } from "../gateway/auto-classify-mid-turn.js";
3
+
4
+ function base(over: Partial<AutoClassifyInput> = {}): AutoClassifyInput {
5
+ return {
6
+ isSteerPrefix: false,
7
+ isQueuePrefix: false,
8
+ priorTurnInFlight: true,
9
+ isDm: false,
10
+ incomingThreadId: 3,
11
+ activeTurnThreadId: 3,
12
+ msSinceLastAgentOutput: 2000,
13
+ dmSteerWindowMs: 0, // DM auto-steer off by default
14
+ topicSteerWindowMs: 8000,
15
+ ...over,
16
+ };
17
+ }
18
+
19
+ describe("autoClassifyMidTurnInbound", () => {
20
+ it("explicit /steer prefix always wins", () => {
21
+ const r = autoClassifyMidTurnInbound(base({ isSteerPrefix: true, incomingThreadId: 9, activeTurnThreadId: 3 }));
22
+ expect(r.decision).toBe("steer");
23
+ expect(r.reason).toBe("steer_prefix");
24
+ });
25
+
26
+ it("explicit /queue prefix always wins", () => {
27
+ expect(autoClassifyMidTurnInbound(base({ isQueuePrefix: true })).decision).toBe("queue");
28
+ });
29
+
30
+ it("no turn in flight → queue (fresh turn, not our decision)", () => {
31
+ const r = autoClassifyMidTurnInbound(base({ priorTurnInFlight: false }));
32
+ expect(r.decision).toBe("queue");
33
+ expect(r.reason).toBe("not_mid_turn");
34
+ });
35
+
36
+ // ── Supergroup: topic is the strong signal ──
37
+ it("supergroup, DIFFERENT topic than the active turn → queue (cross_topic), regardless of recency", () => {
38
+ const r = autoClassifyMidTurnInbound(base({ incomingThreadId: 5, activeTurnThreadId: 3, msSinceLastAgentOutput: 100 }));
39
+ expect(r.decision).toBe("queue");
40
+ expect(r.reason).toBe("cross_topic");
41
+ expect(r.sameTopic).toBe(false);
42
+ });
43
+
44
+ it("supergroup, SAME topic + recent → steer", () => {
45
+ const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 3000, topicSteerWindowMs: 8000 }));
46
+ expect(r.decision).toBe("steer");
47
+ expect(r.reason).toBe("same_topic_recent");
48
+ expect(r.sameTopic).toBe(true);
49
+ });
50
+
51
+ it("supergroup, SAME topic but STALE (older than window) → queue", () => {
52
+ const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 20000, topicSteerWindowMs: 8000 }));
53
+ expect(r.decision).toBe("queue");
54
+ expect(r.reason).toBe("same_topic_stale");
55
+ });
56
+
57
+ it("supergroup, no recency recorded (null) → queue (not treated as recent)", () => {
58
+ const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: null }));
59
+ expect(r.decision).toBe("queue");
60
+ expect(r.reason).toBe("same_topic_stale");
61
+ });
62
+
63
+ it("topicSteerWindowMs=0 (auto-steer off) → queue, still reports sameTopic", () => {
64
+ const r = autoClassifyMidTurnInbound(base({ topicSteerWindowMs: 0, incomingThreadId: 3, activeTurnThreadId: 3 }));
65
+ expect(r.decision).toBe("queue");
66
+ expect(r.reason).toBe("topic_disabled");
67
+ expect(r.sameTopic).toBe(true);
68
+ });
69
+
70
+ it("canonical thread compare: null/undefined/0 collapse to the same no-thread bucket", () => {
71
+ expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 0, activeTurnThreadId: null })).sameTopic).toBe(true);
72
+ expect(autoClassifyMidTurnInbound(base({ incomingThreadId: undefined, activeTurnThreadId: 0 })).sameTopic).toBe(true);
73
+ expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 1, activeTurnThreadId: 0 })).sameTopic).toBe(false);
74
+ });
75
+
76
+ // ── DM: timing-only, off by default ──
77
+ it("DM with dmSteerWindowMs=0 (default) → queue even if recent (DM auto-steer off)", () => {
78
+ const r = autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 500, dmSteerWindowMs: 0 }));
79
+ expect(r.decision).toBe("queue");
80
+ expect(r.reason).toBe("dm_disabled");
81
+ });
82
+
83
+ it("DM with dmSteerWindowMs>0 + recent → steer; stale → queue", () => {
84
+ expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 5000, dmSteerWindowMs: 10000 })).decision).toBe("steer");
85
+ expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 15000, dmSteerWindowMs: 10000 })).decision).toBe("queue");
86
+ });
87
+ });