npm - switchroom - Versions diffs - 0.14.62 → 0.14.63 - Mend

switchroom 0.14.62 → 0.14.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/cli/switchroom.js +2 -2
package/package.json +1 -1
package/telegram-plugin/dist/gateway/gateway.js +148 -35
package/telegram-plugin/gateway/auto-classify-mid-turn.ts +119 -0
package/telegram-plugin/gateway/escalation-drive.ts +79 -0
package/telegram-plugin/gateway/gateway.ts +146 -52
package/telegram-plugin/gateway/obligation-ledger.ts +45 -3
package/telegram-plugin/hooks/tool-label-pretool.mjs +32 -12
package/telegram-plugin/tests/auto-classify-mid-turn.test.ts +87 -0
package/telegram-plugin/tests/escalation-drive.test.ts +123 -0
package/telegram-plugin/tests/obligation-determinism.test.ts +63 -3
package/telegram-plugin/tests/obligation-ledger.test.ts +92 -0

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -142,6 +142,7 @@ import {
   resolveRetentionDays as resolveRegistryRetentionDays,
 } from '../registry/reaper.js'
 import { parseQueuePrefix, parseSteerPrefix, formatPriorAssistantPreview, formatReplyToText } from '../steering.js'
+import { autoClassifyMidTurnInbound } from './auto-classify-mid-turn.js'
 import {
   renderOperatorEvent,
   shouldEmitOperatorEvent,
@@ -289,7 +290,7 @@ import {
   obligationEscalationText,
 } from './obligation-ledger.js'
 import { loadObligations, persistObligations } from './obligation-store.js'
-import { withDeadline } from './with-deadline.js'
+import { driveEscalation } from './escalation-drive.js'
 import { createInboundSpool } from './inbound-spool.js'
 import { purgeStaleTurnsForChat } from './turn-state-purge.js'
 import { decideInboundDelivery } from './inbound-delivery-gate.js'
@@ -1423,6 +1424,47 @@ const OBLIGATION_ESCALATE_MAX = 3
 // bounded escalate ladder to a terminal. 45s comfortably exceeds robustApiCall's
 // 3-attempt network backoff so a legitimate slow send isn't cut short.
 const OBLIGATION_ESCALATE_SEND_DEADLINE_MS = 45_000
+// Escalate-grace window. A slow / background-worker / multi-segment turn ends
+// (the in-flight gate clears) BEFORE its trailing answer's reply lands, and the
+// 5s sweep would re-present/escalate in that gap — a false "⚠️ I may have missed
+// this" on a message that's actively being answered (fuzz-confirmed on v0.14.62:
+// ~14% of marko's no-reply turn-ends had the answer in flight). An obligation
+// whose handling turn ended < this ago is skipped by decideAtIdle, giving the
+// trailing answer's close a beat to fire. Bounded: each re-present is itself a
+// turn that re-stamps once, representCount is capped → the ladder still
+// terminates. 45s > the observed "answer lands within ~60s, usually <40s" gap.
+// Kill switch: SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS=0 → pre-grace behaviour.
+const OBLIGATION_ESCALATE_GRACE_MS = (() => {
+  const raw = process.env.SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS
+  if (raw == null || raw === '') return 45_000
+  const n = Number(raw)
+  return Number.isFinite(n) && n >= 0 ? n : 45_000
+})()
+// ─── Mid-turn auto-classify (steer-vs-queue), SHADOW mode ─────────────────────
+// Today a no-prefix mid-turn message always QUEUES. autoClassifyMidTurnInbound
+// (auto-classify-mid-turn.ts) is the basis for a smarter default using
+// topic-vs-active-turn + reply-recency. Phase 1 ships SHADOW-ONLY: when this
+// flag is on we COMPUTE + LOG what we'd decide (decision/reason/same_topic/
+// ms_since_out) but the behaviour is UNCHANGED (still queue) — to gather the
+// real-world distribution (how often mid-turn messages are same-topic
+// continuations vs cross-topic, and the recency spread) before any action flips
+// on. Default OFF → zero overhead. The action windows below stay 0 in shadow.
+const AUTOCLASSIFY_MIDTURN_SHADOW = process.env.SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW === '1'
+// Per-(chat,thread) wall-clock ms of the agent's LAST visible output — the
+// recency clock the classifier uses (NOT turn age: a long actively-narrating
+// worker turn must not read "stale"). Stamped beside signalTracker.noteOutbound.
+// LRU-bounded so a long-lived gateway with many topics can't grow unboundedly.
+const lastAgentOutputAt = new Map<string, number>()
+const LAST_OUTPUT_MAX_KEYS = 512
+function noteAgentOutputAt(key: string, ts: number): void {
+  lastAgentOutputAt.delete(key) // re-insert → most-recently-used at the tail
+  lastAgentOutputAt.set(key, ts)
+  if (lastAgentOutputAt.size > LAST_OUTPUT_MAX_KEYS) {
+    const oldest = lastAgentOutputAt.keys().next().value
+    if (oldest !== undefined) lastAgentOutputAt.delete(oldest)
+  }
+}
 // Durable snapshot of the open obligation set on the persistent per-agent
 // volume (STATE_DIR = /state/agent/telegram in prod). Closes the restart hole:
 // the in-memory ledger alone empties on restart and the spool's boot-replay
@@ -1999,6 +2041,29 @@ let pendingDeferredInterrupt: PendingDeferredInterrupt | null = null
  * Idempotent: nulls the slot and clears the timer before doing any work so a
  * boundary event and the timeout can't double-fire.
  */
+/**
+ * An `!` interrupt SIGINT-kills the in-flight turn. That turn was handling a
+ * user message with an open obligation, and the killed turn does NOT reliably
+ * emit turn_end (so endCurrentTurnAtomic never closes it) — so without this the
+ * obligation survives and the idle sweep later re-presents/escalates "you have
+ * an earlier message you never answered" for a question the user EXPLICITLY
+ * cancelled. An interrupt is a deliberate redirect, so closing that obligation
+ * is the correct terminal (the user chose to interrupt; they can re-ask). Only
+ * the interrupted turn's OWN obligation is closed — queued siblings (other open
+ * obligations) are untouched. No-op when the flag is off, no turn is in flight,
+ * or the turn isn't a tracked obligation (synthetic / already closed).
+ */
+function cancelInterruptedObligation(): void {
+  if (!OBLIGATION_LEDGER_ENABLED) return
+  const turn = currentTurn
+  if (turn == null) return
+  if (obligationLedger.close(turn.turnId)) {
+    process.stderr.write(
+      `telegram gateway: obligation cancelled by interrupt origin=${turn.turnId}\n`,
+    )
+  }
+}
 async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<void> {
   const pending = pendingDeferredInterrupt
   if (pending == null) return
@@ -2027,6 +2092,10 @@ async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<vo
     process.stderr.write(`telegram gateway: deferred-interrupt SIGINT failed: ${(err as Error).message}\n`)
   }
+  // The SIGINT just killed the in-flight turn — cancel its obligation so the
+  // interrupted (user-redirected) question isn't re-presented/escalated later.
+  cancelInterruptedObligation()
   // Deliver the replacement body as a fresh turn to the freshly-killed
   // bridge — same sendToAgent + buffer-on-miss primitive the synchronous
   // interrupt carve-out uses at the handleInbound delivery site.
@@ -2426,8 +2495,17 @@ function endCurrentTurnAtomic(turn: CurrentTurn): void {
   // finalAnswerDelivered===false → stays open → re-presented (the intended
   // catch). close() is a no-op for synthetic turns (turnId not in the ledger).
   // No-op when the flag is off.
-  if (OBLIGATION_LEDGER_ENABLED && turn.finalAnswerDelivered) {
-    obligationLedger.close(turn.turnId)
+  if (OBLIGATION_LEDGER_ENABLED) {
+    if (turn.finalAnswerDelivered) {
+      obligationLedger.close(turn.turnId)
+    } else {
+      // Turn ended WITHOUT a final answer. If this turn was handling an open
+      // obligation, stamp its grace clock so the idle sweep waits before
+      // re-presenting/escalating — a slow/worker answer may still be in flight
+      // (the over-escalation fix). No-op when turn.turnId isn't an open
+      // obligation (synthetic / already-closed turn).
+      obligationLedger.noteTurnEnded(turn.turnId, Date.now())
+    }
   }
   // Component 2 — clear any prior no-reply drain timer for this turn; a
   // fresh end re-evaluates below. (Idempotent — null when never armed.)
@@ -4925,7 +5003,13 @@ function obligationSweep(): void {
   if (!obligationLedger.hasOpen()) return
   if (turnInFlightForGate()) return // a turn is running — let it finish/answer
   const agent = process.env.SWITCHROOM_AGENT_NAME ?? ''
-  const decision = obligationLedger.decideAtIdle()
+  // Grace window: skip an obligation whose handling turn ended < grace ago — its
+  // trailing slow/worker answer may still be landing (over-escalation fix).
+  const decision = obligationLedger.decideAtIdle(
+    OBLIGATION_ESCALATE_GRACE_MS > 0
+      ? { now: Date.now(), graceMs: OBLIGATION_ESCALATE_GRACE_MS }
+      : undefined,
+  )
   const o = decision.obligation
   if (decision.action === 'none' || o == null) return
   if (decision.action === 'represent') {
@@ -4950,54 +5034,30 @@ function obligationSweep(): void {
   // (dead topic even after thread-fallback, blocked bot) is bounded by
   // OBLIGATION_ESCALATE_MAX → close best-effort (the user is unreachable, so a
   // bounded give-up beats an infinite loop / a boot-surviving poison record).
-  if (obligationEscalateInFlight.has(o.originTurnId)) return // a send is already awaiting
-  const escId = o.originTurnId
-  const attempt = obligationLedger.markEscalateAttempt(escId)
-  obligationEscalateInFlight.add(escId)
-  process.stderr.write(
-    `telegram gateway: obligation escalating (exhausted ${OBLIGATION_REPRESENT_MAX} re-presents) origin=${escId} attempt=${attempt}/${OBLIGATION_ESCALATE_MAX}\n`,
-  )
-  // retryWithThreadFallback: a stale/renumbered topic returns THREAD_NOT_FOUND;
-  // retry WITHOUT the thread so the nudge still lands in the chat (the #2096
-  // pattern) instead of being permanently undeliverable to a dead topic.
-  // withDeadline: grammy/fetch impose no request timeout and `.finally` (which
-  // clears the in-flight flag) only runs on settle — so a hung send would leak
-  // the flag forever and wedge this obligation OPEN. Racing against a deadline
-  // guarantees the chain settles, the flag always clears, and a hang becomes a
-  // bounded reject handled exactly like any other failed attempt.
-  void withDeadline(
-    retryWithThreadFallback(
-      robustApiCall,
-      (tid) =>
-        bot.api.sendMessage(o.chatId, obligationEscalationText(o), {
-          ...(tid != null ? { message_thread_id: tid } : {}),
-        }),
-      { threadId: o.threadId, chat_id: o.chatId, verb: 'obligation.escalate' },
-    ),
-    OBLIGATION_ESCALATE_SEND_DEADLINE_MS,
-    'obligation escalation send timed out',
-  )
-    .then(() => {
-      obligationLedger.close(escId)
-      process.stderr.write(
-        `telegram gateway: obligation escalation delivered + closed origin=${escId}\n`,
-      )
-    })
-    .catch((err) => {
-      if (attempt >= OBLIGATION_ESCALATE_MAX) {
-        obligationLedger.close(escId)
-        process.stderr.write(
-          `telegram gateway: obligation escalation PERMANENTLY undeliverable after ${attempt} attempts — closing best-effort origin=${escId}: ${err}\n`,
-        )
-      } else {
-        process.stderr.write(
-          `telegram gateway: obligation escalation send failed (attempt ${attempt}/${OBLIGATION_ESCALATE_MAX}), retrying next sweep origin=${escId}: ${err}\n`,
-        )
-      }
-    })
-    .finally(() => {
-      obligationEscalateInFlight.delete(escId)
-    })
+  // Drive one escalation attempt. The send is a direct Telegram nudge
+  // (retryWithThreadFallback: a stale/renumbered topic → THREAD_NOT_FOUND retries
+  // thread-less, the #2096 pattern). driveEscalation guards against concurrent
+  // sends, bounds the send with withDeadline (so a hung send can't leak the
+  // in-flight flag and wedge the obligation OPEN), closes only after a successful
+  // send, and bounds permanent failures to a best-effort close. Extracted so the
+  // hang → bounded → terminal path is executable in escalation-drive.test.ts —
+  // the path neither mtcute (can't hang Telegram) nor a synchronous test reaches.
+  void driveEscalation({
+    escId: o.originTurnId,
+    inFlight: obligationEscalateInFlight,
+    ledger: obligationLedger,
+    send: () =>
+      retryWithThreadFallback(
+        robustApiCall,
+        (tid) =>
+          bot.api.sendMessage(o.chatId, obligationEscalationText(o), {
+            ...(tid != null ? { message_thread_id: tid } : {}),
+          }),
+        { threadId: o.threadId, chat_id: o.chatId, verb: 'obligation.escalate' },
+      ),
+    maxAttempts: OBLIGATION_ESCALATE_MAX,
+    deadlineMs: OBLIGATION_ESCALATE_SEND_DEADLINE_MS,
+  })
 }
 if (!STATIC && OBLIGATION_LEDGER_ENABLED) {
   setInterval(obligationSweep, OBLIGATION_SWEEP_MS).unref()
@@ -6503,6 +6563,10 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
   // silence-poke clock so the next poke is measured from this send.
   signalTracker.noteOutbound(statusKey(chat_id, threadId), Date.now())
   silencePoke.noteOutbound(statusKey(chat_id, threadId), Date.now())
+  // Mid-turn auto-classify recency clock: the agent just produced visible output
+  // in this chat/thread (cross-turn, unlike silencePoke's per-turn lastOutboundAt).
+  // Only maintained when the shadow flag is on → truly zero overhead by default.
+  if (AUTOCLASSIFY_MIDTURN_SHADOW) noteAgentOutputAt(statusKey(chat_id, threadId), Date.now())
   // PR3b-cutover: feed lastOutboundAt to the delivery machine so its
   // TTL `tick` suppresses the fallback for a long-but-active turn
   // (model streaming past 5 min) — parity with silencePoke's own
@@ -10808,6 +10872,9 @@ async function handleInbound(
       } catch (err) {
         process.stderr.write(`telegram gateway: interrupt-marker SIGINT failed: ${(err as Error).message}\n`)
       }
+      // The SIGINT just killed the in-flight turn — cancel its obligation so the
+      // interrupted (user-redirected) question isn't re-presented/escalated later.
+      cancelInterruptedObligation()
     }
     if (interrupt.emptyBody) {
       // #1075: thread-id-bearing — route through swallowingApiCall so
@@ -11414,6 +11481,33 @@ async function handleInbound(
     isSteering = priorTurnInFlight && isSteerPrefix
     if (priorTurnInFlight) priorTurnStartedAt = activeTurnStartedAt.get(key)
+    // Mid-turn auto-classify SHADOW: compute what a topic+recency classifier
+    // WOULD decide and log it — behaviour is UNCHANGED (isSteering above is
+    // untouched). Gathers the real-world distribution (same-topic continuation
+    // vs cross-topic, recency spread) to tune auto-steer before it ever acts.
+    // No-op unless the shadow flag is on AND a turn is in flight (the only case
+    // a steer-vs-queue decision is meaningful).
+    if (AUTOCLASSIFY_MIDTURN_SHADOW && priorTurnInFlight) {
+      const lastOut = lastAgentOutputAt.get(key)
+      const msSinceOut = lastOut != null ? Date.now() - lastOut : null
+      const shadow = autoClassifyMidTurnInbound({
+        isSteerPrefix,
+        isQueuePrefix: isQueuedPrefix,
+        priorTurnInFlight,
+        isDm: isDmChatId(chat_id),
+        incomingThreadId: messageThreadId ?? null,
+        activeTurnThreadId: currentTurn?.sessionThreadId ?? null,
+        msSinceLastAgentOutput: msSinceOut,
+        dmSteerWindowMs: 0, // DM auto-steer stays off (the April regime)
+        topicSteerWindowMs: 8_000, // candidate window — what we're tuning
+      })
+      process.stderr.write(
+        `telegram gateway: autoclassify-shadow chat_id=${chat_id} ` +
+        `would=${shadow.decision} reason=${shadow.reason} same_topic=${shadow.sameTopic ?? '-'} ` +
+        `ms_since_out=${msSinceOut ?? '-'} actual=${isSteering ? 'steer' : 'queue'}\n`,
+      )
+    }
     if (access.statusReactions !== false) {
       if (isSteering) {
         // Explicit steer: mark with 🤝 on the inbound message; leave the

package/telegram-plugin/gateway/obligation-ledger.ts CHANGED Viewed

@@ -44,6 +44,17 @@ export interface Obligation {
    *  can't loop forever — and, because it is part of the durable snapshot,
    *  can't become a boot-surviving poison record either. */
   escalateAttempts?: number
+  /** Wall-clock ms the most recent turn handling THIS obligation ended (stamped
+   *  at turn_end via noteTurnEnded). Drives the escalate-grace window: a slow /
+   *  background-worker / multi-segment turn ends (the in-flight gate clears)
+   *  before its trailing answer's reply lands, and the sweep would otherwise
+   *  re-present/escalate in that gap — a false "I may have missed this" on a
+   *  message that's actively being answered (fuzz-confirmed on v0.14.62). The
+   *  decision waits `graceMs` after this stamp before acting, so the trailing
+   *  answer's close has a beat to fire. Bounded: each re-present is itself a turn
+   *  that re-stamps this once, and representCount is capped, so the ladder still
+   *  terminates. Durable (part of the snapshot) so the grace survives restart. */
+  lastTurnEndedAt?: number
 }
 /** What the gateway should do for the oldest open obligation at an idle boundary. */
@@ -162,19 +173,50 @@ export class ObligationLedger {
    * does not mutate. The caller performs the side effect then calls
    * markRepresented / close accordingly.
    *
-   *  - 'none'      → no open obligation; the agent may idle.
+   *  - 'none'      → no open obligation (or all open ones are within their
+   *                  escalate-grace window); the agent may idle.
    *  - 'represent' → re-present `obligation` as a fresh must-answer turn.
    *  - 'escalate'  → it has already been re-presented maxRepresents times; send
    *                  ONE operator-visible "did I miss this?" and close it
    *                  (caller calls close) rather than loop forever.
+   *
+   * GRACE WINDOW (opts.graceMs > 0): an obligation whose handling turn ended less
+   * than `graceMs` ago is SKIPPED — its trailing answer may still be in flight
+   * (a worker / long-think / multi-segment turn ends the in-flight gate before
+   * the reply lands). We pick the oldest obligation that is OUT of grace, so a
+   * genuinely-stale one is still acted on while a freshly-ended one waits. Pure
+   * (clock injected via opts.now, mirroring the builder convention). With no opts
+   * (or graceMs<=0) this is the pre-grace behaviour exactly.
    */
-  decideAtIdle(): LedgerDecision {
-    const o = this.oldest()
+  decideAtIdle(opts?: { now: number; graceMs: number }): LedgerDecision {
+    const o =
+      opts != null && opts.graceMs > 0 ? this.oldestEligible(opts.now, opts.graceMs) : this.oldest()
     if (o === undefined) return { action: 'none' }
     if (o.representCount >= this.maxRepresents) return { action: 'escalate', obligation: o }
     return { action: 'represent', obligation: o }
   }
+  /** The oldest open obligation whose handling turn ended at least `graceMs` ago
+   *  (or never ended — a still-queued obligation has no lastTurnEndedAt and is
+   *  always eligible; it can't have a trailing answer in flight). */
+  private oldestEligible(now: number, graceMs: number): Obligation | undefined {
+    let best: Obligation | undefined
+    for (const o of this.open.values()) {
+      if (o.lastTurnEndedAt != null && now - o.lastTurnEndedAt < graceMs) continue // within grace
+      if (best === undefined || o.openedAt < best.openedAt) best = o
+    }
+    return best
+  }
+  /** Stamp that the most recent turn handling `originTurnId` just ended (drives
+   *  the escalate-grace window). No-op if the obligation isn't open. Persists. */
+  noteTurnEnded(originTurnId: string, ts: number): void {
+    const o = this.open.get(originTurnId)
+    if (o === undefined) return
+    o.lastTurnEndedAt = ts
+    this.persist()
+  }
   /**
    * Decide which obligation a substantive reply discharges — DETERMINISTICALLY,
    * holding for any model behavior:

package/telegram-plugin/hooks/tool-label-pretool.mjs CHANGED Viewed

@@ -35,6 +35,26 @@ function readStdin() {
   }
 }
+/**
+ * Coerce a tool-input field to display text WITHOUT the `[object Object]`
+ * trap. Only primitives carry a meaningful label: strings pass through,
+ * numbers/booleans stringify cleanly. Objects and arrays return '' so the
+ * caller falls through to its next fallback (a sibling field, or the
+ * humanized tool name) instead of surfacing literal "[object Object]".
+ *
+ * This guards the MCP-tool path in particular: an operator-configured
+ * server (e.g. Brevo CRM) may pass a filter/query OBJECT in `query` /
+ * `description` / `title`, and the old `String(i.query ?? '')` coercion
+ * rendered that as "[object Object]" on the live activity feed. The
+ * renderer's own `clip()` already rejects non-strings; this mirrors that
+ * contract at the hook so the bad value never reaches the sidecar JSONL.
+ */
+function asText(v) {
+  if (typeof v === 'string') return v
+  if (typeof v === 'number' || typeof v === 'boolean') return String(v)
+  return ''
+}
 /**
  * One-line, length-bounded escape of a value for inclusion in a label.
  * Newlines collapsed, very long strings truncated with an ellipsis.
@@ -82,10 +102,10 @@ export function computeLabel(toolName, input) {
   // for Bash/Task, matching the gateway's describeToolUse rendering.
   switch (toolName) {
     case 'Bash':
-      return clip(String(i.description ?? ''), 70).trim() || 'Running a command'
+      return clip(asText(i.description), 70).trim() || 'Running a command'
     case 'Task':
     case 'Agent': {
-      const d = clip(String(i.description ?? ''), 60).trim()
+      const d = clip(asText(i.description), 60).trim()
       return d ? `Delegating: ${d}` : 'Delegating to a sub-agent'
     }
     case 'TodoWrite':
@@ -103,16 +123,16 @@ export function computeLabel(toolName, input) {
     case 'Write':
       return `Writing ${clip(safeBasename(i.file_path))}`.trim()
     case 'Grep': {
-      const path = i.path ? clip(String(i.path), 40) : '.'
-      const pat = clip(String(i.pattern ?? ''), 40)
+      const path = i.path ? clip(asText(i.path), 40) : '.'
+      const pat = clip(asText(i.pattern), 40)
       return `Searching ${path} for ${pat}`
     }
     case 'Glob':
-      return `Finding files matching ${clip(String(i.pattern ?? ''), 60)}`
+      return `Finding files matching ${clip(asText(i.pattern), 60)}`
     case 'WebFetch':
       return `Fetching ${clip(urlHostPath(i.url), 60)}`
     case 'WebSearch':
-      return `Searching the web for ${clip(String(i.query ?? ''), 60)}`
+      return `Searching the web for ${clip(asText(i.query), 60)}`
     case 'NotebookEdit':
       return `Editing notebook ${clip(safeBasename(i.notebook_path))}`
     case 'BashOutput':
@@ -128,7 +148,7 @@ export function computeLabel(toolName, input) {
       // sidecar JSONL and recover which skill fired per turn —
       // the progress card path that used to surface this was retired
       // when `progressDriver` was nulled out in #1122 PR3.
-      const slug = clip(String(i.skill ?? ''), 64)
+      const slug = clip(asText(i.skill), 64)
       return slug ? `Running skill ${slug}` : null
     }
   }
@@ -141,7 +161,7 @@ export function computeLabel(toolName, input) {
       case 'mcp__switchroom-telegram__stream_reply':
         return 'Replying'
       case 'mcp__switchroom-telegram__react': {
-        const emoji = clip(String(i.emoji ?? ''), 8)
+        const emoji = clip(asText(i.emoji), 8)
         return emoji ? `Reacting ${emoji}` : 'Reacting'
       }
       case 'mcp__switchroom-telegram__get_recent_messages':
@@ -177,7 +197,7 @@ export function computeLabel(toolName, input) {
       return 'Looking through your files'
     if (server === 'notion' || server === 'claude_ai_notion') return 'Checking your notes'
     if (server === 'perplexity') {
-      const q = clip(String(i.query ?? i.description ?? ''), 60).trim()
+      const q = clip(asText(i.query) || asText(i.description), 60).trim()
       return q ? `Searching the web for ${q}` : 'Searching the web'
     }
     if (server === 'webkite') {
@@ -186,9 +206,9 @@ export function computeLabel(toolName, input) {
     }
     // Unknown MCP server: prefer a model-authored field, else humanized tool.
     const desc =
-      clip(String(i.description ?? ''), 60).trim() ||
-      clip(String(i.query ?? ''), 50).trim() ||
-      clip(String(i.title ?? ''), 50).trim()
+      clip(asText(i.description), 60).trim() ||
+      clip(asText(i.query), 50).trim() ||
+      clip(asText(i.title), 50).trim()
     if (desc) return desc
     return `Using ${tool.replace(/[-_]+/g, ' ')}`
   }

package/telegram-plugin/tests/auto-classify-mid-turn.test.ts ADDED Viewed

@@ -0,0 +1,87 @@
+import { describe, it, expect } from "vitest";
+import { autoClassifyMidTurnInbound, type AutoClassifyInput } from "../gateway/auto-classify-mid-turn.js";
+function base(over: Partial<AutoClassifyInput> = {}): AutoClassifyInput {
+  return {
+    isSteerPrefix: false,
+    isQueuePrefix: false,
+    priorTurnInFlight: true,
+    isDm: false,
+    incomingThreadId: 3,
+    activeTurnThreadId: 3,
+    msSinceLastAgentOutput: 2000,
+    dmSteerWindowMs: 0, // DM auto-steer off by default
+    topicSteerWindowMs: 8000,
+    ...over,
+  };
+}
+describe("autoClassifyMidTurnInbound", () => {
+  it("explicit /steer prefix always wins", () => {
+    const r = autoClassifyMidTurnInbound(base({ isSteerPrefix: true, incomingThreadId: 9, activeTurnThreadId: 3 }));
+    expect(r.decision).toBe("steer");
+    expect(r.reason).toBe("steer_prefix");
+  });
+  it("explicit /queue prefix always wins", () => {
+    expect(autoClassifyMidTurnInbound(base({ isQueuePrefix: true })).decision).toBe("queue");
+  });
+  it("no turn in flight → queue (fresh turn, not our decision)", () => {
+    const r = autoClassifyMidTurnInbound(base({ priorTurnInFlight: false }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("not_mid_turn");
+  });
+  // ── Supergroup: topic is the strong signal ──
+  it("supergroup, DIFFERENT topic than the active turn → queue (cross_topic), regardless of recency", () => {
+    const r = autoClassifyMidTurnInbound(base({ incomingThreadId: 5, activeTurnThreadId: 3, msSinceLastAgentOutput: 100 }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("cross_topic");
+    expect(r.sameTopic).toBe(false);
+  });
+  it("supergroup, SAME topic + recent → steer", () => {
+    const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 3000, topicSteerWindowMs: 8000 }));
+    expect(r.decision).toBe("steer");
+    expect(r.reason).toBe("same_topic_recent");
+    expect(r.sameTopic).toBe(true);
+  });
+  it("supergroup, SAME topic but STALE (older than window) → queue", () => {
+    const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 20000, topicSteerWindowMs: 8000 }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("same_topic_stale");
+  });
+  it("supergroup, no recency recorded (null) → queue (not treated as recent)", () => {
+    const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: null }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("same_topic_stale");
+  });
+  it("topicSteerWindowMs=0 (auto-steer off) → queue, still reports sameTopic", () => {
+    const r = autoClassifyMidTurnInbound(base({ topicSteerWindowMs: 0, incomingThreadId: 3, activeTurnThreadId: 3 }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("topic_disabled");
+    expect(r.sameTopic).toBe(true);
+  });
+  it("canonical thread compare: null/undefined/0 collapse to the same no-thread bucket", () => {
+    expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 0, activeTurnThreadId: null })).sameTopic).toBe(true);
+    expect(autoClassifyMidTurnInbound(base({ incomingThreadId: undefined, activeTurnThreadId: 0 })).sameTopic).toBe(true);
+    expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 1, activeTurnThreadId: 0 })).sameTopic).toBe(false);
+  });
+  // ── DM: timing-only, off by default ──
+  it("DM with dmSteerWindowMs=0 (default) → queue even if recent (DM auto-steer off)", () => {
+    const r = autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 500, dmSteerWindowMs: 0 }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("dm_disabled");
+  });
+  it("DM with dmSteerWindowMs>0 + recent → steer; stale → queue", () => {
+    expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 5000, dmSteerWindowMs: 10000 })).decision).toBe("steer");
+    expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 15000, dmSteerWindowMs: 10000 })).decision).toBe("queue");
+  });
+});

package/telegram-plugin/tests/escalation-drive.test.ts ADDED Viewed

@@ -0,0 +1,123 @@
+import { describe, it, expect } from "vitest";
+import { driveEscalation } from "../gateway/escalation-drive.js";
+import { ObligationLedger } from "../gateway/obligation-ledger.js";
+// Drives the REAL escalation step (the code obligationSweep calls) with the REAL
+// ObligationLedger and the REAL withDeadline — including a fake hanging send,
+// the exact path the total proof flagged and that mtcute / a synchronous test
+// cannot reach. This is the executable verification of the hang-wedge fix.
+function openEscalatable(L: ObligationLedger, id: string) {
+  L.openIfAbsent({ originTurnId: id, chatId: "-100", threadId: 3, messageId: 1, text: "x", openedAt: 0 });
+}
+const MAX = 3;
+const DEADLINE = 15; // ms — short so the hang case settles fast and deterministically
+describe("driveEscalation — the obligation escalation step is bounded and always reaches a terminal", () => {
+  it("a successful send closes the obligation and clears the in-flight flag", async () => {
+    const L = new ObligationLedger(2);
+    openEscalatable(L, "c#1");
+    const inFlight = new Set<string>();
+    await driveEscalation({
+      escId: "c#1",
+      inFlight,
+      ledger: L,
+      send: () => Promise.resolve("sent"),
+      maxAttempts: MAX,
+      deadlineMs: DEADLINE,
+      log: () => {},
+    });
+    expect(L.isOpen("c#1")).toBe(false); // closed
+    expect(inFlight.has("c#1")).toBe(false); // flag cleared
+  });
+  it("a transient failure below the cap stays OPEN and clears the flag (retried next sweep)", async () => {
+    const L = new ObligationLedger(2);
+    openEscalatable(L, "c#1");
+    const inFlight = new Set<string>();
+    await driveEscalation({
+      escId: "c#1",
+      inFlight,
+      ledger: L,
+      send: () => Promise.reject(new Error("network blip")),
+      maxAttempts: MAX,
+      deadlineMs: DEADLINE,
+      log: () => {},
+    });
+    expect(L.isOpen("c#1")).toBe(true); // still open — will retry
+    expect(inFlight.has("c#1")).toBe(false); // flag cleared, so the next sweep can re-enter
+  });
+  it("THE FIX: a send that NEVER settles still clears the flag (bounded by the deadline)", async () => {
+    const L = new ObligationLedger(2);
+    openEscalatable(L, "c#1");
+    const inFlight = new Set<string>();
+    let sendInvoked = 0;
+    const start = Date.now();
+    // A promise that never resolves/rejects — the stalled send that, pre-fix,
+    // left the in-flight flag set forever and wedged the obligation OPEN.
+    await driveEscalation({
+      escId: "c#1",
+      inFlight,
+      ledger: L,
+      send: () => {
+        sendInvoked++;
+        return new Promise(() => {});
+      },
+      maxAttempts: MAX,
+      deadlineMs: DEADLINE,
+      log: () => {},
+    });
+    expect(sendInvoked).toBe(1);
+    expect(inFlight.has("c#1")).toBe(false); // cleared despite the hang — the wedge is gone
+    expect(Date.now() - start).toBeLessThan(DEADLINE + 500); // settled at the deadline, not "never"
+  });
+  it("repeated hung sends reach a bounded terminal (close best-effort), never an infinite loop", async () => {
+    const L = new ObligationLedger(2);
+    openEscalatable(L, "c#1");
+    const inFlight = new Set<string>();
+    let sends = 0;
+    let drives = 0;
+    // Simulate the 5s sweep firing repeatedly while every send hangs.
+    while (L.isOpen("c#1") && drives < 20) {
+      drives++;
+      const p = driveEscalation({
+        escId: "c#1",
+        inFlight,
+        ledger: L,
+        send: () => {
+          sends++;
+          return new Promise(() => {});
+        },
+        maxAttempts: MAX,
+        deadlineMs: DEADLINE,
+        log: () => {},
+      });
+      if (p) await p; // each attempt settles within the deadline
+    }
+    expect(L.isOpen("c#1")).toBe(false); // reached a terminal (closed best-effort)
+    expect(inFlight.has("c#1")).toBe(false);
+    expect(sends).toBe(MAX); // exactly maxAttempts sends, then close — bounded
+    expect(drives).toBeLessThanOrEqual(MAX + 1);
+  });
+  it("the in-flight guard prevents a concurrent second send for the same obligation", async () => {
+    const L = new ObligationLedger(2);
+    openEscalatable(L, "c#1");
+    const inFlight = new Set<string>();
+    let sends = 0;
+    const hang = () => {
+      sends++;
+      return new Promise<void>(() => {});
+    };
+    const p1 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
+    // Second call while the first is still awaiting → must be a no-op.
+    const p2 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
+    expect(p2).toBeUndefined(); // guarded
+    expect(sends).toBe(1); // only one send fired
+    expect(L.list()[0].escalateAttempts).toBe(1); // only one attempt recorded
+    await p1; // let the first settle so we don't leak a pending timer
+  });
+});