npm - switchroom - Versions diffs - 0.14.61 → 0.14.63 - Mend

switchroom 0.14.61 → 0.14.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/cli/switchroom.js +73 -62
package/package.json +1 -1
package/telegram-plugin/dist/gateway/gateway.js +2617 -2081
package/telegram-plugin/gateway/auth-broker-client.ts +18 -8
package/telegram-plugin/gateway/auto-classify-mid-turn.ts +119 -0
package/telegram-plugin/gateway/escalation-drive.ts +79 -0
package/telegram-plugin/gateway/gateway.ts +448 -43
package/telegram-plugin/gateway/microsoft-connect-flow.ts +226 -0
package/telegram-plugin/gateway/obligation-ledger.ts +45 -3
package/telegram-plugin/gateway/with-deadline.ts +43 -0
package/telegram-plugin/hooks/tool-label-pretool.mjs +32 -12
package/telegram-plugin/tests/auto-classify-mid-turn.test.ts +87 -0
package/telegram-plugin/tests/escalation-drive.test.ts +123 -0
package/telegram-plugin/tests/microsoft-connect-flow.test.ts +185 -0
package/telegram-plugin/tests/obligation-determinism.test.ts +85 -25
package/telegram-plugin/tests/obligation-ledger.test.ts +92 -0
package/telegram-plugin/tests/with-deadline.test.ts +61 -0

package/telegram-plugin/gateway/microsoft-connect-flow.ts ADDED Viewed

@@ -0,0 +1,226 @@
+/**
+ * Telegram-native Microsoft connect — device-code flow (RFC #1873 /
+ * out-of-box, Phase 2).
+ *
+ * The headline "connect from your phone" path: a user runs
+ * `/connect microsoft`, the gateway shows a card with a Microsoft
+ * sign-in link + a short code, the user approves on their phone, and the
+ * gateway registers the resulting account with the auth-broker — no host
+ * shell, no Azure portal (the shipped default app is used unless the
+ * operator BYO'd one).
+ *
+ * This module is the framework-agnostic core: it talks to Microsoft's
+ * device-code endpoints (RFC 8628, engine in `src/microsoft/oauth.ts`)
+ * and the auth-broker, and returns plain data. The gateway owns the
+ * Telegram surface (card rendering, edits, callbacks). All network +
+ * broker boundaries are injectable so the flow is testable without
+ * hitting Microsoft or a live broker, and it contains NO raw bot.api
+ * calls (the bot-api-wrapping lint trap lives only in the gateway).
+ *
+ * Mirrors `auth-add-flow.ts` (the Anthropic `/auth add` template) but
+ * needs no child subprocess and no pasted code: device-code consent
+ * happens entirely on Microsoft's domain, so nothing secret is ever
+ * pasted into chat (strictly better than paste-back — no redaction
+ * needed). Personal Microsoft accounts (outlook.com/hotmail) are the
+ * clean case at `/common`; a work/school account that fails device-code
+ * at `/common` surfaces a clear "use the host CLI" error (the documented
+ * "personal-first, work best-effort" boundary).
+ */
+import {
+  requestDeviceCode as realRequestDeviceCode,
+  pollDeviceToken as realPollDeviceToken,
+  type MicrosoftDeviceCodeResponse,
+  type MicrosoftOAuthClientConfig,
+} from '../../src/microsoft/oauth.js'
+import { selectMicrosoftScopes } from '../../src/microsoft/scopes.js'
+import { buildMicrosoftCredentials } from '../../src/microsoft/credentials.js'
+import { resolveMicrosoftClientId } from '../../src/auth/default-oauth-clients.js'
+import { isVaultReference } from '../../src/vault/resolver.js'
+import { addAccountViaBroker } from './auth-broker-client.js'
+import type { MicrosoftAddAccountCredentials } from '../../src/auth/broker/client.js'
+/** A connect flow in flight, keyed by `chatKey(chatId, threadId)`. */
+export interface PendingMicrosoftConnectFlow {
+  /** Telegram user id that started the flow (consent owner; Phase 3). */
+  initiatedBy: string
+  /** Card we posted, so the poll loop can edit it on completion. */
+  cardChatId: number | string
+  cardMessageId: number
+  device: MicrosoftDeviceCodeResponse
+  clientId: string
+  scopes: string[]
+  startedAt: number
+  /** Flipped by cancel so the in-flight poll bails without writing. */
+  cancelled: boolean
+}
+export const pendingMicrosoftConnectFlows = new Map<
+  string,
+  PendingMicrosoftConnectFlow
+>()
+export interface MicrosoftConnectDeps {
+  /** `config.microsoft_workspace?.microsoft_client_id` (may be a vault: ref). */
+  configClientId?: string
+  orgMode?: boolean
+  requestDeviceCode?: (
+    cfg: MicrosoftOAuthClientConfig,
+  ) => Promise<MicrosoftDeviceCodeResponse>
+  pollDeviceToken?: typeof realPollDeviceToken
+  addAccount?: (
+    label: string,
+    credentials: MicrosoftAddAccountCredentials,
+    opts: { replace?: boolean; provider: 'microsoft' },
+  ) => Promise<{ label: string; expiresAt?: number }>
+  now?: () => number
+}
+export type StartResult =
+  | {
+      kind: 'started'
+      device: MicrosoftDeviceCodeResponse
+      clientId: string
+      scopes: string[]
+      /** 'default' = shipped app; 'config'/'env' = BYO. */
+      source: 'env' | 'config' | 'default'
+    }
+  | {
+      // The operator BYO'd a Microsoft client via a vault: reference,
+      // which the gateway can't resolve in-process — host CLI only.
+      kind: 'byo-vault'
+      ref: string
+    }
+  | { kind: 'error'; message: string }
+/**
+ * Request a device code and return the data the gateway needs to render
+ * the connect card. Does NOT mutate the pending map — the gateway stores
+ * the pending entry (with the card message id) after it posts the card.
+ */
+export async function startMicrosoftConnect(
+  deps: MicrosoftConnectDeps = {},
+): Promise<StartResult> {
+  const resolved = resolveMicrosoftClientId(deps.configClientId)
+  // A vaulted BYO client_id can't be resolved from the gateway process
+  // (the gateway has no passphrase / vault-broker read path here). The
+  // shipped default and literal config values are fine.
+  if (isVaultReference(resolved.clientId)) {
+    return { kind: 'byo-vault', ref: resolved.clientId }
+  }
+  const scopes = selectMicrosoftScopes(deps.orgMode ?? false)
+  const cfg: MicrosoftOAuthClientConfig = {
+    client_id: resolved.clientId,
+    scopes,
+  }
+  try {
+    const device = await (deps.requestDeviceCode ?? realRequestDeviceCode)(cfg)
+    return {
+      kind: 'started',
+      device,
+      clientId: resolved.clientId,
+      scopes,
+      source: resolved.source,
+    }
+  } catch (err) {
+    return { kind: 'error', message: (err as Error).message }
+  }
+}
+export type PollResult =
+  | {
+      kind: 'connected'
+      account: string
+      accountType: 'personal' | 'work'
+      expiresAt: number
+    }
+  | { kind: 'cancelled' }
+  | { kind: 'no-refresh-token' }
+  | { kind: 'failed'; message: string }
+/**
+ * Poll Microsoft for consent completion, then register the account with
+ * the broker. Blocks (with the device-code `interval`) up to the
+ * device's `expires_in`. Returns a discriminated result the gateway
+ * turns into a card edit. Reads `flow.cancelled` after the (potentially
+ * long) poll so a `/connect cancel` between consent and write is
+ * honored.
+ */
+export async function runMicrosoftConnectPoll(
+  flow: Pick<
+    PendingMicrosoftConnectFlow,
+    'device' | 'clientId' | 'scopes' | 'cancelled'
+  >,
+  deps: MicrosoftConnectDeps = {},
+): Promise<PollResult> {
+  const now = deps.now ?? Date.now
+  const cfg: MicrosoftOAuthClientConfig = {
+    client_id: flow.clientId,
+    scopes: flow.scopes,
+  }
+  let tokens
+  try {
+    tokens = await (deps.pollDeviceToken ?? realPollDeviceToken)(
+      cfg,
+      flow.device,
+      { now },
+    )
+  } catch (err) {
+    return { kind: 'failed', message: (err as Error).message }
+  }
+  if (flow.cancelled) return { kind: 'cancelled' }
+  const built = buildMicrosoftCredentials({
+    tokens,
+    clientId: flow.clientId,
+    accountEmail: '', // device-code learns the email from the id_token
+    fallbackScope: flow.scopes.join(' '),
+    now,
+  })
+  // offline_access is requested, so a refresh token is expected; without
+  // one the account dies at the first access-token expiry — fail loud
+  // rather than register an un-refreshable account.
+  if (!built.credentials.microsoftOauth.refreshToken) {
+    return { kind: 'no-refresh-token' }
+  }
+  const account = built.resolvedEmail
+  if (!account) {
+    return {
+      kind: 'failed',
+      message: 'Microsoft did not return an account identity (no id_token).',
+    }
+  }
+  const addAccount = deps.addAccount ?? defaultAddAccount
+  try {
+    await addAccount(account, built.credentials as MicrosoftAddAccountCredentials, {
+      provider: 'microsoft',
+      // replace:true so reconnecting an already-linked account just
+      // refreshes its tokens rather than erroring.
+      replace: true,
+    })
+  } catch (err) {
+    return { kind: 'failed', message: (err as Error).message }
+  }
+  return {
+    kind: 'connected',
+    account,
+    accountType: built.credentials.microsoftOauth.accountType,
+    expiresAt: built.credentials.microsoftOauth.expiresAt,
+  }
+}
+function defaultAddAccount(
+  label: string,
+  credentials: MicrosoftAddAccountCredentials,
+  opts: { replace?: boolean; provider: 'microsoft' },
+): Promise<{ label: string; expiresAt?: number }> {
+  return addAccountViaBroker(label, credentials, opts)
+}

package/telegram-plugin/gateway/obligation-ledger.ts CHANGED Viewed

@@ -44,6 +44,17 @@ export interface Obligation {
    *  can't loop forever — and, because it is part of the durable snapshot,
    *  can't become a boot-surviving poison record either. */
   escalateAttempts?: number
+  /** Wall-clock ms the most recent turn handling THIS obligation ended (stamped
+   *  at turn_end via noteTurnEnded). Drives the escalate-grace window: a slow /
+   *  background-worker / multi-segment turn ends (the in-flight gate clears)
+   *  before its trailing answer's reply lands, and the sweep would otherwise
+   *  re-present/escalate in that gap — a false "I may have missed this" on a
+   *  message that's actively being answered (fuzz-confirmed on v0.14.62). The
+   *  decision waits `graceMs` after this stamp before acting, so the trailing
+   *  answer's close has a beat to fire. Bounded: each re-present is itself a turn
+   *  that re-stamps this once, and representCount is capped, so the ladder still
+   *  terminates. Durable (part of the snapshot) so the grace survives restart. */
+  lastTurnEndedAt?: number
 }
 /** What the gateway should do for the oldest open obligation at an idle boundary. */
@@ -162,19 +173,50 @@ export class ObligationLedger {
    * does not mutate. The caller performs the side effect then calls
    * markRepresented / close accordingly.
    *
-   *  - 'none'      → no open obligation; the agent may idle.
+   *  - 'none'      → no open obligation (or all open ones are within their
+   *                  escalate-grace window); the agent may idle.
    *  - 'represent' → re-present `obligation` as a fresh must-answer turn.
    *  - 'escalate'  → it has already been re-presented maxRepresents times; send
    *                  ONE operator-visible "did I miss this?" and close it
    *                  (caller calls close) rather than loop forever.
+   *
+   * GRACE WINDOW (opts.graceMs > 0): an obligation whose handling turn ended less
+   * than `graceMs` ago is SKIPPED — its trailing answer may still be in flight
+   * (a worker / long-think / multi-segment turn ends the in-flight gate before
+   * the reply lands). We pick the oldest obligation that is OUT of grace, so a
+   * genuinely-stale one is still acted on while a freshly-ended one waits. Pure
+   * (clock injected via opts.now, mirroring the builder convention). With no opts
+   * (or graceMs<=0) this is the pre-grace behaviour exactly.
    */
-  decideAtIdle(): LedgerDecision {
-    const o = this.oldest()
+  decideAtIdle(opts?: { now: number; graceMs: number }): LedgerDecision {
+    const o =
+      opts != null && opts.graceMs > 0 ? this.oldestEligible(opts.now, opts.graceMs) : this.oldest()
     if (o === undefined) return { action: 'none' }
     if (o.representCount >= this.maxRepresents) return { action: 'escalate', obligation: o }
     return { action: 'represent', obligation: o }
   }
+  /** The oldest open obligation whose handling turn ended at least `graceMs` ago
+   *  (or never ended — a still-queued obligation has no lastTurnEndedAt and is
+   *  always eligible; it can't have a trailing answer in flight). */
+  private oldestEligible(now: number, graceMs: number): Obligation | undefined {
+    let best: Obligation | undefined
+    for (const o of this.open.values()) {
+      if (o.lastTurnEndedAt != null && now - o.lastTurnEndedAt < graceMs) continue // within grace
+      if (best === undefined || o.openedAt < best.openedAt) best = o
+    }
+    return best
+  }
+  /** Stamp that the most recent turn handling `originTurnId` just ended (drives
+   *  the escalate-grace window). No-op if the obligation isn't open. Persists. */
+  noteTurnEnded(originTurnId: string, ts: number): void {
+    const o = this.open.get(originTurnId)
+    if (o === undefined) return
+    o.lastTurnEndedAt = ts
+    this.persist()
+  }
   /**
    * Decide which obligation a substantive reply discharges — DETERMINISTICALLY,
    * holding for any model behavior:

package/telegram-plugin/gateway/with-deadline.ts ADDED Viewed

@@ -0,0 +1,43 @@
+/**
+ * withDeadline — bound a promise so the chain off it ALWAYS settles within `ms`.
+ *
+ * Why this exists (the obligation-ledger determinism hole): the escalation send
+ * in `obligationSweep` is fire-and-forget and clears its in-flight guard
+ * (`obligationEscalateInFlight`) only in a `.finally` — which runs only if the
+ * awaited promise SETTLES. grammy's `bot.api` has no request timeout
+ * (`new Bot(TOKEN)`, no `client.timeoutSeconds`) and `retryApiCall`'s `await
+ * fn()` does not bound a hang (its retry cap applies to rejections, not to a
+ * promise that never resolves). So a stalled send (half-open TCP, unresponsive
+ * Telegram) would never settle → `.finally` never fires → the in-flight id is
+ * leaked forever → every later sweep early-returns at the guard → the
+ * obligation is stuck OPEN: never re-presented, never escalated, never closed.
+ * That is a silent loss of the "every inbound is answered-or-escalated"
+ * guarantee — the one liveness hole a total state-machine proof surfaced (a
+ * sampling test cannot, because its model never includes "send never settles").
+ *
+ * Racing the send against a deadline makes the wait bounded BY CONSTRUCTION:
+ * the returned promise settles in ≤ `ms`, so the caller's `.then/.catch/.finally`
+ * always run and the in-flight flag always clears. A hang becomes a bounded
+ * rejection that feeds the already-bounded escalate ladder
+ * (`escalateAttempts → OBLIGATION_ESCALATE_MAX`) to a terminal. The losing
+ * (still-pending) promise is given a no-op `.catch` so its eventual rejection
+ * is not an unhandled rejection, and the timer is cleared + unref'd so it
+ * neither leaks nor keeps the event loop alive.
+ *
+ * Pure (no gateway/Telegram coupling) ⇒ unit-testable; see
+ * tests/with-deadline.test.ts.
+ */
+export function withDeadline<T>(p: Promise<T>, ms: number, timeoutMessage: string): Promise<T> {
+  // Swallow a late rejection from the loser after the race has already settled,
+  // so a hung-then-eventually-rejected send is never an unhandled rejection.
+  p.catch(() => {})
+  let timer: ReturnType<typeof setTimeout> | undefined
+  const deadline = new Promise<never>((_resolve, reject) => {
+    timer = setTimeout(() => reject(new Error(timeoutMessage)), ms)
+    // Don't keep the process alive solely for this timer.
+    ;(timer as unknown as { unref?: () => void }).unref?.()
+  })
+  return Promise.race([p, deadline]).finally(() => {
+    if (timer !== undefined) clearTimeout(timer)
+  }) as Promise<T>
+}

package/telegram-plugin/hooks/tool-label-pretool.mjs CHANGED Viewed

@@ -35,6 +35,26 @@ function readStdin() {
   }
 }
+/**
+ * Coerce a tool-input field to display text WITHOUT the `[object Object]`
+ * trap. Only primitives carry a meaningful label: strings pass through,
+ * numbers/booleans stringify cleanly. Objects and arrays return '' so the
+ * caller falls through to its next fallback (a sibling field, or the
+ * humanized tool name) instead of surfacing literal "[object Object]".
+ *
+ * This guards the MCP-tool path in particular: an operator-configured
+ * server (e.g. Brevo CRM) may pass a filter/query OBJECT in `query` /
+ * `description` / `title`, and the old `String(i.query ?? '')` coercion
+ * rendered that as "[object Object]" on the live activity feed. The
+ * renderer's own `clip()` already rejects non-strings; this mirrors that
+ * contract at the hook so the bad value never reaches the sidecar JSONL.
+ */
+function asText(v) {
+  if (typeof v === 'string') return v
+  if (typeof v === 'number' || typeof v === 'boolean') return String(v)
+  return ''
+}
 /**
  * One-line, length-bounded escape of a value for inclusion in a label.
  * Newlines collapsed, very long strings truncated with an ellipsis.
@@ -82,10 +102,10 @@ export function computeLabel(toolName, input) {
   // for Bash/Task, matching the gateway's describeToolUse rendering.
   switch (toolName) {
     case 'Bash':
-      return clip(String(i.description ?? ''), 70).trim() || 'Running a command'
+      return clip(asText(i.description), 70).trim() || 'Running a command'
     case 'Task':
     case 'Agent': {
-      const d = clip(String(i.description ?? ''), 60).trim()
+      const d = clip(asText(i.description), 60).trim()
       return d ? `Delegating: ${d}` : 'Delegating to a sub-agent'
     }
     case 'TodoWrite':
@@ -103,16 +123,16 @@ export function computeLabel(toolName, input) {
     case 'Write':
       return `Writing ${clip(safeBasename(i.file_path))}`.trim()
     case 'Grep': {
-      const path = i.path ? clip(String(i.path), 40) : '.'
-      const pat = clip(String(i.pattern ?? ''), 40)
+      const path = i.path ? clip(asText(i.path), 40) : '.'
+      const pat = clip(asText(i.pattern), 40)
       return `Searching ${path} for ${pat}`
     }
     case 'Glob':
-      return `Finding files matching ${clip(String(i.pattern ?? ''), 60)}`
+      return `Finding files matching ${clip(asText(i.pattern), 60)}`
     case 'WebFetch':
       return `Fetching ${clip(urlHostPath(i.url), 60)}`
     case 'WebSearch':
-      return `Searching the web for ${clip(String(i.query ?? ''), 60)}`
+      return `Searching the web for ${clip(asText(i.query), 60)}`
     case 'NotebookEdit':
       return `Editing notebook ${clip(safeBasename(i.notebook_path))}`
     case 'BashOutput':
@@ -128,7 +148,7 @@ export function computeLabel(toolName, input) {
       // sidecar JSONL and recover which skill fired per turn —
       // the progress card path that used to surface this was retired
       // when `progressDriver` was nulled out in #1122 PR3.
-      const slug = clip(String(i.skill ?? ''), 64)
+      const slug = clip(asText(i.skill), 64)
       return slug ? `Running skill ${slug}` : null
     }
   }
@@ -141,7 +161,7 @@ export function computeLabel(toolName, input) {
       case 'mcp__switchroom-telegram__stream_reply':
         return 'Replying'
       case 'mcp__switchroom-telegram__react': {
-        const emoji = clip(String(i.emoji ?? ''), 8)
+        const emoji = clip(asText(i.emoji), 8)
         return emoji ? `Reacting ${emoji}` : 'Reacting'
       }
       case 'mcp__switchroom-telegram__get_recent_messages':
@@ -177,7 +197,7 @@ export function computeLabel(toolName, input) {
       return 'Looking through your files'
     if (server === 'notion' || server === 'claude_ai_notion') return 'Checking your notes'
     if (server === 'perplexity') {
-      const q = clip(String(i.query ?? i.description ?? ''), 60).trim()
+      const q = clip(asText(i.query) || asText(i.description), 60).trim()
       return q ? `Searching the web for ${q}` : 'Searching the web'
     }
     if (server === 'webkite') {
@@ -186,9 +206,9 @@ export function computeLabel(toolName, input) {
     }
     // Unknown MCP server: prefer a model-authored field, else humanized tool.
     const desc =
-      clip(String(i.description ?? ''), 60).trim() ||
-      clip(String(i.query ?? ''), 50).trim() ||
-      clip(String(i.title ?? ''), 50).trim()
+      clip(asText(i.description), 60).trim() ||
+      clip(asText(i.query), 50).trim() ||
+      clip(asText(i.title), 50).trim()
     if (desc) return desc
     return `Using ${tool.replace(/[-_]+/g, ' ')}`
   }

package/telegram-plugin/tests/auto-classify-mid-turn.test.ts ADDED Viewed

@@ -0,0 +1,87 @@
+import { describe, it, expect } from "vitest";
+import { autoClassifyMidTurnInbound, type AutoClassifyInput } from "../gateway/auto-classify-mid-turn.js";
+function base(over: Partial<AutoClassifyInput> = {}): AutoClassifyInput {
+  return {
+    isSteerPrefix: false,
+    isQueuePrefix: false,
+    priorTurnInFlight: true,
+    isDm: false,
+    incomingThreadId: 3,
+    activeTurnThreadId: 3,
+    msSinceLastAgentOutput: 2000,
+    dmSteerWindowMs: 0, // DM auto-steer off by default
+    topicSteerWindowMs: 8000,
+    ...over,
+  };
+}
+describe("autoClassifyMidTurnInbound", () => {
+  it("explicit /steer prefix always wins", () => {
+    const r = autoClassifyMidTurnInbound(base({ isSteerPrefix: true, incomingThreadId: 9, activeTurnThreadId: 3 }));
+    expect(r.decision).toBe("steer");
+    expect(r.reason).toBe("steer_prefix");
+  });
+  it("explicit /queue prefix always wins", () => {
+    expect(autoClassifyMidTurnInbound(base({ isQueuePrefix: true })).decision).toBe("queue");
+  });
+  it("no turn in flight → queue (fresh turn, not our decision)", () => {
+    const r = autoClassifyMidTurnInbound(base({ priorTurnInFlight: false }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("not_mid_turn");
+  });
+  // ── Supergroup: topic is the strong signal ──
+  it("supergroup, DIFFERENT topic than the active turn → queue (cross_topic), regardless of recency", () => {
+    const r = autoClassifyMidTurnInbound(base({ incomingThreadId: 5, activeTurnThreadId: 3, msSinceLastAgentOutput: 100 }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("cross_topic");
+    expect(r.sameTopic).toBe(false);
+  });
+  it("supergroup, SAME topic + recent → steer", () => {
+    const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 3000, topicSteerWindowMs: 8000 }));
+    expect(r.decision).toBe("steer");
+    expect(r.reason).toBe("same_topic_recent");
+    expect(r.sameTopic).toBe(true);
+  });
+  it("supergroup, SAME topic but STALE (older than window) → queue", () => {
+    const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 20000, topicSteerWindowMs: 8000 }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("same_topic_stale");
+  });
+  it("supergroup, no recency recorded (null) → queue (not treated as recent)", () => {
+    const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: null }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("same_topic_stale");
+  });
+  it("topicSteerWindowMs=0 (auto-steer off) → queue, still reports sameTopic", () => {
+    const r = autoClassifyMidTurnInbound(base({ topicSteerWindowMs: 0, incomingThreadId: 3, activeTurnThreadId: 3 }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("topic_disabled");
+    expect(r.sameTopic).toBe(true);
+  });
+  it("canonical thread compare: null/undefined/0 collapse to the same no-thread bucket", () => {
+    expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 0, activeTurnThreadId: null })).sameTopic).toBe(true);
+    expect(autoClassifyMidTurnInbound(base({ incomingThreadId: undefined, activeTurnThreadId: 0 })).sameTopic).toBe(true);
+    expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 1, activeTurnThreadId: 0 })).sameTopic).toBe(false);
+  });
+  // ── DM: timing-only, off by default ──
+  it("DM with dmSteerWindowMs=0 (default) → queue even if recent (DM auto-steer off)", () => {
+    const r = autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 500, dmSteerWindowMs: 0 }));
+    expect(r.decision).toBe("queue");
+    expect(r.reason).toBe("dm_disabled");
+  });
+  it("DM with dmSteerWindowMs>0 + recent → steer; stale → queue", () => {
+    expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 5000, dmSteerWindowMs: 10000 })).decision).toBe("steer");
+    expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 15000, dmSteerWindowMs: 10000 })).decision).toBe("queue");
+  });
+});

package/telegram-plugin/tests/escalation-drive.test.ts ADDED Viewed

@@ -0,0 +1,123 @@
+import { describe, it, expect } from "vitest";
+import { driveEscalation } from "../gateway/escalation-drive.js";
+import { ObligationLedger } from "../gateway/obligation-ledger.js";
+// Drives the REAL escalation step (the code obligationSweep calls) with the REAL
+// ObligationLedger and the REAL withDeadline — including a fake hanging send,
+// the exact path the total proof flagged and that mtcute / a synchronous test
+// cannot reach. This is the executable verification of the hang-wedge fix.
+function openEscalatable(L: ObligationLedger, id: string) {
+  L.openIfAbsent({ originTurnId: id, chatId: "-100", threadId: 3, messageId: 1, text: "x", openedAt: 0 });
+}
+const MAX = 3;
+const DEADLINE = 15; // ms — short so the hang case settles fast and deterministically
+describe("driveEscalation — the obligation escalation step is bounded and always reaches a terminal", () => {
+  it("a successful send closes the obligation and clears the in-flight flag", async () => {
+    const L = new ObligationLedger(2);
+    openEscalatable(L, "c#1");
+    const inFlight = new Set<string>();
+    await driveEscalation({
+      escId: "c#1",
+      inFlight,
+      ledger: L,
+      send: () => Promise.resolve("sent"),
+      maxAttempts: MAX,
+      deadlineMs: DEADLINE,
+      log: () => {},
+    });
+    expect(L.isOpen("c#1")).toBe(false); // closed
+    expect(inFlight.has("c#1")).toBe(false); // flag cleared
+  });
+  it("a transient failure below the cap stays OPEN and clears the flag (retried next sweep)", async () => {
+    const L = new ObligationLedger(2);
+    openEscalatable(L, "c#1");
+    const inFlight = new Set<string>();
+    await driveEscalation({
+      escId: "c#1",
+      inFlight,
+      ledger: L,
+      send: () => Promise.reject(new Error("network blip")),
+      maxAttempts: MAX,
+      deadlineMs: DEADLINE,
+      log: () => {},
+    });
+    expect(L.isOpen("c#1")).toBe(true); // still open — will retry
+    expect(inFlight.has("c#1")).toBe(false); // flag cleared, so the next sweep can re-enter
+  });
+  it("THE FIX: a send that NEVER settles still clears the flag (bounded by the deadline)", async () => {
+    const L = new ObligationLedger(2);
+    openEscalatable(L, "c#1");
+    const inFlight = new Set<string>();
+    let sendInvoked = 0;
+    const start = Date.now();
+    // A promise that never resolves/rejects — the stalled send that, pre-fix,
+    // left the in-flight flag set forever and wedged the obligation OPEN.
+    await driveEscalation({
+      escId: "c#1",
+      inFlight,
+      ledger: L,
+      send: () => {
+        sendInvoked++;
+        return new Promise(() => {});
+      },
+      maxAttempts: MAX,
+      deadlineMs: DEADLINE,
+      log: () => {},
+    });
+    expect(sendInvoked).toBe(1);
+    expect(inFlight.has("c#1")).toBe(false); // cleared despite the hang — the wedge is gone
+    expect(Date.now() - start).toBeLessThan(DEADLINE + 500); // settled at the deadline, not "never"
+  });
+  it("repeated hung sends reach a bounded terminal (close best-effort), never an infinite loop", async () => {
+    const L = new ObligationLedger(2);
+    openEscalatable(L, "c#1");
+    const inFlight = new Set<string>();
+    let sends = 0;
+    let drives = 0;
+    // Simulate the 5s sweep firing repeatedly while every send hangs.
+    while (L.isOpen("c#1") && drives < 20) {
+      drives++;
+      const p = driveEscalation({
+        escId: "c#1",
+        inFlight,
+        ledger: L,
+        send: () => {
+          sends++;
+          return new Promise(() => {});
+        },
+        maxAttempts: MAX,
+        deadlineMs: DEADLINE,
+        log: () => {},
+      });
+      if (p) await p; // each attempt settles within the deadline
+    }
+    expect(L.isOpen("c#1")).toBe(false); // reached a terminal (closed best-effort)
+    expect(inFlight.has("c#1")).toBe(false);
+    expect(sends).toBe(MAX); // exactly maxAttempts sends, then close — bounded
+    expect(drives).toBeLessThanOrEqual(MAX + 1);
+  });
+  it("the in-flight guard prevents a concurrent second send for the same obligation", async () => {
+    const L = new ObligationLedger(2);
+    openEscalatable(L, "c#1");
+    const inFlight = new Set<string>();
+    let sends = 0;
+    const hang = () => {
+      sends++;
+      return new Promise<void>(() => {});
+    };
+    const p1 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
+    // Second call while the first is still awaiting → must be a no-op.
+    const p2 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
+    expect(p2).toBeUndefined(); // guarded
+    expect(sends).toBe(1); // only one send fired
+    expect(L.list()[0].escalateAttempts).toBe(1); // only one attempt recorded
+    await p1; // let the first settle so we don't leak a pending timer
+  });
+});