switchroom 0.14.61 → 0.14.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Telegram-native Microsoft connect — device-code flow (RFC #1873 /
3
+ * out-of-box, Phase 2).
4
+ *
5
+ * The headline "connect from your phone" path: a user runs
6
+ * `/connect microsoft`, the gateway shows a card with a Microsoft
7
+ * sign-in link + a short code, the user approves on their phone, and the
8
+ * gateway registers the resulting account with the auth-broker — no host
9
+ * shell, no Azure portal (the shipped default app is used unless the
10
+ * operator BYO'd one).
11
+ *
12
+ * This module is the framework-agnostic core: it talks to Microsoft's
13
+ * device-code endpoints (RFC 8628, engine in `src/microsoft/oauth.ts`)
14
+ * and the auth-broker, and returns plain data. The gateway owns the
15
+ * Telegram surface (card rendering, edits, callbacks). All network +
16
+ * broker boundaries are injectable so the flow is testable without
17
+ * hitting Microsoft or a live broker, and it contains NO raw bot.api
18
+ * calls (the bot-api-wrapping lint trap lives only in the gateway).
19
+ *
20
+ * Mirrors `auth-add-flow.ts` (the Anthropic `/auth add` template) but
21
+ * needs no child subprocess and no pasted code: device-code consent
22
+ * happens entirely on Microsoft's domain, so nothing secret is ever
23
+ * pasted into chat (strictly better than paste-back — no redaction
24
+ * needed). Personal Microsoft accounts (outlook.com/hotmail) are the
25
+ * clean case at `/common`; a work/school account that fails device-code
26
+ * at `/common` surfaces a clear "use the host CLI" error (the documented
27
+ * "personal-first, work best-effort" boundary).
28
+ */
29
+
30
+ import {
31
+ requestDeviceCode as realRequestDeviceCode,
32
+ pollDeviceToken as realPollDeviceToken,
33
+ type MicrosoftDeviceCodeResponse,
34
+ type MicrosoftOAuthClientConfig,
35
+ } from '../../src/microsoft/oauth.js'
36
+ import { selectMicrosoftScopes } from '../../src/microsoft/scopes.js'
37
+ import { buildMicrosoftCredentials } from '../../src/microsoft/credentials.js'
38
+ import { resolveMicrosoftClientId } from '../../src/auth/default-oauth-clients.js'
39
+ import { isVaultReference } from '../../src/vault/resolver.js'
40
+ import { addAccountViaBroker } from './auth-broker-client.js'
41
+ import type { MicrosoftAddAccountCredentials } from '../../src/auth/broker/client.js'
42
+
43
+ /** A connect flow in flight, keyed by `chatKey(chatId, threadId)`. */
44
+ export interface PendingMicrosoftConnectFlow {
45
+ /** Telegram user id that started the flow (consent owner; Phase 3). */
46
+ initiatedBy: string
47
+ /** Card we posted, so the poll loop can edit it on completion. */
48
+ cardChatId: number | string
49
+ cardMessageId: number
50
+ device: MicrosoftDeviceCodeResponse
51
+ clientId: string
52
+ scopes: string[]
53
+ startedAt: number
54
+ /** Flipped by cancel so the in-flight poll bails without writing. */
55
+ cancelled: boolean
56
+ }
57
+
58
+ export const pendingMicrosoftConnectFlows = new Map<
59
+ string,
60
+ PendingMicrosoftConnectFlow
61
+ >()
62
+
63
+ export interface MicrosoftConnectDeps {
64
+ /** `config.microsoft_workspace?.microsoft_client_id` (may be a vault: ref). */
65
+ configClientId?: string
66
+ orgMode?: boolean
67
+ requestDeviceCode?: (
68
+ cfg: MicrosoftOAuthClientConfig,
69
+ ) => Promise<MicrosoftDeviceCodeResponse>
70
+ pollDeviceToken?: typeof realPollDeviceToken
71
+ addAccount?: (
72
+ label: string,
73
+ credentials: MicrosoftAddAccountCredentials,
74
+ opts: { replace?: boolean; provider: 'microsoft' },
75
+ ) => Promise<{ label: string; expiresAt?: number }>
76
+ now?: () => number
77
+ }
78
+
79
+ export type StartResult =
80
+ | {
81
+ kind: 'started'
82
+ device: MicrosoftDeviceCodeResponse
83
+ clientId: string
84
+ scopes: string[]
85
+ /** 'default' = shipped app; 'config'/'env' = BYO. */
86
+ source: 'env' | 'config' | 'default'
87
+ }
88
+ | {
89
+ // The operator BYO'd a Microsoft client via a vault: reference,
90
+ // which the gateway can't resolve in-process — host CLI only.
91
+ kind: 'byo-vault'
92
+ ref: string
93
+ }
94
+ | { kind: 'error'; message: string }
95
+
96
+ /**
97
+ * Request a device code and return the data the gateway needs to render
98
+ * the connect card. Does NOT mutate the pending map — the gateway stores
99
+ * the pending entry (with the card message id) after it posts the card.
100
+ */
101
+ export async function startMicrosoftConnect(
102
+ deps: MicrosoftConnectDeps = {},
103
+ ): Promise<StartResult> {
104
+ const resolved = resolveMicrosoftClientId(deps.configClientId)
105
+
106
+ // A vaulted BYO client_id can't be resolved from the gateway process
107
+ // (the gateway has no passphrase / vault-broker read path here). The
108
+ // shipped default and literal config values are fine.
109
+ if (isVaultReference(resolved.clientId)) {
110
+ return { kind: 'byo-vault', ref: resolved.clientId }
111
+ }
112
+
113
+ const scopes = selectMicrosoftScopes(deps.orgMode ?? false)
114
+ const cfg: MicrosoftOAuthClientConfig = {
115
+ client_id: resolved.clientId,
116
+ scopes,
117
+ }
118
+ try {
119
+ const device = await (deps.requestDeviceCode ?? realRequestDeviceCode)(cfg)
120
+ return {
121
+ kind: 'started',
122
+ device,
123
+ clientId: resolved.clientId,
124
+ scopes,
125
+ source: resolved.source,
126
+ }
127
+ } catch (err) {
128
+ return { kind: 'error', message: (err as Error).message }
129
+ }
130
+ }
131
+
132
+ export type PollResult =
133
+ | {
134
+ kind: 'connected'
135
+ account: string
136
+ accountType: 'personal' | 'work'
137
+ expiresAt: number
138
+ }
139
+ | { kind: 'cancelled' }
140
+ | { kind: 'no-refresh-token' }
141
+ | { kind: 'failed'; message: string }
142
+
143
+ /**
144
+ * Poll Microsoft for consent completion, then register the account with
145
+ * the broker. Blocks (with the device-code `interval`) up to the
146
+ * device's `expires_in`. Returns a discriminated result the gateway
147
+ * turns into a card edit. Reads `flow.cancelled` after the (potentially
148
+ * long) poll so a `/connect cancel` between consent and write is
149
+ * honored.
150
+ */
151
+ export async function runMicrosoftConnectPoll(
152
+ flow: Pick<
153
+ PendingMicrosoftConnectFlow,
154
+ 'device' | 'clientId' | 'scopes' | 'cancelled'
155
+ >,
156
+ deps: MicrosoftConnectDeps = {},
157
+ ): Promise<PollResult> {
158
+ const now = deps.now ?? Date.now
159
+ const cfg: MicrosoftOAuthClientConfig = {
160
+ client_id: flow.clientId,
161
+ scopes: flow.scopes,
162
+ }
163
+
164
+ let tokens
165
+ try {
166
+ tokens = await (deps.pollDeviceToken ?? realPollDeviceToken)(
167
+ cfg,
168
+ flow.device,
169
+ { now },
170
+ )
171
+ } catch (err) {
172
+ return { kind: 'failed', message: (err as Error).message }
173
+ }
174
+
175
+ if (flow.cancelled) return { kind: 'cancelled' }
176
+
177
+ const built = buildMicrosoftCredentials({
178
+ tokens,
179
+ clientId: flow.clientId,
180
+ accountEmail: '', // device-code learns the email from the id_token
181
+ fallbackScope: flow.scopes.join(' '),
182
+ now,
183
+ })
184
+
185
+ // offline_access is requested, so a refresh token is expected; without
186
+ // one the account dies at the first access-token expiry — fail loud
187
+ // rather than register an un-refreshable account.
188
+ if (!built.credentials.microsoftOauth.refreshToken) {
189
+ return { kind: 'no-refresh-token' }
190
+ }
191
+
192
+ const account = built.resolvedEmail
193
+ if (!account) {
194
+ return {
195
+ kind: 'failed',
196
+ message: 'Microsoft did not return an account identity (no id_token).',
197
+ }
198
+ }
199
+
200
+ const addAccount = deps.addAccount ?? defaultAddAccount
201
+ try {
202
+ await addAccount(account, built.credentials as MicrosoftAddAccountCredentials, {
203
+ provider: 'microsoft',
204
+ // replace:true so reconnecting an already-linked account just
205
+ // refreshes its tokens rather than erroring.
206
+ replace: true,
207
+ })
208
+ } catch (err) {
209
+ return { kind: 'failed', message: (err as Error).message }
210
+ }
211
+
212
+ return {
213
+ kind: 'connected',
214
+ account,
215
+ accountType: built.credentials.microsoftOauth.accountType,
216
+ expiresAt: built.credentials.microsoftOauth.expiresAt,
217
+ }
218
+ }
219
+
220
+ function defaultAddAccount(
221
+ label: string,
222
+ credentials: MicrosoftAddAccountCredentials,
223
+ opts: { replace?: boolean; provider: 'microsoft' },
224
+ ): Promise<{ label: string; expiresAt?: number }> {
225
+ return addAccountViaBroker(label, credentials, opts)
226
+ }
@@ -44,6 +44,17 @@ export interface Obligation {
44
44
  * can't loop forever — and, because it is part of the durable snapshot,
45
45
  * can't become a boot-surviving poison record either. */
46
46
  escalateAttempts?: number
47
+ /** Wall-clock ms the most recent turn handling THIS obligation ended (stamped
48
+ * at turn_end via noteTurnEnded). Drives the escalate-grace window: a slow /
49
+ * background-worker / multi-segment turn ends (the in-flight gate clears)
50
+ * before its trailing answer's reply lands, and the sweep would otherwise
51
+ * re-present/escalate in that gap — a false "I may have missed this" on a
52
+ * message that's actively being answered (fuzz-confirmed on v0.14.62). The
53
+ * decision waits `graceMs` after this stamp before acting, so the trailing
54
+ * answer's close has a beat to fire. Bounded: each re-present is itself a turn
55
+ * that re-stamps this once, and representCount is capped, so the ladder still
56
+ * terminates. Durable (part of the snapshot) so the grace survives restart. */
57
+ lastTurnEndedAt?: number
47
58
  }
48
59
 
49
60
  /** What the gateway should do for the oldest open obligation at an idle boundary. */
@@ -162,19 +173,50 @@ export class ObligationLedger {
162
173
  * does not mutate. The caller performs the side effect then calls
163
174
  * markRepresented / close accordingly.
164
175
  *
165
- * - 'none' → no open obligation; the agent may idle.
176
+ * - 'none' → no open obligation (or all open ones are within their
177
+ * escalate-grace window); the agent may idle.
166
178
  * - 'represent' → re-present `obligation` as a fresh must-answer turn.
167
179
  * - 'escalate' → it has already been re-presented maxRepresents times; send
168
180
  * ONE operator-visible "did I miss this?" and close it
169
181
  * (caller calls close) rather than loop forever.
182
+ *
183
+ * GRACE WINDOW (opts.graceMs > 0): an obligation whose handling turn ended less
184
+ * than `graceMs` ago is SKIPPED — its trailing answer may still be in flight
185
+ * (a worker / long-think / multi-segment turn ends the in-flight gate before
186
+ * the reply lands). We pick the oldest obligation that is OUT of grace, so a
187
+ * genuinely-stale one is still acted on while a freshly-ended one waits. Pure
188
+ * (clock injected via opts.now, mirroring the builder convention). With no opts
189
+ * (or graceMs<=0) this is the pre-grace behaviour exactly.
170
190
  */
171
- decideAtIdle(): LedgerDecision {
172
- const o = this.oldest()
191
+ decideAtIdle(opts?: { now: number; graceMs: number }): LedgerDecision {
192
+ const o =
193
+ opts != null && opts.graceMs > 0 ? this.oldestEligible(opts.now, opts.graceMs) : this.oldest()
173
194
  if (o === undefined) return { action: 'none' }
174
195
  if (o.representCount >= this.maxRepresents) return { action: 'escalate', obligation: o }
175
196
  return { action: 'represent', obligation: o }
176
197
  }
177
198
 
199
+ /** The oldest open obligation whose handling turn ended at least `graceMs` ago
200
+ * (or never ended — a still-queued obligation has no lastTurnEndedAt and is
201
+ * always eligible; it can't have a trailing answer in flight). */
202
+ private oldestEligible(now: number, graceMs: number): Obligation | undefined {
203
+ let best: Obligation | undefined
204
+ for (const o of this.open.values()) {
205
+ if (o.lastTurnEndedAt != null && now - o.lastTurnEndedAt < graceMs) continue // within grace
206
+ if (best === undefined || o.openedAt < best.openedAt) best = o
207
+ }
208
+ return best
209
+ }
210
+
211
+ /** Stamp that the most recent turn handling `originTurnId` just ended (drives
212
+ * the escalate-grace window). No-op if the obligation isn't open. Persists. */
213
+ noteTurnEnded(originTurnId: string, ts: number): void {
214
+ const o = this.open.get(originTurnId)
215
+ if (o === undefined) return
216
+ o.lastTurnEndedAt = ts
217
+ this.persist()
218
+ }
219
+
178
220
  /**
179
221
  * Decide which obligation a substantive reply discharges — DETERMINISTICALLY,
180
222
  * holding for any model behavior:
@@ -0,0 +1,43 @@
1
+ /**
2
+ * withDeadline — bound a promise so the chain off it ALWAYS settles within `ms`.
3
+ *
4
+ * Why this exists (the obligation-ledger determinism hole): the escalation send
5
+ * in `obligationSweep` is fire-and-forget and clears its in-flight guard
6
+ * (`obligationEscalateInFlight`) only in a `.finally` — which runs only if the
7
+ * awaited promise SETTLES. grammy's `bot.api` has no request timeout
8
+ * (`new Bot(TOKEN)`, no `client.timeoutSeconds`) and `retryApiCall`'s `await
9
+ * fn()` does not bound a hang (its retry cap applies to rejections, not to a
10
+ * promise that never resolves). So a stalled send (half-open TCP, unresponsive
11
+ * Telegram) would never settle → `.finally` never fires → the in-flight id is
12
+ * leaked forever → every later sweep early-returns at the guard → the
13
+ * obligation is stuck OPEN: never re-presented, never escalated, never closed.
14
+ * That is a silent loss of the "every inbound is answered-or-escalated"
15
+ * guarantee — the one liveness hole a total state-machine proof surfaced (a
16
+ * sampling test cannot, because its model never includes "send never settles").
17
+ *
18
+ * Racing the send against a deadline makes the wait bounded BY CONSTRUCTION:
19
+ * the returned promise settles in ≤ `ms`, so the caller's `.then/.catch/.finally`
20
+ * always run and the in-flight flag always clears. A hang becomes a bounded
21
+ * rejection that feeds the already-bounded escalate ladder
22
+ * (`escalateAttempts → OBLIGATION_ESCALATE_MAX`) to a terminal. The losing
23
+ * (still-pending) promise is given a no-op `.catch` so its eventual rejection
24
+ * is not an unhandled rejection, and the timer is cleared + unref'd so it
25
+ * neither leaks nor keeps the event loop alive.
26
+ *
27
+ * Pure (no gateway/Telegram coupling) ⇒ unit-testable; see
28
+ * tests/with-deadline.test.ts.
29
+ */
30
+ export function withDeadline<T>(p: Promise<T>, ms: number, timeoutMessage: string): Promise<T> {
31
+ // Swallow a late rejection from the loser after the race has already settled,
32
+ // so a hung-then-eventually-rejected send is never an unhandled rejection.
33
+ p.catch(() => {})
34
+ let timer: ReturnType<typeof setTimeout> | undefined
35
+ const deadline = new Promise<never>((_resolve, reject) => {
36
+ timer = setTimeout(() => reject(new Error(timeoutMessage)), ms)
37
+ // Don't keep the process alive solely for this timer.
38
+ ;(timer as unknown as { unref?: () => void }).unref?.()
39
+ })
40
+ return Promise.race([p, deadline]).finally(() => {
41
+ if (timer !== undefined) clearTimeout(timer)
42
+ }) as Promise<T>
43
+ }
@@ -35,6 +35,26 @@ function readStdin() {
35
35
  }
36
36
  }
37
37
 
38
+ /**
39
+ * Coerce a tool-input field to display text WITHOUT the `[object Object]`
40
+ * trap. Only primitives carry a meaningful label: strings pass through,
41
+ * numbers/booleans stringify cleanly. Objects and arrays return '' so the
42
+ * caller falls through to its next fallback (a sibling field, or the
43
+ * humanized tool name) instead of surfacing literal "[object Object]".
44
+ *
45
+ * This guards the MCP-tool path in particular: an operator-configured
46
+ * server (e.g. Brevo CRM) may pass a filter/query OBJECT in `query` /
47
+ * `description` / `title`, and the old `String(i.query ?? '')` coercion
48
+ * rendered that as "[object Object]" on the live activity feed. The
49
+ * renderer's own `clip()` already rejects non-strings; this mirrors that
50
+ * contract at the hook so the bad value never reaches the sidecar JSONL.
51
+ */
52
+ function asText(v) {
53
+ if (typeof v === 'string') return v
54
+ if (typeof v === 'number' || typeof v === 'boolean') return String(v)
55
+ return ''
56
+ }
57
+
38
58
  /**
39
59
  * One-line, length-bounded escape of a value for inclusion in a label.
40
60
  * Newlines collapsed, very long strings truncated with an ellipsis.
@@ -82,10 +102,10 @@ export function computeLabel(toolName, input) {
82
102
  // for Bash/Task, matching the gateway's describeToolUse rendering.
83
103
  switch (toolName) {
84
104
  case 'Bash':
85
- return clip(String(i.description ?? ''), 70).trim() || 'Running a command'
105
+ return clip(asText(i.description), 70).trim() || 'Running a command'
86
106
  case 'Task':
87
107
  case 'Agent': {
88
- const d = clip(String(i.description ?? ''), 60).trim()
108
+ const d = clip(asText(i.description), 60).trim()
89
109
  return d ? `Delegating: ${d}` : 'Delegating to a sub-agent'
90
110
  }
91
111
  case 'TodoWrite':
@@ -103,16 +123,16 @@ export function computeLabel(toolName, input) {
103
123
  case 'Write':
104
124
  return `Writing ${clip(safeBasename(i.file_path))}`.trim()
105
125
  case 'Grep': {
106
- const path = i.path ? clip(String(i.path), 40) : '.'
107
- const pat = clip(String(i.pattern ?? ''), 40)
126
+ const path = i.path ? clip(asText(i.path), 40) : '.'
127
+ const pat = clip(asText(i.pattern), 40)
108
128
  return `Searching ${path} for ${pat}`
109
129
  }
110
130
  case 'Glob':
111
- return `Finding files matching ${clip(String(i.pattern ?? ''), 60)}`
131
+ return `Finding files matching ${clip(asText(i.pattern), 60)}`
112
132
  case 'WebFetch':
113
133
  return `Fetching ${clip(urlHostPath(i.url), 60)}`
114
134
  case 'WebSearch':
115
- return `Searching the web for ${clip(String(i.query ?? ''), 60)}`
135
+ return `Searching the web for ${clip(asText(i.query), 60)}`
116
136
  case 'NotebookEdit':
117
137
  return `Editing notebook ${clip(safeBasename(i.notebook_path))}`
118
138
  case 'BashOutput':
@@ -128,7 +148,7 @@ export function computeLabel(toolName, input) {
128
148
  // sidecar JSONL and recover which skill fired per turn —
129
149
  // the progress card path that used to surface this was retired
130
150
  // when `progressDriver` was nulled out in #1122 PR3.
131
- const slug = clip(String(i.skill ?? ''), 64)
151
+ const slug = clip(asText(i.skill), 64)
132
152
  return slug ? `Running skill ${slug}` : null
133
153
  }
134
154
  }
@@ -141,7 +161,7 @@ export function computeLabel(toolName, input) {
141
161
  case 'mcp__switchroom-telegram__stream_reply':
142
162
  return 'Replying'
143
163
  case 'mcp__switchroom-telegram__react': {
144
- const emoji = clip(String(i.emoji ?? ''), 8)
164
+ const emoji = clip(asText(i.emoji), 8)
145
165
  return emoji ? `Reacting ${emoji}` : 'Reacting'
146
166
  }
147
167
  case 'mcp__switchroom-telegram__get_recent_messages':
@@ -177,7 +197,7 @@ export function computeLabel(toolName, input) {
177
197
  return 'Looking through your files'
178
198
  if (server === 'notion' || server === 'claude_ai_notion') return 'Checking your notes'
179
199
  if (server === 'perplexity') {
180
- const q = clip(String(i.query ?? i.description ?? ''), 60).trim()
200
+ const q = clip(asText(i.query) || asText(i.description), 60).trim()
181
201
  return q ? `Searching the web for ${q}` : 'Searching the web'
182
202
  }
183
203
  if (server === 'webkite') {
@@ -186,9 +206,9 @@ export function computeLabel(toolName, input) {
186
206
  }
187
207
  // Unknown MCP server: prefer a model-authored field, else humanized tool.
188
208
  const desc =
189
- clip(String(i.description ?? ''), 60).trim() ||
190
- clip(String(i.query ?? ''), 50).trim() ||
191
- clip(String(i.title ?? ''), 50).trim()
209
+ clip(asText(i.description), 60).trim() ||
210
+ clip(asText(i.query), 50).trim() ||
211
+ clip(asText(i.title), 50).trim()
192
212
  if (desc) return desc
193
213
  return `Using ${tool.replace(/[-_]+/g, ' ')}`
194
214
  }
@@ -0,0 +1,87 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { autoClassifyMidTurnInbound, type AutoClassifyInput } from "../gateway/auto-classify-mid-turn.js";
3
+
4
+ function base(over: Partial<AutoClassifyInput> = {}): AutoClassifyInput {
5
+ return {
6
+ isSteerPrefix: false,
7
+ isQueuePrefix: false,
8
+ priorTurnInFlight: true,
9
+ isDm: false,
10
+ incomingThreadId: 3,
11
+ activeTurnThreadId: 3,
12
+ msSinceLastAgentOutput: 2000,
13
+ dmSteerWindowMs: 0, // DM auto-steer off by default
14
+ topicSteerWindowMs: 8000,
15
+ ...over,
16
+ };
17
+ }
18
+
19
+ describe("autoClassifyMidTurnInbound", () => {
20
+ it("explicit /steer prefix always wins", () => {
21
+ const r = autoClassifyMidTurnInbound(base({ isSteerPrefix: true, incomingThreadId: 9, activeTurnThreadId: 3 }));
22
+ expect(r.decision).toBe("steer");
23
+ expect(r.reason).toBe("steer_prefix");
24
+ });
25
+
26
+ it("explicit /queue prefix always wins", () => {
27
+ expect(autoClassifyMidTurnInbound(base({ isQueuePrefix: true })).decision).toBe("queue");
28
+ });
29
+
30
+ it("no turn in flight → queue (fresh turn, not our decision)", () => {
31
+ const r = autoClassifyMidTurnInbound(base({ priorTurnInFlight: false }));
32
+ expect(r.decision).toBe("queue");
33
+ expect(r.reason).toBe("not_mid_turn");
34
+ });
35
+
36
+ // ── Supergroup: topic is the strong signal ──
37
+ it("supergroup, DIFFERENT topic than the active turn → queue (cross_topic), regardless of recency", () => {
38
+ const r = autoClassifyMidTurnInbound(base({ incomingThreadId: 5, activeTurnThreadId: 3, msSinceLastAgentOutput: 100 }));
39
+ expect(r.decision).toBe("queue");
40
+ expect(r.reason).toBe("cross_topic");
41
+ expect(r.sameTopic).toBe(false);
42
+ });
43
+
44
+ it("supergroup, SAME topic + recent → steer", () => {
45
+ const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 3000, topicSteerWindowMs: 8000 }));
46
+ expect(r.decision).toBe("steer");
47
+ expect(r.reason).toBe("same_topic_recent");
48
+ expect(r.sameTopic).toBe(true);
49
+ });
50
+
51
+ it("supergroup, SAME topic but STALE (older than window) → queue", () => {
52
+ const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: 20000, topicSteerWindowMs: 8000 }));
53
+ expect(r.decision).toBe("queue");
54
+ expect(r.reason).toBe("same_topic_stale");
55
+ });
56
+
57
+ it("supergroup, no recency recorded (null) → queue (not treated as recent)", () => {
58
+ const r = autoClassifyMidTurnInbound(base({ msSinceLastAgentOutput: null }));
59
+ expect(r.decision).toBe("queue");
60
+ expect(r.reason).toBe("same_topic_stale");
61
+ });
62
+
63
+ it("topicSteerWindowMs=0 (auto-steer off) → queue, still reports sameTopic", () => {
64
+ const r = autoClassifyMidTurnInbound(base({ topicSteerWindowMs: 0, incomingThreadId: 3, activeTurnThreadId: 3 }));
65
+ expect(r.decision).toBe("queue");
66
+ expect(r.reason).toBe("topic_disabled");
67
+ expect(r.sameTopic).toBe(true);
68
+ });
69
+
70
+ it("canonical thread compare: null/undefined/0 collapse to the same no-thread bucket", () => {
71
+ expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 0, activeTurnThreadId: null })).sameTopic).toBe(true);
72
+ expect(autoClassifyMidTurnInbound(base({ incomingThreadId: undefined, activeTurnThreadId: 0 })).sameTopic).toBe(true);
73
+ expect(autoClassifyMidTurnInbound(base({ incomingThreadId: 1, activeTurnThreadId: 0 })).sameTopic).toBe(false);
74
+ });
75
+
76
+ // ── DM: timing-only, off by default ──
77
+ it("DM with dmSteerWindowMs=0 (default) → queue even if recent (DM auto-steer off)", () => {
78
+ const r = autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 500, dmSteerWindowMs: 0 }));
79
+ expect(r.decision).toBe("queue");
80
+ expect(r.reason).toBe("dm_disabled");
81
+ });
82
+
83
+ it("DM with dmSteerWindowMs>0 + recent → steer; stale → queue", () => {
84
+ expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 5000, dmSteerWindowMs: 10000 })).decision).toBe("steer");
85
+ expect(autoClassifyMidTurnInbound(base({ isDm: true, incomingThreadId: null, activeTurnThreadId: null, msSinceLastAgentOutput: 15000, dmSteerWindowMs: 10000 })).decision).toBe("queue");
86
+ });
87
+ });
@@ -0,0 +1,123 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { driveEscalation } from "../gateway/escalation-drive.js";
3
+ import { ObligationLedger } from "../gateway/obligation-ledger.js";
4
+
5
+ // Drives the REAL escalation step (the code obligationSweep calls) with the REAL
6
+ // ObligationLedger and the REAL withDeadline — including a fake hanging send,
7
+ // the exact path the total proof flagged and that mtcute / a synchronous test
8
+ // cannot reach. This is the executable verification of the hang-wedge fix.
9
+
10
+ function openEscalatable(L: ObligationLedger, id: string) {
11
+ L.openIfAbsent({ originTurnId: id, chatId: "-100", threadId: 3, messageId: 1, text: "x", openedAt: 0 });
12
+ }
13
+
14
+ const MAX = 3;
15
+ const DEADLINE = 15; // ms — short so the hang case settles fast and deterministically
16
+
17
+ describe("driveEscalation — the obligation escalation step is bounded and always reaches a terminal", () => {
18
+ it("a successful send closes the obligation and clears the in-flight flag", async () => {
19
+ const L = new ObligationLedger(2);
20
+ openEscalatable(L, "c#1");
21
+ const inFlight = new Set<string>();
22
+ await driveEscalation({
23
+ escId: "c#1",
24
+ inFlight,
25
+ ledger: L,
26
+ send: () => Promise.resolve("sent"),
27
+ maxAttempts: MAX,
28
+ deadlineMs: DEADLINE,
29
+ log: () => {},
30
+ });
31
+ expect(L.isOpen("c#1")).toBe(false); // closed
32
+ expect(inFlight.has("c#1")).toBe(false); // flag cleared
33
+ });
34
+
35
+ it("a transient failure below the cap stays OPEN and clears the flag (retried next sweep)", async () => {
36
+ const L = new ObligationLedger(2);
37
+ openEscalatable(L, "c#1");
38
+ const inFlight = new Set<string>();
39
+ await driveEscalation({
40
+ escId: "c#1",
41
+ inFlight,
42
+ ledger: L,
43
+ send: () => Promise.reject(new Error("network blip")),
44
+ maxAttempts: MAX,
45
+ deadlineMs: DEADLINE,
46
+ log: () => {},
47
+ });
48
+ expect(L.isOpen("c#1")).toBe(true); // still open — will retry
49
+ expect(inFlight.has("c#1")).toBe(false); // flag cleared, so the next sweep can re-enter
50
+ });
51
+
52
+ it("THE FIX: a send that NEVER settles still clears the flag (bounded by the deadline)", async () => {
53
+ const L = new ObligationLedger(2);
54
+ openEscalatable(L, "c#1");
55
+ const inFlight = new Set<string>();
56
+ let sendInvoked = 0;
57
+ const start = Date.now();
58
+ // A promise that never resolves/rejects — the stalled send that, pre-fix,
59
+ // left the in-flight flag set forever and wedged the obligation OPEN.
60
+ await driveEscalation({
61
+ escId: "c#1",
62
+ inFlight,
63
+ ledger: L,
64
+ send: () => {
65
+ sendInvoked++;
66
+ return new Promise(() => {});
67
+ },
68
+ maxAttempts: MAX,
69
+ deadlineMs: DEADLINE,
70
+ log: () => {},
71
+ });
72
+ expect(sendInvoked).toBe(1);
73
+ expect(inFlight.has("c#1")).toBe(false); // cleared despite the hang — the wedge is gone
74
+ expect(Date.now() - start).toBeLessThan(DEADLINE + 500); // settled at the deadline, not "never"
75
+ });
76
+
77
+ it("repeated hung sends reach a bounded terminal (close best-effort), never an infinite loop", async () => {
78
+ const L = new ObligationLedger(2);
79
+ openEscalatable(L, "c#1");
80
+ const inFlight = new Set<string>();
81
+ let sends = 0;
82
+ let drives = 0;
83
+ // Simulate the 5s sweep firing repeatedly while every send hangs.
84
+ while (L.isOpen("c#1") && drives < 20) {
85
+ drives++;
86
+ const p = driveEscalation({
87
+ escId: "c#1",
88
+ inFlight,
89
+ ledger: L,
90
+ send: () => {
91
+ sends++;
92
+ return new Promise(() => {});
93
+ },
94
+ maxAttempts: MAX,
95
+ deadlineMs: DEADLINE,
96
+ log: () => {},
97
+ });
98
+ if (p) await p; // each attempt settles within the deadline
99
+ }
100
+ expect(L.isOpen("c#1")).toBe(false); // reached a terminal (closed best-effort)
101
+ expect(inFlight.has("c#1")).toBe(false);
102
+ expect(sends).toBe(MAX); // exactly maxAttempts sends, then close — bounded
103
+ expect(drives).toBeLessThanOrEqual(MAX + 1);
104
+ });
105
+
106
+ it("the in-flight guard prevents a concurrent second send for the same obligation", async () => {
107
+ const L = new ObligationLedger(2);
108
+ openEscalatable(L, "c#1");
109
+ const inFlight = new Set<string>();
110
+ let sends = 0;
111
+ const hang = () => {
112
+ sends++;
113
+ return new Promise<void>(() => {});
114
+ };
115
+ const p1 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
116
+ // Second call while the first is still awaiting → must be a no-op.
117
+ const p2 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
118
+ expect(p2).toBeUndefined(); // guarded
119
+ expect(sends).toBe(1); // only one send fired
120
+ expect(L.list()[0].escalateAttempts).toBe(1); // only one attempt recorded
121
+ await p1; // let the first settle so we don't leak a pending timer
122
+ });
123
+ });