switchroom 0.14.61 → 0.14.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +73 -62
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +2617 -2081
- package/telegram-plugin/gateway/auth-broker-client.ts +18 -8
- package/telegram-plugin/gateway/auto-classify-mid-turn.ts +119 -0
- package/telegram-plugin/gateway/escalation-drive.ts +79 -0
- package/telegram-plugin/gateway/gateway.ts +448 -43
- package/telegram-plugin/gateway/microsoft-connect-flow.ts +226 -0
- package/telegram-plugin/gateway/obligation-ledger.ts +45 -3
- package/telegram-plugin/gateway/with-deadline.ts +43 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +32 -12
- package/telegram-plugin/tests/auto-classify-mid-turn.test.ts +87 -0
- package/telegram-plugin/tests/escalation-drive.test.ts +123 -0
- package/telegram-plugin/tests/microsoft-connect-flow.test.ts +185 -0
- package/telegram-plugin/tests/obligation-determinism.test.ts +85 -25
- package/telegram-plugin/tests/obligation-ledger.test.ts +92 -0
- package/telegram-plugin/tests/with-deadline.test.ts +61 -0
|
@@ -116,6 +116,11 @@ import {
|
|
|
116
116
|
import type { AuthBrokerClient } from './auth-command.js'
|
|
117
117
|
import type { ListStateData } from './auth-line.js'
|
|
118
118
|
import { getAuthBrokerClient, addAccountViaBroker } from './auth-broker-client.js'
|
|
119
|
+
import {
|
|
120
|
+
pendingMicrosoftConnectFlows,
|
|
121
|
+
startMicrosoftConnect,
|
|
122
|
+
runMicrosoftConnectPoll,
|
|
123
|
+
} from './microsoft-connect-flow.js'
|
|
119
124
|
import { resolveAuthBrokerSocketPath } from '../../src/auth/broker/client.js'
|
|
120
125
|
import { createFleetFallbackGate } from '../fleet-fallback-gate.js'
|
|
121
126
|
import {
|
|
@@ -137,6 +142,7 @@ import {
|
|
|
137
142
|
resolveRetentionDays as resolveRegistryRetentionDays,
|
|
138
143
|
} from '../registry/reaper.js'
|
|
139
144
|
import { parseQueuePrefix, parseSteerPrefix, formatPriorAssistantPreview, formatReplyToText } from '../steering.js'
|
|
145
|
+
import { autoClassifyMidTurnInbound } from './auto-classify-mid-turn.js'
|
|
140
146
|
import {
|
|
141
147
|
renderOperatorEvent,
|
|
142
148
|
shouldEmitOperatorEvent,
|
|
@@ -284,6 +290,7 @@ import {
|
|
|
284
290
|
obligationEscalationText,
|
|
285
291
|
} from './obligation-ledger.js'
|
|
286
292
|
import { loadObligations, persistObligations } from './obligation-store.js'
|
|
293
|
+
import { driveEscalation } from './escalation-drive.js'
|
|
287
294
|
import { createInboundSpool } from './inbound-spool.js'
|
|
288
295
|
import { purgeStaleTurnsForChat } from './turn-state-purge.js'
|
|
289
296
|
import { decideInboundDelivery } from './inbound-delivery-gate.js'
|
|
@@ -1406,6 +1413,58 @@ const OBLIGATION_SWEEP_MS = 5_000
|
|
|
1406
1413
|
// attempts the gateway closes best-effort (loud log): the user is genuinely
|
|
1407
1414
|
// unreachable, so a bounded give-up beats an infinite/poison loop.
|
|
1408
1415
|
const OBLIGATION_ESCALATE_MAX = 3
|
|
1416
|
+
// Deadline for a single escalation send. grammy/fetch impose NO request timeout,
|
|
1417
|
+
// and the in-flight guard (obligationEscalateInFlight) is cleared only in the
|
|
1418
|
+
// send's `.finally` — which never runs if the send hangs. Without a bound, one
|
|
1419
|
+
// stalled send leaks the in-flight flag forever and the obligation is stuck OPEN
|
|
1420
|
+
// (never re-escalated, never closed) — the sole liveness hole a total proof
|
|
1421
|
+
// found. Racing the send against this deadline makes the wait bounded BY
|
|
1422
|
+
// CONSTRUCTION (see with-deadline.ts): the chain always settles, `.finally`
|
|
1423
|
+
// always clears the flag, and a hang becomes a bounded reject that feeds the
|
|
1424
|
+
// bounded escalate ladder to a terminal. 45s comfortably exceeds robustApiCall's
|
|
1425
|
+
// 3-attempt network backoff so a legitimate slow send isn't cut short.
|
|
1426
|
+
const OBLIGATION_ESCALATE_SEND_DEADLINE_MS = 45_000
|
|
1427
|
+
// Escalate-grace window. A slow / background-worker / multi-segment turn ends
|
|
1428
|
+
// (the in-flight gate clears) BEFORE its trailing answer's reply lands, and the
|
|
1429
|
+
// 5s sweep would re-present/escalate in that gap — a false "⚠️ I may have missed
|
|
1430
|
+
// this" on a message that's actively being answered (fuzz-confirmed on v0.14.62:
|
|
1431
|
+
// ~14% of marko's no-reply turn-ends had the answer in flight). An obligation
|
|
1432
|
+
// whose handling turn ended < this ago is skipped by decideAtIdle, giving the
|
|
1433
|
+
// trailing answer's close a beat to fire. Bounded: each re-present is itself a
|
|
1434
|
+
// turn that re-stamps once, representCount is capped → the ladder still
|
|
1435
|
+
// terminates. 45s > the observed "answer lands within ~60s, usually <40s" gap.
|
|
1436
|
+
// Kill switch: SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS=0 → pre-grace behaviour.
|
|
1437
|
+
const OBLIGATION_ESCALATE_GRACE_MS = (() => {
|
|
1438
|
+
const raw = process.env.SWITCHROOM_OBLIGATION_ESCALATE_GRACE_MS
|
|
1439
|
+
if (raw == null || raw === '') return 45_000
|
|
1440
|
+
const n = Number(raw)
|
|
1441
|
+
return Number.isFinite(n) && n >= 0 ? n : 45_000
|
|
1442
|
+
})()
|
|
1443
|
+
|
|
1444
|
+
// ─── Mid-turn auto-classify (steer-vs-queue), SHADOW mode ─────────────────────
|
|
1445
|
+
// Today a no-prefix mid-turn message always QUEUES. autoClassifyMidTurnInbound
|
|
1446
|
+
// (auto-classify-mid-turn.ts) is the basis for a smarter default using
|
|
1447
|
+
// topic-vs-active-turn + reply-recency. Phase 1 ships SHADOW-ONLY: when this
|
|
1448
|
+
// flag is on we COMPUTE + LOG what we'd decide (decision/reason/same_topic/
|
|
1449
|
+
// ms_since_out) but the behaviour is UNCHANGED (still queue) — to gather the
|
|
1450
|
+
// real-world distribution (how often mid-turn messages are same-topic
|
|
1451
|
+
// continuations vs cross-topic, and the recency spread) before any action flips
|
|
1452
|
+
// on. Default OFF → zero overhead. The action windows below stay 0 in shadow.
|
|
1453
|
+
const AUTOCLASSIFY_MIDTURN_SHADOW = process.env.SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW === '1'
|
|
1454
|
+
// Per-(chat,thread) wall-clock ms of the agent's LAST visible output — the
|
|
1455
|
+
// recency clock the classifier uses (NOT turn age: a long actively-narrating
|
|
1456
|
+
// worker turn must not read "stale"). Stamped beside signalTracker.noteOutbound.
|
|
1457
|
+
// LRU-bounded so a long-lived gateway with many topics can't grow unboundedly.
|
|
1458
|
+
const lastAgentOutputAt = new Map<string, number>()
|
|
1459
|
+
const LAST_OUTPUT_MAX_KEYS = 512
|
|
1460
|
+
function noteAgentOutputAt(key: string, ts: number): void {
|
|
1461
|
+
lastAgentOutputAt.delete(key) // re-insert → most-recently-used at the tail
|
|
1462
|
+
lastAgentOutputAt.set(key, ts)
|
|
1463
|
+
if (lastAgentOutputAt.size > LAST_OUTPUT_MAX_KEYS) {
|
|
1464
|
+
const oldest = lastAgentOutputAt.keys().next().value
|
|
1465
|
+
if (oldest !== undefined) lastAgentOutputAt.delete(oldest)
|
|
1466
|
+
}
|
|
1467
|
+
}
|
|
1409
1468
|
// Durable snapshot of the open obligation set on the persistent per-agent
|
|
1410
1469
|
// volume (STATE_DIR = /state/agent/telegram in prod). Closes the restart hole:
|
|
1411
1470
|
// the in-memory ledger alone empties on restart and the spool's boot-replay
|
|
@@ -1982,6 +2041,29 @@ let pendingDeferredInterrupt: PendingDeferredInterrupt | null = null
|
|
|
1982
2041
|
* Idempotent: nulls the slot and clears the timer before doing any work so a
|
|
1983
2042
|
* boundary event and the timeout can't double-fire.
|
|
1984
2043
|
*/
|
|
2044
|
+
/**
|
|
2045
|
+
* An `!` interrupt SIGINT-kills the in-flight turn. That turn was handling a
|
|
2046
|
+
* user message with an open obligation, and the killed turn does NOT reliably
|
|
2047
|
+
* emit turn_end (so endCurrentTurnAtomic never closes it) — so without this the
|
|
2048
|
+
* obligation survives and the idle sweep later re-presents/escalates "you have
|
|
2049
|
+
* an earlier message you never answered" for a question the user EXPLICITLY
|
|
2050
|
+
* cancelled. An interrupt is a deliberate redirect, so closing that obligation
|
|
2051
|
+
* is the correct terminal (the user chose to interrupt; they can re-ask). Only
|
|
2052
|
+
* the interrupted turn's OWN obligation is closed — queued siblings (other open
|
|
2053
|
+
* obligations) are untouched. No-op when the flag is off, no turn is in flight,
|
|
2054
|
+
* or the turn isn't a tracked obligation (synthetic / already closed).
|
|
2055
|
+
*/
|
|
2056
|
+
function cancelInterruptedObligation(): void {
|
|
2057
|
+
if (!OBLIGATION_LEDGER_ENABLED) return
|
|
2058
|
+
const turn = currentTurn
|
|
2059
|
+
if (turn == null) return
|
|
2060
|
+
if (obligationLedger.close(turn.turnId)) {
|
|
2061
|
+
process.stderr.write(
|
|
2062
|
+
`telegram gateway: obligation cancelled by interrupt origin=${turn.turnId}\n`,
|
|
2063
|
+
)
|
|
2064
|
+
}
|
|
2065
|
+
}
|
|
2066
|
+
|
|
1985
2067
|
async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<void> {
|
|
1986
2068
|
const pending = pendingDeferredInterrupt
|
|
1987
2069
|
if (pending == null) return
|
|
@@ -2010,6 +2092,10 @@ async function fireDeferredInterrupt(reason: 'boundary' | 'timeout'): Promise<vo
|
|
|
2010
2092
|
process.stderr.write(`telegram gateway: deferred-interrupt SIGINT failed: ${(err as Error).message}\n`)
|
|
2011
2093
|
}
|
|
2012
2094
|
|
|
2095
|
+
// The SIGINT just killed the in-flight turn — cancel its obligation so the
|
|
2096
|
+
// interrupted (user-redirected) question isn't re-presented/escalated later.
|
|
2097
|
+
cancelInterruptedObligation()
|
|
2098
|
+
|
|
2013
2099
|
// Deliver the replacement body as a fresh turn to the freshly-killed
|
|
2014
2100
|
// bridge — same sendToAgent + buffer-on-miss primitive the synchronous
|
|
2015
2101
|
// interrupt carve-out uses at the handleInbound delivery site.
|
|
@@ -2409,8 +2495,17 @@ function endCurrentTurnAtomic(turn: CurrentTurn): void {
|
|
|
2409
2495
|
// finalAnswerDelivered===false → stays open → re-presented (the intended
|
|
2410
2496
|
// catch). close() is a no-op for synthetic turns (turnId not in the ledger).
|
|
2411
2497
|
// No-op when the flag is off.
|
|
2412
|
-
if (OBLIGATION_LEDGER_ENABLED
|
|
2413
|
-
|
|
2498
|
+
if (OBLIGATION_LEDGER_ENABLED) {
|
|
2499
|
+
if (turn.finalAnswerDelivered) {
|
|
2500
|
+
obligationLedger.close(turn.turnId)
|
|
2501
|
+
} else {
|
|
2502
|
+
// Turn ended WITHOUT a final answer. If this turn was handling an open
|
|
2503
|
+
// obligation, stamp its grace clock so the idle sweep waits before
|
|
2504
|
+
// re-presenting/escalating — a slow/worker answer may still be in flight
|
|
2505
|
+
// (the over-escalation fix). No-op when turn.turnId isn't an open
|
|
2506
|
+
// obligation (synthetic / already-closed turn).
|
|
2507
|
+
obligationLedger.noteTurnEnded(turn.turnId, Date.now())
|
|
2508
|
+
}
|
|
2414
2509
|
}
|
|
2415
2510
|
// Component 2 — clear any prior no-reply drain timer for this turn; a
|
|
2416
2511
|
// fresh end re-evaluates below. (Idempotent — null when never armed.)
|
|
@@ -3745,6 +3840,17 @@ const pendingStateReaper = setInterval(() => {
|
|
|
3745
3840
|
for (const [k, v] of awaitingAuthCodeAt) {
|
|
3746
3841
|
if (now - v > AUTH_CODE_CONTEXT_TTL_MS) awaitingAuthCodeAt.delete(k)
|
|
3747
3842
|
}
|
|
3843
|
+
// Microsoft connect flows self-expire at the device code's own expiry
|
|
3844
|
+
// (~15 min) — sweep past that + grace so an abandoned card doesn't pin
|
|
3845
|
+
// its key. Setting cancelled makes any still-running poll bail. Placed
|
|
3846
|
+
// AFTER the OAuth-code cluster above, which secret-detect-oauth-code.
|
|
3847
|
+
// test.ts pins as contiguous within the first 800 chars of the reaper.
|
|
3848
|
+
for (const [k, v] of pendingMicrosoftConnectFlows) {
|
|
3849
|
+
if (now - v.startedAt > v.device.expires_in * 1000 + 30_000) {
|
|
3850
|
+
v.cancelled = true
|
|
3851
|
+
pendingMicrosoftConnectFlows.delete(k)
|
|
3852
|
+
}
|
|
3853
|
+
}
|
|
3748
3854
|
// Auth-refresh throttle entries decay quickly (5s window); sweep
|
|
3749
3855
|
// anything older than 60s so abandoned snapshot messages don't pin
|
|
3750
3856
|
// their key forever.
|
|
@@ -4874,16 +4980,45 @@ const pendingInboundBuffer = createPendingInboundBuffer({ spool: inboundSpool })
|
|
|
4874
4980
|
// OPEN via meta.source; reuses tested delivery). Bounded: after maxRepresents,
|
|
4875
4981
|
// escalate to ONE operator-visible "did I miss this?" and close — no loop.
|
|
4876
4982
|
// No-op unless the flag is on; gated on the same idle predicate as the drains.
|
|
4983
|
+
// DETERMINISM (closed-form, not sampled). The obligation ledger itself
|
|
4984
|
+
// (obligation-ledger.ts) is a finite FSM with a total transition function and a
|
|
4985
|
+
// strictly-decreasing measure μ = (REPRESENT_MAX - representCount) +
|
|
4986
|
+
// (ESCALATE_MAX - escalateAttempts): every markRepresented/markEscalateAttempt
|
|
4987
|
+
// decreases μ, both terms are bounded below, and at the floor the only move is
|
|
4988
|
+
// close → terminal. So on the FSM, every OPEN reaches answered | escalation-
|
|
4989
|
+
// delivered | bounded give-up (no cycle re-increments a counter). This sweep is
|
|
4990
|
+
// the FSM's driver; its termination rests on three liveness facts, all bounded:
|
|
4991
|
+
// (1) the 5s setInterval keeps firing;
|
|
4992
|
+
// (2) the gate (turnInFlightForGate) opens — claudeBusyKeys is cleared at
|
|
4993
|
+
// turn-end (purgeReactionTracking / releaseTurnBufferGate), on bridge
|
|
4994
|
+
// disconnect (disconnect-flush.ts), and by the 300s silence-poke watchdog;
|
|
4995
|
+
// (3) the escalation send settles — bounded BY CONSTRUCTION via withDeadline
|
|
4996
|
+
// below (grammy has no request timeout, so an unbounded send was the one
|
|
4997
|
+
// way an obligation could get stuck OPEN forever — now closed).
|
|
4998
|
+
// The only residual liveness assumption is the bridge eventually reconnecting /
|
|
4999
|
+
// the process restarting, which the entire gateway's inbound delivery already
|
|
5000
|
+
// depends on and which durable spool + boot-replay make self-healing.
|
|
4877
5001
|
function obligationSweep(): void {
|
|
4878
5002
|
if (!OBLIGATION_LEDGER_ENABLED) return
|
|
4879
5003
|
if (!obligationLedger.hasOpen()) return
|
|
4880
5004
|
if (turnInFlightForGate()) return // a turn is running — let it finish/answer
|
|
4881
5005
|
const agent = process.env.SWITCHROOM_AGENT_NAME ?? ''
|
|
4882
|
-
|
|
4883
|
-
|
|
5006
|
+
// Grace window: skip an obligation whose handling turn ended < grace ago — its
|
|
5007
|
+
// trailing slow/worker answer may still be landing (over-escalation fix).
|
|
5008
|
+
const decision = obligationLedger.decideAtIdle(
|
|
5009
|
+
OBLIGATION_ESCALATE_GRACE_MS > 0
|
|
5010
|
+
? { now: Date.now(), graceMs: OBLIGATION_ESCALATE_GRACE_MS }
|
|
5011
|
+
: undefined,
|
|
5012
|
+
)
|
|
4884
5013
|
const o = decision.obligation
|
|
4885
5014
|
if (decision.action === 'none' || o == null) return
|
|
4886
5015
|
if (decision.action === 'represent') {
|
|
5016
|
+
// Re-present goes through the bridge → buffer. Only the represent path is
|
|
5017
|
+
// gated on an empty buffer (let the existing drain run first, avoid
|
|
5018
|
+
// double-presenting). Escalation below is NOT gated on the buffer — it is a
|
|
5019
|
+
// direct Telegram send, independent of the bridge, so a represent stranded
|
|
5020
|
+
// behind a dead bridge can never block the operator nudge.
|
|
5021
|
+
if (pendingInboundBuffer.depth(agent) > 0) return
|
|
4887
5022
|
pendingInboundBuffer.push(agent, buildObligationRepresentInbound(o, Date.now()))
|
|
4888
5023
|
const attempt = obligationLedger.markRepresented(o.originTurnId)
|
|
4889
5024
|
process.stderr.write(
|
|
@@ -4899,45 +5034,30 @@ function obligationSweep(): void {
|
|
|
4899
5034
|
// (dead topic even after thread-fallback, blocked bot) is bounded by
|
|
4900
5035
|
// OBLIGATION_ESCALATE_MAX → close best-effort (the user is unreachable, so a
|
|
4901
5036
|
// bounded give-up beats an infinite loop / a boot-surviving poison record).
|
|
4902
|
-
|
|
4903
|
-
|
|
4904
|
-
|
|
4905
|
-
|
|
4906
|
-
|
|
4907
|
-
|
|
4908
|
-
|
|
4909
|
-
//
|
|
4910
|
-
|
|
4911
|
-
|
|
4912
|
-
|
|
4913
|
-
|
|
4914
|
-
(
|
|
4915
|
-
|
|
4916
|
-
|
|
4917
|
-
|
|
4918
|
-
|
|
4919
|
-
|
|
4920
|
-
|
|
4921
|
-
|
|
4922
|
-
|
|
4923
|
-
|
|
4924
|
-
|
|
4925
|
-
|
|
4926
|
-
.catch((err) => {
|
|
4927
|
-
if (attempt >= OBLIGATION_ESCALATE_MAX) {
|
|
4928
|
-
obligationLedger.close(escId)
|
|
4929
|
-
process.stderr.write(
|
|
4930
|
-
`telegram gateway: obligation escalation PERMANENTLY undeliverable after ${attempt} attempts — closing best-effort origin=${escId}: ${err}\n`,
|
|
4931
|
-
)
|
|
4932
|
-
} else {
|
|
4933
|
-
process.stderr.write(
|
|
4934
|
-
`telegram gateway: obligation escalation send failed (attempt ${attempt}/${OBLIGATION_ESCALATE_MAX}), retrying next sweep origin=${escId}: ${err}\n`,
|
|
4935
|
-
)
|
|
4936
|
-
}
|
|
4937
|
-
})
|
|
4938
|
-
.finally(() => {
|
|
4939
|
-
obligationEscalateInFlight.delete(escId)
|
|
4940
|
-
})
|
|
5037
|
+
// Drive one escalation attempt. The send is a direct Telegram nudge
|
|
5038
|
+
// (retryWithThreadFallback: a stale/renumbered topic → THREAD_NOT_FOUND retries
|
|
5039
|
+
// thread-less, the #2096 pattern). driveEscalation guards against concurrent
|
|
5040
|
+
// sends, bounds the send with withDeadline (so a hung send can't leak the
|
|
5041
|
+
// in-flight flag and wedge the obligation OPEN), closes only after a successful
|
|
5042
|
+
// send, and bounds permanent failures to a best-effort close. Extracted so the
|
|
5043
|
+
// hang → bounded → terminal path is executable in escalation-drive.test.ts —
|
|
5044
|
+
// the path neither mtcute (can't hang Telegram) nor a synchronous test reaches.
|
|
5045
|
+
void driveEscalation({
|
|
5046
|
+
escId: o.originTurnId,
|
|
5047
|
+
inFlight: obligationEscalateInFlight,
|
|
5048
|
+
ledger: obligationLedger,
|
|
5049
|
+
send: () =>
|
|
5050
|
+
retryWithThreadFallback(
|
|
5051
|
+
robustApiCall,
|
|
5052
|
+
(tid) =>
|
|
5053
|
+
bot.api.sendMessage(o.chatId, obligationEscalationText(o), {
|
|
5054
|
+
...(tid != null ? { message_thread_id: tid } : {}),
|
|
5055
|
+
}),
|
|
5056
|
+
{ threadId: o.threadId, chat_id: o.chatId, verb: 'obligation.escalate' },
|
|
5057
|
+
),
|
|
5058
|
+
maxAttempts: OBLIGATION_ESCALATE_MAX,
|
|
5059
|
+
deadlineMs: OBLIGATION_ESCALATE_SEND_DEADLINE_MS,
|
|
5060
|
+
})
|
|
4941
5061
|
}
|
|
4942
5062
|
if (!STATIC && OBLIGATION_LEDGER_ENABLED) {
|
|
4943
5063
|
setInterval(obligationSweep, OBLIGATION_SWEEP_MS).unref()
|
|
@@ -6443,6 +6563,10 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
6443
6563
|
// silence-poke clock so the next poke is measured from this send.
|
|
6444
6564
|
signalTracker.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
6445
6565
|
silencePoke.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
6566
|
+
// Mid-turn auto-classify recency clock: the agent just produced visible output
|
|
6567
|
+
// in this chat/thread (cross-turn, unlike silencePoke's per-turn lastOutboundAt).
|
|
6568
|
+
// Only maintained when the shadow flag is on → truly zero overhead by default.
|
|
6569
|
+
if (AUTOCLASSIFY_MIDTURN_SHADOW) noteAgentOutputAt(statusKey(chat_id, threadId), Date.now())
|
|
6446
6570
|
// PR3b-cutover: feed lastOutboundAt to the delivery machine so its
|
|
6447
6571
|
// TTL `tick` suppresses the fallback for a long-but-active turn
|
|
6448
6572
|
// (model streaming past 5 min) — parity with silencePoke's own
|
|
@@ -10748,6 +10872,9 @@ async function handleInbound(
|
|
|
10748
10872
|
} catch (err) {
|
|
10749
10873
|
process.stderr.write(`telegram gateway: interrupt-marker SIGINT failed: ${(err as Error).message}\n`)
|
|
10750
10874
|
}
|
|
10875
|
+
// The SIGINT just killed the in-flight turn — cancel its obligation so the
|
|
10876
|
+
// interrupted (user-redirected) question isn't re-presented/escalated later.
|
|
10877
|
+
cancelInterruptedObligation()
|
|
10751
10878
|
}
|
|
10752
10879
|
if (interrupt.emptyBody) {
|
|
10753
10880
|
// #1075: thread-id-bearing — route through swallowingApiCall so
|
|
@@ -11354,6 +11481,33 @@ async function handleInbound(
|
|
|
11354
11481
|
isSteering = priorTurnInFlight && isSteerPrefix
|
|
11355
11482
|
if (priorTurnInFlight) priorTurnStartedAt = activeTurnStartedAt.get(key)
|
|
11356
11483
|
|
|
11484
|
+
// Mid-turn auto-classify SHADOW: compute what a topic+recency classifier
|
|
11485
|
+
// WOULD decide and log it — behaviour is UNCHANGED (isSteering above is
|
|
11486
|
+
// untouched). Gathers the real-world distribution (same-topic continuation
|
|
11487
|
+
// vs cross-topic, recency spread) to tune auto-steer before it ever acts.
|
|
11488
|
+
// No-op unless the shadow flag is on AND a turn is in flight (the only case
|
|
11489
|
+
// a steer-vs-queue decision is meaningful).
|
|
11490
|
+
if (AUTOCLASSIFY_MIDTURN_SHADOW && priorTurnInFlight) {
|
|
11491
|
+
const lastOut = lastAgentOutputAt.get(key)
|
|
11492
|
+
const msSinceOut = lastOut != null ? Date.now() - lastOut : null
|
|
11493
|
+
const shadow = autoClassifyMidTurnInbound({
|
|
11494
|
+
isSteerPrefix,
|
|
11495
|
+
isQueuePrefix: isQueuedPrefix,
|
|
11496
|
+
priorTurnInFlight,
|
|
11497
|
+
isDm: isDmChatId(chat_id),
|
|
11498
|
+
incomingThreadId: messageThreadId ?? null,
|
|
11499
|
+
activeTurnThreadId: currentTurn?.sessionThreadId ?? null,
|
|
11500
|
+
msSinceLastAgentOutput: msSinceOut,
|
|
11501
|
+
dmSteerWindowMs: 0, // DM auto-steer stays off (the April regime)
|
|
11502
|
+
topicSteerWindowMs: 8_000, // candidate window — what we're tuning
|
|
11503
|
+
})
|
|
11504
|
+
process.stderr.write(
|
|
11505
|
+
`telegram gateway: autoclassify-shadow chat_id=${chat_id} ` +
|
|
11506
|
+
`would=${shadow.decision} reason=${shadow.reason} same_topic=${shadow.sameTopic ?? '-'} ` +
|
|
11507
|
+
`ms_since_out=${msSinceOut ?? '-'} actual=${isSteering ? 'steer' : 'queue'}\n`,
|
|
11508
|
+
)
|
|
11509
|
+
}
|
|
11510
|
+
|
|
11357
11511
|
if (access.statusReactions !== false) {
|
|
11358
11512
|
if (isSteering) {
|
|
11359
11513
|
// Explicit steer: mark with 🤝 on the inbound message; leave the
|
|
@@ -14385,6 +14539,226 @@ async function runQuotaWatch(): Promise<void> {
|
|
|
14385
14539
|
}
|
|
14386
14540
|
}
|
|
14387
14541
|
|
|
14542
|
+
/**
|
|
14543
|
+
* Edit a Microsoft connect card from the BACKGROUND device-code poll
|
|
14544
|
+
* (no `ctx` — we hold the chat+message id). Wrapped in robustApiCall;
|
|
14545
|
+
* swallows failures (a deleted card / closed topic must not crash the
|
|
14546
|
+
* poll). RFC #1873 Phase 2.
|
|
14547
|
+
*/
|
|
14548
|
+
async function editMicrosoftConnectCard(
|
|
14549
|
+
chatId: number | string,
|
|
14550
|
+
messageId: number,
|
|
14551
|
+
html: string,
|
|
14552
|
+
): Promise<void> {
|
|
14553
|
+
await robustApiCall(
|
|
14554
|
+
// allow-raw-bot-api: background connect-card edit by message id (no thread_id; not the reply chunk loop)
|
|
14555
|
+
() => bot.api.editMessageText(chatId, messageId, html, {
|
|
14556
|
+
parse_mode: 'HTML',
|
|
14557
|
+
link_preview_options: { is_disabled: true },
|
|
14558
|
+
}),
|
|
14559
|
+
{ chat_id: String(chatId), verb: 'microsoftConnectCard' },
|
|
14560
|
+
).catch(() => {})
|
|
14561
|
+
}
|
|
14562
|
+
|
|
14563
|
+
/**
|
|
14564
|
+
* Background half of `/connect microsoft`: poll Microsoft for consent,
|
|
14565
|
+
* register the account with the broker, then edit the card to the
|
|
14566
|
+
* outcome. Fire-and-forget from the command handler.
|
|
14567
|
+
*/
|
|
14568
|
+
async function finalizeMicrosoftConnect(key: string): Promise<void> {
|
|
14569
|
+
const flow = pendingMicrosoftConnectFlows.get(key)
|
|
14570
|
+
if (!flow) return
|
|
14571
|
+
const agentName = getMyAgentName()
|
|
14572
|
+
const result = await runMicrosoftConnectPoll(flow)
|
|
14573
|
+
// A `/connect cancel` (command or button) between consent and write
|
|
14574
|
+
// already edited the card and dropped the entry — don't clobber it.
|
|
14575
|
+
if (result.kind === 'cancelled' || !pendingMicrosoftConnectFlows.has(key)) {
|
|
14576
|
+
pendingMicrosoftConnectFlows.delete(key)
|
|
14577
|
+
return
|
|
14578
|
+
}
|
|
14579
|
+
pendingMicrosoftConnectFlows.delete(key)
|
|
14580
|
+
|
|
14581
|
+
let html: string
|
|
14582
|
+
if (result.kind === 'connected') {
|
|
14583
|
+
html =
|
|
14584
|
+
`✅ <b>Connected</b> <code>${escapeHtmlForTg(result.account)}</code> ` +
|
|
14585
|
+
`(${result.accountType}).\n\n` +
|
|
14586
|
+
`To let <b>${escapeHtmlForTg(agentName)}</b> use it, run on the host:\n` +
|
|
14587
|
+
`<code>switchroom auth microsoft enable ${escapeHtmlForTg(result.account)} ${escapeHtmlForTg(agentName)}</code>\n` +
|
|
14588
|
+
`then restart ${escapeHtmlForTg(agentName)}.`
|
|
14589
|
+
} else if (result.kind === 'no-refresh-token') {
|
|
14590
|
+
html =
|
|
14591
|
+
`⚠ Microsoft returned no refresh token (the account would expire in ~1h), ` +
|
|
14592
|
+
`so it was not registered. Try <code>/connect microsoft</code> again, or ` +
|
|
14593
|
+
`connect from the host CLI.`
|
|
14594
|
+
} else {
|
|
14595
|
+
html =
|
|
14596
|
+
`❌ <b>Connect failed:</b> ${escapeHtmlForTg(result.message)}\n\n` +
|
|
14597
|
+
`<i>Work/school accounts can't use the phone flow at /common — connect those ` +
|
|
14598
|
+
`from the host: <code>switchroom auth microsoft account add <email></code>.</i>`
|
|
14599
|
+
}
|
|
14600
|
+
await editMicrosoftConnectCard(flow.cardChatId, flow.cardMessageId, html)
|
|
14601
|
+
}
|
|
14602
|
+
|
|
14603
|
+
/**
|
|
14604
|
+
* `/connect microsoft` — Telegram-native device-code connect for a
|
|
14605
|
+
* Microsoft account (RFC #1873 Phase 2). The user signs in on their
|
|
14606
|
+
* phone; we register the account with the auth-broker (shipped default
|
|
14607
|
+
* app unless the operator BYO'd one). Admin-gated like `/auth add`.
|
|
14608
|
+
* `/connect cancel` aborts a pending flow. Google stays host-CLI.
|
|
14609
|
+
*/
|
|
14610
|
+
bot.command('connect', async ctx => {
|
|
14611
|
+
// Credential-plane admin is OPERATOR-PRIVATE (WS7-F2 / #1408), exactly
|
|
14612
|
+
// like `/auth`: honor `/connect` ONLY in a private chat from a strict
|
|
14613
|
+
// `access.allowFrom` sender — never the group-permissive
|
|
14614
|
+
// `isAuthorizedSender` (an empty group `allowFrom` = allow-all, so a
|
|
14615
|
+
// forum member could otherwise bind THEIR OWN Microsoft account as the
|
|
14616
|
+
// agent's credential). The agent-`admin:true` flag check below is
|
|
14617
|
+
// orthogonal and the broker enforces it server-side on add-account.
|
|
14618
|
+
const senderId = String(ctx.from?.id ?? '')
|
|
14619
|
+
const operatorPrivate =
|
|
14620
|
+
ctx.chat?.type === 'private' && loadAccess().allowFrom.includes(senderId)
|
|
14621
|
+
if (!operatorPrivate) {
|
|
14622
|
+
if (ctx.chat?.type !== 'private') {
|
|
14623
|
+
process.stderr.write(
|
|
14624
|
+
`telegram gateway: /connect refused (not operator-private) agent=${process.env.SWITCHROOM_AGENT_NAME ?? '-'} chat=${ctx.chat?.type ?? '?'} sender=${senderId}\n`,
|
|
14625
|
+
)
|
|
14626
|
+
await switchroomReply(
|
|
14627
|
+
ctx,
|
|
14628
|
+
`⚠️ <code>/connect</code> links account credentials — it is <b>operator-private</b>. ` +
|
|
14629
|
+
`Send it as a direct message to me from your operator account (a private chat where ` +
|
|
14630
|
+
`your Telegram ID is on the access allowlist), not in a group or forum.`,
|
|
14631
|
+
{ html: true },
|
|
14632
|
+
).catch(() => {})
|
|
14633
|
+
}
|
|
14634
|
+
return
|
|
14635
|
+
}
|
|
14636
|
+
|
|
14637
|
+
const arg = String(ctx.match ?? '').trim().toLowerCase()
|
|
14638
|
+
const chatId = String(ctx.chat?.id ?? '')
|
|
14639
|
+
const key = chatKey(chatId, ctx.message?.message_thread_id ?? null) as string
|
|
14640
|
+
|
|
14641
|
+
// Agent-`admin:true` gate (orthogonal to operator-private above; same
|
|
14642
|
+
// source as /auth + the broker's server-side add-account enforcement).
|
|
14643
|
+
let isAdmin = false
|
|
14644
|
+
try {
|
|
14645
|
+
const cfg = loadSwitchroomConfig()
|
|
14646
|
+
const me = (cfg as unknown as { agents?: Record<string, { admin?: boolean }> })
|
|
14647
|
+
?.agents?.[getMyAgentName()]
|
|
14648
|
+
isAdmin = me?.admin === true
|
|
14649
|
+
} catch { /* non-admin is the safe default */ }
|
|
14650
|
+
if (!isAuthAdmin({ isAdmin })) {
|
|
14651
|
+
await switchroomReply(
|
|
14652
|
+
ctx,
|
|
14653
|
+
`<b>Not authorized.</b> <code>/connect</code> requires this agent to have ` +
|
|
14654
|
+
`<code>admin: true</code> in switchroom.yaml.`,
|
|
14655
|
+
{ html: true },
|
|
14656
|
+
)
|
|
14657
|
+
return
|
|
14658
|
+
}
|
|
14659
|
+
|
|
14660
|
+
if (arg === 'cancel') {
|
|
14661
|
+
const existing = pendingMicrosoftConnectFlows.get(key)
|
|
14662
|
+
if (!existing) {
|
|
14663
|
+
await switchroomReply(ctx, '<i>No pending connect flow in this chat.</i>', { html: true })
|
|
14664
|
+
return
|
|
14665
|
+
}
|
|
14666
|
+
existing.cancelled = true
|
|
14667
|
+
pendingMicrosoftConnectFlows.delete(key)
|
|
14668
|
+
await switchroomReply(ctx, 'Cancelled.', { html: true })
|
|
14669
|
+
return
|
|
14670
|
+
}
|
|
14671
|
+
|
|
14672
|
+
if (arg !== '' && arg !== 'microsoft' && arg !== 'ms') {
|
|
14673
|
+
await switchroomReply(
|
|
14674
|
+
ctx,
|
|
14675
|
+
`<b>Usage:</b> <code>/connect microsoft</code> to link a Microsoft account ` +
|
|
14676
|
+
`(Outlook / Office 365), or <code>/connect cancel</code>.\n` +
|
|
14677
|
+
`<i>Google accounts are connected from the host CLI.</i>`,
|
|
14678
|
+
{ html: true },
|
|
14679
|
+
)
|
|
14680
|
+
return
|
|
14681
|
+
}
|
|
14682
|
+
|
|
14683
|
+
if (pendingMicrosoftConnectFlows.has(key)) {
|
|
14684
|
+
await switchroomReply(
|
|
14685
|
+
ctx,
|
|
14686
|
+
'<i>A Microsoft connect flow is already in progress here. Finish it on your phone, ' +
|
|
14687
|
+
'or send <code>/connect cancel</code>.</i>',
|
|
14688
|
+
{ html: true },
|
|
14689
|
+
)
|
|
14690
|
+
return
|
|
14691
|
+
}
|
|
14692
|
+
|
|
14693
|
+
let configClientId: string | undefined
|
|
14694
|
+
let orgMode = false
|
|
14695
|
+
try {
|
|
14696
|
+
const cfg = loadSwitchroomConfig() as unknown as {
|
|
14697
|
+
microsoft_workspace?: { microsoft_client_id?: string; org_mode?: boolean }
|
|
14698
|
+
}
|
|
14699
|
+
configClientId = cfg?.microsoft_workspace?.microsoft_client_id
|
|
14700
|
+
orgMode = cfg?.microsoft_workspace?.org_mode === true
|
|
14701
|
+
} catch { /* fall back to the shipped default */ }
|
|
14702
|
+
|
|
14703
|
+
const started = await startMicrosoftConnect({ configClientId, orgMode })
|
|
14704
|
+
if (started.kind === 'byo-vault') {
|
|
14705
|
+
await switchroomReply(
|
|
14706
|
+
ctx,
|
|
14707
|
+
`<b>Can't connect from chat:</b> this install uses a vaulted custom Microsoft ` +
|
|
14708
|
+
`app (<code>${escapeHtmlForTg(started.ref)}</code>) only the host CLI can read. ` +
|
|
14709
|
+
`Run <code>switchroom auth microsoft account add <email></code> on the host.`,
|
|
14710
|
+
{ html: true },
|
|
14711
|
+
)
|
|
14712
|
+
return
|
|
14713
|
+
}
|
|
14714
|
+
if (started.kind === 'error') {
|
|
14715
|
+
await switchroomReply(
|
|
14716
|
+
ctx,
|
|
14717
|
+
`<b>/connect failed:</b> ${escapeHtmlForTg(started.message)}`,
|
|
14718
|
+
{ html: true },
|
|
14719
|
+
)
|
|
14720
|
+
return
|
|
14721
|
+
}
|
|
14722
|
+
|
|
14723
|
+
const appNote =
|
|
14724
|
+
started.source === 'default'
|
|
14725
|
+
? "<i>Using switchroom's shipped Microsoft app.</i>"
|
|
14726
|
+
: '<i>Using your configured Microsoft app.</i>'
|
|
14727
|
+
const keyboard = new InlineKeyboard()
|
|
14728
|
+
.url('🔐 Sign in to Microsoft', started.device.verification_uri)
|
|
14729
|
+
.row()
|
|
14730
|
+
.text('✖ Cancel', `cn:cancel:${key}`)
|
|
14731
|
+
const sent = await ctx.reply(
|
|
14732
|
+
`🔗 <b>Connect a Microsoft account</b>\n\n` +
|
|
14733
|
+
`1. Tap <b>Sign in to Microsoft</b> below.\n` +
|
|
14734
|
+
`2. Enter this code: <code>${escapeHtmlForTg(started.device.user_code)}</code>\n` +
|
|
14735
|
+
`3. Approve the requested permissions (Mail, Calendar, Files).\n\n` +
|
|
14736
|
+
`I'll confirm here once it's connected (within ~15 min).\n${appNote}`,
|
|
14737
|
+
{
|
|
14738
|
+
parse_mode: 'HTML',
|
|
14739
|
+
link_preview_options: { is_disabled: true },
|
|
14740
|
+
reply_markup: keyboard,
|
|
14741
|
+
...(ctx.message?.message_thread_id != null
|
|
14742
|
+
? { message_thread_id: ctx.message.message_thread_id }
|
|
14743
|
+
: {}),
|
|
14744
|
+
},
|
|
14745
|
+
)
|
|
14746
|
+
|
|
14747
|
+
pendingMicrosoftConnectFlows.set(key, {
|
|
14748
|
+
initiatedBy: String(ctx.from?.id ?? ''),
|
|
14749
|
+
cardChatId: ctx.chat!.id,
|
|
14750
|
+
cardMessageId: sent.message_id,
|
|
14751
|
+
device: started.device,
|
|
14752
|
+
clientId: started.clientId,
|
|
14753
|
+
scopes: started.scopes,
|
|
14754
|
+
startedAt: Date.now(),
|
|
14755
|
+
cancelled: false,
|
|
14756
|
+
})
|
|
14757
|
+
|
|
14758
|
+
// Background: poll Microsoft, register the account, edit the card.
|
|
14759
|
+
void finalizeMicrosoftConnect(key)
|
|
14760
|
+
})
|
|
14761
|
+
|
|
14388
14762
|
bot.command("auth", async ctx => {
|
|
14389
14763
|
// sec WS7-F2b (#1394): `/auth` drives the auth-broker credential
|
|
14390
14764
|
// lifecycle (`/auth add` mints/attaches an Anthropic account token,
|
|
@@ -17130,6 +17504,37 @@ bot.on('callback_query:data', async ctx => {
|
|
|
17130
17504
|
return
|
|
17131
17505
|
}
|
|
17132
17506
|
|
|
17507
|
+
// `cn:cancel:<key>` — cancel a pending Microsoft connect flow (the
|
|
17508
|
+
// Cancel button on the /connect card). RFC #1873 Phase 2.
|
|
17509
|
+
if (data.startsWith('cn:')) {
|
|
17510
|
+
const access = loadAccess()
|
|
17511
|
+
const senderId = String(ctx.from?.id ?? '')
|
|
17512
|
+
if (!access.allowFrom.includes(senderId)) {
|
|
17513
|
+
await ctx.answerCallbackQuery({ text: 'Not authorized.' })
|
|
17514
|
+
return
|
|
17515
|
+
}
|
|
17516
|
+
const rest = data.slice('cn:'.length)
|
|
17517
|
+
const sep = rest.indexOf(':')
|
|
17518
|
+
const action = sep >= 0 ? rest.slice(0, sep) : rest
|
|
17519
|
+
const flowKey = sep >= 0 ? rest.slice(sep + 1) : ''
|
|
17520
|
+
if (action === 'cancel') {
|
|
17521
|
+
const pending = pendingMicrosoftConnectFlows.get(flowKey)
|
|
17522
|
+
if (pending) {
|
|
17523
|
+
pending.cancelled = true
|
|
17524
|
+
pendingMicrosoftConnectFlows.delete(flowKey)
|
|
17525
|
+
}
|
|
17526
|
+
await ctx.answerCallbackQuery({ text: 'Connect cancelled.' })
|
|
17527
|
+
await ctx
|
|
17528
|
+
.editMessageText('Microsoft connect cancelled.', {
|
|
17529
|
+
reply_markup: { inline_keyboard: [] },
|
|
17530
|
+
})
|
|
17531
|
+
.catch(() => {})
|
|
17532
|
+
} else {
|
|
17533
|
+
await ctx.answerCallbackQuery()
|
|
17534
|
+
}
|
|
17535
|
+
return
|
|
17536
|
+
}
|
|
17537
|
+
|
|
17133
17538
|
// RFC B §6.1: apv:<request_id>:<choice>[:<param>] — approval kernel taps.
|
|
17134
17539
|
// Routed through the generic kernel handler so any surface that uses
|
|
17135
17540
|
// buildApprovalCard inherits consume → record → confirmation UX without
|