npm - switchroom - Versions diffs - 0.15.37 → 0.15.38 - Mend

switchroom 0.15.37 → 0.15.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

package/telegram-plugin/gateway/auto-classify-mid-turn.ts CHANGED Viewed

@@ -7,7 +7,7 @@
  *
  * Today a no-prefix mid-turn message always QUEUES (the default flipped
  * 2026-04-17 away from the blunt "everything steers" — see
- * reference/steer-or-queue-mid-flight.md). This module is the basis for a
+ * reference/jobs/steer-or-queue-mid-flight.md). This module is the basis for a
  * smarter default. It ships first in SHADOW mode (the gateway logs what it WOULD
  * decide but still queues), to gather real-world data — how often mid-turn
  * messages are same-topic continuations vs cross-topic new tasks, and the

package/telegram-plugin/gateway/boot-card.ts CHANGED Viewed

@@ -46,6 +46,7 @@ import {
   probeBroker,
   probeKernel,
   probeSkills,
+  probeConnections,
   watchAgentProcess,
   AGENT_LIVE_WINDOW_MS,
   AGENT_LIVE_POLL_INTERVAL_MS,
@@ -120,6 +121,7 @@ export type ProbeKey =
   | 'broker'
   | 'kernel'
   | 'skills'
+  | 'connections'
 export type ProbeMap = Partial<Record<ProbeKey, ProbeResult | null>>
@@ -253,11 +255,12 @@ const PROBE_LABELS: Record<ProbeKey, string> = {
   broker:    'Broker',
   kernel:    'Kernel',
   skills:    'Skills',
+  connections: 'Connections',
 }
 const PROBE_KEYS: ReadonlyArray<ProbeKey> = [
   'account', 'agent', 'gateway', 'quota', 'hindsight',
-  'scheduler', 'broker', 'kernel', 'skills',
+  'scheduler', 'broker', 'kernel', 'skills', 'connections',
 ]
 const REASON_EMOJI: Record<RestartReason, string> = {
@@ -617,6 +620,7 @@ export async function runAllProbes(opts: RunProbesOpts): Promise<ProbeMap> {
     probeBroker(undefined, { dockerMode: opts.dockerMode }).then(r => { probes.broker = r }),
     probeKernel(undefined, { dockerMode: opts.dockerMode }).then(r => { probes.kernel = r }),
     probeSkills(opts.agentDir, { agentName: opts.agentSlug ?? opts.agentName }).then(r => { probes.skills = r }),
+    probeConnections(opts.agentDir).then(r => { probes.connections = r }),
   ])
   return probes

package/telegram-plugin/gateway/boot-probes.ts CHANGED Viewed

@@ -1421,6 +1421,68 @@ function renderBucketedSkills(switchroom: string[], agent: string[]): string {
   return parts.length === 0 ? 'none resolved' : parts.join(' · ')
 }
+// ─── Probe: Connections (configured-but-unauthed MCP integrations) ───────────
+/**
+ * Surface configured-but-unauthed MCP connections at agent start. The auth
+ * verdict can't be computed in-container (this boot probe must not do
+ * vault/grant work — see the module header), so `switchroom apply` computes
+ * it host-side and drops a snapshot at
+ * `<agentDir>/.claude/connection-health.json` (src/agents/connection-health.ts).
+ * This probe just reads it.
+ *
+ *   ok       — snapshot missing/unparseable (not yet computed → assume
+ *              healthy, don't nag) OR zero issues
+ *   degraded — ≥1 connection configured but not authed; detail names the
+ *              servers, nextStep carries the first fix
+ *
+ * Never `fail`: an unauthed integration degrades that one capability, it
+ * doesn't take the agent down, and the silent-when-healthy boot card
+ * should not red an agent over a missing third-party token.
+ */
+export interface ConnectionIssueShape {
+  server: string
+  key: string
+  kind: string
+  detail: string
+  fix: string
+}
+export async function probeConnections(
+  agentDir: string,
+  opts: { readFileImpl?: (path: string) => string } = {},
+): Promise<ProbeResult> {
+  return withTimeout('Connections', (async (): Promise<ProbeResult> => {
+    const path = join(agentDir, '.claude', 'connection-health.json')
+    const read = opts.readFileImpl ?? ((p: string) => readFileSync(p, 'utf8'))
+    let issues: ConnectionIssueShape[] = []
+    try {
+      const parsed = JSON.parse(read(path)) as { issues?: ConnectionIssueShape[] }
+      issues = Array.isArray(parsed.issues) ? parsed.issues : []
+    } catch {
+      // ENOENT (never applied with this build) or malformed — assume healthy.
+      return { status: 'ok', label: 'Connections', detail: 'no issues' }
+    }
+    if (issues.length === 0) {
+      return { status: 'ok', label: 'Connections', detail: 'all authed' }
+    }
+    const servers = [...new Set(issues.map((i) => i.server))]
+    const named = servers.slice(0, 4).join(', ')
+    const more = servers.length > 4 ? ` +${servers.length - 4} more` : ''
+    const first = issues[0]
+    const extra =
+      issues.length > 1
+        ? ` (+${issues.length - 1} more — run \`switchroom doctor\`)`
+        : ''
+    return {
+      status: 'degraded',
+      label: 'Connections',
+      detail: `${servers.length} integration(s) configured but not authed: ${named}${more}`,
+      nextStep: `${first.fix}${extra}`,
+    }
+  })())
+}
 export interface SkillsFsImpl {
   readdir: (p: string) => string[]
   exists: (p: string) => boolean

package/telegram-plugin/gateway/cron-session.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Cheap-cron session identity — docs/rfcs/cheap-cron-sessions.md §3.3.
+ * Cheap-cron session identity — reference/rfcs/cheap-cron-sessions.md §3.3.
  *
  * Rather than rekey the gateway's hardened single-bridge machinery
  * (agentIndex / pendingInboundBuffer / handleRegister, each carrying

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -67,6 +67,13 @@ import { DeferredDoneReactions } from '../reaction-defer.js'
 import { createWorkerActivityFeed, isWorkerActivityFeedEnabled } from '../worker-activity-feed.js'
 import { formatTurnLifecycle, detectStatusSurfaceDegraded } from './status-surface-log.js'
 import { parseSourceMessageId } from './source-message-id.js'
+import {
+  permissionSignature,
+  timeoutDenyMessage,
+  duplicateDenyMessage,
+  isRecentTimeoutDuplicate,
+} from './permission-timeout.js'
+import { pickRecoveredPermissionOrigin } from './permission-card-origin.js'
 import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
 import { appendActivityLabel, renderActivityFeedWithNested } from '../tool-activity-summary.js'
 import { toolLabel } from '../tool-labels.js'
@@ -563,7 +570,7 @@ const INBOX_DIR = join(STATE_DIR, 'inbox')
  *     different agent's container from inside our own (no docker.sock).
  *   - else (v0.6 legacy non-docker path, scheduled for removal in
  *     Phase 3 of the host-control daemon rollout — see
- *     `docs/rfcs/host-control-daemon.md`): detached `systemctl --user
+ *     `reference/rfcs/host-control-daemon.md`): detached `systemctl --user
  *     restart` of the two units. This branch is never reached on
  *     v0.7+ docker installs (the `isDocker` guard above takes the
  *     docker branch); only callable on legacy systemd hosts that
@@ -1901,7 +1908,7 @@ type CurrentTurn = {
   // #1675 (over-ping safety net): wall-clock ms of the first reply
   // this turn that landed with `disable_notification: false` (a real
   // device ping). The conversational-pacing contract
-  // (`reference/conversational-pacing.md` beat 5) says EXACTLY ONE
+  // (`reference/rfcs/conversational-pacing.md` beat 5) says EXACTLY ONE
   // ping per turn — the final answer. When the model violates that
   // (sends a substantive answer pinged + a wrap-up "Delivered…" or
   // meta-narration also pinged), subsequent reply calls with
@@ -3280,6 +3287,29 @@ function resolvePermissionCardTargets(): Array<{ chatId: string; threadId: numbe
   if (turn != null) {
     return [{ chatId: turn.sessionChatId, threadId: turn.sessionThreadId }]
   }
+  // currentTurn was nulled — most commonly because the orphaned-reply backstop
+  // force-closed the turn while the single claude session kept running and then
+  // hit a permission-gated tool (e.g. a retry after a first card auto-denied:
+  // marko Rentals-budget, 2026-06-17). Recover the originating topic from the
+  // recently-started turn registry so the card lands where the operator is
+  // working, instead of fanning out to operator DMs (thread-stripped) where it
+  // sits unseen until the 10-min TTL auto-denies it. Kill switch (=0) restores
+  // the legacy DM fan-out.
+  if (PERMISSION_CARD_ORIGIN_RECOVERY_ENABLED) {
+    const recovered = pickRecoveredPermissionOrigin(
+      recentTurnsById.values(),
+      Date.now(),
+      PERMISSION_CARD_ORIGIN_MAX_AGE_MS,
+    )
+    if (recovered != null) {
+      process.stderr.write(
+        `telegram gateway: permission-card origin recovered from recent turn ` +
+        `chat=${recovered.chatId} thread=${recovered.threadId ?? '-'} ` +
+        `(currentTurn was null — force-closed turn)\n`,
+      )
+      return [recovered]
+    }
+  }
   const sg = resolveAgentSupergroupChatId()
   const topic = resolveAgentOutboundTopic({
     kind: 'permission',
@@ -3699,6 +3729,39 @@ const STATUS_QUERY_RE = /^\s*status\??\s*$/i
 const PERMISSION_REPLY_RE = /^\s*(y|yes|n|no)\s+([a-km-z]{5})\s*$/i
 const pendingPermissions = new Map<string, { tool_name: string; description: string; input_preview: string; startedAt: number }>()
 const PERMISSION_TTL_MS = 10 * 60_000
+// No-repeat-on-timeout (marko Rentals-budget loop, 2026-06-17). When a card
+// auto-denies on TTL, the model is told it was a TIMEOUT (not a denial) so it
+// doesn't retry; if it retries the identical (tool, input) anyway while the
+// operator is still absent, we short-circuit-deny it WITHOUT posting a second
+// card. `permissionTimeoutSignatures` maps signature → last-timeout epoch ms;
+// it is cleared the moment the operator is active again (answers any card, or
+// sends a message), so suppression only ever holds during genuine absence.
+// Kill switch: SWITCHROOM_PERMISSION_NO_REPEAT=0.
+const PERMISSION_NO_REPEAT_ENABLED =
+  process.env.SWITCHROOM_PERMISSION_NO_REPEAT !== '0'
+// Safety cap on how long a timed-out signature suppresses retries even if the
+// operator-activity reset is somehow missed; the reset is the primary bound.
+const PERMISSION_DUPLICATE_WINDOW_MS = 60 * 60_000
+const permissionTimeoutSignatures = new Map<string, number>()
+function clearPermissionTimeoutSuppression(reason: string): void {
+  if (permissionTimeoutSignatures.size === 0) return
+  const n = permissionTimeoutSignatures.size
+  permissionTimeoutSignatures.clear()
+  process.stderr.write(
+    `telegram gateway: permission no-repeat suppression cleared (${n} sig(s)) — ${reason}\n`,
+  )
+}
+// Permission/approval-card origin recovery (marko Rentals-budget, 2026-06-17).
+// When `currentTurn` was force-closed by the orphaned-reply backstop but the
+// claude session kept running into a permission-gated tool, recover the card's
+// origin topic from the recently-started turn registry instead of fanning out
+// to operator DMs. Kill switch: SWITCHROOM_PERMISSION_CARD_ORIGIN_RECOVERY=0.
+const PERMISSION_CARD_ORIGIN_RECOVERY_ENABLED =
+  process.env.SWITCHROOM_PERMISSION_CARD_ORIGIN_RECOVERY !== '0'
+// A backstop-closed turn is seconds-to-minutes old; bound recovery so a
+// long-idle agent's stale registry entry can't mis-route a much later
+// permission into an old topic (it falls back to the operator-DM fan-out).
+const PERMISSION_CARD_ORIGIN_MAX_AGE_MS = 30 * 60_000
 // #1977 — single-tap correlation for the durable "🔁 Always allow"
 // flow. When the gateway dispatches a `config_propose_edit` to hostd in
@@ -4305,23 +4368,46 @@ const pendingStateReaper = setInterval(() => {
       // permission (or takes a fallback). Routed through
       // dispatchPermissionVerdict so it's buffered+redelivered too if
       // the bridge is also offline at sweep time.
-      dispatchPermissionVerdict({ type: 'permission', requestId: k, behavior: 'deny' })
+      // Carry a TIMEOUT reason to the model (claude renders it as "…the user
+      // said: …") so it can tell a timeout from a real denial and not retry
+      // the identical call — the duplicate-card loop this series closes.
+      const timeoutMinutes = Math.round(PERMISSION_TTL_MS / 60000)
+      dispatchPermissionVerdict({
+        type: 'permission',
+        requestId: k,
+        behavior: 'deny',
+        message: timeoutDenyMessage(timeoutMinutes),
+      })
       // The auto-deny un-parks the suspended turn — flip 🙏 → working so
       // it doesn't sit on the awaiting glyph (or stall) after the timeout.
       resumeReactionAfterVerdict()
       postPermissionResumeMessage({
         behavior: 'deny',
         action: naturalAction(v.tool_name, v.input_preview),
-        timeoutMinutes: Math.round(PERMISSION_TTL_MS / 60000),
+        timeoutMinutes,
       })
+      // Remember this (tool, input) timed out so an immediate identical retry
+      // (while the operator is still absent) is short-circuited without a
+      // second card. Cleared on operator activity.
+      if (PERMISSION_NO_REPEAT_ENABLED) {
+        permissionTimeoutSignatures.set(
+          permissionSignature(v.tool_name, v.input_preview),
+          now,
+        )
+      }
       process.stderr.write(
         `telegram gateway: permission TTL expired — auto-deny request=${k} ` +
         `tool=${v.tool_name} (no operator response in ` +
-        `${Math.round(PERMISSION_TTL_MS / 60000)}m)\n`,
+        `${timeoutMinutes}m)\n`,
       )
       pendingPermissions.delete(k)
     }
   }
+  // Drop no-repeat suppression entries past the safety-cap window (the primary
+  // bound is the operator-activity reset; this just keeps the map from growing).
+  for (const [sig, at] of permissionTimeoutSignatures) {
+    if (now - at > PERMISSION_DUPLICATE_WINDOW_MS) permissionTimeoutSignatures.delete(sig)
+  }
   for (const [k, v] of vaultPassphraseCache) {
     if (now > v.expiresAt) vaultPassphraseCache.delete(k)
   }
@@ -5803,7 +5889,7 @@ const ipcServer: IpcServer = createIpcServer({
     // (5-min cooldown per agent), and skipped if no boot chat resolves.
     // Claude responds NO_REPLY per inline instruction; existing
     // silent-marker suppression at gateway.ts:5906 swallows the
-    // outbound. See docs/rfcs/cold-start-ttfo.md Option A.
+    // outbound. See reference/rfcs/cold-start-ttfo.md Option A.
     if (client.agentName != null) {
       maybeFireWarmup({
         selfAgent: client.agentName,
@@ -6097,6 +6183,30 @@ const ipcServer: IpcServer = createIpcServer({
         return
       }
     }
+    // No-repeat short-circuit: this exact (tool, input) already timed out and
+    // the operator hasn't been active since (the suppression map is cleared on
+    // any operator activity). Deny it WITH a timeout-duplicate reason and post
+    // NO second card — the model retrying into an absent operator is the loop
+    // this closes. The turn still unblocks (deny verdict), and a returning
+    // operator resets suppression so the next ask gets a fresh card.
+    if (PERMISSION_NO_REPEAT_ENABLED) {
+      const sig = permissionSignature(toolName, inputPreview)
+      if (isRecentTimeoutDuplicate(permissionTimeoutSignatures, sig, Date.now(), PERMISSION_DUPLICATE_WINDOW_MS)) {
+        // no-card-verdict: no card was posted and the turn was never parked on
+        // the awaiting glyph, so we omit the resume-reaction flip / resume msg.
+        dispatchPermissionVerdict({
+          type: 'permission',
+          requestId,
+          behavior: 'deny',
+          message: duplicateDenyMessage,
+        })
+        process.stderr.write(
+          `telegram gateway: permission no-repeat short-circuit — duplicate of a ` +
+          `timed-out request tool=${toolName} request=${requestId} (no card posted)\n`,
+        )
+        return
+      }
+    }
     pendingPermissions.set(requestId, { tool_name: toolName, description, input_preview: inputPreview, startedAt: Date.now() })
     // Natural-language card body — a plain sentence ("Gymbro wants to
     // edit: supplement-log.md" + a why-line), never a raw tool id.
@@ -6586,7 +6696,7 @@ const ipcServer: IpcServer = createIpcServer({
     const source = typeof msg.inbound.meta?.source === 'string'
       ? msg.inbound.meta.source
       : 'unknown'
-    // Cheap-cron (docs/rfcs/cheap-cron-sessions.md §3.3): a Tier-1 fire
+    // Cheap-cron (reference/rfcs/cheap-cron-sessions.md §3.3): a Tier-1 fire
     // carries meta.session='cron' → route to the derived `<agent>-cron`
     // bridge (a 2nd interactive Sonnet session in the same container).
     // Every other fire (and all of today's callers) routes to the agent
@@ -7148,7 +7258,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
   let disableNotification = args.disable_notification === true
   // #1675 over-ping safety net. The conversational-pacing contract
-  // (`reference/conversational-pacing.md` beat 5) says EXACTLY ONE
+  // (`reference/rfcs/conversational-pacing.md` beat 5) says EXACTLY ONE
   // device ping per turn — the final answer. The model sometimes
   // violates this by sending a substantive answer pinged + a wrap-up
   // ("Delivered all three steps…", "Sent.", or meta-narration) ALSO
@@ -10254,7 +10364,7 @@ function handleSessionEvent(ev: SessionEvent): void {
             //   only fires for text-only turns where the stream IS the
             //   answer): PING. The user reached for the agent and the
             //   model produced an answer; per beat 5 of
-            //   `reference/conversational-pacing.md` the final answer MUST
+            //   `reference/rfcs/conversational-pacing.md` the final answer MUST
             //   ping the device exactly once. Without this carve-out, a
             //   short text-only turn ("on it" being the whole response)
             //   lands silently and the user has no notification to know
@@ -11580,6 +11690,11 @@ async function handleInbound(
     return
   }
+  // A real message from an allowed sender (gate passed) ⇒ the operator is
+  // present, so reset any no-repeat suppression: the next time the agent asks
+  // for something that timed out earlier, they should see a fresh card.
+  clearPermissionTimeoutSuppression('operator inbound')
   // Capture wall-clock receive time for inbound_ack metric (#203).
   // Must be after gate() so early-exit paths (drop/pair) don't skew the delta.
   //
@@ -11699,7 +11814,7 @@ async function handleInbound(
   }
   // `!`-prefix interrupt (#575). Closes
-  // `reference/steer-or-queue-mid-flight.md`'s correction path.
+  // `reference/jobs/steer-or-queue-mid-flight.md`'s correction path.
   //
   // Behavior:
   //   1. SIGINT the agent service. This kills any in-flight turn —
@@ -13156,7 +13271,7 @@ function resolveBootChatId(
   // operator sees lifecycle events in a predictable lane instead of
   // chat-root. For fleet-mode / DM agents the helper returns undefined
   // → behavior unchanged (lands at chat-root as today). PR4b of
-  // supergroup-mode rollout (docs/rfcs/supergroup-mode.md).
+  // supergroup-mode rollout (reference/rfcs/supergroup-mode.md).
   const supergroupBootTopic = resolveAgentOutboundTopic({ kind: 'boot' })
   const bootSupergroup = resolveAgentSupergroupChatId()
   // The boot topic is valid only in the agent's supergroup — attach it per
@@ -14254,7 +14369,7 @@ async function buildLiveProbeRows(agentName: string): Promise<StatusProbeRow[]>
     // Render order matches the boot card's PROBE_KEYS so the two
     // surfaces tell the same story in the same order.
     const order = ['account', 'agent', 'gateway', 'quota', 'hindsight',
-      'scheduler', 'broker', 'kernel', 'skills'] as const
+      'scheduler', 'broker', 'kernel', 'skills', 'connections'] as const
     for (const k of order) {
       const r = probes[k]
       if (!r) continue
@@ -15134,6 +15249,8 @@ async function handlePermissionSlash(ctx: Context, behavior: 'allow' | 'deny'):
     )
     return
   }
+  // Operator answered via slash ⇒ present; reset no-repeat suppression.
+  clearPermissionTimeoutSuppression('operator answered via /approve|/deny')
   // Forward to connected bridges — same IPC the button handler uses.
   dispatchPermissionVerdict({ type: 'permission', requestId: request_id, behavior })
   resumeReactionAfterVerdict()
@@ -19640,6 +19757,9 @@ bot.on('callback_query:data', async ctx => {
   // scopes (resolveTimeBox → null) and the disabled tier (ttl<=0) stay truly
   // once. The verdict is still dispatched WITHOUT a `rule` (below), so the
   // bridge never caches it untimed — the window lives only in scopedGrants.
+  // Operator tapped a verdict ⇒ they are present; reset no-repeat suppression
+  // so a later identical ask is shown fresh rather than silently short-circuited.
+  clearPermissionTimeoutSuppression('operator answered a permission card')
   const pd = pendingPermissions.get(request_id)
   const resumeAction = pd ? naturalAction(pd.tool_name, pd.input_preview) : ''
   const scopedTtl = scopedApprovalTtlMs()
@@ -20919,6 +21039,7 @@ async function shutdown(signal: string): Promise<void> {
   pendingReauthFlows.clear()
   pendingVaultOps.clear()
   pendingPermissions.clear()
+  permissionTimeoutSignatures.clear()
   try {
     await ipcServer.close()

package/telegram-plugin/gateway/grant-restart.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  * turn-deferred-vs-now — unit-tests without gateway.ts's boot side-effects
  * (same pattern as scoped-approval.ts / admin-commands/index.ts).
  *
- * Contract (reference/access-model.md): the restart only ever follows an
+ * Contract (reference/rfcs/access-model.md): the restart only ever follows an
  * operator-approved, single-agent, additive `tools.allow` edit, and only
  * ever bounces the CALLER's own agent — never a peer, never fleet-wide.
  */

package/telegram-plugin/gateway/inbound-delivery-machine-dispatch.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * InboundDeliveryStateMachine — DISPATCH (Phase 2b PR 3a, bridgeUp cutover).
  *
- * Per RFC `docs/rfcs/inbound-delivery-state-machine.md`, the state
+ * Per RFC `reference/rfcs/inbound-delivery-state-machine.md`, the state
  * machine is pure: `transition(state, event) → { state', effects[] }`.
  * The gateway's job is to (a) emit events at the right moments and
  * (b) execute the returned effects against real I/O. This module owns

package/telegram-plugin/gateway/inbound-delivery-machine-shadow.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * InboundDeliveryStateMachine — SHADOW MODE wiring (Phase 2b PR 2).
  *
- * Per RFC `docs/rfcs/inbound-delivery-state-machine.md` Phase 2b PR 2:
+ * Per RFC `reference/rfcs/inbound-delivery-state-machine.md` Phase 2b PR 2:
  * the state machine runs ALONGSIDE the existing imperative gateway
  * code, recording predicted effects to a structured trace. Behavior
  * is unchanged — every existing code path still executes the actual

package/telegram-plugin/gateway/inbound-delivery-machine.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * InboundDeliveryStateMachine — pure transition function for the
  * gateway's inbound→bridge→outbound pipeline.
  *
- * Per `docs/rfcs/inbound-delivery-state-machine.md` (RFC merged in
+ * Per `reference/rfcs/inbound-delivery-state-machine.md` (RFC merged in
  * PR #1576): the gateway's delivery state was implicit and scattered
  * across 8+ pieces of mutable state. The wedge cluster of 2026-05-19
  * (9 PRs in 36h all patching variants of "inbound stranded → 5-min

package/telegram-plugin/gateway/interrupt-defer.ts CHANGED Viewed

@@ -3,7 +3,7 @@
 // A `!`-prefix interrupt SIGINTs the agent's in-flight turn (tmux C-c) and
 // then resumes with the replacement body as a fresh turn. Firing the SIGINT
 // the instant `!` arrives can land mid-tool-call — a C-c during a Write or a
-// Bash leaves the tool's work half-done. `reference/steer-or-queue-mid-flight.md`
+// Bash leaves the tool's work half-done. `reference/jobs/steer-or-queue-mid-flight.md`
 // names this exact anti-pattern: "Mid-tool-call is not 'amend time.'"
 //
 // We can't pause claude's internal loop (the unmodified-CLI constraint — the

package/telegram-plugin/gateway/ipc-protocol.ts CHANGED Viewed

@@ -38,6 +38,18 @@ export interface PermissionEvent {
    * (`mcp__<server>__*`).
    */
   rule?: string;
+  /**
+   * Optional human-readable reason for the verdict, surfaced to the model
+   * verbatim by claude's permission channel as "…the user said: ${message}".
+   * Only set on `deny`. switchroom uses it to make a TIMEOUT auto-deny (no
+   * operator response within the TTL) distinguishable from a deliberate
+   * operator denial — otherwise both render as the generic "Denied" and the
+   * model retries the identical call, re-raising an identical card 10 min
+   * later (marko Rentals-budget loop, 2026-06-17). When absent, claude falls
+   * back to its default "Denied", so this degrades safely on any claude that
+   * ignores the field.
+   */
+  message?: string;
 }
 export interface StatusEvent {

package/telegram-plugin/gateway/permission-card-origin.ts ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * Pure origin-recovery for a permission/approval card when the gateway's live
+ * `currentTurn` has already been nulled.
+ *
+ * Why this exists (marko Rentals-budget incident, 2026-06-17). A
+ * supergroup-owned agent that delivers its final answer as plain transcript
+ * text — never calling the `reply` tool — has its turn force-closed by the
+ * gateway's orphaned-reply backstop ~30s later, which nulls `currentTurn`. If
+ * the single claude session is still running and then calls a permission-gated
+ * tool (the real case: retrying `meta_ads_set_budget` after a first card had
+ * auto-denied), the gate fires with `currentTurn == null`. The card emitter
+ * then fell through to broadcasting the card to the operator-DM allowlist,
+ * thread-stripped — so the card never reached the forum topic the operator was
+ * working in. Unanswered there, it hit the 10-minute TTL and auto-denied, and
+ * an explicitly-approved budget change silently never ran.
+ *
+ * A switchroom agent runs exactly ONE claude session, so a tool permission can
+ * only belong to the turn that session most recently had open. We recover that
+ * origin from the bounded recently-started turn registry: the most-recently-
+ * started turn still within `maxAgeMs`. A turn force-closed by the backstop is,
+ * by construction, seconds-to-minutes old, so the freshness ceiling costs
+ * nothing for the incident class while keeping a long-idle agent's stale
+ * registry entry from mis-routing a much later permission into an old topic —
+ * beyond the ceiling we return null and the caller keeps the existing
+ * operator-DM fan-out. This only ever ADDS topic recovery; it never changes the
+ * idle/turn-less path.
+ */
+/** The subset of a turn this recovery needs — kept structural so the gateway's
+ *  richer `CurrentTurn` satisfies it without a cast. */
+export interface RecoverableTurn {
+  sessionChatId: string
+  sessionThreadId: number | undefined
+  startedAt: number
+}
+export interface PermissionCardOrigin {
+  chatId: string
+  threadId: number | undefined
+}
+/**
+ * Pick the most-recently-started turn within the freshness window as the
+ * permission card's origin, or null when none qualifies (caller falls back to
+ * the operator-DM fan-out). Order-independent — selects by `startedAt`, not by
+ * the iteration order of the source registry, so it is robust to any
+ * out-of-order insertion.
+ */
+export function pickRecoveredPermissionOrigin(
+  recentTurns: Iterable<RecoverableTurn>,
+  now: number,
+  maxAgeMs: number,
+): PermissionCardOrigin | null {
+  let best: RecoverableTurn | null = null
+  for (const t of recentTurns) {
+    if (now - t.startedAt > maxAgeMs) continue
+    if (best == null || t.startedAt >= best.startedAt) best = t
+  }
+  return best == null
+    ? null
+    : { chatId: best.sessionChatId, threadId: best.sessionThreadId }
+}

package/telegram-plugin/gateway/permission-timeout.ts ADDED Viewed

@@ -0,0 +1,70 @@
+/**
+ * Pure helpers for permission-card TIMEOUT handling — making a "no operator
+ * responded" auto-deny distinguishable from a deliberate denial, and
+ * suppressing the duplicate card a model raises when it retries the identical
+ * call after such a timeout.
+ *
+ * Background (marko Rentals-budget loop, 2026-06-17). switchroom forwards a
+ * permission verdict to claude as `{ behavior, message? }`; with no `message`,
+ * claude renders the generic "the user said: Denied". A 10-minute TTL
+ * auto-deny was therefore indistinguishable from a real operator "Deny", so
+ * the model read it as transient and retried the SAME tool call — re-raising
+ * an identical card 10 minutes later, in a loop the operator never asked for.
+ *
+ * Two levers, both pure here and wired in gateway.ts:
+ *  1. `timeoutDenyMessage` — the `message` we attach ONLY to a TTL auto-deny,
+ *     telling the model it was a timeout (not a denial) and not to retry.
+ *  2. `permissionSignature` + `isRecentTimeoutDuplicate` — recognise a retry of
+ *     the exact same (tool, input) shortly after it timed out, so the gateway
+ *     can short-circuit it (deny with `duplicateDenyMessage`) WITHOUT posting a
+ *     second identical card. The suppression is reset on operator activity
+ *     (handled gateway-side), so it only ever holds while the operator is
+ *     genuinely absent — re-showing a card to an absent operator is the noise
+ *     this removes.
+ */
+// NUL — can appear in neither a tool name nor a rendered input preview, so it
+// safely delimits the two halves of a signature (a printable separator could
+// collide: ("a b","c") vs ("a","b c")). Built at runtime so the SOURCE file
+// stays plain text (a literal NUL byte would make git treat it as binary).
+const SIGNATURE_SEP = String.fromCharCode(0)
+/**
+ * Stable identity for a permission request: the tool plus its input preview
+ * (the same string the card renders). Same tool + same preview ⇒ same action.
+ */
+export function permissionSignature(toolName: string, inputPreview: string): string {
+  return toolName + SIGNATURE_SEP + inputPreview
+}
+/** The `message` attached to a TTL auto-deny so the model treats it as a
+ *  timeout, not a denial, and does not retry the identical call. */
+export function timeoutDenyMessage(timeoutMinutes: number): string {
+  return (
+    `No operator responded within ${timeoutMinutes} minutes, so this request timed out. ` +
+    `This is a TIMEOUT, not a denial — the operator is likely away. ` +
+    `Do NOT retry this exact action automatically. Tell the user it is still ` +
+    `awaiting their approval, then continue with other work or stop.`
+  )
+}
+/** The `message` attached when we short-circuit a duplicate retry of an
+ *  already-timed-out request (no new card posted). */
+export const duplicateDenyMessage =
+  `This exact action already timed out awaiting the operator, and they have not ` +
+  `responded since. Do NOT keep re-requesting it — tell the user it needs their ` +
+  `approval when they are back, and move on to other work or stop.`
+/**
+ * True when `sig` timed out within `windowMs` of `now` (so a fresh request for
+ * it is a retry to suppress). `timeouts` maps signature → last-timeout epoch ms.
+ */
+export function isRecentTimeoutDuplicate(
+  timeouts: ReadonlyMap<string, number>,
+  sig: string,
+  now: number,
+  windowMs: number,
+): boolean {
+  const at = timeouts.get(sig)
+  return at != null && now - at <= windowMs
+}

package/telegram-plugin/gateway/prefix-warmup.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * Prefix-cache warmup turn — opt-in cold-start TTFO optimization.
  *
- * Per cold-start TTFO RFC (docs/rfcs/cold-start-ttfo.md, PR #1589),
+ * Per cold-start TTFO RFC (reference/rfcs/cold-start-ttfo.md, PR #1589),
  * Option A. On every bridge-up after a restart, synthesize a synthetic
  * inbound (`__WARMUP_PING__`, meta.source="warmup") and deliver it to
  * the just-registered bridge. Claude processes the message — paying

package/telegram-plugin/gateway/webhook-ingest-server.test.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /**
  * Tests for the peercred-gated webhook ingest UDS server
- * (RFC docs/rfcs/webhook-via-gateway-socket.md).
+ * (RFC reference/rfcs/webhook-via-gateway-socket.md).
  *
  * MUST run under `bun test`: the peer-credential gate calls
  * `getPeerCred` (bun:ffi getsockopt SO_PEERCRED), which returns null

package/telegram-plugin/gateway/webhook-ingest-server.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Webhook ingest UDS server (RFC docs/rfcs/webhook-via-gateway-socket.md).
+ * Webhook ingest UDS server (RFC reference/rfcs/webhook-via-gateway-socket.md).
  *
  * A dedicated, peercred-gated Unix socket the host-side web receiver
  * forwards verified webhook events to. It is deliberately SEPARATE from

package/telegram-plugin/hooks/subagent-tracker-pretool.mjs CHANGED Viewed

@@ -20,7 +20,7 @@
  *        writing to a registry.db nobody read, leaving every bg sub-agent
  *        invisible to the watcher. Surfaced by
  *        bg-sub-agent-dispatch-dm.test.ts; see RFC Phase 2 §Bug 2 in
- *        reference/sub-agent-visibility-rfc.md.
+ *        reference/rfcs/sub-agent-visibility.md.
  *     3. process.cwd() (legacy fallback for ad-hoc invocations).
  *
  * Performance: the actual DB write is deferred via setImmediate (Node 22+

package/telegram-plugin/interrupt-marker.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * `!`-prefix interrupt marker — closes #575 / part of `reference/steer-or-queue-mid-flight.md`.
+ * `!`-prefix interrupt marker — closes #575 / part of `reference/jobs/steer-or-queue-mid-flight.md`.
  *
  * The product contract: when the user starts a Telegram message with
  * `!`, they're saying "drop what you're doing and handle this