npm - switchroom - Versions diffs - 0.15.36 → 0.15.38 - Mend

switchroom 0.15.36 → 0.15.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -67,6 +67,13 @@ import { DeferredDoneReactions } from '../reaction-defer.js'
 import { createWorkerActivityFeed, isWorkerActivityFeedEnabled } from '../worker-activity-feed.js'
 import { formatTurnLifecycle, detectStatusSurfaceDegraded } from './status-surface-log.js'
 import { parseSourceMessageId } from './source-message-id.js'
+import {
+  permissionSignature,
+  timeoutDenyMessage,
+  duplicateDenyMessage,
+  isRecentTimeoutDuplicate,
+} from './permission-timeout.js'
+import { pickRecoveredPermissionOrigin } from './permission-card-origin.js'
 import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
 import { appendActivityLabel, renderActivityFeedWithNested } from '../tool-activity-summary.js'
 import { toolLabel } from '../tool-labels.js'
@@ -487,7 +494,10 @@ import {
   listGrantsViaBroker,
   revokeGrantViaBroker,
 } from '../../src/vault/broker/client.js'
-import { emitLinearAgentActivity, createLinearIssue } from './linear-activity.js'
+import { emitLinearAgentActivity, createLinearIssue, buildLinearAuthDeadMessage, brokerRefreshIO, type LinearAuthDeadReason } from './linear-activity.js'
+import { runLinearAgentSetup } from './linear-setup.js'
+import { runLinearAuthCheck } from './linear-auth-watch.js'
+import { performLinearRefresh } from '../../src/linear/oauth-refresh.js'
 import {
   approvalRequest,
   approvalConsume,
@@ -560,7 +570,7 @@ const INBOX_DIR = join(STATE_DIR, 'inbox')
  *     different agent's container from inside our own (no docker.sock).
  *   - else (v0.6 legacy non-docker path, scheduled for removal in
  *     Phase 3 of the host-control daemon rollout — see
- *     `docs/rfcs/host-control-daemon.md`): detached `systemctl --user
+ *     `reference/rfcs/host-control-daemon.md`): detached `systemctl --user
  *     restart` of the two units. This branch is never reached on
  *     v0.7+ docker installs (the `isDocker` guard above takes the
  *     docker branch); only callable on legacy systemd hosts that
@@ -1898,7 +1908,7 @@ type CurrentTurn = {
   // #1675 (over-ping safety net): wall-clock ms of the first reply
   // this turn that landed with `disable_notification: false` (a real
   // device ping). The conversational-pacing contract
-  // (`reference/conversational-pacing.md` beat 5) says EXACTLY ONE
+  // (`reference/rfcs/conversational-pacing.md` beat 5) says EXACTLY ONE
   // ping per turn — the final answer. When the model violates that
   // (sends a substantive answer pinged + a wrap-up "Delivered…" or
   // meta-narration also pinged), subsequent reply calls with
@@ -3277,6 +3287,29 @@ function resolvePermissionCardTargets(): Array<{ chatId: string; threadId: numbe
   if (turn != null) {
     return [{ chatId: turn.sessionChatId, threadId: turn.sessionThreadId }]
   }
+  // currentTurn was nulled — most commonly because the orphaned-reply backstop
+  // force-closed the turn while the single claude session kept running and then
+  // hit a permission-gated tool (e.g. a retry after a first card auto-denied:
+  // marko Rentals-budget, 2026-06-17). Recover the originating topic from the
+  // recently-started turn registry so the card lands where the operator is
+  // working, instead of fanning out to operator DMs (thread-stripped) where it
+  // sits unseen until the 10-min TTL auto-denies it. Kill switch (=0) restores
+  // the legacy DM fan-out.
+  if (PERMISSION_CARD_ORIGIN_RECOVERY_ENABLED) {
+    const recovered = pickRecoveredPermissionOrigin(
+      recentTurnsById.values(),
+      Date.now(),
+      PERMISSION_CARD_ORIGIN_MAX_AGE_MS,
+    )
+    if (recovered != null) {
+      process.stderr.write(
+        `telegram gateway: permission-card origin recovered from recent turn ` +
+        `chat=${recovered.chatId} thread=${recovered.threadId ?? '-'} ` +
+        `(currentTurn was null — force-closed turn)\n`,
+      )
+      return [recovered]
+    }
+  }
   const sg = resolveAgentSupergroupChatId()
   const topic = resolveAgentOutboundTopic({
     kind: 'permission',
@@ -3696,6 +3729,39 @@ const STATUS_QUERY_RE = /^\s*status\??\s*$/i
 const PERMISSION_REPLY_RE = /^\s*(y|yes|n|no)\s+([a-km-z]{5})\s*$/i
 const pendingPermissions = new Map<string, { tool_name: string; description: string; input_preview: string; startedAt: number }>()
 const PERMISSION_TTL_MS = 10 * 60_000
+// No-repeat-on-timeout (marko Rentals-budget loop, 2026-06-17). When a card
+// auto-denies on TTL, the model is told it was a TIMEOUT (not a denial) so it
+// doesn't retry; if it retries the identical (tool, input) anyway while the
+// operator is still absent, we short-circuit-deny it WITHOUT posting a second
+// card. `permissionTimeoutSignatures` maps signature → last-timeout epoch ms;
+// it is cleared the moment the operator is active again (answers any card, or
+// sends a message), so suppression only ever holds during genuine absence.
+// Kill switch: SWITCHROOM_PERMISSION_NO_REPEAT=0.
+const PERMISSION_NO_REPEAT_ENABLED =
+  process.env.SWITCHROOM_PERMISSION_NO_REPEAT !== '0'
+// Safety cap on how long a timed-out signature suppresses retries even if the
+// operator-activity reset is somehow missed; the reset is the primary bound.
+const PERMISSION_DUPLICATE_WINDOW_MS = 60 * 60_000
+const permissionTimeoutSignatures = new Map<string, number>()
+function clearPermissionTimeoutSuppression(reason: string): void {
+  if (permissionTimeoutSignatures.size === 0) return
+  const n = permissionTimeoutSignatures.size
+  permissionTimeoutSignatures.clear()
+  process.stderr.write(
+    `telegram gateway: permission no-repeat suppression cleared (${n} sig(s)) — ${reason}\n`,
+  )
+}
+// Permission/approval-card origin recovery (marko Rentals-budget, 2026-06-17).
+// When `currentTurn` was force-closed by the orphaned-reply backstop but the
+// claude session kept running into a permission-gated tool, recover the card's
+// origin topic from the recently-started turn registry instead of fanning out
+// to operator DMs. Kill switch: SWITCHROOM_PERMISSION_CARD_ORIGIN_RECOVERY=0.
+const PERMISSION_CARD_ORIGIN_RECOVERY_ENABLED =
+  process.env.SWITCHROOM_PERMISSION_CARD_ORIGIN_RECOVERY !== '0'
+// A backstop-closed turn is seconds-to-minutes old; bound recovery so a
+// long-idle agent's stale registry entry can't mis-route a much later
+// permission into an old topic (it falls back to the operator-DM fan-out).
+const PERMISSION_CARD_ORIGIN_MAX_AGE_MS = 30 * 60_000
 // #1977 — single-tap correlation for the durable "🔁 Always allow"
 // flow. When the gateway dispatches a `config_propose_edit` to hostd in
@@ -4302,23 +4368,46 @@ const pendingStateReaper = setInterval(() => {
       // permission (or takes a fallback). Routed through
       // dispatchPermissionVerdict so it's buffered+redelivered too if
       // the bridge is also offline at sweep time.
-      dispatchPermissionVerdict({ type: 'permission', requestId: k, behavior: 'deny' })
+      // Carry a TIMEOUT reason to the model (claude renders it as "…the user
+      // said: …") so it can tell a timeout from a real denial and not retry
+      // the identical call — the duplicate-card loop this series closes.
+      const timeoutMinutes = Math.round(PERMISSION_TTL_MS / 60000)
+      dispatchPermissionVerdict({
+        type: 'permission',
+        requestId: k,
+        behavior: 'deny',
+        message: timeoutDenyMessage(timeoutMinutes),
+      })
       // The auto-deny un-parks the suspended turn — flip 🙏 → working so
       // it doesn't sit on the awaiting glyph (or stall) after the timeout.
       resumeReactionAfterVerdict()
       postPermissionResumeMessage({
         behavior: 'deny',
         action: naturalAction(v.tool_name, v.input_preview),
-        timeoutMinutes: Math.round(PERMISSION_TTL_MS / 60000),
+        timeoutMinutes,
       })
+      // Remember this (tool, input) timed out so an immediate identical retry
+      // (while the operator is still absent) is short-circuited without a
+      // second card. Cleared on operator activity.
+      if (PERMISSION_NO_REPEAT_ENABLED) {
+        permissionTimeoutSignatures.set(
+          permissionSignature(v.tool_name, v.input_preview),
+          now,
+        )
+      }
       process.stderr.write(
         `telegram gateway: permission TTL expired — auto-deny request=${k} ` +
         `tool=${v.tool_name} (no operator response in ` +
-        `${Math.round(PERMISSION_TTL_MS / 60000)}m)\n`,
+        `${timeoutMinutes}m)\n`,
       )
       pendingPermissions.delete(k)
     }
   }
+  // Drop no-repeat suppression entries past the safety-cap window (the primary
+  // bound is the operator-activity reset; this just keeps the map from growing).
+  for (const [sig, at] of permissionTimeoutSignatures) {
+    if (now - at > PERMISSION_DUPLICATE_WINDOW_MS) permissionTimeoutSignatures.delete(sig)
+  }
   for (const [k, v] of vaultPassphraseCache) {
     if (now > v.expiresAt) vaultPassphraseCache.delete(k)
   }
@@ -5800,7 +5889,7 @@ const ipcServer: IpcServer = createIpcServer({
     // (5-min cooldown per agent), and skipped if no boot chat resolves.
     // Claude responds NO_REPLY per inline instruction; existing
     // silent-marker suppression at gateway.ts:5906 swallows the
-    // outbound. See docs/rfcs/cold-start-ttfo.md Option A.
+    // outbound. See reference/rfcs/cold-start-ttfo.md Option A.
     if (client.agentName != null) {
       maybeFireWarmup({
         selfAgent: client.agentName,
@@ -6094,6 +6183,30 @@ const ipcServer: IpcServer = createIpcServer({
         return
       }
     }
+    // No-repeat short-circuit: this exact (tool, input) already timed out and
+    // the operator hasn't been active since (the suppression map is cleared on
+    // any operator activity). Deny it WITH a timeout-duplicate reason and post
+    // NO second card — the model retrying into an absent operator is the loop
+    // this closes. The turn still unblocks (deny verdict), and a returning
+    // operator resets suppression so the next ask gets a fresh card.
+    if (PERMISSION_NO_REPEAT_ENABLED) {
+      const sig = permissionSignature(toolName, inputPreview)
+      if (isRecentTimeoutDuplicate(permissionTimeoutSignatures, sig, Date.now(), PERMISSION_DUPLICATE_WINDOW_MS)) {
+        // no-card-verdict: no card was posted and the turn was never parked on
+        // the awaiting glyph, so we omit the resume-reaction flip / resume msg.
+        dispatchPermissionVerdict({
+          type: 'permission',
+          requestId,
+          behavior: 'deny',
+          message: duplicateDenyMessage,
+        })
+        process.stderr.write(
+          `telegram gateway: permission no-repeat short-circuit — duplicate of a ` +
+          `timed-out request tool=${toolName} request=${requestId} (no card posted)\n`,
+        )
+        return
+      }
+    }
     pendingPermissions.set(requestId, { tool_name: toolName, description, input_preview: inputPreview, startedAt: Date.now() })
     // Natural-language card body — a plain sentence ("Gymbro wants to
     // edit: supplement-log.md" + a why-line), never a raw tool id.
@@ -6583,7 +6696,7 @@ const ipcServer: IpcServer = createIpcServer({
     const source = typeof msg.inbound.meta?.source === 'string'
       ? msg.inbound.meta.source
       : 'unknown'
-    // Cheap-cron (docs/rfcs/cheap-cron-sessions.md §3.3): a Tier-1 fire
+    // Cheap-cron (reference/rfcs/cheap-cron-sessions.md §3.3): a Tier-1 fire
     // carries meta.session='cron' → route to the derived `<agent>-cron`
     // bridge (a 2nd interactive Sonnet session in the same container).
     // Every other fire (and all of today's callers) routes to the agent
@@ -6883,6 +6996,7 @@ const ALLOWED_TOOLS = new Set([
   'request_secret',
   'linear_agent_activity',
   'linear_create_issue',
+  'linear_agent_setup',
 ])
 async function executeToolCall(tool: string, args: Record<string, unknown>): Promise<unknown> {
@@ -6932,6 +7046,8 @@ async function executeToolCall(tool: string, args: Record<string, unknown>): Pro
       return executeLinearAgentActivity(args)
     case 'linear_create_issue':
       return executeLinearCreateIssue(args)
+    case 'linear_agent_setup':
+      return executeLinearAgentSetup(args)
     default:
       throw new Error(`unknown tool: ${tool}`)
   }
@@ -6963,12 +7079,66 @@ async function executeSendChecklist(args: Record<string, unknown>): Promise<{ co
   return { content: [{ type: 'text', text: `checklist sent (id: ${sent.message_id})` }] }
 }
+/**
+ * Per-(agent,reason) cooldown for the Linear-auth-dead operator alert. The
+ * triggering 401 recurs on every Linear call once the token expires, so
+ * without a cooldown the operator would be paged on every capture/activity.
+ * One alert per reason per window is enough to surface the action item.
+ */
+const linearAuthAlertLast = new Map<string, number>()
+const LINEAR_AUTH_ALERT_COOLDOWN_MS = 6 * 60 * 60 * 1000
+/**
+ * Surface an un-healable Linear auth failure (no refresh bundle / revoked
+ * refresh token) to the operator as a Telegram message — not just a gateway
+ * log line. Deduped per (agent,reason) and gated by SWITCHROOM_LINEAR_AUTH_ALERT=0.
+ * Best-effort: a failed send never affects the agent's turn.
+ */
+function notifyLinearAuthDead(info: { agent: string; reason: LinearAuthDeadReason; detail: string }): void {
+  if (process.env.SWITCHROOM_LINEAR_AUTH_ALERT === '0') return
+  const key = `${info.agent}:${info.reason}`
+  const now = Date.now()
+  const last = linearAuthAlertLast.get(key)
+  if (last != null && now - last < LINEAR_AUTH_ALERT_COOLDOWN_MS) return
+  void (async () => {
+    try {
+      const chatId = loadAccess().allowFrom[0]
+      if (!chatId) return
+      const threadId = topicForRecipient({
+        recipientChatId: chatId,
+        resolvedTopic: resolveAgentOutboundTopic({ kind: 'linear-auth' }) ?? chatThreadMap.get(chatId),
+        supergroupChatId: resolveAgentSupergroupChatId(),
+      })
+      const text = buildLinearAuthDeadMessage(info.agent, info.reason)
+      await swallowingApiCall(
+        () =>
+          bot.api.sendMessage(chatId, text, {
+            parse_mode: 'HTML',
+            ...(threadId != null ? { message_thread_id: threadId } : {}),
+          }),
+        { chat_id: chatId, verb: 'linearAuthDead' },
+      )
+      // Stamp the cooldown only after a successful send so a transient
+      // Telegram failure doesn't burn the 6h window (the 401 recurs and will
+      // retry the page on the next Linear call).
+      linearAuthAlertLast.set(key, now)
+      process.stderr.write(`telegram gateway: linear auth-dead alert sent agent=${info.agent} reason=${info.reason}\n`)
+    } catch {
+      /* best-effort */
+    }
+  })()
+}
 async function executeLinearAgentActivity(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
-  return emitLinearAgentActivity(args)
+  return emitLinearAgentActivity(args, { onAuthUnrecoverable: notifyLinearAuthDead })
 }
 async function executeLinearCreateIssue(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
-  return createLinearIssue(args)
+  return createLinearIssue(args, { onAuthUnrecoverable: notifyLinearAuthDead })
+}
+async function executeLinearAgentSetup(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
+  return runLinearAgentSetup(args)
 }
 async function executeUpdateChecklist(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
@@ -7088,7 +7258,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
   let disableNotification = args.disable_notification === true
   // #1675 over-ping safety net. The conversational-pacing contract
-  // (`reference/conversational-pacing.md` beat 5) says EXACTLY ONE
+  // (`reference/rfcs/conversational-pacing.md` beat 5) says EXACTLY ONE
   // device ping per turn — the final answer. The model sometimes
   // violates this by sending a substantive answer pinged + a wrap-up
   // ("Delivered all three steps…", "Sent.", or meta-narration) ALSO
@@ -10194,7 +10364,7 @@ function handleSessionEvent(ev: SessionEvent): void {
             //   only fires for text-only turns where the stream IS the
             //   answer): PING. The user reached for the agent and the
             //   model produced an answer; per beat 5 of
-            //   `reference/conversational-pacing.md` the final answer MUST
+            //   `reference/rfcs/conversational-pacing.md` the final answer MUST
             //   ping the device exactly once. Without this carve-out, a
             //   short text-only turn ("on it" being the whole response)
             //   lands silently and the user has no notification to know
@@ -11520,6 +11690,11 @@ async function handleInbound(
     return
   }
+  // A real message from an allowed sender (gate passed) ⇒ the operator is
+  // present, so reset any no-repeat suppression: the next time the agent asks
+  // for something that timed out earlier, they should see a fresh card.
+  clearPermissionTimeoutSuppression('operator inbound')
   // Capture wall-clock receive time for inbound_ack metric (#203).
   // Must be after gate() so early-exit paths (drop/pair) don't skew the delta.
   //
@@ -11639,7 +11814,7 @@ async function handleInbound(
   }
   // `!`-prefix interrupt (#575). Closes
-  // `reference/steer-or-queue-mid-flight.md`'s correction path.
+  // `reference/jobs/steer-or-queue-mid-flight.md`'s correction path.
   //
   // Behavior:
   //   1. SIGINT the agent service. This kills any in-flight turn —
@@ -13096,7 +13271,7 @@ function resolveBootChatId(
   // operator sees lifecycle events in a predictable lane instead of
   // chat-root. For fleet-mode / DM agents the helper returns undefined
   // → behavior unchanged (lands at chat-root as today). PR4b of
-  // supergroup-mode rollout (docs/rfcs/supergroup-mode.md).
+  // supergroup-mode rollout (reference/rfcs/supergroup-mode.md).
   const supergroupBootTopic = resolveAgentOutboundTopic({ kind: 'boot' })
   const bootSupergroup = resolveAgentSupergroupChatId()
   // The boot topic is valid only in the agent's supergroup — attach it per
@@ -13179,6 +13354,46 @@ function resolveAgentSupergroupChatId(): string | undefined {
   }
 }
+/** Whether THIS agent has `channels.telegram.linear_agent.enabled`. Used by the
+ *  proactive Linear-auth watch to skip agents that aren't Linear actors. */
+function isSelfLinearAgentEnabled(): boolean {
+  const agentName = process.env.SWITCHROOM_AGENT_NAME
+  if (!agentName) return false
+  try {
+    const cfg = loadSwitchroomConfig()
+    const rawAgent = cfg.agents?.[agentName]
+    if (!rawAgent) return false
+    const resolved = resolveAgentConfig(cfg.defaults, cfg.profiles, rawAgent)
+    const la = (resolved.channels?.telegram as { linear_agent?: { enabled?: boolean } } | undefined)?.linear_agent
+    return la?.enabled === true
+  } catch {
+    return false
+  }
+}
+/**
+ * One proactive Linear-auth check for this agent (boot + interval). Reads the
+ * refresh bundle via the broker; missing → operator alert, near-expiry →
+ * proactive rotate, revoked → operator alert. Best-effort, never throws.
+ * Disabled with SWITCHROOM_LINEAR_AUTH_WATCH_POLL_MS=0.
+ */
+async function runLinearAuthWatch(): Promise<void> {
+  const agent = process.env.SWITCHROOM_AGENT_NAME
+  if (!agent) return
+  const io = brokerRefreshIO(agent)
+  const status = await runLinearAuthCheck({
+    agent,
+    linearEnabled: isSelfLinearAgentEnabled,
+    readBundle: io.readBundle,
+    refresh: () => performLinearRefresh(io),
+    onAuthDead: notifyLinearAuthDead,
+    log: (s) => process.stderr.write(s),
+  })
+  if (status !== 'disabled' && status !== 'fresh') {
+    process.stderr.write(`telegram gateway: linear-auth-watch agent=${agent} status=${status}\n`)
+  }
+}
 /**
  * Stamp a user-facing restart reason into the clean-shutdown marker
  * (same file the SIGTERM handler writes to and the next session greeting
@@ -14154,7 +14369,7 @@ async function buildLiveProbeRows(agentName: string): Promise<StatusProbeRow[]>
     // Render order matches the boot card's PROBE_KEYS so the two
     // surfaces tell the same story in the same order.
     const order = ['account', 'agent', 'gateway', 'quota', 'hindsight',
-      'scheduler', 'broker', 'kernel', 'skills'] as const
+      'scheduler', 'broker', 'kernel', 'skills', 'connections'] as const
     for (const k of order) {
       const r = probes[k]
       if (!r) continue
@@ -15034,6 +15249,8 @@ async function handlePermissionSlash(ctx: Context, behavior: 'allow' | 'deny'):
     )
     return
   }
+  // Operator answered via slash ⇒ present; reset no-repeat suppression.
+  clearPermissionTimeoutSuppression('operator answered via /approve|/deny')
   // Forward to connected bridges — same IPC the button handler uses.
   dispatchPermissionVerdict({ type: 'permission', requestId: request_id, behavior })
   resumeReactionAfterVerdict()
@@ -19540,6 +19757,9 @@ bot.on('callback_query:data', async ctx => {
   // scopes (resolveTimeBox → null) and the disabled tier (ttl<=0) stay truly
   // once. The verdict is still dispatched WITHOUT a `rule` (below), so the
   // bridge never caches it untimed — the window lives only in scopedGrants.
+  // Operator tapped a verdict ⇒ they are present; reset no-repeat suppression
+  // so a later identical ask is shown fresh rather than silently short-circuited.
+  clearPermissionTimeoutSuppression('operator answered a permission card')
   const pd = pendingPermissions.get(request_id)
   const resumeAction = pd ? naturalAction(pd.tool_name, pd.input_preview) : ''
   const scopedTtl = scopedApprovalTtlMs()
@@ -20819,6 +21039,7 @@ async function shutdown(signal: string): Promise<void> {
   pendingReauthFlows.clear()
   pendingVaultOps.clear()
   pendingPermissions.clear()
+  permissionTimeoutSignatures.clear()
   try {
     await ipcServer.close()
@@ -21401,6 +21622,24 @@ void (async () => {
           }, QUOTA_WATCH_POLL_MS).unref()
         }
+        // Proactive Linear-auth watch (FIX 3): catch a dead/missing/near-expiry
+        // Linear bundle BEFORE the agent needs Linear, instead of only on a live
+        // 401. Boot run (delayed so the broker connection settles) + interval.
+        // SWITCHROOM_LINEAR_AUTH_WATCH_POLL_MS=0 disables it.
+        const LINEAR_AUTH_WATCH_POLL_MS = Number(process.env.SWITCHROOM_LINEAR_AUTH_WATCH_POLL_MS ?? 6 * 60 * 60_000)
+        if (LINEAR_AUTH_WATCH_POLL_MS > 0) {
+          setTimeout(() => {
+            void runLinearAuthWatch().catch((err) => {
+              process.stderr.write(`telegram gateway: linear-auth-watch initial run failed: ${err}\n`)
+            })
+          }, 35_000)
+          setInterval(() => {
+            void runLinearAuthWatch().catch((err) => {
+              process.stderr.write(`telegram gateway: linear-auth-watch scheduled run failed: ${err}\n`)
+            })
+          }, LINEAR_AUTH_WATCH_POLL_MS).unref()
+        }
         // Restart-watchdog: poll systemd's NRestarts for the agent unit.
         // When the count ticks up without a corresponding restart-pending
         // marker (= user-initiated /restart), emit an operator event.

package/telegram-plugin/gateway/grant-restart.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  * turn-deferred-vs-now — unit-tests without gateway.ts's boot side-effects
  * (same pattern as scoped-approval.ts / admin-commands/index.ts).
  *
- * Contract (reference/access-model.md): the restart only ever follows an
+ * Contract (reference/rfcs/access-model.md): the restart only ever follows an
  * operator-approved, single-agent, additive `tools.allow` edit, and only
  * ever bounces the CALLER's own agent — never a peer, never fleet-wide.
  */

package/telegram-plugin/gateway/inbound-delivery-machine-dispatch.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * InboundDeliveryStateMachine — DISPATCH (Phase 2b PR 3a, bridgeUp cutover).
  *
- * Per RFC `docs/rfcs/inbound-delivery-state-machine.md`, the state
+ * Per RFC `reference/rfcs/inbound-delivery-state-machine.md`, the state
  * machine is pure: `transition(state, event) → { state', effects[] }`.
  * The gateway's job is to (a) emit events at the right moments and
  * (b) execute the returned effects against real I/O. This module owns

package/telegram-plugin/gateway/inbound-delivery-machine-shadow.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * InboundDeliveryStateMachine — SHADOW MODE wiring (Phase 2b PR 2).
  *
- * Per RFC `docs/rfcs/inbound-delivery-state-machine.md` Phase 2b PR 2:
+ * Per RFC `reference/rfcs/inbound-delivery-state-machine.md` Phase 2b PR 2:
  * the state machine runs ALONGSIDE the existing imperative gateway
  * code, recording predicted effects to a structured trace. Behavior
  * is unchanged — every existing code path still executes the actual

package/telegram-plugin/gateway/inbound-delivery-machine.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * InboundDeliveryStateMachine — pure transition function for the
  * gateway's inbound→bridge→outbound pipeline.
  *
- * Per `docs/rfcs/inbound-delivery-state-machine.md` (RFC merged in
+ * Per `reference/rfcs/inbound-delivery-state-machine.md` (RFC merged in
  * PR #1576): the gateway's delivery state was implicit and scattered
  * across 8+ pieces of mutable state. The wedge cluster of 2026-05-19
  * (9 PRs in 36h all patching variants of "inbound stranded → 5-min

package/telegram-plugin/gateway/interrupt-defer.ts CHANGED Viewed

@@ -3,7 +3,7 @@
 // A `!`-prefix interrupt SIGINTs the agent's in-flight turn (tmux C-c) and
 // then resumes with the replacement body as a fresh turn. Firing the SIGINT
 // the instant `!` arrives can land mid-tool-call — a C-c during a Write or a
-// Bash leaves the tool's work half-done. `reference/steer-or-queue-mid-flight.md`
+// Bash leaves the tool's work half-done. `reference/jobs/steer-or-queue-mid-flight.md`
 // names this exact anti-pattern: "Mid-tool-call is not 'amend time.'"
 //
 // We can't pause claude's internal loop (the unmodified-CLI constraint — the

package/telegram-plugin/gateway/ipc-protocol.ts CHANGED Viewed

@@ -38,6 +38,18 @@ export interface PermissionEvent {
    * (`mcp__<server>__*`).
    */
   rule?: string;
+  /**
+   * Optional human-readable reason for the verdict, surfaced to the model
+   * verbatim by claude's permission channel as "…the user said: ${message}".
+   * Only set on `deny`. switchroom uses it to make a TIMEOUT auto-deny (no
+   * operator response within the TTL) distinguishable from a deliberate
+   * operator denial — otherwise both render as the generic "Denied" and the
+   * model retries the identical call, re-raising an identical card 10 min
+   * later (marko Rentals-budget loop, 2026-06-17). When absent, claude falls
+   * back to its default "Denied", so this degrades safely on any claude that
+   * ignores the field.
+   */
+  message?: string;
 }
 export interface StatusEvent {

package/telegram-plugin/gateway/linear-activity.ts CHANGED Viewed

@@ -24,6 +24,37 @@ import { performLinearRefresh, type RefreshIO } from '../../src/linear/oauth-ref
 export const LINEAR_GRAPHQL_ENDPOINT = 'https://api.linear.app/graphql'
+/** The two operator-action reasons a Linear 401 can't self-heal. */
+export type LinearAuthDeadReason = 'no_bundle' | 'revoked'
+/** Minimal HTML-escape (Telegram parse_mode: 'HTML'). Kept local so the
+ *  message builder is self-contained + unit-testable without reaching into a
+ *  gateway-only escaper (the bug that shipped the first cut of this alert). */
+function escapeHtmlMin(s: string): string {
+  return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;')
+}
+/**
+ * Build the operator-facing Telegram alert (HTML) for an un-healable Linear
+ * auth failure. Pure + self-escaping so it can be unit-tested directly. The
+ * gateway's `notifyLinearAuthDead` only handles dedup + transport.
+ */
+export function buildLinearAuthDeadMessage(agent: string, reason: LinearAuthDeadReason): string {
+  const a = escapeHtmlMin(agent)
+  const why =
+    reason === 'no_bundle'
+      ? `no refresh credentials are stored (<code>linear/${a}/oauth</code> is missing), so its daily-expiring token can't renew`
+      : `its Linear refresh token was revoked`
+  return (
+    `🔑 <b>Linear auth needs you</b>\n` +
+    `<b>${a}</b> can't reach Linear — ${why}. ` +
+    `Its access token will keep failing until you re-authorize.\n\n` +
+    `Re-auth (actor=app) then run <code>switchroom linear-agent setup --agent ${a} ` +
+    `--token … --refresh-token … --client-id … --client-secret …</code> on the host, ` +
+    `or ask me to walk you through it.`
+  )
+}
 export type LinearTokenResult =
   | { ok: true; token: string }
   | { ok: false; reason: 'denied' | 'unreachable' | 'not_found' | 'unknown' }
@@ -44,6 +75,14 @@ export interface LinearActivityDeps {
   defaultTeamId?: string
   /** Log sink — stderr in production. */
   log?: (line: string) => void
+  /** Invoked when a Linear 401 CANNOT self-heal because the situation needs
+   *  an operator to act: `no_bundle` (no refresh credentials were ever
+   *  stored — the silent-setup-failure case) or `revoked` (the refresh token
+   *  itself is dead). The gateway wires this to a deduped operator-facing
+   *  Telegram alert so a daily-expiring token stops failing invisibly. NOT
+   *  called for transient reasons (network/http_error/bad_response) — those
+   *  retry on their own. */
+  onAuthUnrecoverable?: (info: { agent: string; reason: LinearAuthDeadReason; detail: string }) => void
 }
 export type ToolTextResult = { content: Array<{ type: string; text: string }> }
@@ -106,6 +145,7 @@ async function linearPostWithRefresh(
   fetchImpl: typeof fetch,
   log: (s: string) => void,
   refreshIO?: (agent: string) => RefreshIO,
+  onAuthUnrecoverable?: (info: { agent: string; reason: LinearAuthDeadReason; detail: string }) => void,
 ): Promise<{ resp: Response; token: string }> {
   const post = (t: string) =>
     fetchImpl(LINEAR_GRAPHQL_ENDPOINT, {
@@ -125,7 +165,21 @@ async function linearPostWithRefresh(
         `telegram gateway: linear token REVOKED agent=${agent} — refresh token is dead; ` +
           `operator must re-authorize (linear-agent setup --refresh-token …)\n`,
       )
+      onAuthUnrecoverable?.({ agent, reason: 'revoked', detail: refreshed.detail })
+    } else if (refreshed.reason === 'no_bundle') {
+      // No refresh bundle was ever stored (the silent-setup-failure case):
+      // the access token expires ~daily and there is nothing to renew from.
+      // This is invisible in the gateway log alone — surface it to the
+      // operator so they can re-provision instead of the agent failing
+      // every day forever.
+      log(
+        `telegram gateway: linear token DEAD agent=${agent} — no refresh bundle stored ` +
+          `(linear/${agent}/oauth absent); operator must re-authorize\n`,
+      )
+      onAuthUnrecoverable?.({ agent, reason: 'no_bundle', detail: refreshed.detail })
     } else {
+      // Transient (network / http_error / bad_response): retries on its own,
+      // no operator action — log only, don't page.
       log(`telegram gateway: linear token refresh failed agent=${agent} reason=${refreshed.reason}\n`)
     }
     return { resp, token } // surface the original 401
@@ -206,6 +260,7 @@ export async function emitLinearAgentActivity(
       fetchImpl,
       log,
       deps.refreshIO,
+      deps.onAuthUnrecoverable,
     ))
   } catch (err) {
     return {
@@ -312,6 +367,7 @@ export async function createLinearIssue(
         fetchImpl,
         log,
         deps.refreshIO,
+        deps.onAuthUnrecoverable,
       )
       resp = out.resp
       activeToken = out.token