switchroom 0.15.36 → 0.15.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/dist/agent-scheduler/index.js +10 -9
  2. package/dist/auth-broker/index.js +9 -9
  3. package/dist/cli/autoaccept-poll.js +13 -7
  4. package/dist/cli/notion-write-pretool.mjs +9 -9
  5. package/dist/cli/switchroom.js +480 -217
  6. package/dist/cli/ui/index.html +87 -17
  7. package/dist/host-control/main.js +10 -10
  8. package/dist/vault/approvals/kernel-server.js +9 -9
  9. package/dist/vault/broker/server.js +9 -9
  10. package/package.json +1 -1
  11. package/profiles/_base/cron-session.sh.hbs +1 -1
  12. package/profiles/_base/start.sh.hbs +1 -1
  13. package/profiles/_shared/agent-self-service.md.hbs +25 -0
  14. package/skills/switchroom-manage/SKILL.md +1 -1
  15. package/skills/switchroom-runtime/SKILL.md +1 -1
  16. package/telegram-plugin/answer-stream.ts +1 -1
  17. package/telegram-plugin/bridge/bridge.ts +50 -1
  18. package/telegram-plugin/bridge/ipc-client.ts +4 -1
  19. package/telegram-plugin/bridge/tool-filter.ts +77 -0
  20. package/telegram-plugin/chat-lock.ts +1 -1
  21. package/telegram-plugin/credits-watch.ts +1 -1
  22. package/telegram-plugin/dist/bridge/bridge.js +60 -3
  23. package/telegram-plugin/dist/gateway/gateway.js +753 -207
  24. package/telegram-plugin/dist/server.js +64 -4
  25. package/telegram-plugin/gateway/auto-classify-mid-turn.ts +1 -1
  26. package/telegram-plugin/gateway/boot-card.ts +5 -1
  27. package/telegram-plugin/gateway/boot-probes.ts +62 -0
  28. package/telegram-plugin/gateway/cron-session.ts +1 -1
  29. package/telegram-plugin/gateway/gateway.ts +254 -15
  30. package/telegram-plugin/gateway/grant-restart.ts +1 -1
  31. package/telegram-plugin/gateway/inbound-delivery-machine-dispatch.ts +1 -1
  32. package/telegram-plugin/gateway/inbound-delivery-machine-shadow.ts +1 -1
  33. package/telegram-plugin/gateway/inbound-delivery-machine.ts +1 -1
  34. package/telegram-plugin/gateway/interrupt-defer.ts +1 -1
  35. package/telegram-plugin/gateway/ipc-protocol.ts +12 -0
  36. package/telegram-plugin/gateway/linear-activity.ts +56 -0
  37. package/telegram-plugin/gateway/linear-auth-watch.ts +102 -0
  38. package/telegram-plugin/gateway/linear-setup.ts +196 -0
  39. package/telegram-plugin/gateway/permission-card-origin.ts +62 -0
  40. package/telegram-plugin/gateway/permission-timeout.ts +70 -0
  41. package/telegram-plugin/gateway/prefix-warmup.ts +1 -1
  42. package/telegram-plugin/gateway/webhook-ingest-server.test.ts +1 -1
  43. package/telegram-plugin/gateway/webhook-ingest-server.ts +1 -1
  44. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +1 -1
  45. package/telegram-plugin/interrupt-marker.ts +1 -1
  46. package/telegram-plugin/over-ping-safety-net.ts +1 -1
  47. package/telegram-plugin/scoped-approval.ts +1 -1
  48. package/telegram-plugin/secret-detect/vault-error.ts +1 -1
  49. package/telegram-plugin/silence-poke.ts +2 -2
  50. package/telegram-plugin/silent-reply-anchor.ts +1 -1
  51. package/telegram-plugin/slot-banner-driver.ts +1 -1
  52. package/telegram-plugin/startup-reset.ts +1 -1
  53. package/telegram-plugin/tests/boot-probes-connections.test.ts +66 -0
  54. package/telegram-plugin/tests/gateway-startup-reset.test.ts +1 -1
  55. package/telegram-plugin/tests/inbound-delivery-machine.test.ts +1 -1
  56. package/telegram-plugin/tests/linear-agent-activity.test.ts +77 -0
  57. package/telegram-plugin/tests/linear-agent-setup.test.ts +132 -0
  58. package/telegram-plugin/tests/linear-auth-watch.test.ts +79 -0
  59. package/telegram-plugin/tests/linear-create-issue.test.ts +3 -1
  60. package/telegram-plugin/tests/permission-card-origin.test.ts +97 -0
  61. package/telegram-plugin/tests/permission-card-routing.test.ts +23 -0
  62. package/telegram-plugin/tests/permission-no-repeat-wiring.test.ts +76 -0
  63. package/telegram-plugin/tests/permission-timeout.test.ts +87 -0
  64. package/telegram-plugin/tests/scoped-approval.test.ts +1 -1
  65. package/telegram-plugin/tests/silence-poke.test.ts +1 -1
  66. package/telegram-plugin/tests/tool-filter.test.ts +87 -0
  67. package/telegram-plugin/tests/turn-flush-safety.test.ts +1 -1
  68. package/telegram-plugin/turn-flush-safety.ts +1 -1
  69. package/telegram-plugin/uat/assertions.ts +1 -1
  70. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +1 -1
  71. package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +1 -1
  72. package/telegram-plugin/uat/scenarios/jtbd-fast-ack-dm.test.ts +1 -1
  73. package/telegram-plugin/uat/scenarios/jtbd-fast-trivial-dm.test.ts +2 -2
  74. package/telegram-plugin/uat/scenarios/jtbd-forwarded-burst-dm.test.ts +1 -1
  75. package/telegram-plugin/uat/scenarios/jtbd-memory-survives-restart-dm.test.ts +1 -1
  76. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +1 -1
  77. package/telegram-plugin/uat/scenarios/jtbd-reflective-status-reaction-dm.test.ts +1 -1
  78. package/telegram-plugin/uat/scenarios/jtbd-wake-audit-content-dm.test.ts +1 -1
@@ -67,6 +67,13 @@ import { DeferredDoneReactions } from '../reaction-defer.js'
67
67
  import { createWorkerActivityFeed, isWorkerActivityFeedEnabled } from '../worker-activity-feed.js'
68
68
  import { formatTurnLifecycle, detectStatusSurfaceDegraded } from './status-surface-log.js'
69
69
  import { parseSourceMessageId } from './source-message-id.js'
70
+ import {
71
+ permissionSignature,
72
+ timeoutDenyMessage,
73
+ duplicateDenyMessage,
74
+ isRecentTimeoutDuplicate,
75
+ } from './permission-timeout.js'
76
+ import { pickRecoveredPermissionOrigin } from './permission-card-origin.js'
70
77
  import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
71
78
  import { appendActivityLabel, renderActivityFeedWithNested } from '../tool-activity-summary.js'
72
79
  import { toolLabel } from '../tool-labels.js'
@@ -487,7 +494,10 @@ import {
487
494
  listGrantsViaBroker,
488
495
  revokeGrantViaBroker,
489
496
  } from '../../src/vault/broker/client.js'
490
- import { emitLinearAgentActivity, createLinearIssue } from './linear-activity.js'
497
+ import { emitLinearAgentActivity, createLinearIssue, buildLinearAuthDeadMessage, brokerRefreshIO, type LinearAuthDeadReason } from './linear-activity.js'
498
+ import { runLinearAgentSetup } from './linear-setup.js'
499
+ import { runLinearAuthCheck } from './linear-auth-watch.js'
500
+ import { performLinearRefresh } from '../../src/linear/oauth-refresh.js'
491
501
  import {
492
502
  approvalRequest,
493
503
  approvalConsume,
@@ -560,7 +570,7 @@ const INBOX_DIR = join(STATE_DIR, 'inbox')
560
570
  * different agent's container from inside our own (no docker.sock).
561
571
  * - else (v0.6 legacy non-docker path, scheduled for removal in
562
572
  * Phase 3 of the host-control daemon rollout — see
563
- * `docs/rfcs/host-control-daemon.md`): detached `systemctl --user
573
+ * `reference/rfcs/host-control-daemon.md`): detached `systemctl --user
564
574
  * restart` of the two units. This branch is never reached on
565
575
  * v0.7+ docker installs (the `isDocker` guard above takes the
566
576
  * docker branch); only callable on legacy systemd hosts that
@@ -1898,7 +1908,7 @@ type CurrentTurn = {
1898
1908
  // #1675 (over-ping safety net): wall-clock ms of the first reply
1899
1909
  // this turn that landed with `disable_notification: false` (a real
1900
1910
  // device ping). The conversational-pacing contract
1901
- // (`reference/conversational-pacing.md` beat 5) says EXACTLY ONE
1911
+ // (`reference/rfcs/conversational-pacing.md` beat 5) says EXACTLY ONE
1902
1912
  // ping per turn — the final answer. When the model violates that
1903
1913
  // (sends a substantive answer pinged + a wrap-up "Delivered…" or
1904
1914
  // meta-narration also pinged), subsequent reply calls with
@@ -3277,6 +3287,29 @@ function resolvePermissionCardTargets(): Array<{ chatId: string; threadId: numbe
3277
3287
  if (turn != null) {
3278
3288
  return [{ chatId: turn.sessionChatId, threadId: turn.sessionThreadId }]
3279
3289
  }
3290
+ // currentTurn was nulled — most commonly because the orphaned-reply backstop
3291
+ // force-closed the turn while the single claude session kept running and then
3292
+ // hit a permission-gated tool (e.g. a retry after a first card auto-denied:
3293
+ // marko Rentals-budget, 2026-06-17). Recover the originating topic from the
3294
+ // recently-started turn registry so the card lands where the operator is
3295
+ // working, instead of fanning out to operator DMs (thread-stripped) where it
3296
+ // sits unseen until the 10-min TTL auto-denies it. Kill switch (=0) restores
3297
+ // the legacy DM fan-out.
3298
+ if (PERMISSION_CARD_ORIGIN_RECOVERY_ENABLED) {
3299
+ const recovered = pickRecoveredPermissionOrigin(
3300
+ recentTurnsById.values(),
3301
+ Date.now(),
3302
+ PERMISSION_CARD_ORIGIN_MAX_AGE_MS,
3303
+ )
3304
+ if (recovered != null) {
3305
+ process.stderr.write(
3306
+ `telegram gateway: permission-card origin recovered from recent turn ` +
3307
+ `chat=${recovered.chatId} thread=${recovered.threadId ?? '-'} ` +
3308
+ `(currentTurn was null — force-closed turn)\n`,
3309
+ )
3310
+ return [recovered]
3311
+ }
3312
+ }
3280
3313
  const sg = resolveAgentSupergroupChatId()
3281
3314
  const topic = resolveAgentOutboundTopic({
3282
3315
  kind: 'permission',
@@ -3696,6 +3729,39 @@ const STATUS_QUERY_RE = /^\s*status\??\s*$/i
3696
3729
  const PERMISSION_REPLY_RE = /^\s*(y|yes|n|no)\s+([a-km-z]{5})\s*$/i
3697
3730
  const pendingPermissions = new Map<string, { tool_name: string; description: string; input_preview: string; startedAt: number }>()
3698
3731
  const PERMISSION_TTL_MS = 10 * 60_000
3732
+ // No-repeat-on-timeout (marko Rentals-budget loop, 2026-06-17). When a card
3733
+ // auto-denies on TTL, the model is told it was a TIMEOUT (not a denial) so it
3734
+ // doesn't retry; if it retries the identical (tool, input) anyway while the
3735
+ // operator is still absent, we short-circuit-deny it WITHOUT posting a second
3736
+ // card. `permissionTimeoutSignatures` maps signature → last-timeout epoch ms;
3737
+ // it is cleared the moment the operator is active again (answers any card, or
3738
+ // sends a message), so suppression only ever holds during genuine absence.
3739
+ // Kill switch: SWITCHROOM_PERMISSION_NO_REPEAT=0.
3740
+ const PERMISSION_NO_REPEAT_ENABLED =
3741
+ process.env.SWITCHROOM_PERMISSION_NO_REPEAT !== '0'
3742
+ // Safety cap on how long a timed-out signature suppresses retries even if the
3743
+ // operator-activity reset is somehow missed; the reset is the primary bound.
3744
+ const PERMISSION_DUPLICATE_WINDOW_MS = 60 * 60_000
3745
+ const permissionTimeoutSignatures = new Map<string, number>()
3746
+ function clearPermissionTimeoutSuppression(reason: string): void {
3747
+ if (permissionTimeoutSignatures.size === 0) return
3748
+ const n = permissionTimeoutSignatures.size
3749
+ permissionTimeoutSignatures.clear()
3750
+ process.stderr.write(
3751
+ `telegram gateway: permission no-repeat suppression cleared (${n} sig(s)) — ${reason}\n`,
3752
+ )
3753
+ }
3754
+ // Permission/approval-card origin recovery (marko Rentals-budget, 2026-06-17).
3755
+ // When `currentTurn` was force-closed by the orphaned-reply backstop but the
3756
+ // claude session kept running into a permission-gated tool, recover the card's
3757
+ // origin topic from the recently-started turn registry instead of fanning out
3758
+ // to operator DMs. Kill switch: SWITCHROOM_PERMISSION_CARD_ORIGIN_RECOVERY=0.
3759
+ const PERMISSION_CARD_ORIGIN_RECOVERY_ENABLED =
3760
+ process.env.SWITCHROOM_PERMISSION_CARD_ORIGIN_RECOVERY !== '0'
3761
+ // A backstop-closed turn is seconds-to-minutes old; bound recovery so a
3762
+ // long-idle agent's stale registry entry can't mis-route a much later
3763
+ // permission into an old topic (it falls back to the operator-DM fan-out).
3764
+ const PERMISSION_CARD_ORIGIN_MAX_AGE_MS = 30 * 60_000
3699
3765
 
3700
3766
  // #1977 — single-tap correlation for the durable "🔁 Always allow"
3701
3767
  // flow. When the gateway dispatches a `config_propose_edit` to hostd in
@@ -4302,23 +4368,46 @@ const pendingStateReaper = setInterval(() => {
4302
4368
  // permission (or takes a fallback). Routed through
4303
4369
  // dispatchPermissionVerdict so it's buffered+redelivered too if
4304
4370
  // the bridge is also offline at sweep time.
4305
- dispatchPermissionVerdict({ type: 'permission', requestId: k, behavior: 'deny' })
4371
+ // Carry a TIMEOUT reason to the model (claude renders it as "…the user
4372
+ // said: …") so it can tell a timeout from a real denial and not retry
4373
+ // the identical call — the duplicate-card loop this series closes.
4374
+ const timeoutMinutes = Math.round(PERMISSION_TTL_MS / 60000)
4375
+ dispatchPermissionVerdict({
4376
+ type: 'permission',
4377
+ requestId: k,
4378
+ behavior: 'deny',
4379
+ message: timeoutDenyMessage(timeoutMinutes),
4380
+ })
4306
4381
  // The auto-deny un-parks the suspended turn — flip 🙏 → working so
4307
4382
  // it doesn't sit on the awaiting glyph (or stall) after the timeout.
4308
4383
  resumeReactionAfterVerdict()
4309
4384
  postPermissionResumeMessage({
4310
4385
  behavior: 'deny',
4311
4386
  action: naturalAction(v.tool_name, v.input_preview),
4312
- timeoutMinutes: Math.round(PERMISSION_TTL_MS / 60000),
4387
+ timeoutMinutes,
4313
4388
  })
4389
+ // Remember this (tool, input) timed out so an immediate identical retry
4390
+ // (while the operator is still absent) is short-circuited without a
4391
+ // second card. Cleared on operator activity.
4392
+ if (PERMISSION_NO_REPEAT_ENABLED) {
4393
+ permissionTimeoutSignatures.set(
4394
+ permissionSignature(v.tool_name, v.input_preview),
4395
+ now,
4396
+ )
4397
+ }
4314
4398
  process.stderr.write(
4315
4399
  `telegram gateway: permission TTL expired — auto-deny request=${k} ` +
4316
4400
  `tool=${v.tool_name} (no operator response in ` +
4317
- `${Math.round(PERMISSION_TTL_MS / 60000)}m)\n`,
4401
+ `${timeoutMinutes}m)\n`,
4318
4402
  )
4319
4403
  pendingPermissions.delete(k)
4320
4404
  }
4321
4405
  }
4406
+ // Drop no-repeat suppression entries past the safety-cap window (the primary
4407
+ // bound is the operator-activity reset; this just keeps the map from growing).
4408
+ for (const [sig, at] of permissionTimeoutSignatures) {
4409
+ if (now - at > PERMISSION_DUPLICATE_WINDOW_MS) permissionTimeoutSignatures.delete(sig)
4410
+ }
4322
4411
  for (const [k, v] of vaultPassphraseCache) {
4323
4412
  if (now > v.expiresAt) vaultPassphraseCache.delete(k)
4324
4413
  }
@@ -5800,7 +5889,7 @@ const ipcServer: IpcServer = createIpcServer({
5800
5889
  // (5-min cooldown per agent), and skipped if no boot chat resolves.
5801
5890
  // Claude responds NO_REPLY per inline instruction; existing
5802
5891
  // silent-marker suppression at gateway.ts:5906 swallows the
5803
- // outbound. See docs/rfcs/cold-start-ttfo.md Option A.
5892
+ // outbound. See reference/rfcs/cold-start-ttfo.md Option A.
5804
5893
  if (client.agentName != null) {
5805
5894
  maybeFireWarmup({
5806
5895
  selfAgent: client.agentName,
@@ -6094,6 +6183,30 @@ const ipcServer: IpcServer = createIpcServer({
6094
6183
  return
6095
6184
  }
6096
6185
  }
6186
+ // No-repeat short-circuit: this exact (tool, input) already timed out and
6187
+ // the operator hasn't been active since (the suppression map is cleared on
6188
+ // any operator activity). Deny it WITH a timeout-duplicate reason and post
6189
+ // NO second card — the model retrying into an absent operator is the loop
6190
+ // this closes. The turn still unblocks (deny verdict), and a returning
6191
+ // operator resets suppression so the next ask gets a fresh card.
6192
+ if (PERMISSION_NO_REPEAT_ENABLED) {
6193
+ const sig = permissionSignature(toolName, inputPreview)
6194
+ if (isRecentTimeoutDuplicate(permissionTimeoutSignatures, sig, Date.now(), PERMISSION_DUPLICATE_WINDOW_MS)) {
6195
+ // no-card-verdict: no card was posted and the turn was never parked on
6196
+ // the awaiting glyph, so we omit the resume-reaction flip / resume msg.
6197
+ dispatchPermissionVerdict({
6198
+ type: 'permission',
6199
+ requestId,
6200
+ behavior: 'deny',
6201
+ message: duplicateDenyMessage,
6202
+ })
6203
+ process.stderr.write(
6204
+ `telegram gateway: permission no-repeat short-circuit — duplicate of a ` +
6205
+ `timed-out request tool=${toolName} request=${requestId} (no card posted)\n`,
6206
+ )
6207
+ return
6208
+ }
6209
+ }
6097
6210
  pendingPermissions.set(requestId, { tool_name: toolName, description, input_preview: inputPreview, startedAt: Date.now() })
6098
6211
  // Natural-language card body — a plain sentence ("Gymbro wants to
6099
6212
  // edit: supplement-log.md" + a why-line), never a raw tool id.
@@ -6583,7 +6696,7 @@ const ipcServer: IpcServer = createIpcServer({
6583
6696
  const source = typeof msg.inbound.meta?.source === 'string'
6584
6697
  ? msg.inbound.meta.source
6585
6698
  : 'unknown'
6586
- // Cheap-cron (docs/rfcs/cheap-cron-sessions.md §3.3): a Tier-1 fire
6699
+ // Cheap-cron (reference/rfcs/cheap-cron-sessions.md §3.3): a Tier-1 fire
6587
6700
  // carries meta.session='cron' → route to the derived `<agent>-cron`
6588
6701
  // bridge (a 2nd interactive Sonnet session in the same container).
6589
6702
  // Every other fire (and all of today's callers) routes to the agent
@@ -6883,6 +6996,7 @@ const ALLOWED_TOOLS = new Set([
6883
6996
  'request_secret',
6884
6997
  'linear_agent_activity',
6885
6998
  'linear_create_issue',
6999
+ 'linear_agent_setup',
6886
7000
  ])
6887
7001
 
6888
7002
  async function executeToolCall(tool: string, args: Record<string, unknown>): Promise<unknown> {
@@ -6932,6 +7046,8 @@ async function executeToolCall(tool: string, args: Record<string, unknown>): Pro
6932
7046
  return executeLinearAgentActivity(args)
6933
7047
  case 'linear_create_issue':
6934
7048
  return executeLinearCreateIssue(args)
7049
+ case 'linear_agent_setup':
7050
+ return executeLinearAgentSetup(args)
6935
7051
  default:
6936
7052
  throw new Error(`unknown tool: ${tool}`)
6937
7053
  }
@@ -6963,12 +7079,66 @@ async function executeSendChecklist(args: Record<string, unknown>): Promise<{ co
6963
7079
  return { content: [{ type: 'text', text: `checklist sent (id: ${sent.message_id})` }] }
6964
7080
  }
6965
7081
 
7082
+ /**
7083
+ * Per-(agent,reason) cooldown for the Linear-auth-dead operator alert. The
7084
+ * triggering 401 recurs on every Linear call once the token expires, so
7085
+ * without a cooldown the operator would be paged on every capture/activity.
7086
+ * One alert per reason per window is enough to surface the action item.
7087
+ */
7088
+ const linearAuthAlertLast = new Map<string, number>()
7089
+ const LINEAR_AUTH_ALERT_COOLDOWN_MS = 6 * 60 * 60 * 1000
7090
+
7091
+ /**
7092
+ * Surface an un-healable Linear auth failure (no refresh bundle / revoked
7093
+ * refresh token) to the operator as a Telegram message — not just a gateway
7094
+ * log line. Deduped per (agent,reason) and gated by SWITCHROOM_LINEAR_AUTH_ALERT=0.
7095
+ * Best-effort: a failed send never affects the agent's turn.
7096
+ */
7097
+ function notifyLinearAuthDead(info: { agent: string; reason: LinearAuthDeadReason; detail: string }): void {
7098
+ if (process.env.SWITCHROOM_LINEAR_AUTH_ALERT === '0') return
7099
+ const key = `${info.agent}:${info.reason}`
7100
+ const now = Date.now()
7101
+ const last = linearAuthAlertLast.get(key)
7102
+ if (last != null && now - last < LINEAR_AUTH_ALERT_COOLDOWN_MS) return
7103
+ void (async () => {
7104
+ try {
7105
+ const chatId = loadAccess().allowFrom[0]
7106
+ if (!chatId) return
7107
+ const threadId = topicForRecipient({
7108
+ recipientChatId: chatId,
7109
+ resolvedTopic: resolveAgentOutboundTopic({ kind: 'linear-auth' }) ?? chatThreadMap.get(chatId),
7110
+ supergroupChatId: resolveAgentSupergroupChatId(),
7111
+ })
7112
+ const text = buildLinearAuthDeadMessage(info.agent, info.reason)
7113
+ await swallowingApiCall(
7114
+ () =>
7115
+ bot.api.sendMessage(chatId, text, {
7116
+ parse_mode: 'HTML',
7117
+ ...(threadId != null ? { message_thread_id: threadId } : {}),
7118
+ }),
7119
+ { chat_id: chatId, verb: 'linearAuthDead' },
7120
+ )
7121
+ // Stamp the cooldown only after a successful send so a transient
7122
+ // Telegram failure doesn't burn the 6h window (the 401 recurs and will
7123
+ // retry the page on the next Linear call).
7124
+ linearAuthAlertLast.set(key, now)
7125
+ process.stderr.write(`telegram gateway: linear auth-dead alert sent agent=${info.agent} reason=${info.reason}\n`)
7126
+ } catch {
7127
+ /* best-effort */
7128
+ }
7129
+ })()
7130
+ }
7131
+
6966
7132
  async function executeLinearAgentActivity(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
6967
- return emitLinearAgentActivity(args)
7133
+ return emitLinearAgentActivity(args, { onAuthUnrecoverable: notifyLinearAuthDead })
6968
7134
  }
6969
7135
 
6970
7136
  async function executeLinearCreateIssue(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
6971
- return createLinearIssue(args)
7137
+ return createLinearIssue(args, { onAuthUnrecoverable: notifyLinearAuthDead })
7138
+ }
7139
+
7140
+ async function executeLinearAgentSetup(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
7141
+ return runLinearAgentSetup(args)
6972
7142
  }
6973
7143
 
6974
7144
  async function executeUpdateChecklist(args: Record<string, unknown>): Promise<{ content: Array<{ type: string; text: string }> }> {
@@ -7088,7 +7258,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
7088
7258
  let disableNotification = args.disable_notification === true
7089
7259
 
7090
7260
  // #1675 over-ping safety net. The conversational-pacing contract
7091
- // (`reference/conversational-pacing.md` beat 5) says EXACTLY ONE
7261
+ // (`reference/rfcs/conversational-pacing.md` beat 5) says EXACTLY ONE
7092
7262
  // device ping per turn — the final answer. The model sometimes
7093
7263
  // violates this by sending a substantive answer pinged + a wrap-up
7094
7264
  // ("Delivered all three steps…", "Sent.", or meta-narration) ALSO
@@ -10194,7 +10364,7 @@ function handleSessionEvent(ev: SessionEvent): void {
10194
10364
  // only fires for text-only turns where the stream IS the
10195
10365
  // answer): PING. The user reached for the agent and the
10196
10366
  // model produced an answer; per beat 5 of
10197
- // `reference/conversational-pacing.md` the final answer MUST
10367
+ // `reference/rfcs/conversational-pacing.md` the final answer MUST
10198
10368
  // ping the device exactly once. Without this carve-out, a
10199
10369
  // short text-only turn ("on it" being the whole response)
10200
10370
  // lands silently and the user has no notification to know
@@ -11520,6 +11690,11 @@ async function handleInbound(
11520
11690
  return
11521
11691
  }
11522
11692
 
11693
+ // A real message from an allowed sender (gate passed) ⇒ the operator is
11694
+ // present, so reset any no-repeat suppression: the next time the agent asks
11695
+ // for something that timed out earlier, they should see a fresh card.
11696
+ clearPermissionTimeoutSuppression('operator inbound')
11697
+
11523
11698
  // Capture wall-clock receive time for inbound_ack metric (#203).
11524
11699
  // Must be after gate() so early-exit paths (drop/pair) don't skew the delta.
11525
11700
  //
@@ -11639,7 +11814,7 @@ async function handleInbound(
11639
11814
  }
11640
11815
 
11641
11816
  // `!`-prefix interrupt (#575). Closes
11642
- // `reference/steer-or-queue-mid-flight.md`'s correction path.
11817
+ // `reference/jobs/steer-or-queue-mid-flight.md`'s correction path.
11643
11818
  //
11644
11819
  // Behavior:
11645
11820
  // 1. SIGINT the agent service. This kills any in-flight turn —
@@ -13096,7 +13271,7 @@ function resolveBootChatId(
13096
13271
  // operator sees lifecycle events in a predictable lane instead of
13097
13272
  // chat-root. For fleet-mode / DM agents the helper returns undefined
13098
13273
  // → behavior unchanged (lands at chat-root as today). PR4b of
13099
- // supergroup-mode rollout (docs/rfcs/supergroup-mode.md).
13274
+ // supergroup-mode rollout (reference/rfcs/supergroup-mode.md).
13100
13275
  const supergroupBootTopic = resolveAgentOutboundTopic({ kind: 'boot' })
13101
13276
  const bootSupergroup = resolveAgentSupergroupChatId()
13102
13277
  // The boot topic is valid only in the agent's supergroup — attach it per
@@ -13179,6 +13354,46 @@ function resolveAgentSupergroupChatId(): string | undefined {
13179
13354
  }
13180
13355
  }
13181
13356
 
13357
+ /** Whether THIS agent has `channels.telegram.linear_agent.enabled`. Used by the
13358
+ * proactive Linear-auth watch to skip agents that aren't Linear actors. */
13359
+ function isSelfLinearAgentEnabled(): boolean {
13360
+ const agentName = process.env.SWITCHROOM_AGENT_NAME
13361
+ if (!agentName) return false
13362
+ try {
13363
+ const cfg = loadSwitchroomConfig()
13364
+ const rawAgent = cfg.agents?.[agentName]
13365
+ if (!rawAgent) return false
13366
+ const resolved = resolveAgentConfig(cfg.defaults, cfg.profiles, rawAgent)
13367
+ const la = (resolved.channels?.telegram as { linear_agent?: { enabled?: boolean } } | undefined)?.linear_agent
13368
+ return la?.enabled === true
13369
+ } catch {
13370
+ return false
13371
+ }
13372
+ }
13373
+
13374
+ /**
13375
+ * One proactive Linear-auth check for this agent (boot + interval). Reads the
13376
+ * refresh bundle via the broker; missing → operator alert, near-expiry →
13377
+ * proactive rotate, revoked → operator alert. Best-effort, never throws.
13378
+ * Disabled with SWITCHROOM_LINEAR_AUTH_WATCH_POLL_MS=0.
13379
+ */
13380
+ async function runLinearAuthWatch(): Promise<void> {
13381
+ const agent = process.env.SWITCHROOM_AGENT_NAME
13382
+ if (!agent) return
13383
+ const io = brokerRefreshIO(agent)
13384
+ const status = await runLinearAuthCheck({
13385
+ agent,
13386
+ linearEnabled: isSelfLinearAgentEnabled,
13387
+ readBundle: io.readBundle,
13388
+ refresh: () => performLinearRefresh(io),
13389
+ onAuthDead: notifyLinearAuthDead,
13390
+ log: (s) => process.stderr.write(s),
13391
+ })
13392
+ if (status !== 'disabled' && status !== 'fresh') {
13393
+ process.stderr.write(`telegram gateway: linear-auth-watch agent=${agent} status=${status}\n`)
13394
+ }
13395
+ }
13396
+
13182
13397
  /**
13183
13398
  * Stamp a user-facing restart reason into the clean-shutdown marker
13184
13399
  * (same file the SIGTERM handler writes to and the next session greeting
@@ -14154,7 +14369,7 @@ async function buildLiveProbeRows(agentName: string): Promise<StatusProbeRow[]>
14154
14369
  // Render order matches the boot card's PROBE_KEYS so the two
14155
14370
  // surfaces tell the same story in the same order.
14156
14371
  const order = ['account', 'agent', 'gateway', 'quota', 'hindsight',
14157
- 'scheduler', 'broker', 'kernel', 'skills'] as const
14372
+ 'scheduler', 'broker', 'kernel', 'skills', 'connections'] as const
14158
14373
  for (const k of order) {
14159
14374
  const r = probes[k]
14160
14375
  if (!r) continue
@@ -15034,6 +15249,8 @@ async function handlePermissionSlash(ctx: Context, behavior: 'allow' | 'deny'):
15034
15249
  )
15035
15250
  return
15036
15251
  }
15252
+ // Operator answered via slash ⇒ present; reset no-repeat suppression.
15253
+ clearPermissionTimeoutSuppression('operator answered via /approve|/deny')
15037
15254
  // Forward to connected bridges — same IPC the button handler uses.
15038
15255
  dispatchPermissionVerdict({ type: 'permission', requestId: request_id, behavior })
15039
15256
  resumeReactionAfterVerdict()
@@ -19540,6 +19757,9 @@ bot.on('callback_query:data', async ctx => {
19540
19757
  // scopes (resolveTimeBox → null) and the disabled tier (ttl<=0) stay truly
19541
19758
  // once. The verdict is still dispatched WITHOUT a `rule` (below), so the
19542
19759
  // bridge never caches it untimed — the window lives only in scopedGrants.
19760
+ // Operator tapped a verdict ⇒ they are present; reset no-repeat suppression
19761
+ // so a later identical ask is shown fresh rather than silently short-circuited.
19762
+ clearPermissionTimeoutSuppression('operator answered a permission card')
19543
19763
  const pd = pendingPermissions.get(request_id)
19544
19764
  const resumeAction = pd ? naturalAction(pd.tool_name, pd.input_preview) : ''
19545
19765
  const scopedTtl = scopedApprovalTtlMs()
@@ -20819,6 +21039,7 @@ async function shutdown(signal: string): Promise<void> {
20819
21039
  pendingReauthFlows.clear()
20820
21040
  pendingVaultOps.clear()
20821
21041
  pendingPermissions.clear()
21042
+ permissionTimeoutSignatures.clear()
20822
21043
 
20823
21044
  try {
20824
21045
  await ipcServer.close()
@@ -21401,6 +21622,24 @@ void (async () => {
21401
21622
  }, QUOTA_WATCH_POLL_MS).unref()
21402
21623
  }
21403
21624
 
21625
+ // Proactive Linear-auth watch (FIX 3): catch a dead/missing/near-expiry
21626
+ // Linear bundle BEFORE the agent needs Linear, instead of only on a live
21627
+ // 401. Boot run (delayed so the broker connection settles) + interval.
21628
+ // SWITCHROOM_LINEAR_AUTH_WATCH_POLL_MS=0 disables it.
21629
+ const LINEAR_AUTH_WATCH_POLL_MS = Number(process.env.SWITCHROOM_LINEAR_AUTH_WATCH_POLL_MS ?? 6 * 60 * 60_000)
21630
+ if (LINEAR_AUTH_WATCH_POLL_MS > 0) {
21631
+ setTimeout(() => {
21632
+ void runLinearAuthWatch().catch((err) => {
21633
+ process.stderr.write(`telegram gateway: linear-auth-watch initial run failed: ${err}\n`)
21634
+ })
21635
+ }, 35_000)
21636
+ setInterval(() => {
21637
+ void runLinearAuthWatch().catch((err) => {
21638
+ process.stderr.write(`telegram gateway: linear-auth-watch scheduled run failed: ${err}\n`)
21639
+ })
21640
+ }, LINEAR_AUTH_WATCH_POLL_MS).unref()
21641
+ }
21642
+
21404
21643
  // Restart-watchdog: poll systemd's NRestarts for the agent unit.
21405
21644
  // When the count ticks up without a corresponding restart-pending
21406
21645
  // marker (= user-initiated /restart), emit an operator event.
@@ -6,7 +6,7 @@
6
6
  * turn-deferred-vs-now — unit-tests without gateway.ts's boot side-effects
7
7
  * (same pattern as scoped-approval.ts / admin-commands/index.ts).
8
8
  *
9
- * Contract (reference/access-model.md): the restart only ever follows an
9
+ * Contract (reference/rfcs/access-model.md): the restart only ever follows an
10
10
  * operator-approved, single-agent, additive `tools.allow` edit, and only
11
11
  * ever bounces the CALLER's own agent — never a peer, never fleet-wide.
12
12
  */
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * InboundDeliveryStateMachine — DISPATCH (Phase 2b PR 3a, bridgeUp cutover).
3
3
  *
4
- * Per RFC `docs/rfcs/inbound-delivery-state-machine.md`, the state
4
+ * Per RFC `reference/rfcs/inbound-delivery-state-machine.md`, the state
5
5
  * machine is pure: `transition(state, event) → { state', effects[] }`.
6
6
  * The gateway's job is to (a) emit events at the right moments and
7
7
  * (b) execute the returned effects against real I/O. This module owns
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * InboundDeliveryStateMachine — SHADOW MODE wiring (Phase 2b PR 2).
3
3
  *
4
- * Per RFC `docs/rfcs/inbound-delivery-state-machine.md` Phase 2b PR 2:
4
+ * Per RFC `reference/rfcs/inbound-delivery-state-machine.md` Phase 2b PR 2:
5
5
  * the state machine runs ALONGSIDE the existing imperative gateway
6
6
  * code, recording predicted effects to a structured trace. Behavior
7
7
  * is unchanged — every existing code path still executes the actual
@@ -2,7 +2,7 @@
2
2
  * InboundDeliveryStateMachine — pure transition function for the
3
3
  * gateway's inbound→bridge→outbound pipeline.
4
4
  *
5
- * Per `docs/rfcs/inbound-delivery-state-machine.md` (RFC merged in
5
+ * Per `reference/rfcs/inbound-delivery-state-machine.md` (RFC merged in
6
6
  * PR #1576): the gateway's delivery state was implicit and scattered
7
7
  * across 8+ pieces of mutable state. The wedge cluster of 2026-05-19
8
8
  * (9 PRs in 36h all patching variants of "inbound stranded → 5-min
@@ -3,7 +3,7 @@
3
3
  // A `!`-prefix interrupt SIGINTs the agent's in-flight turn (tmux C-c) and
4
4
  // then resumes with the replacement body as a fresh turn. Firing the SIGINT
5
5
  // the instant `!` arrives can land mid-tool-call — a C-c during a Write or a
6
- // Bash leaves the tool's work half-done. `reference/steer-or-queue-mid-flight.md`
6
+ // Bash leaves the tool's work half-done. `reference/jobs/steer-or-queue-mid-flight.md`
7
7
  // names this exact anti-pattern: "Mid-tool-call is not 'amend time.'"
8
8
  //
9
9
  // We can't pause claude's internal loop (the unmodified-CLI constraint — the
@@ -38,6 +38,18 @@ export interface PermissionEvent {
38
38
  * (`mcp__<server>__*`).
39
39
  */
40
40
  rule?: string;
41
+ /**
42
+ * Optional human-readable reason for the verdict, surfaced to the model
43
+ * verbatim by claude's permission channel as "…the user said: ${message}".
44
+ * Only set on `deny`. switchroom uses it to make a TIMEOUT auto-deny (no
45
+ * operator response within the TTL) distinguishable from a deliberate
46
+ * operator denial — otherwise both render as the generic "Denied" and the
47
+ * model retries the identical call, re-raising an identical card 10 min
48
+ * later (marko Rentals-budget loop, 2026-06-17). When absent, claude falls
49
+ * back to its default "Denied", so this degrades safely on any claude that
50
+ * ignores the field.
51
+ */
52
+ message?: string;
41
53
  }
42
54
 
43
55
  export interface StatusEvent {
@@ -24,6 +24,37 @@ import { performLinearRefresh, type RefreshIO } from '../../src/linear/oauth-ref
24
24
 
25
25
  export const LINEAR_GRAPHQL_ENDPOINT = 'https://api.linear.app/graphql'
26
26
 
27
+ /** The two operator-action reasons a Linear 401 can't self-heal. */
28
+ export type LinearAuthDeadReason = 'no_bundle' | 'revoked'
29
+
30
+ /** Minimal HTML-escape (Telegram parse_mode: 'HTML'). Kept local so the
31
+ * message builder is self-contained + unit-testable without reaching into a
32
+ * gateway-only escaper (the bug that shipped the first cut of this alert). */
33
+ function escapeHtmlMin(s: string): string {
34
+ return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;')
35
+ }
36
+
37
+ /**
38
+ * Build the operator-facing Telegram alert (HTML) for an un-healable Linear
39
+ * auth failure. Pure + self-escaping so it can be unit-tested directly. The
40
+ * gateway's `notifyLinearAuthDead` only handles dedup + transport.
41
+ */
42
+ export function buildLinearAuthDeadMessage(agent: string, reason: LinearAuthDeadReason): string {
43
+ const a = escapeHtmlMin(agent)
44
+ const why =
45
+ reason === 'no_bundle'
46
+ ? `no refresh credentials are stored (<code>linear/${a}/oauth</code> is missing), so its daily-expiring token can't renew`
47
+ : `its Linear refresh token was revoked`
48
+ return (
49
+ `🔑 <b>Linear auth needs you</b>\n` +
50
+ `<b>${a}</b> can't reach Linear — ${why}. ` +
51
+ `Its access token will keep failing until you re-authorize.\n\n` +
52
+ `Re-auth (actor=app) then run <code>switchroom linear-agent setup --agent ${a} ` +
53
+ `--token … --refresh-token … --client-id … --client-secret …</code> on the host, ` +
54
+ `or ask me to walk you through it.`
55
+ )
56
+ }
57
+
27
58
  export type LinearTokenResult =
28
59
  | { ok: true; token: string }
29
60
  | { ok: false; reason: 'denied' | 'unreachable' | 'not_found' | 'unknown' }
@@ -44,6 +75,14 @@ export interface LinearActivityDeps {
44
75
  defaultTeamId?: string
45
76
  /** Log sink — stderr in production. */
46
77
  log?: (line: string) => void
78
+ /** Invoked when a Linear 401 CANNOT self-heal because the situation needs
79
+ * an operator to act: `no_bundle` (no refresh credentials were ever
80
+ * stored — the silent-setup-failure case) or `revoked` (the refresh token
81
+ * itself is dead). The gateway wires this to a deduped operator-facing
82
+ * Telegram alert so a daily-expiring token stops failing invisibly. NOT
83
+ * called for transient reasons (network/http_error/bad_response) — those
84
+ * retry on their own. */
85
+ onAuthUnrecoverable?: (info: { agent: string; reason: LinearAuthDeadReason; detail: string }) => void
47
86
  }
48
87
 
49
88
  export type ToolTextResult = { content: Array<{ type: string; text: string }> }
@@ -106,6 +145,7 @@ async function linearPostWithRefresh(
106
145
  fetchImpl: typeof fetch,
107
146
  log: (s: string) => void,
108
147
  refreshIO?: (agent: string) => RefreshIO,
148
+ onAuthUnrecoverable?: (info: { agent: string; reason: LinearAuthDeadReason; detail: string }) => void,
109
149
  ): Promise<{ resp: Response; token: string }> {
110
150
  const post = (t: string) =>
111
151
  fetchImpl(LINEAR_GRAPHQL_ENDPOINT, {
@@ -125,7 +165,21 @@ async function linearPostWithRefresh(
125
165
  `telegram gateway: linear token REVOKED agent=${agent} — refresh token is dead; ` +
126
166
  `operator must re-authorize (linear-agent setup --refresh-token …)\n`,
127
167
  )
168
+ onAuthUnrecoverable?.({ agent, reason: 'revoked', detail: refreshed.detail })
169
+ } else if (refreshed.reason === 'no_bundle') {
170
+ // No refresh bundle was ever stored (the silent-setup-failure case):
171
+ // the access token expires ~daily and there is nothing to renew from.
172
+ // This is invisible in the gateway log alone — surface it to the
173
+ // operator so they can re-provision instead of the agent failing
174
+ // every day forever.
175
+ log(
176
+ `telegram gateway: linear token DEAD agent=${agent} — no refresh bundle stored ` +
177
+ `(linear/${agent}/oauth absent); operator must re-authorize\n`,
178
+ )
179
+ onAuthUnrecoverable?.({ agent, reason: 'no_bundle', detail: refreshed.detail })
128
180
  } else {
181
+ // Transient (network / http_error / bad_response): retries on its own,
182
+ // no operator action — log only, don't page.
129
183
  log(`telegram gateway: linear token refresh failed agent=${agent} reason=${refreshed.reason}\n`)
130
184
  }
131
185
  return { resp, token } // surface the original 401
@@ -206,6 +260,7 @@ export async function emitLinearAgentActivity(
206
260
  fetchImpl,
207
261
  log,
208
262
  deps.refreshIO,
263
+ deps.onAuthUnrecoverable,
209
264
  ))
210
265
  } catch (err) {
211
266
  return {
@@ -312,6 +367,7 @@ export async function createLinearIssue(
312
367
  fetchImpl,
313
368
  log,
314
369
  deps.refreshIO,
370
+ deps.onAuthUnrecoverable,
315
371
  )
316
372
  resp = out.resp
317
373
  activeToken = out.token