switchroom 0.15.36 โ†’ 0.15.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/dist/agent-scheduler/index.js +10 -9
  2. package/dist/auth-broker/index.js +9 -9
  3. package/dist/cli/autoaccept-poll.js +13 -7
  4. package/dist/cli/notion-write-pretool.mjs +9 -9
  5. package/dist/cli/switchroom.js +480 -217
  6. package/dist/cli/ui/index.html +87 -17
  7. package/dist/host-control/main.js +10 -10
  8. package/dist/vault/approvals/kernel-server.js +9 -9
  9. package/dist/vault/broker/server.js +9 -9
  10. package/package.json +1 -1
  11. package/profiles/_base/cron-session.sh.hbs +1 -1
  12. package/profiles/_base/start.sh.hbs +1 -1
  13. package/profiles/_shared/agent-self-service.md.hbs +25 -0
  14. package/skills/switchroom-manage/SKILL.md +1 -1
  15. package/skills/switchroom-runtime/SKILL.md +1 -1
  16. package/telegram-plugin/answer-stream.ts +1 -1
  17. package/telegram-plugin/bridge/bridge.ts +50 -1
  18. package/telegram-plugin/bridge/ipc-client.ts +4 -1
  19. package/telegram-plugin/bridge/tool-filter.ts +77 -0
  20. package/telegram-plugin/chat-lock.ts +1 -1
  21. package/telegram-plugin/credits-watch.ts +1 -1
  22. package/telegram-plugin/dist/bridge/bridge.js +60 -3
  23. package/telegram-plugin/dist/gateway/gateway.js +753 -207
  24. package/telegram-plugin/dist/server.js +64 -4
  25. package/telegram-plugin/gateway/auto-classify-mid-turn.ts +1 -1
  26. package/telegram-plugin/gateway/boot-card.ts +5 -1
  27. package/telegram-plugin/gateway/boot-probes.ts +62 -0
  28. package/telegram-plugin/gateway/cron-session.ts +1 -1
  29. package/telegram-plugin/gateway/gateway.ts +254 -15
  30. package/telegram-plugin/gateway/grant-restart.ts +1 -1
  31. package/telegram-plugin/gateway/inbound-delivery-machine-dispatch.ts +1 -1
  32. package/telegram-plugin/gateway/inbound-delivery-machine-shadow.ts +1 -1
  33. package/telegram-plugin/gateway/inbound-delivery-machine.ts +1 -1
  34. package/telegram-plugin/gateway/interrupt-defer.ts +1 -1
  35. package/telegram-plugin/gateway/ipc-protocol.ts +12 -0
  36. package/telegram-plugin/gateway/linear-activity.ts +56 -0
  37. package/telegram-plugin/gateway/linear-auth-watch.ts +102 -0
  38. package/telegram-plugin/gateway/linear-setup.ts +196 -0
  39. package/telegram-plugin/gateway/permission-card-origin.ts +62 -0
  40. package/telegram-plugin/gateway/permission-timeout.ts +70 -0
  41. package/telegram-plugin/gateway/prefix-warmup.ts +1 -1
  42. package/telegram-plugin/gateway/webhook-ingest-server.test.ts +1 -1
  43. package/telegram-plugin/gateway/webhook-ingest-server.ts +1 -1
  44. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +1 -1
  45. package/telegram-plugin/interrupt-marker.ts +1 -1
  46. package/telegram-plugin/over-ping-safety-net.ts +1 -1
  47. package/telegram-plugin/scoped-approval.ts +1 -1
  48. package/telegram-plugin/secret-detect/vault-error.ts +1 -1
  49. package/telegram-plugin/silence-poke.ts +2 -2
  50. package/telegram-plugin/silent-reply-anchor.ts +1 -1
  51. package/telegram-plugin/slot-banner-driver.ts +1 -1
  52. package/telegram-plugin/startup-reset.ts +1 -1
  53. package/telegram-plugin/tests/boot-probes-connections.test.ts +66 -0
  54. package/telegram-plugin/tests/gateway-startup-reset.test.ts +1 -1
  55. package/telegram-plugin/tests/inbound-delivery-machine.test.ts +1 -1
  56. package/telegram-plugin/tests/linear-agent-activity.test.ts +77 -0
  57. package/telegram-plugin/tests/linear-agent-setup.test.ts +132 -0
  58. package/telegram-plugin/tests/linear-auth-watch.test.ts +79 -0
  59. package/telegram-plugin/tests/linear-create-issue.test.ts +3 -1
  60. package/telegram-plugin/tests/permission-card-origin.test.ts +97 -0
  61. package/telegram-plugin/tests/permission-card-routing.test.ts +23 -0
  62. package/telegram-plugin/tests/permission-no-repeat-wiring.test.ts +76 -0
  63. package/telegram-plugin/tests/permission-timeout.test.ts +87 -0
  64. package/telegram-plugin/tests/scoped-approval.test.ts +1 -1
  65. package/telegram-plugin/tests/silence-poke.test.ts +1 -1
  66. package/telegram-plugin/tests/tool-filter.test.ts +87 -0
  67. package/telegram-plugin/tests/turn-flush-safety.test.ts +1 -1
  68. package/telegram-plugin/turn-flush-safety.ts +1 -1
  69. package/telegram-plugin/uat/assertions.ts +1 -1
  70. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +1 -1
  71. package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +1 -1
  72. package/telegram-plugin/uat/scenarios/jtbd-fast-ack-dm.test.ts +1 -1
  73. package/telegram-plugin/uat/scenarios/jtbd-fast-trivial-dm.test.ts +2 -2
  74. package/telegram-plugin/uat/scenarios/jtbd-forwarded-burst-dm.test.ts +1 -1
  75. package/telegram-plugin/uat/scenarios/jtbd-memory-survives-restart-dm.test.ts +1 -1
  76. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +1 -1
  77. package/telegram-plugin/uat/scenarios/jtbd-reflective-status-reaction-dm.test.ts +1 -1
  78. package/telegram-plugin/uat/scenarios/jtbd-wake-audit-content-dm.test.ts +1 -1
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Proactive Linear auth watch (FIX 3 โ€” observability).
3
+ *
4
+ * Before this, Linear auth was only ever checked REACTIVELY: a refresh (and the
5
+ * "๐Ÿ”‘ Linear auth needs you" operator alert) happened only when an agent made a
6
+ * live Linear call and got a 401. A linear-enabled agent that rarely calls
7
+ * Linear could therefore sit dead-auth (missing bundle / revoked refresh /
8
+ * silently-expired token) completely unnoticed until the moment it needed
9
+ * Linear.
10
+ *
11
+ * This runs a small check on boot + on an interval (mirrors quota-watch):
12
+ * - bundle missing/invalid โ†’ fire the operator alert (no_bundle) NOW.
13
+ * - bundle present + access token within the refresh skew โ†’ proactively
14
+ * rotate it (so the next real call never eats a 401), and surface a revoked
15
+ * refresh token via the operator alert.
16
+ * - bundle present + token fresh โ†’ nothing.
17
+ *
18
+ * Pure orchestration over injected deps so it is unit-testable without a broker
19
+ * or the network. The gateway wires the broker-backed deps + notifyLinearAuthDead.
20
+ */
21
+
22
+ import {
23
+ parseBundle,
24
+ needsRefresh,
25
+ type PerformRefreshResult,
26
+ } from '../../src/linear/oauth-refresh.js'
27
+
28
+ export type LinearAuthWatchStatus =
29
+ | 'disabled'
30
+ | 'fresh'
31
+ | 'no_bundle'
32
+ | 'refreshed'
33
+ | 'revoked'
34
+ | 'refresh_failed'
35
+
36
+ export interface LinearAuthWatchDeps {
37
+ agent: string
38
+ /** Whether this agent has linear_agent enabled (reads config). */
39
+ linearEnabled: () => boolean
40
+ /** Read the raw JSON bundle from linear/<agent>/oauth (broker). */
41
+ readBundle: () => Promise<string | null>
42
+ /** Rotate the token via the stored bundle (performLinearRefresh over broker). */
43
+ refresh: () => Promise<PerformRefreshResult>
44
+ /** Operator alert (gateway's notifyLinearAuthDead). */
45
+ onAuthDead: (info: { agent: string; reason: 'no_bundle' | 'revoked'; detail: string }) => void
46
+ /** Epoch seconds (injectable for tests). */
47
+ nowSec?: () => number
48
+ log?: (line: string) => void
49
+ }
50
+
51
+ /**
52
+ * One proactive check. Never throws โ€” returns a status the caller can log.
53
+ */
54
+ export async function runLinearAuthCheck(deps: LinearAuthWatchDeps): Promise<LinearAuthWatchStatus> {
55
+ const log = deps.log ?? (() => {})
56
+ if (!deps.linearEnabled()) return 'disabled'
57
+
58
+ let raw: string | null
59
+ try {
60
+ raw = await deps.readBundle()
61
+ } catch (err) {
62
+ // A broker read failure is transient infra, not an auth problem โ€” don't
63
+ // page the operator, just log.
64
+ log(`telegram gateway: linear-auth-watch agent=${deps.agent} bundle read error: ${(err as Error).message}\n`)
65
+ return 'refresh_failed'
66
+ }
67
+
68
+ const bundle = parseBundle(raw)
69
+ if (!bundle) {
70
+ // The silent-setup-failure case: linear_agent is enabled but no refresh
71
+ // bundle was ever stored. Surface it proactively.
72
+ log(`telegram gateway: linear-auth-watch agent=${deps.agent} โ€” no refresh bundle (proactive)\n`)
73
+ deps.onAuthDead({ agent: deps.agent, reason: 'no_bundle', detail: 'proactive watch: linear/<agent>/oauth missing or invalid' })
74
+ return 'no_bundle'
75
+ }
76
+
77
+ const now = deps.nowSec ? deps.nowSec() : Math.floor(Date.now() / 1000)
78
+ if (!needsRefresh(bundle.expiresAt, now)) {
79
+ // Fresh, or expiry-untracked (older bundle) โ€” the reactive-on-401 path
80
+ // covers untracked bundles; nothing to do proactively.
81
+ return 'fresh'
82
+ }
83
+
84
+ // Within the refresh skew โ†’ rotate now so the next real call never 401s.
85
+ const res = await deps.refresh()
86
+ if (res.ok) {
87
+ log(`telegram gateway: linear-auth-watch agent=${deps.agent} proactively refreshed (was near expiry)\n`)
88
+ return 'refreshed'
89
+ }
90
+ if (res.reason === 'revoked') {
91
+ log(`telegram gateway: linear-auth-watch agent=${deps.agent} refresh REVOKED (proactive)\n`)
92
+ deps.onAuthDead({ agent: deps.agent, reason: 'revoked', detail: res.detail })
93
+ return 'revoked'
94
+ }
95
+ if (res.reason === 'no_bundle') {
96
+ deps.onAuthDead({ agent: deps.agent, reason: 'no_bundle', detail: res.detail })
97
+ return 'no_bundle'
98
+ }
99
+ // Transient (network/http_error/bad_response/persist_failed) โ€” log, don't page.
100
+ log(`telegram gateway: linear-auth-watch agent=${deps.agent} proactive refresh failed reason=${res.reason}\n`)
101
+ return 'refresh_failed'
102
+ }
@@ -0,0 +1,196 @@
1
+ /**
2
+ * `linear_agent_setup` MCP tool โ€” in-container, operator-approved Linear
3
+ * `actor=app` OAuth provisioning (FIX 2).
4
+ *
5
+ * Background: `switchroom linear-agent setup` is host-only (it writes the
6
+ * vault file directly with the operator passphrase). Run from inside an agent
7
+ * container it silently no-ops โ€” there is no mounted vault and no passphrase โ€”
8
+ * which is exactly how clerk/carrie ended up with an access token but no
9
+ * refresh bundle (a daily 401 with no self-heal). This tool gives the agent a
10
+ * sanctioned in-container path that uses ONLY operator-approved primitives:
11
+ *
12
+ * 1. `action: "authorize_url"` โ€” pure. Returns the browser authorize URL the
13
+ * operator opens to consent. No side effects, no approval.
14
+ * 2. `action: "complete"` โ€” exchanges the `code` from the redirect for an
15
+ * access token + refresh token, then writes BOTH
16
+ * `linear/<agent>/token` (access) and `linear/<agent>/oauth` (the durable
17
+ * refresh bundle) via the broker. Creating these NEW keys requires a
18
+ * write-grant โ€” `vault_request_access(scope: "write")` for each, which the
19
+ * operator approves. On a vault denial the tool returns the exact
20
+ * next-step text (mirrors `linear_agent_activity`'s vault_request_access
21
+ * guidance) rather than failing opaquely.
22
+ *
23
+ * The durable `secrets[]` ACL + the `linear_agent` config block are added by
24
+ * the agent via `config_propose_edit` (also operator-approved) โ€” see the
25
+ * returned guidance and the self-service playbook. The secret VALUES never
26
+ * pass through config (no leak); only the access token + bundle go to the
27
+ * broker, and the OAuth client_secret/code are used in-process for the
28
+ * exchange and never stored or logged.
29
+ */
30
+
31
+ import { putViaBroker, readVaultTokenFile } from '../../src/vault/broker/client.js'
32
+ import {
33
+ buildLinearAuthorizeUrl,
34
+ exchangeLinearAuthCode,
35
+ serializeBundle,
36
+ } from '../../src/linear/oauth-refresh.js'
37
+
38
+ export type ToolTextResult = { content: Array<{ type: string; text: string }> }
39
+
40
+ /** Result of a single broker put (new-key create). */
41
+ type PutOutcome = { kind: 'ok' } | { kind: 'denied'; msg: string } | { kind: 'not_found'; msg: string } | { kind: 'unreachable'; msg: string }
42
+
43
+ export interface LinearSetupDeps {
44
+ /** Agent slug (defaults to SWITCHROOM_AGENT_NAME). */
45
+ agent?: string
46
+ /** Injectable fetch (tests). */
47
+ fetchImpl?: typeof fetch
48
+ /** Write `linear/<agent>/token`. Defaults to a broker put. */
49
+ putToken?: (agent: string, accessToken: string) => Promise<PutOutcome>
50
+ /** Write `linear/<agent>/oauth` (the JSON bundle). Defaults to a broker put. */
51
+ putBundle?: (agent: string, bundleJson: string) => Promise<PutOutcome>
52
+ /** Log sink โ€” stderr in production. NEVER receives secret values. */
53
+ log?: (line: string) => void
54
+ }
55
+
56
+ const tokenKey = (agent: string) => `linear/${agent}/token`
57
+ const bundleKey = (agent: string) => `linear/${agent}/oauth`
58
+
59
+ /** Default broker put: path-as-identity + the agent's standing write-grant
60
+ * token (so a new key authorized by `vault_request_access(write)` can be
61
+ * created). Mirrors `brokerRefreshIO` in linear-activity.ts. */
62
+ function defaultPut(agent: string, key: string, value: string): Promise<PutOutcome> {
63
+ const token = readVaultTokenFile(agent) ?? undefined
64
+ const opt = token ? { token } : {}
65
+ return putViaBroker(key, { kind: 'string', value }, opt).then((r) => {
66
+ if (r.kind === 'ok') return { kind: 'ok' as const }
67
+ if (r.kind === 'unreachable') return { kind: 'unreachable' as const, msg: r.msg }
68
+ if (r.kind === 'not_found') return { kind: 'not_found' as const, msg: r.msg }
69
+ return { kind: 'denied' as const, msg: r.msg }
70
+ })
71
+ }
72
+
73
+ function text(s: string): ToolTextResult {
74
+ return { content: [{ type: 'text', text: s }] }
75
+ }
76
+
77
+ /**
78
+ * Guidance the agent shows the operator + itself after a write is blocked
79
+ * because the key doesn't exist yet (no write-grant). This is the expected
80
+ * first-run path: the operator approves the grant, then the agent retries.
81
+ */
82
+ function writeGrantGuidance(agent: string): string {
83
+ return (
84
+ `I need write access to store the Linear credentials. Call:\n` +
85
+ `โ€ข vault_request_access(key: "${tokenKey(agent)}", scope: "write", reason: "store Linear app access token")\n` +
86
+ `โ€ข vault_request_access(key: "${bundleKey(agent)}", scope: "write", reason: "store Linear OAuth refresh bundle")\n` +
87
+ `Once the operator approves both, re-run linear_agent_setup with action "complete" (same code is single-use โ€” if it expired, re-open the authorize URL first).`
88
+ )
89
+ }
90
+
91
+ /** Guidance for the durable config (ACL + linear_agent block) the agent emits
92
+ * after the values are stored, via the operator-approved config_propose_edit. */
93
+ function durableConfigGuidance(agent: string): string {
94
+ return (
95
+ `Stored. To make this durable (survive restarts + enable auto-refresh), propose a config edit ` +
96
+ `(config_propose_edit) that, under agents.${agent}:\n` +
97
+ ` โ€ข adds channels.telegram.linear_agent: { enabled: true, token: "vault:${tokenKey(agent)}" }\n` +
98
+ ` โ€ข adds "${tokenKey(agent)}" and "${bundleKey(agent)}" to secrets[]\n` +
99
+ `Then the operator approves it and you restart to pick up the linear_agent block.`
100
+ )
101
+ }
102
+
103
+ /**
104
+ * Run the `linear_agent_setup` tool. Validates args, performs the requested
105
+ * step, and returns actionable MCP text. Never throws on a network/vault
106
+ * failure โ€” returns guidance the agent can act on.
107
+ */
108
+ export async function runLinearAgentSetup(
109
+ args: Record<string, unknown>,
110
+ deps: LinearSetupDeps = {},
111
+ ): Promise<ToolTextResult> {
112
+ const log = deps.log ?? ((s) => process.stderr.write(s))
113
+ const agent = deps.agent ?? process.env.SWITCHROOM_AGENT_NAME ?? '-'
114
+ if (agent === '-' || !/^[a-z][a-z0-9_-]{0,63}$/.test(agent)) {
115
+ return text(`linear_agent_setup failed: could not resolve a valid agent name (got '${agent}').`)
116
+ }
117
+
118
+ const action = args.action as string | undefined
119
+ if (action !== 'authorize_url' && action !== 'complete') {
120
+ return text(`linear_agent_setup failed: action must be "authorize_url" or "complete".`)
121
+ }
122
+
123
+ const clientId = (args.client_id as string | undefined)?.trim()
124
+ const redirectUri = (args.redirect_uri as string | undefined)?.trim()
125
+ if (!clientId) return text('linear_agent_setup failed: client_id is required.')
126
+ if (!redirectUri || !/^https?:\/\//.test(redirectUri)) {
127
+ return text('linear_agent_setup failed: redirect_uri is required and must be an http(s) URL registered on the Linear OAuth app.')
128
+ }
129
+
130
+ if (action === 'authorize_url') {
131
+ const url = buildLinearAuthorizeUrl({ clientId, redirectUri })
132
+ return text(
133
+ `Open this URL in a browser to authorize <b>${agent}</b> as a Linear app actor (actor=app):\n\n${url}\n\n` +
134
+ `After you approve, Linear redirects to ${redirectUri}?code=โ€ฆ (it may show a blank/error page โ€” that's fine). ` +
135
+ `Copy the code value from the URL bar, then run linear_agent_setup with action "complete", the same client_id + redirect_uri, ` +
136
+ `your client_secret, and that code.`,
137
+ )
138
+ }
139
+
140
+ // action === 'complete'
141
+ const clientSecret = (args.client_secret as string | undefined)?.trim()
142
+ const code = (args.code as string | undefined)?.trim()
143
+ if (!clientSecret) return text('linear_agent_setup failed: client_secret is required for action "complete".')
144
+ if (!code) return text('linear_agent_setup failed: code (from the redirect URL) is required for action "complete".')
145
+
146
+ const exchanged = await exchangeLinearAuthCode(
147
+ { clientId, clientSecret, code, redirectUri },
148
+ deps.fetchImpl ? { fetchImpl: deps.fetchImpl } : {},
149
+ )
150
+ if (!exchanged.ok) {
151
+ log(`telegram gateway: linear_agent_setup exchange failed agent=${agent} reason=${exchanged.reason}\n`)
152
+ if (exchanged.reason === 'bad_code') {
153
+ return text(
154
+ `linear_agent_setup failed: Linear rejected the authorization code (expired, already used, or wrong redirect_uri). ` +
155
+ `Re-run action "authorize_url", open the fresh URL, and copy a new code.`,
156
+ )
157
+ }
158
+ return text(`linear_agent_setup failed: token exchange ${exchanged.reason} โ€” ${exchanged.detail}. Retry shortly.`)
159
+ }
160
+
161
+ const bundle = serializeBundle({
162
+ clientId,
163
+ clientSecret,
164
+ refreshToken: exchanged.refreshToken,
165
+ expiresAt: exchanged.expiresAt,
166
+ })
167
+
168
+ const putBundle = deps.putBundle ?? ((a, j) => defaultPut(a, bundleKey(a), j))
169
+ const putToken = deps.putToken ?? ((a, t) => defaultPut(a, tokenKey(a), t))
170
+
171
+ // Write the bundle FIRST (same ordering rationale as performLinearRefresh:
172
+ // never leave a fresh access token whose refresh bundle didn't persist).
173
+ const b = await putBundle(agent, bundle)
174
+ if (b.kind !== 'ok') {
175
+ if (b.kind === 'not_found' || b.kind === 'denied') {
176
+ return text(writeGrantGuidance(agent))
177
+ }
178
+ log(`telegram gateway: linear_agent_setup bundle write ${b.kind} agent=${agent}\n`)
179
+ return text(`linear_agent_setup failed: couldn't store the refresh bundle (broker ${b.kind}: ${b.msg}).`)
180
+ }
181
+ const t = await putToken(agent, exchanged.accessToken)
182
+ if (t.kind !== 'ok') {
183
+ if (t.kind === 'not_found' || t.kind === 'denied') {
184
+ return text(writeGrantGuidance(agent))
185
+ }
186
+ log(`telegram gateway: linear_agent_setup token write ${t.kind} agent=${agent}\n`)
187
+ return text(`linear_agent_setup failed: couldn't store the access token (broker ${t.kind}: ${t.msg}).`)
188
+ }
189
+
190
+ const hours = Math.max(1, Math.round((exchanged.expiresAt - Date.now() / 1000) / 3600))
191
+ log(`telegram gateway: linear_agent_setup stored token+bundle agent=${agent} (expires ~${hours}h)\n`)
192
+ return text(
193
+ `โœ… Linear app token + refresh bundle stored for ${agent} (access token expires in ~${hours}h; it now auto-renews).\n\n` +
194
+ durableConfigGuidance(agent),
195
+ )
196
+ }
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Pure origin-recovery for a permission/approval card when the gateway's live
3
+ * `currentTurn` has already been nulled.
4
+ *
5
+ * Why this exists (marko Rentals-budget incident, 2026-06-17). A
6
+ * supergroup-owned agent that delivers its final answer as plain transcript
7
+ * text โ€” never calling the `reply` tool โ€” has its turn force-closed by the
8
+ * gateway's orphaned-reply backstop ~30s later, which nulls `currentTurn`. If
9
+ * the single claude session is still running and then calls a permission-gated
10
+ * tool (the real case: retrying `meta_ads_set_budget` after a first card had
11
+ * auto-denied), the gate fires with `currentTurn == null`. The card emitter
12
+ * then fell through to broadcasting the card to the operator-DM allowlist,
13
+ * thread-stripped โ€” so the card never reached the forum topic the operator was
14
+ * working in. Unanswered there, it hit the 10-minute TTL and auto-denied, and
15
+ * an explicitly-approved budget change silently never ran.
16
+ *
17
+ * A switchroom agent runs exactly ONE claude session, so a tool permission can
18
+ * only belong to the turn that session most recently had open. We recover that
19
+ * origin from the bounded recently-started turn registry: the most-recently-
20
+ * started turn still within `maxAgeMs`. A turn force-closed by the backstop is,
21
+ * by construction, seconds-to-minutes old, so the freshness ceiling costs
22
+ * nothing for the incident class while keeping a long-idle agent's stale
23
+ * registry entry from mis-routing a much later permission into an old topic โ€”
24
+ * beyond the ceiling we return null and the caller keeps the existing
25
+ * operator-DM fan-out. This only ever ADDS topic recovery; it never changes the
26
+ * idle/turn-less path.
27
+ */
28
+
29
+ /** The subset of a turn this recovery needs โ€” kept structural so the gateway's
30
+ * richer `CurrentTurn` satisfies it without a cast. */
31
+ export interface RecoverableTurn {
32
+ sessionChatId: string
33
+ sessionThreadId: number | undefined
34
+ startedAt: number
35
+ }
36
+
37
+ export interface PermissionCardOrigin {
38
+ chatId: string
39
+ threadId: number | undefined
40
+ }
41
+
42
+ /**
43
+ * Pick the most-recently-started turn within the freshness window as the
44
+ * permission card's origin, or null when none qualifies (caller falls back to
45
+ * the operator-DM fan-out). Order-independent โ€” selects by `startedAt`, not by
46
+ * the iteration order of the source registry, so it is robust to any
47
+ * out-of-order insertion.
48
+ */
49
+ export function pickRecoveredPermissionOrigin(
50
+ recentTurns: Iterable<RecoverableTurn>,
51
+ now: number,
52
+ maxAgeMs: number,
53
+ ): PermissionCardOrigin | null {
54
+ let best: RecoverableTurn | null = null
55
+ for (const t of recentTurns) {
56
+ if (now - t.startedAt > maxAgeMs) continue
57
+ if (best == null || t.startedAt >= best.startedAt) best = t
58
+ }
59
+ return best == null
60
+ ? null
61
+ : { chatId: best.sessionChatId, threadId: best.sessionThreadId }
62
+ }
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Pure helpers for permission-card TIMEOUT handling โ€” making a "no operator
3
+ * responded" auto-deny distinguishable from a deliberate denial, and
4
+ * suppressing the duplicate card a model raises when it retries the identical
5
+ * call after such a timeout.
6
+ *
7
+ * Background (marko Rentals-budget loop, 2026-06-17). switchroom forwards a
8
+ * permission verdict to claude as `{ behavior, message? }`; with no `message`,
9
+ * claude renders the generic "the user said: Denied". A 10-minute TTL
10
+ * auto-deny was therefore indistinguishable from a real operator "Deny", so
11
+ * the model read it as transient and retried the SAME tool call โ€” re-raising
12
+ * an identical card 10 minutes later, in a loop the operator never asked for.
13
+ *
14
+ * Two levers, both pure here and wired in gateway.ts:
15
+ * 1. `timeoutDenyMessage` โ€” the `message` we attach ONLY to a TTL auto-deny,
16
+ * telling the model it was a timeout (not a denial) and not to retry.
17
+ * 2. `permissionSignature` + `isRecentTimeoutDuplicate` โ€” recognise a retry of
18
+ * the exact same (tool, input) shortly after it timed out, so the gateway
19
+ * can short-circuit it (deny with `duplicateDenyMessage`) WITHOUT posting a
20
+ * second identical card. The suppression is reset on operator activity
21
+ * (handled gateway-side), so it only ever holds while the operator is
22
+ * genuinely absent โ€” re-showing a card to an absent operator is the noise
23
+ * this removes.
24
+ */
25
+
26
+ // NUL โ€” can appear in neither a tool name nor a rendered input preview, so it
27
+ // safely delimits the two halves of a signature (a printable separator could
28
+ // collide: ("a b","c") vs ("a","b c")). Built at runtime so the SOURCE file
29
+ // stays plain text (a literal NUL byte would make git treat it as binary).
30
+ const SIGNATURE_SEP = String.fromCharCode(0)
31
+
32
+ /**
33
+ * Stable identity for a permission request: the tool plus its input preview
34
+ * (the same string the card renders). Same tool + same preview โ‡’ same action.
35
+ */
36
+ export function permissionSignature(toolName: string, inputPreview: string): string {
37
+ return toolName + SIGNATURE_SEP + inputPreview
38
+ }
39
+
40
+ /** The `message` attached to a TTL auto-deny so the model treats it as a
41
+ * timeout, not a denial, and does not retry the identical call. */
42
+ export function timeoutDenyMessage(timeoutMinutes: number): string {
43
+ return (
44
+ `No operator responded within ${timeoutMinutes} minutes, so this request timed out. ` +
45
+ `This is a TIMEOUT, not a denial โ€” the operator is likely away. ` +
46
+ `Do NOT retry this exact action automatically. Tell the user it is still ` +
47
+ `awaiting their approval, then continue with other work or stop.`
48
+ )
49
+ }
50
+
51
+ /** The `message` attached when we short-circuit a duplicate retry of an
52
+ * already-timed-out request (no new card posted). */
53
+ export const duplicateDenyMessage =
54
+ `This exact action already timed out awaiting the operator, and they have not ` +
55
+ `responded since. Do NOT keep re-requesting it โ€” tell the user it needs their ` +
56
+ `approval when they are back, and move on to other work or stop.`
57
+
58
+ /**
59
+ * True when `sig` timed out within `windowMs` of `now` (so a fresh request for
60
+ * it is a retry to suppress). `timeouts` maps signature โ†’ last-timeout epoch ms.
61
+ */
62
+ export function isRecentTimeoutDuplicate(
63
+ timeouts: ReadonlyMap<string, number>,
64
+ sig: string,
65
+ now: number,
66
+ windowMs: number,
67
+ ): boolean {
68
+ const at = timeouts.get(sig)
69
+ return at != null && now - at <= windowMs
70
+ }
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Prefix-cache warmup turn โ€” opt-in cold-start TTFO optimization.
3
3
  *
4
- * Per cold-start TTFO RFC (docs/rfcs/cold-start-ttfo.md, PR #1589),
4
+ * Per cold-start TTFO RFC (reference/rfcs/cold-start-ttfo.md, PR #1589),
5
5
  * Option A. On every bridge-up after a restart, synthesize a synthetic
6
6
  * inbound (`__WARMUP_PING__`, meta.source="warmup") and deliver it to
7
7
  * the just-registered bridge. Claude processes the message โ€” paying
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * Tests for the peercred-gated webhook ingest UDS server
3
- * (RFC docs/rfcs/webhook-via-gateway-socket.md).
3
+ * (RFC reference/rfcs/webhook-via-gateway-socket.md).
4
4
  *
5
5
  * MUST run under `bun test`: the peer-credential gate calls
6
6
  * `getPeerCred` (bun:ffi getsockopt SO_PEERCRED), which returns null
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Webhook ingest UDS server (RFC docs/rfcs/webhook-via-gateway-socket.md).
2
+ * Webhook ingest UDS server (RFC reference/rfcs/webhook-via-gateway-socket.md).
3
3
  *
4
4
  * A dedicated, peercred-gated Unix socket the host-side web receiver
5
5
  * forwards verified webhook events to. It is deliberately SEPARATE from
@@ -20,7 +20,7 @@
20
20
  * writing to a registry.db nobody read, leaving every bg sub-agent
21
21
  * invisible to the watcher. Surfaced by
22
22
  * bg-sub-agent-dispatch-dm.test.ts; see RFC Phase 2 ยงBug 2 in
23
- * reference/sub-agent-visibility-rfc.md.
23
+ * reference/rfcs/sub-agent-visibility.md.
24
24
  * 3. process.cwd() (legacy fallback for ad-hoc invocations).
25
25
  *
26
26
  * Performance: the actual DB write is deferred via setImmediate (Node 22+
@@ -1,5 +1,5 @@
1
1
  /**
2
- * `!`-prefix interrupt marker โ€” closes #575 / part of `reference/steer-or-queue-mid-flight.md`.
2
+ * `!`-prefix interrupt marker โ€” closes #575 / part of `reference/jobs/steer-or-queue-mid-flight.md`.
3
3
  *
4
4
  * The product contract: when the user starts a Telegram message with
5
5
  * `!`, they're saying "drop what you're doing and handle this
@@ -2,7 +2,7 @@
2
2
  * over-ping-safety-net.ts โ€” pure decision predicate for #1674's
3
3
  * "at-most-one device-ping per turn" framework safety net.
4
4
  *
5
- * Background. `reference/conversational-pacing.md` beat 5 is
5
+ * Background. `reference/rfcs/conversational-pacing.md` beat 5 is
6
6
  * explicit: the model should deliver the answer as a fresh `reply`
7
7
  * omitting `disable_notification` (i.e. pinging the device once).
8
8
  * EXACTLY ONE ping per turn. The model occasionally violates this
@@ -9,7 +9,7 @@
9
9
  * "Allow" means for a narrow safe scope, disclosed honestly on the post-tap
10
10
  * card ("won't ask again about <breadth> for 30 min" vs "allowed once").
11
11
  *
12
- * Design contract (reference/access-model.md โ€” "you hold the leash"):
12
+ * Design contract (reference/rfcs/access-model.md โ€” "you hold the leash"):
13
13
  *
14
14
  * - **Operator-authored only.** Every cache entry is created by an
15
15
  * `allowFrom`-authenticated Telegram tap. No tool call can seed an
@@ -158,7 +158,7 @@ export function renderVaultCliError(
158
158
  // Route the operator at the Telegram-native equivalent for the
159
159
  // verb in flight โ€” only `init` needs a one-time host shell.
160
160
  // Closes the "leave Telegram for a verb that exists in Telegram"
161
- // anti-pattern from reference/talk-to-agents-from-anywhere.md.
161
+ // anti-pattern from reference/jobs/talk-to-agents-from-anywhere.md.
162
162
  return {
163
163
  suppressRaw: true,
164
164
  html:
@@ -10,7 +10,7 @@
10
10
  * 75s, firm at 180s) and the 60s user-visible awareness ping were
11
11
  * retired: their success rate was 0-7% by the design's own KPI, and they
12
12
  * duplicated a job the draft thinking-lane now does natively. See
13
- * `reference/conversational-pacing.md` ยง Safety net.
13
+ * `reference/rfcs/conversational-pacing.md` ยง Safety net.
14
14
  *
15
15
  * What remains: ONE silence clock and ONE terminal action.
16
16
  *
@@ -323,7 +323,7 @@ export function silenceMsForKey(key: string, now: number): number | null {
323
323
  * Verbatim framework-fallback text โ€” the user-visible "still working / still
324
324
  * thinking" message the gateway sends at the 300s threshold when the model
325
325
  * hasn't broken its own silence. Wording is load-bearing (see
326
- * `reference/conversational-pacing.md` ยง Safety net). Two principles:
326
+ * `reference/rfcs/conversational-pacing.md` ยง Safety net). Two principles:
327
327
  *
328
328
  * 1. The parenthetical `(no update from agent in N min)` is honest โ€”
329
329
  * distinguishes from "the agent said something" so users learn to trust
@@ -3,7 +3,7 @@
3
3
  * "consecutive silent replies edit one growing message" UX fix.
4
4
  *
5
5
  * Background. Modern Claude 2.1.x on this fleet implements
6
- * conversational pacing (`reference/conversational-pacing.md` beats
6
+ * conversational pacing (`reference/rfcs/conversational-pacing.md` beats
7
7
  * 1 + 3 + 5) by calling the `reply` MCP tool multiple times in a
8
8
  * turn โ€” a silent ack, silent per-step updates, and one pinged
9
9
  * final answer. The over-ping safety net (#1674) caps the
@@ -17,7 +17,7 @@
17
17
  * unpinned message.
18
18
  *
19
19
  * See #421 (banner pin lifecycle) and JTBD
20
- * `reference/track-plan-quota-live.md` ("at a glance").
20
+ * `reference/jobs/track-plan-quota-live.md` ("at a glance").
21
21
  */
22
22
 
23
23
  import type { BannerState } from './slot-banner.js';
@@ -17,7 +17,7 @@
17
17
  * idempotent and has no user-visible side effects beyond clearing the
18
18
  * (probably-empty) pending-updates queue.
19
19
  *
20
- * Reference: reference/restart-and-know-what-im-running.md โ€” "silent
20
+ * Reference: reference/jobs/restart-and-know-what-im-running.md โ€” "silent
21
21
  * respawn. Agent comes back and the user has to guess whether it's
22
22
  * the same agent." A gateway stuck in a 409 loop is exactly that
23
23
  * failure mode.
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Unit tests for probeConnections โ€” the boot-card surface for
3
+ * configured-but-unauthed MCP connections (P3). The probe only READS the
4
+ * host-computed snapshot at <agentDir>/.claude/connection-health.json, so
5
+ * we drive it with an injected readFileImpl (no fs / no broker).
6
+ */
7
+
8
+ import { describe, it, expect } from 'bun:test'
9
+ import { probeConnections } from '../gateway/boot-probes.js'
10
+
11
+ const ENOENT = () => {
12
+ const e = new Error('ENOENT') as NodeJS.ErrnoException
13
+ e.code = 'ENOENT'
14
+ throw e
15
+ }
16
+
17
+ describe('probeConnections', () => {
18
+ it('OK (silent) when the snapshot file is absent โ€” assume healthy', async () => {
19
+ const r = await probeConnections('/agent', { readFileImpl: ENOENT })
20
+ expect(r.status).toBe('ok')
21
+ })
22
+
23
+ it('OK when the snapshot is malformed JSON', async () => {
24
+ const r = await probeConnections('/agent', { readFileImpl: () => 'not json{' })
25
+ expect(r.status).toBe('ok')
26
+ })
27
+
28
+ it('OK when there are zero issues', async () => {
29
+ const r = await probeConnections('/agent', {
30
+ readFileImpl: () => JSON.stringify({ computedAt: 1, issues: [] }),
31
+ })
32
+ expect(r.status).toBe('ok')
33
+ expect(r.detail).toContain('all authed')
34
+ })
35
+
36
+ it('DEGRADED (never fail) with named servers + a fix when connections are unauthed', async () => {
37
+ const snapshot = {
38
+ computedAt: 1,
39
+ issues: [
40
+ { server: 'meta', key: 'meta/token', kind: 'missing', detail: 'x', fix: 'switchroom vault set meta/token --allow marko' },
41
+ { server: 'postiz', key: 'postiz/key', kind: 'missing', detail: 'y', fix: 'switchroom vault set postiz/key --allow marko' },
42
+ ],
43
+ }
44
+ const r = await probeConnections('/agent', { readFileImpl: () => JSON.stringify(snapshot) })
45
+ expect(r.status).toBe('degraded')
46
+ expect(r.detail).toContain('2 integration(s)')
47
+ expect(r.detail).toContain('meta')
48
+ expect(r.detail).toContain('postiz')
49
+ // nextStep carries the first fix + a pointer to doctor for the rest.
50
+ expect(r.nextStep).toContain('switchroom vault set meta/token')
51
+ expect(r.nextStep).toContain('+1 more')
52
+ })
53
+
54
+ it('dedupes servers in the detail count', async () => {
55
+ const snapshot = {
56
+ computedAt: 1,
57
+ issues: [
58
+ { server: 'meta', key: 'meta/a', kind: 'missing', detail: 'x', fix: 'fixa' },
59
+ { server: 'meta', key: 'meta/b', kind: 'acl', detail: 'y', fix: 'fixb' },
60
+ ],
61
+ }
62
+ const r = await probeConnections('/agent', { readFileImpl: () => JSON.stringify(snapshot) })
63
+ expect(r.status).toBe('degraded')
64
+ expect(r.detail).toContain('1 integration(s)')
65
+ })
66
+ })
@@ -16,7 +16,7 @@ import { clearStaleTelegramPollingState } from "../startup-reset";
16
16
  *
17
17
  * These tests pin that behaviour so we don't accidentally remove the
18
18
  * call during a future refactor and reintroduce the silent-respawn
19
- * anti-pattern from reference/restart-and-know-what-im-running.md.
19
+ * anti-pattern from reference/jobs/restart-and-know-what-im-running.md.
20
20
  */
21
21
 
22
22
  describe("clearStaleTelegramPollingState", () => {
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Property tests for `inbound-delivery-machine.ts`.
3
3
  *
4
- * Per RFC `docs/rfcs/inbound-delivery-state-machine.md`: 5 invariants
4
+ * Per RFC `reference/rfcs/inbound-delivery-state-machine.md`: 5 invariants
5
5
  * validated over arbitrary event schedules. A counterexample is the
6
6
  * minimal evidence that the machine has a bug. The wedge-cluster
7
7
  * bugs (v0.12.22 boot-wedge, overlapping-turn silence, #1564 sibling