switchroom 0.7.15 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. package/README.md +51 -59
  2. package/bin/run-hook.sh +27 -11
  3. package/bin/timezone-hook.sh +9 -7
  4. package/dist/agent-scheduler/index.js +410 -133
  5. package/dist/auth-broker/index.js +13932 -0
  6. package/dist/cli/switchroom.js +26937 -5601
  7. package/dist/host-control/main.js +12702 -0
  8. package/dist/vault/approvals/kernel-server.js +467 -184
  9. package/dist/vault/broker/server.js +1430 -724
  10. package/examples/minimal.yaml +63 -0
  11. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  12. package/examples/personal-google-workspace-mcp/README.md +194 -0
  13. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  14. package/examples/switchroom.yaml +220 -0
  15. package/package.json +7 -4
  16. package/profiles/_base/settings.json.hbs +20 -5
  17. package/profiles/_base/start.sh.hbs +16 -3
  18. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  19. package/profiles/_shared/telegram-style.md.hbs +20 -90
  20. package/profiles/_shared/vault-protocol.md.hbs +68 -0
  21. package/profiles/default/CLAUDE.md +50 -96
  22. package/profiles/default/CLAUDE.md.hbs +36 -6
  23. package/profiles/default/workspace/SOUL.md.hbs +12 -5
  24. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  25. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  26. package/skills/buildkite-api/SKILL.md +31 -8
  27. package/skills/buildkite-cli/SKILL.md +27 -9
  28. package/skills/buildkite-migration/SKILL.md +22 -9
  29. package/skills/buildkite-pipelines/SKILL.md +26 -9
  30. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  31. package/skills/buildkite-test-engine/SKILL.md +25 -8
  32. package/skills/docx/SKILL.md +1 -1
  33. package/skills/docx/scripts/office/validators/__pycache__/__init__.cpython-313.pyc +0 -0
  34. package/skills/docx/scripts/office/validators/__pycache__/base.cpython-313.pyc +0 -0
  35. package/skills/file-bug/SKILL.md +34 -6
  36. package/skills/humanizer/SKILL.md +15 -0
  37. package/skills/humanizer-calibrate/SKILL.md +7 -1
  38. package/skills/mcp-builder/SKILL.md +1 -1
  39. package/skills/pdf/SKILL.md +1 -1
  40. package/skills/pptx/SKILL.md +1 -1
  41. package/skills/skill-creator/SKILL.md +21 -1
  42. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  43. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  44. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  45. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  46. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  47. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  48. package/skills/switchroom-cli/SKILL.md +63 -64
  49. package/skills/switchroom-health/SKILL.md +23 -10
  50. package/skills/switchroom-install/SKILL.md +3 -3
  51. package/skills/switchroom-manage/SKILL.md +26 -19
  52. package/skills/switchroom-runtime/SKILL.md +191 -0
  53. package/skills/switchroom-status/SKILL.md +27 -2
  54. package/skills/telegram-test-harness/SKILL.md +3 -0
  55. package/skills/token-helpers/SKILL.md +24 -1
  56. package/skills/webapp-testing/SKILL.md +31 -1
  57. package/skills/xlsx/SKILL.md +1 -1
  58. package/telegram-plugin/admin-commands/index.ts +7 -5
  59. package/telegram-plugin/analytics-posthog.ts +191 -0
  60. package/telegram-plugin/bridge/bridge.ts +69 -0
  61. package/telegram-plugin/bridge/ipc-client.ts +4 -1
  62. package/telegram-plugin/dist/bridge/bridge.js +194 -119
  63. package/telegram-plugin/dist/gateway/gateway.js +23611 -19671
  64. package/telegram-plugin/dist/server.js +245 -189
  65. package/telegram-plugin/first-paint.ts +3 -24
  66. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  67. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  68. package/telegram-plugin/gateway/auth-command.ts +794 -0
  69. package/telegram-plugin/gateway/auth-line.ts +123 -0
  70. package/telegram-plugin/gateway/boot-card.ts +169 -40
  71. package/telegram-plugin/gateway/boot-issue-cache.ts +308 -0
  72. package/telegram-plugin/gateway/boot-probes.ts +166 -123
  73. package/telegram-plugin/gateway/boot-reason.ts +41 -7
  74. package/telegram-plugin/gateway/boot-version.ts +66 -0
  75. package/telegram-plugin/gateway/gateway.ts +3499 -1885
  76. package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
  77. package/telegram-plugin/gateway/ipc-protocol.ts +18 -0
  78. package/telegram-plugin/gateway/pending-inbound-buffer.ts +106 -0
  79. package/telegram-plugin/gateway/quarantine.ts +69 -0
  80. package/telegram-plugin/gateway/quota-cache.ts +9 -4
  81. package/telegram-plugin/gateway/reaction-trigger.ts +401 -0
  82. package/telegram-plugin/gateway/recent-denials.test.ts +103 -0
  83. package/telegram-plugin/gateway/recent-denials.ts +77 -0
  84. package/telegram-plugin/gateway/startup-network-retry.ts +109 -31
  85. package/telegram-plugin/gateway/vault-grant-inbound-builders.ts +125 -0
  86. package/telegram-plugin/history.ts +91 -0
  87. package/telegram-plugin/hooks/hooks.json +10 -0
  88. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +130 -0
  89. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +19 -2
  90. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +22 -2
  91. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  92. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  93. package/telegram-plugin/inbound-classifier.ts +50 -0
  94. package/telegram-plugin/inline-keyboard-callbacks.ts +136 -0
  95. package/telegram-plugin/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
  96. package/telegram-plugin/package.json +4 -2
  97. package/telegram-plugin/permission-rule.ts +51 -0
  98. package/telegram-plugin/permission-title.ts +56 -0
  99. package/telegram-plugin/quota-check.ts +19 -41
  100. package/telegram-plugin/registry/reaper.ts +223 -0
  101. package/telegram-plugin/retry-api-call.ts +80 -0
  102. package/telegram-plugin/runtime-metrics.ts +177 -0
  103. package/telegram-plugin/scripts/build.mjs +0 -1
  104. package/telegram-plugin/secret-detect/index.ts +24 -0
  105. package/telegram-plugin/secret-detect/vault-error.test.ts +64 -12
  106. package/telegram-plugin/secret-detect/vault-error.ts +78 -11
  107. package/telegram-plugin/secret-detect/vault-write.ts +14 -2
  108. package/telegram-plugin/server.js +41795 -0
  109. package/telegram-plugin/session-tail.ts +6 -1
  110. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  111. package/telegram-plugin/silence-poke.ts +420 -0
  112. package/telegram-plugin/silent-end.ts +174 -0
  113. package/telegram-plugin/stream-controller.ts +13 -0
  114. package/telegram-plugin/stream-reply-handler.ts +7 -0
  115. package/telegram-plugin/subagent-watcher.ts +213 -4
  116. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  117. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  118. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  119. package/telegram-plugin/tests/boot-card-issue-dedup.test.ts +247 -0
  120. package/telegram-plugin/tests/boot-card-reason-to-render.test.ts +182 -0
  121. package/telegram-plugin/tests/boot-card-reason.test.ts +65 -2
  122. package/telegram-plugin/tests/boot-card-render.test.ts +146 -0
  123. package/telegram-plugin/tests/boot-card-silent-on-operator.test.ts +103 -0
  124. package/telegram-plugin/tests/boot-probes.test.ts +216 -10
  125. package/telegram-plugin/tests/boot-version-string.test.ts +0 -0
  126. package/telegram-plugin/tests/finalize-callback.test.ts +190 -0
  127. package/telegram-plugin/tests/gateway-message-validator.test.ts +26 -0
  128. package/telegram-plugin/tests/gateway-secret-detect.test.ts +12 -3
  129. package/telegram-plugin/tests/gateway-startup-network-retry.test.ts +104 -0
  130. package/telegram-plugin/tests/history-reaper.test.ts +378 -0
  131. package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
  132. package/telegram-plugin/tests/inbound-classifier.test.ts +76 -0
  133. package/telegram-plugin/tests/inbound-message-types.test.ts +267 -0
  134. package/telegram-plugin/tests/issues-card.test.ts +49 -0
  135. package/telegram-plugin/tests/pending-inbound-buffer.test.ts +132 -0
  136. package/telegram-plugin/tests/permission-rule.test.ts +80 -1
  137. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  138. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  139. package/telegram-plugin/tests/races.test.ts +179 -0
  140. package/telegram-plugin/tests/reaction-trigger-flow.test.ts +353 -0
  141. package/telegram-plugin/tests/reaction-trigger.test.ts +397 -0
  142. package/telegram-plugin/tests/retry-api-call.test.ts +152 -1
  143. package/telegram-plugin/tests/runtime-metrics.test.ts +145 -0
  144. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +155 -0
  145. package/telegram-plugin/tests/secret-detect-delete-must-surface-failures.test.ts +133 -0
  146. package/telegram-plugin/tests/secret-detect-false-positives.test.ts +137 -0
  147. package/telegram-plugin/tests/silence-poke.test.ts +493 -0
  148. package/telegram-plugin/tests/silent-end.test.ts +206 -0
  149. package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +107 -0
  150. package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +224 -0
  151. package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +316 -0
  152. package/telegram-plugin/tests/subagent-watcher.test.ts +263 -0
  153. package/telegram-plugin/tests/turn-signal-tracker.test.ts +81 -0
  154. package/telegram-plugin/tests/vault-approval-posture.test.ts +256 -0
  155. package/telegram-plugin/tests/vault-grant-auto-resume.test.ts +73 -0
  156. package/telegram-plugin/tests/vault-grant-inbound-builders.test.ts +226 -0
  157. package/telegram-plugin/tests/vault-grant-union.test.ts +130 -0
  158. package/telegram-plugin/tests/vault-key-regex-allows-slash.test.ts +140 -0
  159. package/telegram-plugin/tests/vault-posture-quarantine.test.ts +104 -0
  160. package/telegram-plugin/tests/vault-request-access-tool.test.ts +114 -0
  161. package/telegram-plugin/tests/vault-request-access-unlock-resume.test.ts +106 -0
  162. package/telegram-plugin/turn-signal-tracker.ts +100 -24
  163. package/telegram-plugin/uat/SETUP.md +210 -35
  164. package/telegram-plugin/uat/assertions.ts +264 -37
  165. package/telegram-plugin/uat/driver-info.ts +57 -0
  166. package/telegram-plugin/uat/driver.ts +590 -51
  167. package/telegram-plugin/uat/harness.ts +140 -94
  168. package/telegram-plugin/uat/load-env.test.ts +72 -0
  169. package/telegram-plugin/uat/load-env.ts +48 -0
  170. package/telegram-plugin/uat/login.ts +96 -53
  171. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  172. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  173. package/telegram-plugin/uat/runners/report.ts +150 -0
  174. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  175. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  176. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  177. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  178. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  179. package/telegram-plugin/uat/scenarios/ask-user-button-tap-dm.test.ts +141 -0
  180. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +191 -0
  181. package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +255 -0
  182. package/telegram-plugin/uat/scenarios/fuzz-human-style-dm.test.ts +275 -0
  183. package/telegram-plugin/uat/scenarios/fuzz-random-prompts-dm.test.ts +146 -0
  184. package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts +486 -0
  185. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +67 -0
  186. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +100 -0
  187. package/telegram-plugin/uat/scenarios/jtbd-soft-commit-dm.test.ts +67 -0
  188. package/telegram-plugin/uat/scenarios/jtbd-status-query-dm.test.ts +49 -0
  189. package/telegram-plugin/uat/scenarios/location-inbound-dm.test.ts +65 -0
  190. package/telegram-plugin/uat/scenarios/midturn-silent-dm.test.ts +175 -0
  191. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +142 -0
  192. package/telegram-plugin/uat/scenarios/reactions-trigger-turn-dm.test.ts +96 -0
  193. package/telegram-plugin/uat/scenarios/secret-redaction-deletes-original-dm.test.ts +123 -0
  194. package/telegram-plugin/uat/scenarios/secret-redaction-no-false-positive-dm.test.ts +87 -0
  195. package/telegram-plugin/uat/scenarios/silence-poke-soft-dm.test.ts +155 -0
  196. package/telegram-plugin/uat/scenarios/silent-end-recovery-dm.test.ts +95 -0
  197. package/telegram-plugin/uat/scenarios/smoke-dm-reply.test.ts +57 -0
  198. package/telegram-plugin/uat/scenarios/subagent-watcher-no-rerun-dm.test.ts +135 -0
  199. package/telegram-plugin/uat/scenarios/vault-approval-posture-telegram-id-dm.test.ts +191 -0
  200. package/telegram-plugin/uat/scenarios/vault-audit-allow-dm.test.ts +108 -0
  201. package/telegram-plugin/uat/scenarios/vault-grant-auto-resume-dm.test.ts +121 -0
  202. package/telegram-plugin/uat/scenarios/vault-request-access-concurrent-dm.test.ts +161 -0
  203. package/telegram-plugin/uat/scenarios/vault-request-access-end-to-end-dm.test.ts +158 -0
  204. package/telegram-plugin/uat/scenarios/voice-inbound-dm.test.ts +65 -0
  205. package/telegram-plugin/vault-approval-posture.ts +42 -0
  206. package/telegram-plugin/welcome-text.ts +1 -0
  207. package/telegram-plugin/active-pins-sweep.ts +0 -204
  208. package/telegram-plugin/active-pins.ts +0 -146
  209. package/telegram-plugin/auth-dashboard.ts +0 -1104
  210. package/telegram-plugin/auth-slot-parser.ts +0 -497
  211. package/telegram-plugin/card-event-log.ts +0 -138
  212. package/telegram-plugin/dist/foreman/foreman.js +0 -31106
  213. package/telegram-plugin/docs/multi-agent-card-design.md +0 -847
  214. package/telegram-plugin/docs/pinned-progress-card-reliability.md +0 -144
  215. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  216. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  217. package/telegram-plugin/foreman/foreman.ts +0 -1165
  218. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  219. package/telegram-plugin/foreman/setup-state.ts +0 -239
  220. package/telegram-plugin/foreman/state.ts +0 -203
  221. package/telegram-plugin/pin-event-log.ts +0 -76
  222. package/telegram-plugin/progress-card-driver.ts +0 -2886
  223. package/telegram-plugin/progress-card-pin-manager.ts +0 -589
  224. package/telegram-plugin/progress-card-pin-watchdog.ts +0 -98
  225. package/telegram-plugin/progress-card.ts +0 -1409
  226. package/telegram-plugin/tests/HARNESS.md +0 -340
  227. package/telegram-plugin/tests/_progress-card-harness.ts +0 -109
  228. package/telegram-plugin/tests/active-pins-boot-reaper.test.ts +0 -211
  229. package/telegram-plugin/tests/active-pins-sweep.test.ts +0 -309
  230. package/telegram-plugin/tests/active-pins.test.ts +0 -187
  231. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  232. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  233. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  234. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  235. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  236. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  237. package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +0 -201
  238. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  239. package/telegram-plugin/tests/card-event-log.test.ts +0 -145
  240. package/telegram-plugin/tests/first-paint.test.ts +0 -257
  241. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  242. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  243. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  244. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  245. package/telegram-plugin/tests/harness-ordering-invariants.test.ts +0 -243
  246. package/telegram-plugin/tests/pin-event-log.test.ts +0 -124
  247. package/telegram-plugin/tests/progress-card-api-failure-during-deferred.test.ts +0 -73
  248. package/telegram-plugin/tests/progress-card-close-paths-converge.test.ts +0 -272
  249. package/telegram-plugin/tests/progress-card-cross-turn.test.ts +0 -258
  250. package/telegram-plugin/tests/progress-card-delay-842.test.ts +0 -160
  251. package/telegram-plugin/tests/progress-card-dispose-preservepending.test.ts +0 -81
  252. package/telegram-plugin/tests/progress-card-draft-flag.test.ts +0 -80
  253. package/telegram-plugin/tests/progress-card-driver-eviction.test.ts +0 -215
  254. package/telegram-plugin/tests/progress-card-driver-fleet-shadow.test.ts +0 -123
  255. package/telegram-plugin/tests/progress-card-driver-force-complete-parent-done.test.ts +0 -76
  256. package/telegram-plugin/tests/progress-card-edit-timestamps-budget.test.ts +0 -62
  257. package/telegram-plugin/tests/progress-card-memory-bounds.test.ts +0 -84
  258. package/telegram-plugin/tests/progress-card-pin-failure-paths.test.ts +0 -139
  259. package/telegram-plugin/tests/progress-card-pin-manager.test.ts +0 -773
  260. package/telegram-plugin/tests/progress-card-pin-race-fast-turn.test.ts +0 -66
  261. package/telegram-plugin/tests/progress-card-pin-sidecar-partial-write.test.ts +0 -64
  262. package/telegram-plugin/tests/progress-card-pin-watchdog.test.ts +0 -190
  263. package/telegram-plugin/tests/progress-card-sigterm-pin-flush.test.ts +0 -146
  264. package/telegram-plugin/tests/real-gateway-f1-ladder-integrity.test.ts +0 -123
  265. package/telegram-plugin/tests/real-gateway-f2-instant-draft.test.ts +0 -82
  266. package/telegram-plugin/tests/real-gateway-f3-late-card.test.ts +0 -114
  267. package/telegram-plugin/tests/real-gateway-harness.ts +0 -699
  268. package/telegram-plugin/tests/real-gateway-i6-turn-flush-replay-dedup.test.ts +0 -313
  269. package/telegram-plugin/tests/real-gateway-ipc-lifecycle.test.ts +0 -299
  270. package/telegram-plugin/tests/real-gateway-spec.test.ts +0 -487
  271. package/telegram-plugin/tests/real-gateway.smoke.test.ts +0 -101
  272. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  273. package/telegram-plugin/tests/setup-state.test.ts +0 -146
  274. package/telegram-plugin/tests/sync-chat-running-subagents.test.ts +0 -116
  275. package/telegram-plugin/tests/turn-end-regressions.test.ts +0 -489
  276. package/telegram-plugin/tests/turn-flush-card-takeover.test.ts +0 -218
  277. package/telegram-plugin/tests/turn-flush-prose-recovery.test.ts +0 -78
  278. package/telegram-plugin/tests/two-zone-bg-carry-full-lifecycle.test.ts +0 -131
  279. package/telegram-plugin/tests/two-zone-bg-detection.test.ts +0 -120
  280. package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +0 -116
  281. package/telegram-plugin/tests/two-zone-bg-early-turn-end.test.ts +0 -87
  282. package/telegram-plugin/tests/two-zone-bg-survives-next-turn.test.ts +0 -211
  283. package/telegram-plugin/tests/two-zone-card-cap.test.ts +0 -62
  284. package/telegram-plugin/tests/two-zone-card-fleet-row.test.ts +0 -101
  285. package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +0 -78
  286. package/telegram-plugin/tests/two-zone-card-html-balance.test.ts +0 -110
  287. package/telegram-plugin/tests/two-zone-card-lifecycle.test.ts +0 -128
  288. package/telegram-plugin/tests/two-zone-card-sanitise.test.ts +0 -58
  289. package/telegram-plugin/tests/two-zone-card-snapshot.test.ts +0 -133
  290. package/telegram-plugin/tests/two-zone-concurrent-turns-isolation.test.ts +0 -155
  291. package/telegram-plugin/tests/two-zone-phasefor-precedence.test.ts +0 -117
  292. package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +0 -187
  293. package/telegram-plugin/tests/two-zone-stuck-edit-throttle.test.ts +0 -149
  294. package/telegram-plugin/tests/two-zone-stuck-header-escalation.test.ts +0 -101
  295. package/telegram-plugin/tests/two-zone-stuck-per-member.test.ts +0 -114
  296. package/telegram-plugin/tests/two-zone-stuck-recovery.test.ts +0 -105
  297. package/telegram-plugin/tests/waiting-ux-harness.ts +0 -381
  298. package/telegram-plugin/tests/waiting-ux.e2e.test.ts +0 -233
  299. package/telegram-plugin/turn-flush-prose-recovery.ts +0 -40
  300. package/telegram-plugin/two-zone-card.ts +0 -269
  301. package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +0 -61
@@ -11,12 +11,13 @@
11
11
  * caller as a thrown error — only as ProbeResult{ status:'fail', ... }.
12
12
  */
13
13
 
14
- import { readFileSync, readdirSync, existsSync, mkdirSync, writeFileSync } from 'fs'
14
+ import { readFileSync, readdirSync, existsSync } from 'fs'
15
15
  import { join } from 'path'
16
16
  import { execFile as execFileCb } from 'child_process'
17
17
  import { promisify } from 'util'
18
18
 
19
19
  import { readQuotaCache, writeQuotaCache } from './quota-cache.js'
20
+ import { fetchQuota, formatQuotaLine } from '../quota-check.js'
20
21
 
21
22
  const execFile = promisify(execFileCb)
22
23
 
@@ -28,6 +29,13 @@ export interface ProbeResult {
28
29
  status: ProbeStatus
29
30
  label: string
30
31
  detail: string
32
+ /** Plain-text remediation hint shown beneath the degraded row in the
33
+ * boot card. Per `reference/principles.md` principle 1, every failure
34
+ * should tell the user what to do next — naming the failure without a
35
+ * next step is the explicit ❌ Bad pattern. Omitted on ok rows (they
36
+ * don't render) and on degraded rows where no actionable hint exists.
37
+ */
38
+ nextStep?: string
31
39
  /** True when a 429 caused the probe to skip the live check. Used by
32
40
  * writeQuotaCache to select the short RATE_LIMIT_TTL_MS instead of the
33
41
  * default 5-min TTL. Keying off this boolean avoids matching on the
@@ -111,10 +119,18 @@ const TOKEN_EXPIRING_SOON_DAYS = 7
111
119
  * Read account info from the agent's .claude.json.
112
120
  * agentDir: e.g. /home/user/.switchroom/agents/clerk
113
121
  */
114
- export async function probeAccount(agentDir: string): Promise<ProbeResult> {
122
+ export async function probeAccount(
123
+ agentDir: string,
124
+ opts: { agentName?: string } = {},
125
+ ): Promise<ProbeResult> {
115
126
  return withTimeout('Account', (async (): Promise<ProbeResult> => {
116
127
  const claudeDir = join(agentDir, '.claude')
117
128
  const claudeJsonPath = join(claudeDir, '.claude.json')
129
+ // Fall back to the literal placeholder only when no agentName is plumbed
130
+ // through — the renderer's <code> escape will keep that safe in Telegram
131
+ // HTML, but real call sites should always pass the name so users can
132
+ // tap-to-copy a working command.
133
+ const agentRef = opts.agentName ?? '<agent>'
118
134
  let cfg: ClaudeJson = {}
119
135
  try {
120
136
  const raw = readFileSync(claudeJsonPath, 'utf8')
@@ -125,7 +141,12 @@ export async function probeAccount(agentDir: string): Promise<ProbeResult> {
125
141
 
126
142
  const acc = cfg.oauthAccount
127
143
  if (!acc?.emailAddress) {
128
- return { status: 'degraded', label: 'Account', detail: 'not signed in' }
144
+ return {
145
+ status: 'degraded',
146
+ label: 'Account',
147
+ detail: 'not signed in',
148
+ nextStep: `Run \`switchroom auth login ${agentRef}\` to start the OAuth flow`,
149
+ }
129
150
  }
130
151
 
131
152
  const plan = mapPlan(acc.billingType, acc.hasExtraUsageEnabled)
@@ -154,10 +175,16 @@ export async function probeAccount(agentDir: string): Promise<ProbeResult> {
154
175
  }
155
176
  }
156
177
 
178
+ const nextStep = status === 'fail'
179
+ ? `OAuth token expired — run \`switchroom auth login ${agentRef}\` to re-authenticate`
180
+ : status === 'degraded'
181
+ ? `Token expiring soon — run \`switchroom auth login ${agentRef}\` before it lapses`
182
+ : undefined
157
183
  return {
158
184
  status,
159
185
  label: 'Account',
160
186
  detail: `${acc.emailAddress} · ${plan}${tokenStr}`,
187
+ ...(nextStep ? { nextStep } : {}),
161
188
  }
162
189
  })())
163
190
  }
@@ -378,10 +405,36 @@ export function uptimeMsForStarttime(
378
405
  }
379
406
  }
380
407
 
408
+ /**
409
+ * Compute a remediation hint for a non-active agent systemd state. Returns
410
+ * `undefined` when no actionable hint applies. Per `reference/principles.md`
411
+ * principle 1, every degraded/fail row should tell the user what to do next.
412
+ * Hints share a common journalctl shape so they're greppable across
413
+ * agents.
414
+ */
415
+ function nextStepForAgentState(agentName: string, state: string): string | undefined {
416
+ if (state === 'failed') {
417
+ return `Service failed — inspect with \`journalctl --user -u switchroom-${agentName} -n 100\` then \`switchroom agent restart ${agentName}\``
418
+ }
419
+ if (state === 'inactive') {
420
+ return `Service inactive — start with \`switchroom agent start ${agentName}\` (or \`systemctl --user start switchroom-${agentName}\`)`
421
+ }
422
+ if (state === 'deactivating' || state === 'activating' || state === 'auto-restart') {
423
+ return `Service is in a transient \`${state}\` state — re-check with \`switchroom agent status ${agentName}\` in a few seconds`
424
+ }
425
+ // Unknown state — keep the door open with a generic hint.
426
+ return `Inspect with \`journalctl --user -u switchroom-${agentName} -n 100\``
427
+ }
428
+
381
429
  function probeAgentProcessDocker(): ProbeResult {
382
430
  const found = findAgentProcessInContainer()
383
431
  if (!found) {
384
- return { status: 'fail', label: 'Agent', detail: 'claude process not found' }
432
+ return {
433
+ status: 'fail',
434
+ label: 'Agent',
435
+ detail: 'claude process not found',
436
+ nextStep: 'No claude process in container — check container logs with `docker logs <container>` and restart with `switchroom agent restart <agent>`',
437
+ }
385
438
  }
386
439
  const uptimeMs = uptimeMsForStarttime(found.starttime)
387
440
  const mb = Math.round(found.rssKb / 1024)
@@ -570,7 +623,8 @@ export async function probeAgentProcess(
570
623
  state === 'activating' ||
571
624
  state === 'auto-restart'
572
625
  const status = isTransient ? 'degraded' : 'fail'
573
- return { status, label: 'Agent', detail: `service ${state}` }
626
+ const nextStep = nextStepForAgentState(agentName, state)
627
+ return { status, label: 'Agent', detail: `service ${state}`, ...(nextStep ? { nextStep } : {}) }
574
628
  }
575
629
 
576
630
  // Still within retry budget — wait and try again.
@@ -681,7 +735,8 @@ export async function* watchAgentProcess(
681
735
  state === 'auto-restart' ||
682
736
  state === 'inactive'
683
737
  const status = isTransient ? 'degraded' : 'fail'
684
- return { status, label: 'Agent', detail: `service ${state}` }
738
+ const nextStep = nextStepForAgentState(agentName, state)
739
+ return { status, label: 'Agent', detail: `service ${state}`, ...(nextStep ? { nextStep } : {}) }
685
740
  }
686
741
 
687
742
  while (true) {
@@ -758,144 +813,83 @@ export async function probeGateway(info: GatewayRuntimeInfo): Promise<ProbeResul
758
813
 
759
814
  // ─── Probe: Quota ─────────────────────────────────────────────────────────────
760
815
 
761
- const QUOTA_DEBUG_FILE = 'quota-debug.json'
762
-
763
816
  /**
764
- * Attempt to read quota info via the /api/oauth/usage endpoint.
765
- * The response schema is undocumented we probe defensively and
766
- * save the raw response to a debug file on first 2xx hit.
817
+ * Read quota utilization via the Pro/Max plan rate-limit headers on a
818
+ * `/v1/messages` probe the same mechanism `/usage` and `/status` use.
819
+ *
820
+ * Pre-#1163 this hit Anthropic's `/api/oauth/usage` endpoint, which has
821
+ * deprecated/tightened auth and now returns HTTP 403 even for healthy
822
+ * OAuth tokens. That produced the useless boot-card row "Quota HTTP 403
823
+ * — re-authenticate" while `/status` (using the unified-ratelimit
824
+ * headers path) reported the agent as 🟢. See `quota-check.ts` for the
825
+ * underlying probe and `/v1/messages` header surface.
767
826
  *
768
- * Result is cached for 5 min in `~/.switchroom/quota-cache.json` and
769
- * shared across all agents. Without the cache, every gateway boot +
770
- * bridge-reconnect across 4 agents hits the endpoint, triggering 429s
771
- * that surface as 🟡 "rate limited" in the boot card. See `quota-cache.ts`.
827
+ * Result is cached briefly via `quota-cache.ts` so simultaneous fleet
828
+ * restarts (multiple agents booting at once, each with their own gateway)
829
+ * coalesce on the cache instead of each spending a `/v1/messages` token.
772
830
  *
773
831
  * Tests can override the cache path via SWITCHROOM_QUOTA_CACHE_PATH.
774
832
  */
775
833
  export async function probeQuota(
776
834
  claudeConfigDir: string,
777
- agentDir: string,
835
+ _agentDir: string,
778
836
  fetchImpl: typeof fetch = fetch,
779
837
  ): Promise<ProbeResult> {
780
838
  return withTimeout('Quota', (async (): Promise<ProbeResult> => {
781
- // Cache hit → return early (avoids the rate-limit cascade)
782
839
  const cached = readQuotaCache()
783
840
  if (cached) {
784
841
  return cached
785
842
  }
786
843
 
787
- // Read token
788
- let token: string | null = null
844
+ // The fallback per-agent token path is `accounts/default/.oauth-token`;
845
+ // fetchQuota's own resolver only checks the top-level `.oauth-token`,
846
+ // so prefer that, and if it's missing surface the same degraded row
847
+ // we did before (no live probe — that's a setup issue, not a runtime
848
+ // one).
849
+ let claudeDirForProbe: string | null = null
789
850
  for (const candidate of [
790
- join(claudeConfigDir, '.oauth-token'),
791
- join(claudeConfigDir, 'accounts', 'default', '.oauth-token'),
851
+ claudeConfigDir,
852
+ join(claudeConfigDir, 'accounts', 'default'),
792
853
  ]) {
793
- if (existsSync(candidate)) {
794
- try {
795
- const raw = readFileSync(candidate, 'utf8').trim()
796
- if (raw.length > 0) { token = raw; break }
797
- } catch {}
854
+ if (existsSync(join(candidate, '.oauth-token'))) {
855
+ claudeDirForProbe = candidate
856
+ break
798
857
  }
799
858
  }
800
- if (!token) {
801
- return { status: 'degraded', label: 'Quota', detail: 'no OAuth token' }
802
- }
803
-
804
- let resp: Response
805
- try {
806
- const controller = new AbortController()
807
- const t = setTimeout(() => controller.abort(), 1800)
808
- resp = await fetchImpl('https://api.anthropic.com/api/oauth/usage', {
809
- method: 'GET',
810
- headers: {
811
- 'Authorization': `Bearer ${token}`,
812
- 'Accept': 'application/json',
813
- 'anthropic-version': '2023-06-01',
814
- 'anthropic-beta': 'oauth-2025-04-20',
815
- 'User-Agent': 'switchroom-boot/0.1',
816
- },
817
- signal: controller.signal,
818
- })
819
- clearTimeout(t)
820
- } catch (err: unknown) {
821
- return { status: 'fail', label: 'Quota', detail: `request failed: ${(err as Error).message ?? String(err)}` }
822
- }
823
-
824
- if (resp.status === 429) {
825
- // A 429 from /api/oauth/usage means the endpoint is rate-limiting our
826
- // probe calls — it does NOT mean the user is out of quota. Conflating
827
- // the two is the root cause of the false 🟡 "rate limited" alarm
828
- // reported in #210. Return ok-with-note and cache it for 30 s so
829
- // simultaneous fleet restarts read the cached result instead of piling
830
- // up on the same endpoint (see quota-cache.ts: RATE_LIMIT_TTL_MS).
831
- //
832
- // We assume 429 from /api/oauth/usage signals endpoint rate-limiting,
833
- // not quota exhaustion. Anthropic uses 403 / 200-with-flag for the
834
- // latter today; if that changes, revisit this 🟢 mapping.
835
- const rateLimitResult: ProbeResult = {
836
- status: 'ok',
859
+ if (!claudeDirForProbe) {
860
+ return {
861
+ status: 'degraded',
837
862
  label: 'Quota',
838
- detail: 'quota check skipped: rate limited',
839
- rateLimited: true,
863
+ detail: 'no OAuth token',
864
+ nextStep: 'No OAuth token on disk — register a fleet account: `switchroom auth add <label> --from-oauth` then `switchroom auth use <label>` (RFC H)',
840
865
  }
841
- writeQuotaCache(rateLimitResult)
842
- return rateLimitResult
843
- }
844
- if (!resp.ok) {
845
- return { status: 'degraded', label: 'Quota', detail: `HTTP ${resp.status}` }
846
- }
847
-
848
- let body: unknown
849
- try {
850
- body = await resp.json()
851
- } catch {
852
- return { status: 'degraded', label: 'Quota', detail: 'invalid JSON response' }
853
866
  }
854
867
 
855
- // Defensive schema discovery — save raw response for tightening
856
- const debugPath = join(agentDir, 'telegram', QUOTA_DEBUG_FILE)
857
- try {
858
- // Redact token/UUID fields before saving
859
- const redacted = JSON.parse(JSON.stringify(body, (k, v) => {
860
- if (/token|uuid|id|key/i.test(k) && typeof v === 'string' && v.length > 10) return '[REDACTED]'
861
- return v
862
- }))
863
- mkdirSync(join(agentDir, 'telegram'), { recursive: true })
864
- writeFileSync(debugPath, JSON.stringify({ capturedAt: new Date().toISOString(), body: redacted }, null, 2))
865
- } catch {}
866
-
867
- // Try common field paths — schema not yet locked
868
- const b = body as Record<string, unknown>
869
- const sessionQuota =
870
- (b?.['data'] as Record<string, unknown> | undefined)?.['session_quota'] ??
871
- b?.['session_quota'] ??
872
- (b?.['quota'] as Record<string, unknown> | undefined)?.['session'] ??
873
- (b?.['usage'] as Record<string, unknown> | undefined)?.['session']
874
-
875
- if (!sessionQuota) {
868
+ const probe = await fetchQuota({
869
+ claudeConfigDir: claudeDirForProbe,
870
+ fetchImpl,
871
+ timeoutMs: 1800,
872
+ })
873
+ if (!probe.ok) {
874
+ // Auth rejection from /v1/messages is a strong signal — the same
875
+ // endpoint claude itself uses. Other errors are surfaced verbatim
876
+ // so operators can see what's wrong.
877
+ const isAuth = /auth rejected|HTTP 401|HTTP 403/i.test(probe.reason)
876
878
  return {
877
879
  status: 'degraded',
878
880
  label: 'Quota',
879
- detail: `schema unknown — first call captured (debug: ${debugPath})`,
881
+ detail: probe.reason,
882
+ nextStep: isAuth
883
+ ? 'Auth rejected by Anthropic — broker auto-refreshes; if persistent, replace the account: `switchroom auth add <label> --from-oauth --replace`'
884
+ : 'Anthropic quota probe failed — re-check after a minute; broker auto-rotates per `auth.fallback_order`',
880
885
  }
881
886
  }
882
887
 
883
- const sq = sessionQuota as Record<string, unknown>
884
- const parts: string[] = []
885
- if (typeof sq['sonnet_used_pct'] === 'number') parts.push(`Sonnet ${Math.round(sq['sonnet_used_pct'] as number)}%`)
886
- if (typeof sq['opus_used_pct'] === 'number') parts.push(`Opus ${Math.round(sq['opus_used_pct'] as number)}%`)
887
- if (typeof sq['used_pct'] === 'number') parts.push(`${Math.round(sq['used_pct'] as number)}% used`)
888
- if (typeof sq['resets_in_sec'] === 'number') {
889
- const sec = sq['resets_in_sec'] as number
890
- const h = Math.floor(sec / 3600)
891
- const m = Math.round((sec % 3600) / 60)
892
- parts.push(`resets in ${h}h ${m}m`)
888
+ const result: ProbeResult = {
889
+ status: 'ok',
890
+ label: 'Quota',
891
+ detail: formatQuotaLine(probe.data),
893
892
  }
894
-
895
- if (parts.length === 0) {
896
- return { status: 'degraded', label: 'Quota', detail: 'schema unknown — saving raw response' }
897
- }
898
- const result: ProbeResult = { status: 'ok', label: 'Quota', detail: parts.join(' · ') }
899
893
  writeQuotaCache(result)
900
894
  return result
901
895
  })())
@@ -922,7 +916,12 @@ export async function probeHindsight(
922
916
  }
923
917
 
924
918
  if (!resp || !resp.ok) {
925
- return { status: 'fail', label: 'Hindsight', detail: 'unreachable' }
919
+ return {
920
+ status: 'fail',
921
+ label: 'Hindsight',
922
+ detail: 'unreachable',
923
+ nextStep: 'Hindsight server not responding on 127.0.0.1:18888 — start it with `hindsight serve` or check `systemctl --user status hindsight`',
924
+ }
926
925
  }
927
926
 
928
927
  const bankSuffix = bankName ? ` · bank=${bankName}` : ''
@@ -1082,6 +1081,9 @@ export async function probeScheduler(
1082
1081
  status: stillSettling ? 'degraded' : 'fail',
1083
1082
  label: 'Scheduler',
1084
1083
  detail: `sidecar not running (no lockfile)${settlingNote}`,
1084
+ nextStep: stillSettling
1085
+ ? 'Scheduler sidecar still starting — re-check in 30s'
1086
+ : 'Scheduler sidecar not running — restart the agent with `switchroom agent restart <agent>` so the supervisor relaunches it',
1085
1087
  }
1086
1088
  }
1087
1089
  let holderPid: number | null = null
@@ -1090,16 +1092,27 @@ export async function probeScheduler(
1090
1092
  const parsed = Number.parseInt(raw, 10)
1091
1093
  if (Number.isInteger(parsed) && parsed > 0) holderPid = parsed
1092
1094
  } catch {
1093
- return { status: 'degraded', label: 'Scheduler', detail: 'lockfile unreadable' }
1095
+ return {
1096
+ status: 'degraded',
1097
+ label: 'Scheduler',
1098
+ detail: 'lockfile unreadable',
1099
+ nextStep: `Inspect with \`cat ${lockPath}\` — if corrupt, remove it and restart the agent so the supervisor recreates the sidecar`,
1100
+ }
1094
1101
  }
1095
1102
  if (holderPid == null) {
1096
- return { status: 'degraded', label: 'Scheduler', detail: 'lockfile contents invalid' }
1103
+ return {
1104
+ status: 'degraded',
1105
+ label: 'Scheduler',
1106
+ detail: 'lockfile contents invalid',
1107
+ nextStep: `Inspect with \`cat ${lockPath}\` — if corrupt, remove it and restart the agent so the supervisor recreates the sidecar`,
1108
+ }
1097
1109
  }
1098
1110
  if (!isAlive(holderPid)) {
1099
1111
  return {
1100
1112
  status: 'degraded',
1101
1113
  label: 'Scheduler',
1102
1114
  detail: `lock holder pid ${holderPid} not alive (supervisor restart in progress?)`,
1115
+ nextStep: 'Supervisor should relaunch the sidecar shortly — re-check in 30s; if still stale, restart the agent',
1103
1116
  }
1104
1117
  }
1105
1118
 
@@ -1147,14 +1160,24 @@ async function probeUds(
1147
1160
  return { status: 'ok', label, detail: 'n/a (non-docker)' }
1148
1161
  }
1149
1162
  if (!socketPath) {
1150
- return { status: 'fail', label, detail: 'socket path not configured' }
1163
+ return {
1164
+ status: 'fail',
1165
+ label,
1166
+ detail: 'socket path not configured',
1167
+ nextStep: udsNextStep(label, 'unconfigured'),
1168
+ }
1151
1169
  }
1152
1170
  return withTimeout(label, (async (): Promise<ProbeResult> => {
1153
1171
  if (!opts.connectImpl) {
1154
1172
  // Cheap pre-check: stat the file. Saves the connect round-trip on
1155
1173
  // the common "broker container down → bind mount empty" case.
1156
1174
  if (!existsSync(socketPath)) {
1157
- return { status: 'fail', label, detail: `socket missing: ${socketPath}` }
1175
+ return {
1176
+ status: 'fail',
1177
+ label,
1178
+ detail: `socket missing: ${socketPath}`,
1179
+ nextStep: udsNextStep(label, 'missing'),
1180
+ }
1158
1181
  }
1159
1182
  }
1160
1183
  const connect = opts.connectImpl ?? defaultUdsConnect
@@ -1164,13 +1187,31 @@ async function probeUds(
1164
1187
  } catch (err: unknown) {
1165
1188
  const code = (err as NodeJS.ErrnoException)?.code
1166
1189
  const msg = (err as Error)?.message ?? String(err)
1167
- if (code === 'ENOENT') return { status: 'fail', label, detail: 'socket missing' }
1168
- if (code === 'ECONNREFUSED') return { status: 'fail', label, detail: 'connection refused' }
1169
- return { status: 'fail', label, detail: `connect failed: ${msg}` }
1190
+ if (code === 'ENOENT') return { status: 'fail', label, detail: 'socket missing', nextStep: udsNextStep(label, 'missing') }
1191
+ if (code === 'ECONNREFUSED') return { status: 'fail', label, detail: 'connection refused', nextStep: udsNextStep(label, 'refused') }
1192
+ return { status: 'fail', label, detail: `connect failed: ${msg}`, nextStep: udsNextStep(label, 'other') }
1170
1193
  }
1171
1194
  })())
1172
1195
  }
1173
1196
 
1197
+ /**
1198
+ * Remediation hints for the UDS (vault-broker / approval-kernel) probe.
1199
+ * Both services are run by docker-compose alongside agents; recovery is
1200
+ * almost always the same shape ("the service container isn't up"), so we
1201
+ * surface the right `docker compose` target per label.
1202
+ */
1203
+ function udsNextStep(label: string, kind: 'missing' | 'refused' | 'unconfigured' | 'other'): string {
1204
+ const svc = label.toLowerCase() === 'broker' ? 'vault-broker' : 'approval-kernel'
1205
+ if (kind === 'unconfigured') {
1206
+ return `${label} socket path not set — check the compose mount for the agent container`
1207
+ }
1208
+ if (kind === 'refused') {
1209
+ return `${label} socket present but not accepting connections — restart with \`docker compose restart ${svc}\``
1210
+ }
1211
+ // missing | other: most common case is the daemon container isn't running.
1212
+ return `${label} socket not reachable — bring up the daemon with \`docker compose up -d ${svc}\` (or check \`docker compose ps\`)`
1213
+ }
1214
+
1174
1215
  /**
1175
1216
  * Default UDS connect — opens a stream, then immediately closes it.
1176
1217
  * Resolves on `connect` event, rejects on `error`. 1s connect timeout
@@ -1236,7 +1277,7 @@ export async function probeKernel(
1236
1277
  */
1237
1278
  export async function probeSkills(
1238
1279
  agentDir: string,
1239
- opts: { fs?: SkillsFsImpl; maxNamesShown?: number } = {},
1280
+ opts: { fs?: SkillsFsImpl; maxNamesShown?: number; agentName?: string } = {},
1240
1281
  ): Promise<ProbeResult> {
1241
1282
  return withTimeout('Skills', (async (): Promise<ProbeResult> => {
1242
1283
  const fs = opts.fs ?? realSkillsFs
@@ -1282,10 +1323,12 @@ export async function probeSkills(
1282
1323
  }
1283
1324
  const named = dangling.slice(0, max).join(', ')
1284
1325
  const more = dangling.length > max ? ` +${dangling.length - max} more` : ''
1326
+ const reconcileTarget = opts.agentName ? ` ${opts.agentName}` : ''
1285
1327
  return {
1286
1328
  status: 'degraded',
1287
1329
  label: 'Skills',
1288
1330
  detail: `${dangling.length}/${entries.length} dangling: ${named}${more}`,
1331
+ nextStep: `Run \`switchroom agent reconcile${reconcileTarget}\` to rebuild symlinks, or remove unused entries from switchroom.yaml`,
1289
1332
  }
1290
1333
  })())
1291
1334
  }
@@ -14,12 +14,44 @@ import type { SessionMarker } from './session-marker.js'
14
14
  // Re-export so tests can import from a single path
15
15
  export type { RestartReason }
16
16
 
17
+ /**
18
+ * Operator-initiated restart-marker freshness window. Longer than the
19
+ * default `clean-shutdown.json` window (60s) because operator-driven
20
+ * flows — specifically `switchroom update` from the host CLI — stamp
21
+ * the marker BEFORE `docker compose up -d --remove-orphans` runs, and
22
+ * the recreate for a multi-agent fleet can comfortably take longer
23
+ * than 60s to bring every container's gateway back up (9 agents ×
24
+ * docker network/volume setup + gateway boot probes). Without this
25
+ * extended window, my "operator: switchroom update" marker reads
26
+ * stale by the time the late-bootstrapping agent's gateway reads it
27
+ * — `determineRestartReason` falls through to `'crash'` and the
28
+ * boot card renders the planned redeploy as a crash with a noisy
29
+ * `agent-crashed` operator-events broadcast (the very pattern
30
+ * PR #1139 set out to suppress).
31
+ *
32
+ * Five minutes is generous: a 50-agent fleet recreate would still
33
+ * finish well inside it, and we still treat a 5-min-old marker as a
34
+ * crash if the gateway eventually does come up so the longer window
35
+ * isn't a "silent forever" mode. Verified end-to-end against a 9-agent
36
+ * fleet on 2026-05-13: latest-recreated agent's marker age was 97s.
37
+ *
38
+ * Keyed on the reason-text prefix (`operator:`) so user/cli/in-gateway
39
+ * restart paths keep their 60s tight window — those produce a much
40
+ * shorter shutdown-to-boot delta and a 5-min window there would mask
41
+ * a real crash during/after a `/restart`.
42
+ */
43
+ const OPERATOR_MARKER_MAX_AGE_MS = 5 * 60_000
44
+
17
45
  /**
18
46
  * Determine why this gateway is starting up.
19
47
  *
20
48
  * Priority order:
21
49
  * 1. restart-pending.json present + fresh (<5 min) → 'planned'
22
- * 2. clean-shutdown.json present + fresh (<60s default) → 'graceful'
50
+ * 2. clean-shutdown.json present + fresh:
51
+ * - default <60s → 'graceful'
52
+ * - reason starts with `operator:` → <5min → 'graceful' (#1141
53
+ * follow-up: fleet recreate can exceed 60s and still be a
54
+ * planned operator update)
23
55
  * 3. gateway-session.json present (prior process existed) → 'crash'
24
56
  * 4. Otherwise → 'fresh'
25
57
  */
@@ -30,6 +62,7 @@ export function determineRestartReason(opts: {
30
62
  now: number
31
63
  cleanMaxAgeMs?: number
32
64
  markerMaxAgeMs?: number
65
+ operatorMaxAgeMs?: number
33
66
  }): RestartReason {
34
67
  const {
35
68
  marker,
@@ -38,14 +71,15 @@ export function determineRestartReason(opts: {
38
71
  now,
39
72
  cleanMaxAgeMs = CLEAN_SHUTDOWN_MAX_AGE_MS,
40
73
  markerMaxAgeMs = 5 * 60_000,
74
+ operatorMaxAgeMs = OPERATOR_MARKER_MAX_AGE_MS,
41
75
  } = opts
42
76
  if (marker != null && now - marker.ts < markerMaxAgeMs) return 'planned'
43
- if (
44
- cleanMarker != null &&
45
- now - cleanMarker.ts >= 0 &&
46
- now - cleanMarker.ts < cleanMaxAgeMs
47
- )
48
- return 'graceful'
77
+ if (cleanMarker != null && now - cleanMarker.ts >= 0) {
78
+ const isOperator = typeof cleanMarker.reason === 'string'
79
+ && cleanMarker.reason.startsWith('operator:')
80
+ const window = isOperator ? operatorMaxAgeMs : cleanMaxAgeMs
81
+ if (now - cleanMarker.ts < window) return 'graceful'
82
+ }
49
83
  if (sessionMarker != null) return 'crash'
50
84
  return 'fresh'
51
85
  }
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Pure boot-card version-string composer + helpers.
3
+ *
4
+ * Extracted from gateway.ts so the version-string code path can be
5
+ * exercised by property-based tests without dragging in the gateway's
6
+ * runtime side effects (env loading, bot client init, etc.). Live
7
+ * callers stay in gateway.ts; this file is pure functions only.
8
+ */
9
+
10
+ export type BootVersionInputs = {
11
+ version: string
12
+ commitSha: string | null
13
+ commitDate: string | null
14
+ latestPr: number | null
15
+ commitsAheadOfTag: number | null
16
+ claudeCliVersion: string | null
17
+ }
18
+
19
+ export function formatRelativeAgo(iso: string | null): string | null {
20
+ if (!iso) return null
21
+ const t = Date.parse(iso)
22
+ if (Number.isNaN(t)) return null
23
+ const diffSec = Math.max(0, Math.floor((Date.now() - t) / 1000))
24
+ if (diffSec < 60) return `${diffSec}s ago`
25
+ if (diffSec < 3600) return `${Math.floor(diffSec / 60)}m ago`
26
+ if (diffSec < 86400) return `${Math.floor(diffSec / 3600)}h ago`
27
+ return `${Math.floor(diffSec / 86400)}d ago`
28
+ }
29
+
30
+ /**
31
+ * Compose the version string shown in the boot-card ack line and the
32
+ * status card's Version row. Two shapes, matching the deleted greeting
33
+ * card's behavior:
34
+ *
35
+ * - on a tag (commits_ahead = 0 or null): "v0.2.0 · #44 · claude 2.1.123 · 2h ago"
36
+ * (omit "#44 ·" when no PR was parsed; omit claude segment if unavailable)
37
+ * - ahead of a tag (commits_ahead > 0): "v0.2.0+3 · db6de9e · claude 2.1.123 · 2m ago"
38
+ * (always show short SHA when ahead, omit PR)
39
+ *
40
+ * Age segment is omitted if no commit date is available (npm consumer).
41
+ *
42
+ * Sanitization: claude --version output is whitespace-collapsed before
43
+ * embedding — a malicious or rogue `claude` on PATH must not be able to
44
+ * smuggle newlines into the ack line. HTML escaping happens at the
45
+ * boot-card boundary (see boot-card.ts: escapeHtml(version)).
46
+ */
47
+ export function composeBootVersionString(inputs: BootVersionInputs): string {
48
+ const ago = formatRelativeAgo(inputs.commitDate)
49
+ const onTag = inputs.commitsAheadOfTag === 0 || inputs.commitsAheadOfTag === null
50
+ const claudeVerRaw = inputs.claudeCliVersion?.replace(/\s+/g, ' ').trim()
51
+ const claudeVer = claudeVerRaw && claudeVerRaw.length > 0 ? claudeVerRaw : null
52
+
53
+ if (onTag) {
54
+ const parts: string[] = [`v${inputs.version}`]
55
+ if (inputs.latestPr != null) parts.push(`#${inputs.latestPr}`)
56
+ if (claudeVer) parts.push(`claude ${claudeVer}`)
57
+ if (ago) parts.push(ago)
58
+ return parts.join(' · ')
59
+ }
60
+
61
+ const parts: string[] = [`v${inputs.version}+${inputs.commitsAheadOfTag}`]
62
+ if (inputs.commitSha) parts.push(inputs.commitSha)
63
+ if (claudeVer) parts.push(`claude ${claudeVer}`)
64
+ if (ago) parts.push(ago)
65
+ return parts.join(' · ')
66
+ }