switchroom 0.7.15 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. package/README.md +51 -59
  2. package/bin/run-hook.sh +27 -11
  3. package/bin/timezone-hook.sh +9 -7
  4. package/dist/agent-scheduler/index.js +410 -133
  5. package/dist/auth-broker/index.js +13932 -0
  6. package/dist/cli/switchroom.js +26937 -5601
  7. package/dist/host-control/main.js +12702 -0
  8. package/dist/vault/approvals/kernel-server.js +467 -184
  9. package/dist/vault/broker/server.js +1430 -724
  10. package/examples/minimal.yaml +63 -0
  11. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  12. package/examples/personal-google-workspace-mcp/README.md +194 -0
  13. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  14. package/examples/switchroom.yaml +220 -0
  15. package/package.json +7 -4
  16. package/profiles/_base/settings.json.hbs +20 -5
  17. package/profiles/_base/start.sh.hbs +16 -3
  18. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  19. package/profiles/_shared/telegram-style.md.hbs +20 -90
  20. package/profiles/_shared/vault-protocol.md.hbs +68 -0
  21. package/profiles/default/CLAUDE.md +50 -96
  22. package/profiles/default/CLAUDE.md.hbs +36 -6
  23. package/profiles/default/workspace/SOUL.md.hbs +12 -5
  24. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  25. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  26. package/skills/buildkite-api/SKILL.md +31 -8
  27. package/skills/buildkite-cli/SKILL.md +27 -9
  28. package/skills/buildkite-migration/SKILL.md +22 -9
  29. package/skills/buildkite-pipelines/SKILL.md +26 -9
  30. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  31. package/skills/buildkite-test-engine/SKILL.md +25 -8
  32. package/skills/docx/SKILL.md +1 -1
  33. package/skills/docx/scripts/office/validators/__pycache__/__init__.cpython-313.pyc +0 -0
  34. package/skills/docx/scripts/office/validators/__pycache__/base.cpython-313.pyc +0 -0
  35. package/skills/file-bug/SKILL.md +34 -6
  36. package/skills/humanizer/SKILL.md +15 -0
  37. package/skills/humanizer-calibrate/SKILL.md +7 -1
  38. package/skills/mcp-builder/SKILL.md +1 -1
  39. package/skills/pdf/SKILL.md +1 -1
  40. package/skills/pptx/SKILL.md +1 -1
  41. package/skills/skill-creator/SKILL.md +21 -1
  42. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  43. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  44. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  45. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  46. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  47. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  48. package/skills/switchroom-cli/SKILL.md +63 -64
  49. package/skills/switchroom-health/SKILL.md +23 -10
  50. package/skills/switchroom-install/SKILL.md +3 -3
  51. package/skills/switchroom-manage/SKILL.md +26 -19
  52. package/skills/switchroom-runtime/SKILL.md +191 -0
  53. package/skills/switchroom-status/SKILL.md +27 -2
  54. package/skills/telegram-test-harness/SKILL.md +3 -0
  55. package/skills/token-helpers/SKILL.md +24 -1
  56. package/skills/webapp-testing/SKILL.md +31 -1
  57. package/skills/xlsx/SKILL.md +1 -1
  58. package/telegram-plugin/admin-commands/index.ts +7 -5
  59. package/telegram-plugin/analytics-posthog.ts +191 -0
  60. package/telegram-plugin/bridge/bridge.ts +69 -0
  61. package/telegram-plugin/bridge/ipc-client.ts +4 -1
  62. package/telegram-plugin/dist/bridge/bridge.js +194 -119
  63. package/telegram-plugin/dist/gateway/gateway.js +23611 -19671
  64. package/telegram-plugin/dist/server.js +245 -189
  65. package/telegram-plugin/first-paint.ts +3 -24
  66. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  67. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  68. package/telegram-plugin/gateway/auth-command.ts +794 -0
  69. package/telegram-plugin/gateway/auth-line.ts +123 -0
  70. package/telegram-plugin/gateway/boot-card.ts +169 -40
  71. package/telegram-plugin/gateway/boot-issue-cache.ts +308 -0
  72. package/telegram-plugin/gateway/boot-probes.ts +166 -123
  73. package/telegram-plugin/gateway/boot-reason.ts +41 -7
  74. package/telegram-plugin/gateway/boot-version.ts +66 -0
  75. package/telegram-plugin/gateway/gateway.ts +3499 -1885
  76. package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
  77. package/telegram-plugin/gateway/ipc-protocol.ts +18 -0
  78. package/telegram-plugin/gateway/pending-inbound-buffer.ts +106 -0
  79. package/telegram-plugin/gateway/quarantine.ts +69 -0
  80. package/telegram-plugin/gateway/quota-cache.ts +9 -4
  81. package/telegram-plugin/gateway/reaction-trigger.ts +401 -0
  82. package/telegram-plugin/gateway/recent-denials.test.ts +103 -0
  83. package/telegram-plugin/gateway/recent-denials.ts +77 -0
  84. package/telegram-plugin/gateway/startup-network-retry.ts +109 -31
  85. package/telegram-plugin/gateway/vault-grant-inbound-builders.ts +125 -0
  86. package/telegram-plugin/history.ts +91 -0
  87. package/telegram-plugin/hooks/hooks.json +10 -0
  88. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +130 -0
  89. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +19 -2
  90. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +22 -2
  91. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  92. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  93. package/telegram-plugin/inbound-classifier.ts +50 -0
  94. package/telegram-plugin/inline-keyboard-callbacks.ts +136 -0
  95. package/telegram-plugin/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
  96. package/telegram-plugin/package.json +4 -2
  97. package/telegram-plugin/permission-rule.ts +51 -0
  98. package/telegram-plugin/permission-title.ts +56 -0
  99. package/telegram-plugin/quota-check.ts +19 -41
  100. package/telegram-plugin/registry/reaper.ts +223 -0
  101. package/telegram-plugin/retry-api-call.ts +80 -0
  102. package/telegram-plugin/runtime-metrics.ts +177 -0
  103. package/telegram-plugin/scripts/build.mjs +0 -1
  104. package/telegram-plugin/secret-detect/index.ts +24 -0
  105. package/telegram-plugin/secret-detect/vault-error.test.ts +64 -12
  106. package/telegram-plugin/secret-detect/vault-error.ts +78 -11
  107. package/telegram-plugin/secret-detect/vault-write.ts +14 -2
  108. package/telegram-plugin/server.js +41795 -0
  109. package/telegram-plugin/session-tail.ts +6 -1
  110. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  111. package/telegram-plugin/silence-poke.ts +420 -0
  112. package/telegram-plugin/silent-end.ts +174 -0
  113. package/telegram-plugin/stream-controller.ts +13 -0
  114. package/telegram-plugin/stream-reply-handler.ts +7 -0
  115. package/telegram-plugin/subagent-watcher.ts +213 -4
  116. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  117. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  118. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  119. package/telegram-plugin/tests/boot-card-issue-dedup.test.ts +247 -0
  120. package/telegram-plugin/tests/boot-card-reason-to-render.test.ts +182 -0
  121. package/telegram-plugin/tests/boot-card-reason.test.ts +65 -2
  122. package/telegram-plugin/tests/boot-card-render.test.ts +146 -0
  123. package/telegram-plugin/tests/boot-card-silent-on-operator.test.ts +103 -0
  124. package/telegram-plugin/tests/boot-probes.test.ts +216 -10
  125. package/telegram-plugin/tests/boot-version-string.test.ts +0 -0
  126. package/telegram-plugin/tests/finalize-callback.test.ts +190 -0
  127. package/telegram-plugin/tests/gateway-message-validator.test.ts +26 -0
  128. package/telegram-plugin/tests/gateway-secret-detect.test.ts +12 -3
  129. package/telegram-plugin/tests/gateway-startup-network-retry.test.ts +104 -0
  130. package/telegram-plugin/tests/history-reaper.test.ts +378 -0
  131. package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
  132. package/telegram-plugin/tests/inbound-classifier.test.ts +76 -0
  133. package/telegram-plugin/tests/inbound-message-types.test.ts +267 -0
  134. package/telegram-plugin/tests/issues-card.test.ts +49 -0
  135. package/telegram-plugin/tests/pending-inbound-buffer.test.ts +132 -0
  136. package/telegram-plugin/tests/permission-rule.test.ts +80 -1
  137. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  138. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  139. package/telegram-plugin/tests/races.test.ts +179 -0
  140. package/telegram-plugin/tests/reaction-trigger-flow.test.ts +353 -0
  141. package/telegram-plugin/tests/reaction-trigger.test.ts +397 -0
  142. package/telegram-plugin/tests/retry-api-call.test.ts +152 -1
  143. package/telegram-plugin/tests/runtime-metrics.test.ts +145 -0
  144. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +155 -0
  145. package/telegram-plugin/tests/secret-detect-delete-must-surface-failures.test.ts +133 -0
  146. package/telegram-plugin/tests/secret-detect-false-positives.test.ts +137 -0
  147. package/telegram-plugin/tests/silence-poke.test.ts +493 -0
  148. package/telegram-plugin/tests/silent-end.test.ts +206 -0
  149. package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +107 -0
  150. package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +224 -0
  151. package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +316 -0
  152. package/telegram-plugin/tests/subagent-watcher.test.ts +263 -0
  153. package/telegram-plugin/tests/turn-signal-tracker.test.ts +81 -0
  154. package/telegram-plugin/tests/vault-approval-posture.test.ts +256 -0
  155. package/telegram-plugin/tests/vault-grant-auto-resume.test.ts +73 -0
  156. package/telegram-plugin/tests/vault-grant-inbound-builders.test.ts +226 -0
  157. package/telegram-plugin/tests/vault-grant-union.test.ts +130 -0
  158. package/telegram-plugin/tests/vault-key-regex-allows-slash.test.ts +140 -0
  159. package/telegram-plugin/tests/vault-posture-quarantine.test.ts +104 -0
  160. package/telegram-plugin/tests/vault-request-access-tool.test.ts +114 -0
  161. package/telegram-plugin/tests/vault-request-access-unlock-resume.test.ts +106 -0
  162. package/telegram-plugin/turn-signal-tracker.ts +100 -24
  163. package/telegram-plugin/uat/SETUP.md +210 -35
  164. package/telegram-plugin/uat/assertions.ts +264 -37
  165. package/telegram-plugin/uat/driver-info.ts +57 -0
  166. package/telegram-plugin/uat/driver.ts +590 -51
  167. package/telegram-plugin/uat/harness.ts +140 -94
  168. package/telegram-plugin/uat/load-env.test.ts +72 -0
  169. package/telegram-plugin/uat/load-env.ts +48 -0
  170. package/telegram-plugin/uat/login.ts +96 -53
  171. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  172. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  173. package/telegram-plugin/uat/runners/report.ts +150 -0
  174. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  175. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  176. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  177. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  178. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  179. package/telegram-plugin/uat/scenarios/ask-user-button-tap-dm.test.ts +141 -0
  180. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +191 -0
  181. package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +255 -0
  182. package/telegram-plugin/uat/scenarios/fuzz-human-style-dm.test.ts +275 -0
  183. package/telegram-plugin/uat/scenarios/fuzz-random-prompts-dm.test.ts +146 -0
  184. package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts +486 -0
  185. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +67 -0
  186. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +100 -0
  187. package/telegram-plugin/uat/scenarios/jtbd-soft-commit-dm.test.ts +67 -0
  188. package/telegram-plugin/uat/scenarios/jtbd-status-query-dm.test.ts +49 -0
  189. package/telegram-plugin/uat/scenarios/location-inbound-dm.test.ts +65 -0
  190. package/telegram-plugin/uat/scenarios/midturn-silent-dm.test.ts +175 -0
  191. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +142 -0
  192. package/telegram-plugin/uat/scenarios/reactions-trigger-turn-dm.test.ts +96 -0
  193. package/telegram-plugin/uat/scenarios/secret-redaction-deletes-original-dm.test.ts +123 -0
  194. package/telegram-plugin/uat/scenarios/secret-redaction-no-false-positive-dm.test.ts +87 -0
  195. package/telegram-plugin/uat/scenarios/silence-poke-soft-dm.test.ts +155 -0
  196. package/telegram-plugin/uat/scenarios/silent-end-recovery-dm.test.ts +95 -0
  197. package/telegram-plugin/uat/scenarios/smoke-dm-reply.test.ts +57 -0
  198. package/telegram-plugin/uat/scenarios/subagent-watcher-no-rerun-dm.test.ts +135 -0
  199. package/telegram-plugin/uat/scenarios/vault-approval-posture-telegram-id-dm.test.ts +191 -0
  200. package/telegram-plugin/uat/scenarios/vault-audit-allow-dm.test.ts +108 -0
  201. package/telegram-plugin/uat/scenarios/vault-grant-auto-resume-dm.test.ts +121 -0
  202. package/telegram-plugin/uat/scenarios/vault-request-access-concurrent-dm.test.ts +161 -0
  203. package/telegram-plugin/uat/scenarios/vault-request-access-end-to-end-dm.test.ts +158 -0
  204. package/telegram-plugin/uat/scenarios/voice-inbound-dm.test.ts +65 -0
  205. package/telegram-plugin/vault-approval-posture.ts +42 -0
  206. package/telegram-plugin/welcome-text.ts +1 -0
  207. package/telegram-plugin/active-pins-sweep.ts +0 -204
  208. package/telegram-plugin/active-pins.ts +0 -146
  209. package/telegram-plugin/auth-dashboard.ts +0 -1104
  210. package/telegram-plugin/auth-slot-parser.ts +0 -497
  211. package/telegram-plugin/card-event-log.ts +0 -138
  212. package/telegram-plugin/dist/foreman/foreman.js +0 -31106
  213. package/telegram-plugin/docs/multi-agent-card-design.md +0 -847
  214. package/telegram-plugin/docs/pinned-progress-card-reliability.md +0 -144
  215. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  216. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  217. package/telegram-plugin/foreman/foreman.ts +0 -1165
  218. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  219. package/telegram-plugin/foreman/setup-state.ts +0 -239
  220. package/telegram-plugin/foreman/state.ts +0 -203
  221. package/telegram-plugin/pin-event-log.ts +0 -76
  222. package/telegram-plugin/progress-card-driver.ts +0 -2886
  223. package/telegram-plugin/progress-card-pin-manager.ts +0 -589
  224. package/telegram-plugin/progress-card-pin-watchdog.ts +0 -98
  225. package/telegram-plugin/progress-card.ts +0 -1409
  226. package/telegram-plugin/tests/HARNESS.md +0 -340
  227. package/telegram-plugin/tests/_progress-card-harness.ts +0 -109
  228. package/telegram-plugin/tests/active-pins-boot-reaper.test.ts +0 -211
  229. package/telegram-plugin/tests/active-pins-sweep.test.ts +0 -309
  230. package/telegram-plugin/tests/active-pins.test.ts +0 -187
  231. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  232. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  233. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  234. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  235. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  236. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  237. package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +0 -201
  238. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  239. package/telegram-plugin/tests/card-event-log.test.ts +0 -145
  240. package/telegram-plugin/tests/first-paint.test.ts +0 -257
  241. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  242. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  243. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  244. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  245. package/telegram-plugin/tests/harness-ordering-invariants.test.ts +0 -243
  246. package/telegram-plugin/tests/pin-event-log.test.ts +0 -124
  247. package/telegram-plugin/tests/progress-card-api-failure-during-deferred.test.ts +0 -73
  248. package/telegram-plugin/tests/progress-card-close-paths-converge.test.ts +0 -272
  249. package/telegram-plugin/tests/progress-card-cross-turn.test.ts +0 -258
  250. package/telegram-plugin/tests/progress-card-delay-842.test.ts +0 -160
  251. package/telegram-plugin/tests/progress-card-dispose-preservepending.test.ts +0 -81
  252. package/telegram-plugin/tests/progress-card-draft-flag.test.ts +0 -80
  253. package/telegram-plugin/tests/progress-card-driver-eviction.test.ts +0 -215
  254. package/telegram-plugin/tests/progress-card-driver-fleet-shadow.test.ts +0 -123
  255. package/telegram-plugin/tests/progress-card-driver-force-complete-parent-done.test.ts +0 -76
  256. package/telegram-plugin/tests/progress-card-edit-timestamps-budget.test.ts +0 -62
  257. package/telegram-plugin/tests/progress-card-memory-bounds.test.ts +0 -84
  258. package/telegram-plugin/tests/progress-card-pin-failure-paths.test.ts +0 -139
  259. package/telegram-plugin/tests/progress-card-pin-manager.test.ts +0 -773
  260. package/telegram-plugin/tests/progress-card-pin-race-fast-turn.test.ts +0 -66
  261. package/telegram-plugin/tests/progress-card-pin-sidecar-partial-write.test.ts +0 -64
  262. package/telegram-plugin/tests/progress-card-pin-watchdog.test.ts +0 -190
  263. package/telegram-plugin/tests/progress-card-sigterm-pin-flush.test.ts +0 -146
  264. package/telegram-plugin/tests/real-gateway-f1-ladder-integrity.test.ts +0 -123
  265. package/telegram-plugin/tests/real-gateway-f2-instant-draft.test.ts +0 -82
  266. package/telegram-plugin/tests/real-gateway-f3-late-card.test.ts +0 -114
  267. package/telegram-plugin/tests/real-gateway-harness.ts +0 -699
  268. package/telegram-plugin/tests/real-gateway-i6-turn-flush-replay-dedup.test.ts +0 -313
  269. package/telegram-plugin/tests/real-gateway-ipc-lifecycle.test.ts +0 -299
  270. package/telegram-plugin/tests/real-gateway-spec.test.ts +0 -487
  271. package/telegram-plugin/tests/real-gateway.smoke.test.ts +0 -101
  272. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  273. package/telegram-plugin/tests/setup-state.test.ts +0 -146
  274. package/telegram-plugin/tests/sync-chat-running-subagents.test.ts +0 -116
  275. package/telegram-plugin/tests/turn-end-regressions.test.ts +0 -489
  276. package/telegram-plugin/tests/turn-flush-card-takeover.test.ts +0 -218
  277. package/telegram-plugin/tests/turn-flush-prose-recovery.test.ts +0 -78
  278. package/telegram-plugin/tests/two-zone-bg-carry-full-lifecycle.test.ts +0 -131
  279. package/telegram-plugin/tests/two-zone-bg-detection.test.ts +0 -120
  280. package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +0 -116
  281. package/telegram-plugin/tests/two-zone-bg-early-turn-end.test.ts +0 -87
  282. package/telegram-plugin/tests/two-zone-bg-survives-next-turn.test.ts +0 -211
  283. package/telegram-plugin/tests/two-zone-card-cap.test.ts +0 -62
  284. package/telegram-plugin/tests/two-zone-card-fleet-row.test.ts +0 -101
  285. package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +0 -78
  286. package/telegram-plugin/tests/two-zone-card-html-balance.test.ts +0 -110
  287. package/telegram-plugin/tests/two-zone-card-lifecycle.test.ts +0 -128
  288. package/telegram-plugin/tests/two-zone-card-sanitise.test.ts +0 -58
  289. package/telegram-plugin/tests/two-zone-card-snapshot.test.ts +0 -133
  290. package/telegram-plugin/tests/two-zone-concurrent-turns-isolation.test.ts +0 -155
  291. package/telegram-plugin/tests/two-zone-phasefor-precedence.test.ts +0 -117
  292. package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +0 -187
  293. package/telegram-plugin/tests/two-zone-stuck-edit-throttle.test.ts +0 -149
  294. package/telegram-plugin/tests/two-zone-stuck-header-escalation.test.ts +0 -101
  295. package/telegram-plugin/tests/two-zone-stuck-per-member.test.ts +0 -114
  296. package/telegram-plugin/tests/two-zone-stuck-recovery.test.ts +0 -105
  297. package/telegram-plugin/tests/waiting-ux-harness.ts +0 -381
  298. package/telegram-plugin/tests/waiting-ux.e2e.test.ts +0 -233
  299. package/telegram-plugin/turn-flush-prose-recovery.ts +0 -40
  300. package/telegram-plugin/two-zone-card.ts +0 -269
  301. package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +0 -61
@@ -1,46 +1,82 @@
1
1
  /**
2
- * Per-turn silent-gap tracker for streaming observability.
2
+ * Per-turn signal + outbound tracker for streaming observability.
3
3
  *
4
- * Tracks the longest contiguous interval within a turn where no user-visible
5
- * signal was sent. Signals include: progress-card edits, status-reaction
6
- * transitions, answer-lane updates, and fresh sendMessage calls.
4
+ * Tracks TWO things, keyed by chatId+threadId:
5
+ *
6
+ * 1. **Signal gap** longest contiguous interval where no user-visible
7
+ * signal of ANY kind was sent (progress-card edits, status-reaction
8
+ * transitions, answer-lane updates, fresh sendMessage calls). The
9
+ * original use case from #203.
10
+ *
11
+ * 2. **Outbound messages** (added 2026-05 for the conversational-turn-
12
+ * UX redesign, issue #1122) — strictly user-visible MESSAGES that
13
+ * the agent sent: `reply`, `stream_reply` first-emits, progress
14
+ * card flushes that produce a fresh sendMessage. Status reactions
15
+ * and message edits don't count here — they don't ping the device
16
+ * and aren't what "outbound silence" means for the KPI.
7
17
  *
8
18
  * Keyed by chatId+threadId so concurrent turns in different chats don't
9
- * collide. Designed to be fully standalone (no grammy/bot dependency) so
10
- * it's testable with deterministic time injection via vi.useFakeTimers().
19
+ * collide. Fully standalone no grammy/bot dependency, deterministic
20
+ * time injection via vi.useFakeTimers().
11
21
  *
12
22
  * Usage:
13
- * signalTracker.reset(key, now) // at turn start
14
- * signalTracker.noteSignal(key, now) // on every user-visible signal
15
- * signalTracker.getLongestGap(key) // at turn_end
16
- * signalTracker.clear(key) // after emitting (cleanup)
23
+ * signalTracker.reset(key, now) // at turn start
24
+ * signalTracker.noteSignal(key, now) // any signal (legacy)
25
+ * signalTracker.noteOutbound(key, now) // outbound message only
26
+ * signalTracker.getLongestGap(key) // at turn_end (signal)
27
+ * signalTracker.getOutboundMetrics(key) // at turn_end (KPIs)
28
+ * signalTracker.clear(key) // after emitting
17
29
  */
18
30
 
19
31
  export interface TurnSignalState {
20
- /** The time the current gap started (i.e., the last signal time). */
32
+ /** The time the turn began. Used to compute TTFO. */
33
+ turnStartedAt: number
34
+ /** Time the current signal gap started (last signal time). */
21
35
  lastSignalAt: number
22
- /** The longest gap observed so far (ms). */
36
+ /** Longest signal-gap (any signal) observed so far (ms). */
23
37
  longestGapMs: number
38
+ /** First outbound message timestamp this turn, or null if none yet. */
39
+ firstOutboundAt: number | null
40
+ /** Most recent outbound message timestamp, or null. */
41
+ lastOutboundAt: number | null
42
+ /** Total outbound messages sent this turn. */
43
+ outboundCount: number
44
+ /** Longest gap between consecutive outbound messages (ms). */
45
+ longestOutboundGapMs: number
46
+ }
47
+
48
+ export interface OutboundMetrics {
49
+ /** ms between turn start and first outbound message; null if none sent. */
50
+ ttfoMs: number | null
51
+ /** Total outbound messages this turn. */
52
+ outboundCount: number
53
+ /** Longest gap between outbound messages — i.e. the "silent stretch"
54
+ * metric for the conversational-pacing KPI. 0 if <2 messages. */
55
+ longestOutboundGapMs: number
24
56
  }
25
57
 
26
- /**
27
- * Module-scoped map: `"chatId:threadId"` → state. Using a module-level map
28
- * keeps the tracker lightweight and avoids passing state through every
29
- * call-site while remaining mockable in tests via the exported functions.
30
- */
31
58
  const state = new Map<string, TurnSignalState>()
32
59
 
33
60
  /**
34
61
  * Begin tracking a new turn. Records `now` as the initial signal time and
35
- * resets the gap accumulator. Call at the start of each fresh turn.
62
+ * resets the gap accumulator + outbound state. Call at the start of each
63
+ * fresh turn.
36
64
  */
37
65
  export function reset(key: string, now: number): void {
38
- state.set(key, { lastSignalAt: now, longestGapMs: 0 })
66
+ state.set(key, {
67
+ turnStartedAt: now,
68
+ lastSignalAt: now,
69
+ longestGapMs: 0,
70
+ firstOutboundAt: null,
71
+ lastOutboundAt: null,
72
+ outboundCount: 0,
73
+ longestOutboundGapMs: 0,
74
+ })
39
75
  }
40
76
 
41
77
  /**
42
- * Record a user-visible signal. Measures the gap since the last signal and
43
- * updates `longestGapMs` if this gap is larger.
78
+ * Record a user-visible signal (any kind: reaction, edit, send). Measures
79
+ * the gap since the last signal and updates `longestGapMs` if larger.
44
80
  */
45
81
  export function noteSignal(key: string, now: number): void {
46
82
  const entry = state.get(key)
@@ -51,8 +87,31 @@ export function noteSignal(key: string, now: number): void {
51
87
  }
52
88
 
53
89
  /**
54
- * Returns the longest gap observed during the current turn (ms).
55
- * Returns 0 if no tracking state exists for this key.
90
+ * Record a fresh outbound MESSAGE (reply, stream_reply first-emit, or
91
+ * card flush that produced a new sendMessage). Updates the
92
+ * outbound-specific metrics: TTFO on first call, outbound-gap on
93
+ * subsequent calls.
94
+ *
95
+ * Does not double-update the signal-gap stream — callers that note an
96
+ * outbound message should ALSO call `noteSignal()` to keep the legacy
97
+ * signal-gap accurate.
98
+ */
99
+ export function noteOutbound(key: string, now: number): void {
100
+ const entry = state.get(key)
101
+ if (entry == null) return
102
+ if (entry.firstOutboundAt == null) {
103
+ entry.firstOutboundAt = now
104
+ } else if (entry.lastOutboundAt != null) {
105
+ const gap = now - entry.lastOutboundAt
106
+ if (gap > entry.longestOutboundGapMs) entry.longestOutboundGapMs = gap
107
+ }
108
+ entry.lastOutboundAt = now
109
+ entry.outboundCount += 1
110
+ }
111
+
112
+ /**
113
+ * Returns the longest gap observed during the current turn (ms) — legacy
114
+ * "any signal" metric. Returns 0 if no tracking state exists for this key.
56
115
  */
57
116
  export function getLongestGap(key: string): number {
58
117
  return state.get(key)?.longestGapMs ?? 0
@@ -67,8 +126,25 @@ export function getLastSignalAt(key: string): number | undefined {
67
126
  }
68
127
 
69
128
  /**
70
- * Remove state for this key. Call after emitting the turn_signal_gap metric.
129
+ * Returns the outbound-message KPI bundle for the conversational-pacing
130
+ * redesign. Zeroed-out if no tracking state exists.
71
131
  */
132
+ export function getOutboundMetrics(key: string): OutboundMetrics {
133
+ const entry = state.get(key)
134
+ if (entry == null) {
135
+ return { ttfoMs: null, outboundCount: 0, longestOutboundGapMs: 0 }
136
+ }
137
+ const ttfoMs = entry.firstOutboundAt != null
138
+ ? entry.firstOutboundAt - entry.turnStartedAt
139
+ : null
140
+ return {
141
+ ttfoMs,
142
+ outboundCount: entry.outboundCount,
143
+ longestOutboundGapMs: entry.longestOutboundGapMs,
144
+ }
145
+ }
146
+
147
+ /** Remove state for this key. Call after emitting the turn-end metrics. */
72
148
  export function clear(key: string): void {
73
149
  state.delete(key)
74
150
  }
@@ -112,49 +112,224 @@ If the driver account is locked entirely (e.g. SPAM_WAIT), only the
112
112
  account owner can resolve it via support@telegram.org. The harness has
113
113
  no recourse.
114
114
 
115
- ## 5. Worktree-based agent install (NOT `switchroom agent add`)
115
+ ## 5. The `test-harness` agent (Phase 2a DM focus)
116
116
 
117
- The UAT harness does **not** persistently install the test-harness
118
- agent through `switchroom agent add` (which writes a systemd unit + a
119
- persistent state dir wrong shape for hermetic test runs). Instead,
120
- the harness `exec`s the agent as a child process per scenario with:
117
+ Phase 2a tests run against a **persistent** `test-harness` agent
118
+ created once via `switchroom agent add`. This pivots from the epic's
119
+ original child-process-per-scenario plan (written before the Docker
120
+ runtime landed) the standard runtime now gets us most of the
121
+ hermeticity we want without re-inventing the agent lifecycle. Forum
122
+ topic + per-scenario STATE_DIR isolation rolls in with Phase 2b.
121
123
 
122
- - `STATE_DIR=$(mktemp -d)` ephemeral; teardown rm-rfs it.
123
- - A unique `TELEGRAM_GATEWAY_PORT` (see port allocator note below).
124
- - `SWITCHROOM_AGENT_NAME=test-harness`.
125
- - The test bot token loaded from `telegram-test-bot-token`.
124
+ ### One-shot agent creation
126
125
 
127
- The Phase 1 scaffold stubs this out in `harness.ts`; Phase 2 wires it
128
- end-to-end.
126
+ ```bash
127
+ # Resolve the driver's user_id once via mtcute (the helper prints
128
+ # only the integer id to stdout; the session string never appears):
129
+ cd ~/code/switchroom/telegram-plugin
130
+ read -sp "Vault passphrase: " SWITCHROOM_VAULT_PASSPHRASE; echo
131
+ export SWITCHROOM_VAULT_PASSPHRASE
132
+ DRIVER_UID=$(bun uat/driver-info.ts)
133
+ echo "Driver user_id: $DRIVER_UID"
134
+
135
+ # Then create the agent. `--topology dm --allow-from $DRIVER_UID`
136
+ # bypasses the @BotFather DM-pair flow and writes the driver's
137
+ # user_id directly into allowFrom — so the bot will respond only
138
+ # to DMs from the driver, never from arbitrary Telegram users
139
+ # (important: the test bot's token is in vault scoped to
140
+ # `test-harness` only, but the bot itself is publicly reachable
141
+ # on Telegram).
142
+ SWITCHROOM_BOT_TOKEN=$(switchroom vault get --no-broker telegram-test-bot-token) \
143
+ switchroom agent add test-harness \
144
+ --profile default \
145
+ --topology dm \
146
+ --bot-username meken_switchroom_test_bot \
147
+ --allow-from "$DRIVER_UID"
148
+ unset SWITCHROOM_BOT_TOKEN SWITCHROOM_VAULT_PASSPHRASE
149
+
150
+ # Verify the agent is up:
151
+ switchroom agent status test-harness
152
+ ```
153
+
154
+ `agent add` runs the n+1 wizard: scaffolds the per-agent dir under
155
+ `~/.switchroom/agents/test-harness/`, refreshes the compose file,
156
+ boots the container, runs a preflight. On success the agent is
157
+ running and will reply to DMs from the driver user account.
158
+
159
+ > **Hosts upgraded from before #1009.** If you set up the
160
+ > `test-harness` agent on an older CLI build, its
161
+ > `access.json` may carry the two pre-fix shapes — numeric
162
+ > `allowFrom` (silently rejected by the gateway, #1001) and a
163
+ > placeholder `groups: {"-100…"}` entry (404 boot-probe noise,
164
+ > #1002). Both writers were corrected in #1009, but existing
165
+ > scaffolds aren't auto-rewritten. To rebuild a clean access.json
166
+ > on a host that hit the old shapes:
167
+ >
168
+ > ```bash
169
+ > switchroom agent stop test-harness
170
+ > rm ~/.switchroom/agents/test-harness/telegram/access.json
171
+ > switchroom apply # rewrites access.json via the fixed buildAccessJson
172
+ > switchroom agent start test-harness
173
+ > ```
174
+ >
175
+ > Fresh agent-add invocations on current main don't need this.
176
+
177
+ ### When this agent should be running
178
+
179
+ - During UAT runs: yes. Scenarios fail with `expectMessage` timeouts
180
+ if the agent isn't responding.
181
+ - Idle: harmless to leave running. It consumes one Claude turn only
182
+ when DMed by the driver — no scheduled work, no MCP polls.
183
+
184
+ ### Resetting state between runs
185
+
186
+ Phase 2a accepts mild state pollution across scenarios (the agent's
187
+ history accumulates). To reset hard:
129
188
 
130
- ## 6. Port allocator vs unix sockets
189
+ ```bash
190
+ switchroom agent stop test-harness
191
+ rm -rf ~/.switchroom/agents/test-harness/state
192
+ switchroom agent start test-harness
193
+ ```
194
+
195
+ Phase 2b adds per-scenario state-dir scoping so this becomes
196
+ automatic.
197
+
198
+ ### Optional: force progress-card on every turn (Phase 2c+ card scenarios)
199
+
200
+ The gateway's `progress_card.delay_ms` defaults to 45 s, so short DM
201
+ turns (most of UAT) never trigger the pinned card and the card-
202
+ lifecycle scenarios (`progress-card-dm.test.ts`) skip themselves.
203
+ To unskip — and validate `expectPinnedCard` / `waitForCardPhase`
204
+ against real Telegram — override the delay on `test-harness` only:
205
+
206
+ Edit `~/.switchroom/switchroom.yaml`, find the `test-harness:`
207
+ block, and add the highlighted lines:
208
+
209
+ ```yaml
210
+ test-harness:
211
+ extends: default
212
+ topic_name: Test Harness
213
+ channels:
214
+ telegram:
215
+ progress_card:
216
+ delay_ms: 1000 # short — make every turn flash a card
217
+ ```
218
+
219
+ Then apply + restart:
220
+
221
+ ```bash
222
+ switchroom apply
223
+ switchroom agent restart test-harness
224
+ ```
131
225
 
132
- Phase 1 commits to a **process-wide port allocator** (see
133
- `uat/port-allocator.ts`) rather than unix sockets. Rationale:
226
+ Production agents keep the 45 s default; this override is test-only.
227
+ Once configured, unskip the card scenario by changing
228
+ `describe.skip(...)` → `describe(...)` in
229
+ `scenarios/progress-card-dm.test.ts`.
134
230
 
135
- - The gateway already speaks IP loopback to the bridge; switching to
136
- unix sockets is a code change in `gateway/` we don't want bundled
137
- with the UAT scaffold work.
138
- - Tests only ever run from one harness process, so a node-local
139
- monotonic counter starting at a high ephemeral port (default 47000)
140
- is enough to avoid collisions with the system + with sibling
141
- scenarios in the same run.
142
- - The allocator also `bind()`s a probe socket and releases it before
143
- returning, which catches "port already in use by another process"
144
- before the agent boots and produces a confusing crash.
231
+ ## 6. Running scenarios env setup
145
232
 
146
- If we ever want concurrent harness runs from CI, swap to unix sockets;
147
- the harness API takes a `transport` shape so it's a one-line change.
233
+ The harness reads four env vars at `spinUp()` time. The recommended
234
+ workflow is to materialise them once into `.env`
235
+ — the harness loads that file automatically on import (see
236
+ `load-env.ts`). The file is gitignored repo-wide (`.env*` in
237
+ `/.gitignore`); never commit a populated copy.
238
+
239
+ Vault file perms (root:root 0600) mean the operator can't read
240
+ `vault.enc` directly. Sourcing through the `test-harness` agent
241
+ container — which already has these keys in its ACL — is the
242
+ cleanest path:
243
+
244
+ ```bash
245
+ cd ~/code/switchroom
246
+
247
+ read -sp "Vault passphrase: " SWITCHROOM_VAULT_PASSPHRASE; echo
248
+ export SWITCHROOM_VAULT_PASSPHRASE
249
+
250
+ ( umask 077 && {
251
+ echo "TELEGRAM_API_ID=$(docker exec switchroom-test-harness switchroom vault get telegram-uat-api-id)"
252
+ echo "TELEGRAM_API_HASH=$(docker exec switchroom-test-harness switchroom vault get telegram-uat-api-hash)"
253
+ echo "TELEGRAM_UAT_DRIVER_SESSION=$(docker exec switchroom-test-harness switchroom vault get telegram-uat-driver-session)"
254
+ echo "TELEGRAM_TEST_BOT_USERNAME=meken_switchroom_test_bot"
255
+ } > .env )
256
+
257
+ unset SWITCHROOM_VAULT_PASSPHRASE
258
+ ```
259
+
260
+ > `umask 077` in the subshell guarantees the file is never
261
+ > world-readable between creation and the redirection's implicit
262
+ > chmod.
263
+
264
+ > The `docker exec` path requires `test-harness` to have the three
265
+ > `telegram-uat-*` keys in its `schedule[*].secrets` ACL (see
266
+ > `~/.switchroom/switchroom.yaml`). If `vault get` returns
267
+ > `VAULT-BROKER-DENIED`, add them and `switchroom apply`. The legacy
268
+ > `vault get --no-broker` path no longer works for non-root operators
269
+ > because the vault file is owned by the broker container's root user.
270
+
271
+ After the `.env` is in place, just run the suite — no per-shell
272
+ export dance:
273
+
274
+ ```bash
275
+ bun test telegram-plugin/uat/scenarios/
276
+ ```
277
+
278
+ To rotate or refresh the file, repeat the block above. The harness
279
+ prefers existing `process.env` entries over `.env` values, so a
280
+ one-off env override still works (`TELEGRAM_API_ID=99999 bun test ...`).
281
+
282
+ The vault passphrase is unset before the test run so a misbehaving
283
+ scenario can't smuggle it into a chat message. The session string in
284
+ `.env` is bearer-equivalent to the driver account — treat the file
285
+ as a long-lived secret.
148
286
 
149
287
  ## 7. Verification checklist before running scenarios
150
288
 
151
- - [ ] `switchroom vault get telegram-test-bot-token` returns a token.
152
- - [ ] `switchroom vault get telegram-uat-driver-session` returns a
153
- session string (the command output may be redacted by the
154
- vault — that's fine, you only need exit code 0).
155
- - [ ] `$SWITCHROOM_UAT_CHAT_ID` exported and is a negative int.
156
- - [ ] Test bot is admin in the supergroup.
157
- - [ ] Driver user is admin in the supergroup.
158
- - [ ] Topics enabled in the supergroup.
289
+ - [ ] `switchroom vault list` shows `telegram-test-bot-token`,
290
+ `telegram-uat-api-id`, `telegram-uat-api-hash`,
291
+ `telegram-uat-driver-session` (and `telegram-uat-chat-id` for
292
+ Phase 2b).
293
+ - [ ] `switchroom agent status test-harness` reports the agent active.
294
+ - [ ] Driver user can DM `@meken_switchroom_test_bot` from Telegram
295
+ and get a reply (manual sanity check before automating).
296
+
297
+ When all three are checked, the env block above + `bun run test:uat`
298
+ is safe to run.
299
+
300
+ ## 8. CI gate — `:robot: UAT fuzz` Buildkite step
301
+
302
+ Since the buildkite gate landed, the fuzz subset of scenarios
303
+ (`fuzz-random-prompts-dm.test.ts`, `fuzz-extended-dm.test.ts`,
304
+ `fuzz-human-style-dm.test.ts`) runs automatically on every PR that
305
+ touches `telegram-plugin/`, `src/agents/`, or `telegram-plugin/uat/`.
306
+
307
+ The step runs on a self-hosted Buildkite agent tagged
308
+ `queue=uat-host` that lives on the same box as the `test-harness`
309
+ agent. Secrets come from the Buildkite cluster secret store, not
310
+ from local vault. See `.buildkite/README.md` § "UAT fuzz step" for
311
+ agent setup + secret rotation.
312
+
313
+ **Scope (CI):**
314
+
315
+ | Scenario | In CI? | Why |
316
+ |---|---|---|
317
+ | `fuzz-random-prompts-dm` | ✅ gates PRs | JTBD-floor invariants; PR #1132. |
318
+ | `fuzz-extended-dm` | ✅ gates PRs | Second-pass categories; PR #1134. |
319
+ | `fuzz-human-style-dm` | ✅ gates PRs | Human-shape inbounds + meaningful-reply floor. |
320
+ | `silent-end-recovery-dm` | ❌ local only | Passes, but the 5-min worst-case budget makes it costly to run every PR. Run nightly + ad-hoc. |
321
+ | `jtbd-status-query-dm` | ❌ local only | Passes; defer to a follow-up that batches the cheap JTBD scenarios. |
322
+ | `jtbd-soft-commit-dm` | ❌ local only | Already budget-tuned but real-Telegram timing flake risk; defer until we have flake telemetry. |
323
+ | `jtbd-interrupt-marker-dm` | ❌ `describe.skip` | Suspected real bug per #1132 overnight. Investigate before unskipping. |
324
+ | `jtbd-rapid-followup-dm` | ❌ `describe.skip` | Suspected real classification bug per #1132 overnight. Investigate before unskipping. |
325
+ | vault / secret-redaction / voice / location / reactions / progress-card | ❌ local only | Need specific surfaces / config overrides not wired into the gate yet. |
326
+
327
+ A local `bun run test:uat` runs the full include glob minus the two
328
+ `describe.skip`'d JTBDs.
329
+
330
+ ## 9. Port allocator vs unix sockets (Phase 1 scaffold note)
159
331
 
160
- When all six are checked, `bun run test:uat` is safe to run.
332
+ The Phase 1 `port-allocator.ts` is held in reserve for Phase 2b's
333
+ child-process flow — Phase 2a (standard-runtime agent) doesn't need
334
+ it. Kept rather than deleted because the allocator's bind-probe is
335
+ the right shape for what 2b will need.