switchroom 0.7.15 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. package/README.md +51 -59
  2. package/bin/run-hook.sh +27 -11
  3. package/bin/timezone-hook.sh +9 -7
  4. package/dist/agent-scheduler/index.js +410 -133
  5. package/dist/auth-broker/index.js +13932 -0
  6. package/dist/cli/switchroom.js +26937 -5601
  7. package/dist/host-control/main.js +12702 -0
  8. package/dist/vault/approvals/kernel-server.js +467 -184
  9. package/dist/vault/broker/server.js +1430 -724
  10. package/examples/minimal.yaml +63 -0
  11. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  12. package/examples/personal-google-workspace-mcp/README.md +194 -0
  13. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  14. package/examples/switchroom.yaml +220 -0
  15. package/package.json +7 -4
  16. package/profiles/_base/settings.json.hbs +20 -5
  17. package/profiles/_base/start.sh.hbs +16 -3
  18. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  19. package/profiles/_shared/telegram-style.md.hbs +20 -90
  20. package/profiles/_shared/vault-protocol.md.hbs +68 -0
  21. package/profiles/default/CLAUDE.md +50 -96
  22. package/profiles/default/CLAUDE.md.hbs +36 -6
  23. package/profiles/default/workspace/SOUL.md.hbs +12 -5
  24. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  25. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  26. package/skills/buildkite-api/SKILL.md +31 -8
  27. package/skills/buildkite-cli/SKILL.md +27 -9
  28. package/skills/buildkite-migration/SKILL.md +22 -9
  29. package/skills/buildkite-pipelines/SKILL.md +26 -9
  30. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  31. package/skills/buildkite-test-engine/SKILL.md +25 -8
  32. package/skills/docx/SKILL.md +1 -1
  33. package/skills/docx/scripts/office/validators/__pycache__/__init__.cpython-313.pyc +0 -0
  34. package/skills/docx/scripts/office/validators/__pycache__/base.cpython-313.pyc +0 -0
  35. package/skills/file-bug/SKILL.md +34 -6
  36. package/skills/humanizer/SKILL.md +15 -0
  37. package/skills/humanizer-calibrate/SKILL.md +7 -1
  38. package/skills/mcp-builder/SKILL.md +1 -1
  39. package/skills/pdf/SKILL.md +1 -1
  40. package/skills/pptx/SKILL.md +1 -1
  41. package/skills/skill-creator/SKILL.md +21 -1
  42. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  43. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  44. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  45. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  46. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  47. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  48. package/skills/switchroom-cli/SKILL.md +63 -64
  49. package/skills/switchroom-health/SKILL.md +23 -10
  50. package/skills/switchroom-install/SKILL.md +3 -3
  51. package/skills/switchroom-manage/SKILL.md +26 -19
  52. package/skills/switchroom-runtime/SKILL.md +191 -0
  53. package/skills/switchroom-status/SKILL.md +27 -2
  54. package/skills/telegram-test-harness/SKILL.md +3 -0
  55. package/skills/token-helpers/SKILL.md +24 -1
  56. package/skills/webapp-testing/SKILL.md +31 -1
  57. package/skills/xlsx/SKILL.md +1 -1
  58. package/telegram-plugin/admin-commands/index.ts +7 -5
  59. package/telegram-plugin/analytics-posthog.ts +191 -0
  60. package/telegram-plugin/bridge/bridge.ts +69 -0
  61. package/telegram-plugin/bridge/ipc-client.ts +4 -1
  62. package/telegram-plugin/dist/bridge/bridge.js +194 -119
  63. package/telegram-plugin/dist/gateway/gateway.js +23611 -19671
  64. package/telegram-plugin/dist/server.js +245 -189
  65. package/telegram-plugin/first-paint.ts +3 -24
  66. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  67. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  68. package/telegram-plugin/gateway/auth-command.ts +794 -0
  69. package/telegram-plugin/gateway/auth-line.ts +123 -0
  70. package/telegram-plugin/gateway/boot-card.ts +169 -40
  71. package/telegram-plugin/gateway/boot-issue-cache.ts +308 -0
  72. package/telegram-plugin/gateway/boot-probes.ts +166 -123
  73. package/telegram-plugin/gateway/boot-reason.ts +41 -7
  74. package/telegram-plugin/gateway/boot-version.ts +66 -0
  75. package/telegram-plugin/gateway/gateway.ts +3499 -1885
  76. package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
  77. package/telegram-plugin/gateway/ipc-protocol.ts +18 -0
  78. package/telegram-plugin/gateway/pending-inbound-buffer.ts +106 -0
  79. package/telegram-plugin/gateway/quarantine.ts +69 -0
  80. package/telegram-plugin/gateway/quota-cache.ts +9 -4
  81. package/telegram-plugin/gateway/reaction-trigger.ts +401 -0
  82. package/telegram-plugin/gateway/recent-denials.test.ts +103 -0
  83. package/telegram-plugin/gateway/recent-denials.ts +77 -0
  84. package/telegram-plugin/gateway/startup-network-retry.ts +109 -31
  85. package/telegram-plugin/gateway/vault-grant-inbound-builders.ts +125 -0
  86. package/telegram-plugin/history.ts +91 -0
  87. package/telegram-plugin/hooks/hooks.json +10 -0
  88. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +130 -0
  89. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +19 -2
  90. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +22 -2
  91. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  92. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  93. package/telegram-plugin/inbound-classifier.ts +50 -0
  94. package/telegram-plugin/inline-keyboard-callbacks.ts +136 -0
  95. package/telegram-plugin/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
  96. package/telegram-plugin/package.json +4 -2
  97. package/telegram-plugin/permission-rule.ts +51 -0
  98. package/telegram-plugin/permission-title.ts +56 -0
  99. package/telegram-plugin/quota-check.ts +19 -41
  100. package/telegram-plugin/registry/reaper.ts +223 -0
  101. package/telegram-plugin/retry-api-call.ts +80 -0
  102. package/telegram-plugin/runtime-metrics.ts +177 -0
  103. package/telegram-plugin/scripts/build.mjs +0 -1
  104. package/telegram-plugin/secret-detect/index.ts +24 -0
  105. package/telegram-plugin/secret-detect/vault-error.test.ts +64 -12
  106. package/telegram-plugin/secret-detect/vault-error.ts +78 -11
  107. package/telegram-plugin/secret-detect/vault-write.ts +14 -2
  108. package/telegram-plugin/server.js +41795 -0
  109. package/telegram-plugin/session-tail.ts +6 -1
  110. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  111. package/telegram-plugin/silence-poke.ts +420 -0
  112. package/telegram-plugin/silent-end.ts +174 -0
  113. package/telegram-plugin/stream-controller.ts +13 -0
  114. package/telegram-plugin/stream-reply-handler.ts +7 -0
  115. package/telegram-plugin/subagent-watcher.ts +213 -4
  116. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  117. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  118. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  119. package/telegram-plugin/tests/boot-card-issue-dedup.test.ts +247 -0
  120. package/telegram-plugin/tests/boot-card-reason-to-render.test.ts +182 -0
  121. package/telegram-plugin/tests/boot-card-reason.test.ts +65 -2
  122. package/telegram-plugin/tests/boot-card-render.test.ts +146 -0
  123. package/telegram-plugin/tests/boot-card-silent-on-operator.test.ts +103 -0
  124. package/telegram-plugin/tests/boot-probes.test.ts +216 -10
  125. package/telegram-plugin/tests/boot-version-string.test.ts +0 -0
  126. package/telegram-plugin/tests/finalize-callback.test.ts +190 -0
  127. package/telegram-plugin/tests/gateway-message-validator.test.ts +26 -0
  128. package/telegram-plugin/tests/gateway-secret-detect.test.ts +12 -3
  129. package/telegram-plugin/tests/gateway-startup-network-retry.test.ts +104 -0
  130. package/telegram-plugin/tests/history-reaper.test.ts +378 -0
  131. package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
  132. package/telegram-plugin/tests/inbound-classifier.test.ts +76 -0
  133. package/telegram-plugin/tests/inbound-message-types.test.ts +267 -0
  134. package/telegram-plugin/tests/issues-card.test.ts +49 -0
  135. package/telegram-plugin/tests/pending-inbound-buffer.test.ts +132 -0
  136. package/telegram-plugin/tests/permission-rule.test.ts +80 -1
  137. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  138. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  139. package/telegram-plugin/tests/races.test.ts +179 -0
  140. package/telegram-plugin/tests/reaction-trigger-flow.test.ts +353 -0
  141. package/telegram-plugin/tests/reaction-trigger.test.ts +397 -0
  142. package/telegram-plugin/tests/retry-api-call.test.ts +152 -1
  143. package/telegram-plugin/tests/runtime-metrics.test.ts +145 -0
  144. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +155 -0
  145. package/telegram-plugin/tests/secret-detect-delete-must-surface-failures.test.ts +133 -0
  146. package/telegram-plugin/tests/secret-detect-false-positives.test.ts +137 -0
  147. package/telegram-plugin/tests/silence-poke.test.ts +493 -0
  148. package/telegram-plugin/tests/silent-end.test.ts +206 -0
  149. package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +107 -0
  150. package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +224 -0
  151. package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +316 -0
  152. package/telegram-plugin/tests/subagent-watcher.test.ts +263 -0
  153. package/telegram-plugin/tests/turn-signal-tracker.test.ts +81 -0
  154. package/telegram-plugin/tests/vault-approval-posture.test.ts +256 -0
  155. package/telegram-plugin/tests/vault-grant-auto-resume.test.ts +73 -0
  156. package/telegram-plugin/tests/vault-grant-inbound-builders.test.ts +226 -0
  157. package/telegram-plugin/tests/vault-grant-union.test.ts +130 -0
  158. package/telegram-plugin/tests/vault-key-regex-allows-slash.test.ts +140 -0
  159. package/telegram-plugin/tests/vault-posture-quarantine.test.ts +104 -0
  160. package/telegram-plugin/tests/vault-request-access-tool.test.ts +114 -0
  161. package/telegram-plugin/tests/vault-request-access-unlock-resume.test.ts +106 -0
  162. package/telegram-plugin/turn-signal-tracker.ts +100 -24
  163. package/telegram-plugin/uat/SETUP.md +210 -35
  164. package/telegram-plugin/uat/assertions.ts +264 -37
  165. package/telegram-plugin/uat/driver-info.ts +57 -0
  166. package/telegram-plugin/uat/driver.ts +590 -51
  167. package/telegram-plugin/uat/harness.ts +140 -94
  168. package/telegram-plugin/uat/load-env.test.ts +72 -0
  169. package/telegram-plugin/uat/load-env.ts +48 -0
  170. package/telegram-plugin/uat/login.ts +96 -53
  171. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  172. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  173. package/telegram-plugin/uat/runners/report.ts +150 -0
  174. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  175. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  176. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  177. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  178. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  179. package/telegram-plugin/uat/scenarios/ask-user-button-tap-dm.test.ts +141 -0
  180. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +191 -0
  181. package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +255 -0
  182. package/telegram-plugin/uat/scenarios/fuzz-human-style-dm.test.ts +275 -0
  183. package/telegram-plugin/uat/scenarios/fuzz-random-prompts-dm.test.ts +146 -0
  184. package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts +486 -0
  185. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +67 -0
  186. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +100 -0
  187. package/telegram-plugin/uat/scenarios/jtbd-soft-commit-dm.test.ts +67 -0
  188. package/telegram-plugin/uat/scenarios/jtbd-status-query-dm.test.ts +49 -0
  189. package/telegram-plugin/uat/scenarios/location-inbound-dm.test.ts +65 -0
  190. package/telegram-plugin/uat/scenarios/midturn-silent-dm.test.ts +175 -0
  191. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +142 -0
  192. package/telegram-plugin/uat/scenarios/reactions-trigger-turn-dm.test.ts +96 -0
  193. package/telegram-plugin/uat/scenarios/secret-redaction-deletes-original-dm.test.ts +123 -0
  194. package/telegram-plugin/uat/scenarios/secret-redaction-no-false-positive-dm.test.ts +87 -0
  195. package/telegram-plugin/uat/scenarios/silence-poke-soft-dm.test.ts +155 -0
  196. package/telegram-plugin/uat/scenarios/silent-end-recovery-dm.test.ts +95 -0
  197. package/telegram-plugin/uat/scenarios/smoke-dm-reply.test.ts +57 -0
  198. package/telegram-plugin/uat/scenarios/subagent-watcher-no-rerun-dm.test.ts +135 -0
  199. package/telegram-plugin/uat/scenarios/vault-approval-posture-telegram-id-dm.test.ts +191 -0
  200. package/telegram-plugin/uat/scenarios/vault-audit-allow-dm.test.ts +108 -0
  201. package/telegram-plugin/uat/scenarios/vault-grant-auto-resume-dm.test.ts +121 -0
  202. package/telegram-plugin/uat/scenarios/vault-request-access-concurrent-dm.test.ts +161 -0
  203. package/telegram-plugin/uat/scenarios/vault-request-access-end-to-end-dm.test.ts +158 -0
  204. package/telegram-plugin/uat/scenarios/voice-inbound-dm.test.ts +65 -0
  205. package/telegram-plugin/vault-approval-posture.ts +42 -0
  206. package/telegram-plugin/welcome-text.ts +1 -0
  207. package/telegram-plugin/active-pins-sweep.ts +0 -204
  208. package/telegram-plugin/active-pins.ts +0 -146
  209. package/telegram-plugin/auth-dashboard.ts +0 -1104
  210. package/telegram-plugin/auth-slot-parser.ts +0 -497
  211. package/telegram-plugin/card-event-log.ts +0 -138
  212. package/telegram-plugin/dist/foreman/foreman.js +0 -31106
  213. package/telegram-plugin/docs/multi-agent-card-design.md +0 -847
  214. package/telegram-plugin/docs/pinned-progress-card-reliability.md +0 -144
  215. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  216. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  217. package/telegram-plugin/foreman/foreman.ts +0 -1165
  218. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  219. package/telegram-plugin/foreman/setup-state.ts +0 -239
  220. package/telegram-plugin/foreman/state.ts +0 -203
  221. package/telegram-plugin/pin-event-log.ts +0 -76
  222. package/telegram-plugin/progress-card-driver.ts +0 -2886
  223. package/telegram-plugin/progress-card-pin-manager.ts +0 -589
  224. package/telegram-plugin/progress-card-pin-watchdog.ts +0 -98
  225. package/telegram-plugin/progress-card.ts +0 -1409
  226. package/telegram-plugin/tests/HARNESS.md +0 -340
  227. package/telegram-plugin/tests/_progress-card-harness.ts +0 -109
  228. package/telegram-plugin/tests/active-pins-boot-reaper.test.ts +0 -211
  229. package/telegram-plugin/tests/active-pins-sweep.test.ts +0 -309
  230. package/telegram-plugin/tests/active-pins.test.ts +0 -187
  231. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  232. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  233. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  234. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  235. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  236. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  237. package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +0 -201
  238. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  239. package/telegram-plugin/tests/card-event-log.test.ts +0 -145
  240. package/telegram-plugin/tests/first-paint.test.ts +0 -257
  241. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  242. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  243. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  244. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  245. package/telegram-plugin/tests/harness-ordering-invariants.test.ts +0 -243
  246. package/telegram-plugin/tests/pin-event-log.test.ts +0 -124
  247. package/telegram-plugin/tests/progress-card-api-failure-during-deferred.test.ts +0 -73
  248. package/telegram-plugin/tests/progress-card-close-paths-converge.test.ts +0 -272
  249. package/telegram-plugin/tests/progress-card-cross-turn.test.ts +0 -258
  250. package/telegram-plugin/tests/progress-card-delay-842.test.ts +0 -160
  251. package/telegram-plugin/tests/progress-card-dispose-preservepending.test.ts +0 -81
  252. package/telegram-plugin/tests/progress-card-draft-flag.test.ts +0 -80
  253. package/telegram-plugin/tests/progress-card-driver-eviction.test.ts +0 -215
  254. package/telegram-plugin/tests/progress-card-driver-fleet-shadow.test.ts +0 -123
  255. package/telegram-plugin/tests/progress-card-driver-force-complete-parent-done.test.ts +0 -76
  256. package/telegram-plugin/tests/progress-card-edit-timestamps-budget.test.ts +0 -62
  257. package/telegram-plugin/tests/progress-card-memory-bounds.test.ts +0 -84
  258. package/telegram-plugin/tests/progress-card-pin-failure-paths.test.ts +0 -139
  259. package/telegram-plugin/tests/progress-card-pin-manager.test.ts +0 -773
  260. package/telegram-plugin/tests/progress-card-pin-race-fast-turn.test.ts +0 -66
  261. package/telegram-plugin/tests/progress-card-pin-sidecar-partial-write.test.ts +0 -64
  262. package/telegram-plugin/tests/progress-card-pin-watchdog.test.ts +0 -190
  263. package/telegram-plugin/tests/progress-card-sigterm-pin-flush.test.ts +0 -146
  264. package/telegram-plugin/tests/real-gateway-f1-ladder-integrity.test.ts +0 -123
  265. package/telegram-plugin/tests/real-gateway-f2-instant-draft.test.ts +0 -82
  266. package/telegram-plugin/tests/real-gateway-f3-late-card.test.ts +0 -114
  267. package/telegram-plugin/tests/real-gateway-harness.ts +0 -699
  268. package/telegram-plugin/tests/real-gateway-i6-turn-flush-replay-dedup.test.ts +0 -313
  269. package/telegram-plugin/tests/real-gateway-ipc-lifecycle.test.ts +0 -299
  270. package/telegram-plugin/tests/real-gateway-spec.test.ts +0 -487
  271. package/telegram-plugin/tests/real-gateway.smoke.test.ts +0 -101
  272. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  273. package/telegram-plugin/tests/setup-state.test.ts +0 -146
  274. package/telegram-plugin/tests/sync-chat-running-subagents.test.ts +0 -116
  275. package/telegram-plugin/tests/turn-end-regressions.test.ts +0 -489
  276. package/telegram-plugin/tests/turn-flush-card-takeover.test.ts +0 -218
  277. package/telegram-plugin/tests/turn-flush-prose-recovery.test.ts +0 -78
  278. package/telegram-plugin/tests/two-zone-bg-carry-full-lifecycle.test.ts +0 -131
  279. package/telegram-plugin/tests/two-zone-bg-detection.test.ts +0 -120
  280. package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +0 -116
  281. package/telegram-plugin/tests/two-zone-bg-early-turn-end.test.ts +0 -87
  282. package/telegram-plugin/tests/two-zone-bg-survives-next-turn.test.ts +0 -211
  283. package/telegram-plugin/tests/two-zone-card-cap.test.ts +0 -62
  284. package/telegram-plugin/tests/two-zone-card-fleet-row.test.ts +0 -101
  285. package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +0 -78
  286. package/telegram-plugin/tests/two-zone-card-html-balance.test.ts +0 -110
  287. package/telegram-plugin/tests/two-zone-card-lifecycle.test.ts +0 -128
  288. package/telegram-plugin/tests/two-zone-card-sanitise.test.ts +0 -58
  289. package/telegram-plugin/tests/two-zone-card-snapshot.test.ts +0 -133
  290. package/telegram-plugin/tests/two-zone-concurrent-turns-isolation.test.ts +0 -155
  291. package/telegram-plugin/tests/two-zone-phasefor-precedence.test.ts +0 -117
  292. package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +0 -187
  293. package/telegram-plugin/tests/two-zone-stuck-edit-throttle.test.ts +0 -149
  294. package/telegram-plugin/tests/two-zone-stuck-header-escalation.test.ts +0 -101
  295. package/telegram-plugin/tests/two-zone-stuck-per-member.test.ts +0 -114
  296. package/telegram-plugin/tests/two-zone-stuck-recovery.test.ts +0 -105
  297. package/telegram-plugin/tests/waiting-ux-harness.ts +0 -381
  298. package/telegram-plugin/tests/waiting-ux.e2e.test.ts +0 -233
  299. package/telegram-plugin/turn-flush-prose-recovery.ts +0 -40
  300. package/telegram-plugin/two-zone-card.ts +0 -269
  301. package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +0 -61
@@ -0,0 +1,67 @@
1
+ /**
2
+ * JTBD scenario — soft-commit for slow turns.
3
+ *
4
+ * The new conversational-pacing prompt (#1122) instructs the agent
5
+ * to send a one-liner "let me check, back in a few" before slow
6
+ * work. This UAT exercises that behaviour: send a prompt that
7
+ * obviously needs >15s, expect the FIRST outbound to be a short
8
+ * soft-commit message, with the final answer landing later.
9
+ *
10
+ * Not strict — the agent's allowed to skip the soft-commit if it
11
+ * judges the work is fast enough. The assertion is "the user does
12
+ * NOT see a long silent gap before the first sign of life": either
13
+ * a soft-commit OR the actual reply lands within 20s.
14
+ */
15
+
16
+ import { describe, it, expect } from "vitest";
17
+ import { spinUp } from "../harness.js";
18
+
19
+ // A prompt that needs real work (file reads / web search-ish / some
20
+ // thinking) so the model is incentivised to soft-commit.
21
+ const SLOW_PROMPT = (
22
+ "Read /etc/hostname and /etc/os-release, then summarise this "
23
+ + "machine in a single sentence (what OS family, what hostname). "
24
+ + "Take your time."
25
+ );
26
+
27
+ describe("uat: soft-commit pacing", () => {
28
+ it(
29
+ "user asks slow question → first reply lands within 20s",
30
+ async () => {
31
+ const sc = await spinUp({ agent: "test-harness" });
32
+ try {
33
+ const sendStart = Date.now();
34
+ await sc.sendDM(SLOW_PROMPT);
35
+
36
+ // 30s wall-clock budget gives mtcute polling jitter + the
37
+ // agent's first tool call enough headroom that a "near-miss
38
+ // soft commit" (model thinks for 25s then sends) still passes.
39
+ // Previous 25s/22s pair sat exactly in the model's natural
40
+ // think-then-respond window and produced flake unrelated to
41
+ // any real bug.
42
+ const firstReply = await sc.expectMessage(/\S/, {
43
+ from: "bot",
44
+ timeout: 30_000,
45
+ });
46
+ const ttfo = Date.now() - sendStart;
47
+
48
+ expect(firstReply.text.length).toBeGreaterThan(0);
49
+ expect(ttfo).toBeLessThan(30_000);
50
+
51
+ // If the first reply IS the final answer (short, complete),
52
+ // the model skipped soft-commit ceremony — fine, just note.
53
+ if (firstReply.text.length > 200) {
54
+ console.log(
55
+ `[soft-commit] model produced a long final answer as the `
56
+ + `first message (${firstReply.text.length} chars, ${ttfo}ms). `
57
+ + `Conversational pacing prompt would prefer a soft-commit `
58
+ + `first — but this is a soft preference, not a contract.`,
59
+ );
60
+ }
61
+ } finally {
62
+ await sc.tearDown();
63
+ }
64
+ },
65
+ 50_000,
66
+ );
67
+ });
@@ -0,0 +1,49 @@
1
+ /**
2
+ * JTBD scenario — `status?` inbound classifier.
3
+ *
4
+ * The conversational-pacing redesign (#1122 PR1) wired a primary
5
+ * lagging KPI: `inbound_status_query`, the count of users typing
6
+ * "status?", "still there?", "?", etc — every fire is a JTBD
7
+ * failure. We assert the classifier triggers AND the agent
8
+ * gracefully responds (i.e. doesn't crash, doesn't ignore, doesn't
9
+ * loop on it).
10
+ *
11
+ * Note: the classifier is fire-and-forget — it emits a runtime
12
+ * metric event but doesn't change routing. So all we can assert
13
+ * from the driver side is "the agent still replies sensibly" —
14
+ * the metric emission is verified by the unit tests in
15
+ * `tests/inbound-classifier.test.ts`. This UAT exists for
16
+ * end-to-end safety: "sending status? doesn't break anything."
17
+ */
18
+
19
+ import { describe, it, expect } from "vitest";
20
+ import { spinUp } from "../harness.js";
21
+
22
+ const STATUS_QUERIES = ["status?", "still there?", "any update?", "?"];
23
+
24
+ describe("uat: status-query inbound", () => {
25
+ for (const query of STATUS_QUERIES) {
26
+ it(
27
+ `user sends ${JSON.stringify(query)} → agent replies sensibly`,
28
+ async () => {
29
+ const sc = await spinUp({ agent: "test-harness" });
30
+ try {
31
+ await sc.sendDM(query);
32
+
33
+ // Any non-empty reply within 60s is acceptable. The
34
+ // interesting thing is the classifier metric fired —
35
+ // verified at the unit-test level. Here we just want
36
+ // "no crash, no silent-end, sensible reply."
37
+ const reply = await sc.expectMessage(/\S/, {
38
+ from: "bot",
39
+ timeout: 60_000,
40
+ });
41
+ expect(reply.text.length).toBeGreaterThan(0);
42
+ } finally {
43
+ await sc.tearDown();
44
+ }
45
+ },
46
+ 90_000,
47
+ );
48
+ }
49
+ });
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Location-inbound scenario — driver shares a geolocation with the
3
+ * test bot. Exercises the new `message:location` handler from #1077
4
+ * end-to-end: gateway parses the lat/lon, builds a `(location: …)`
5
+ * envelope, forwards to the agent, agent replies.
6
+ *
7
+ * Requires the same env as `smoke-dm-reply.test.ts` (see
8
+ * `uat/SETUP.md` §6).
9
+ *
10
+ * Coordinates are intentionally a well-known landmark (Sydney Opera
11
+ * House) so a failure trace makes "what was shared" obvious — and so
12
+ * a chatbot persona has something semantically grounded to respond to,
13
+ * which makes the bot's reply check more meaningful than asserting
14
+ * `.+`. We still tolerate ANY reply text — the goal is to prove the
15
+ * gateway forwarded the location, not to grade the agent's geography.
16
+ *
17
+ * Other 12 inbound types from #1077 are covered structurally in
18
+ * `tests/inbound-message-types.test.ts`. End-to-end UAT for them
19
+ * (contact, venue, dice, poll, web_app_data, users_shared,
20
+ * chat_shared, dice, game, story, paid_media, successful_payment,
21
+ * passport_data) is deferred — most require either a custom bot
22
+ * setup (mini-app, payments provider) or a Telegram client gesture
23
+ * (story share, dice roll) that the mtcute driver does not script
24
+ * cleanly enough to be worth the brittleness.
25
+ */
26
+
27
+ import { describe, it, expect } from "vitest";
28
+ import { spinUp } from "../harness.js";
29
+
30
+ // Sydney Opera House — recognizable, non-sensitive, stable across runs.
31
+ const SYDNEY_OPERA_HOUSE_LAT = -33.8568;
32
+ const SYDNEY_OPERA_HOUSE_LON = 151.2153;
33
+
34
+ describe("uat: location-inbound DM round-trip", () => {
35
+ it(
36
+ "driver shares a geolocation, bot replies within 90s",
37
+ async () => {
38
+ const sc = await spinUp({ agent: "test-harness" });
39
+
40
+ try {
41
+ await sc.driver.sendLocation(
42
+ sc.botUserId,
43
+ SYDNEY_OPERA_HOUSE_LAT,
44
+ SYDNEY_OPERA_HOUSE_LON,
45
+ );
46
+
47
+ // Same budget as smoke-dm-reply: 90s tolerates the gateway's
48
+ // coalescing window + one normal Claude turn. A healthy agent
49
+ // replies in <20s.
50
+ const reply = await sc.expectMessage(/.+/, {
51
+ from: "bot",
52
+ timeout: 90_000,
53
+ });
54
+
55
+ expect(reply.text.length).toBeGreaterThan(0);
56
+ expect(reply.senderUserId).toBe(sc.botUserId);
57
+ } finally {
58
+ await sc.tearDown();
59
+ }
60
+ },
61
+ // Mirrors smoke-dm-reply's 110s outer budget — must exceed the
62
+ // 90s inner deadline plus spinUp overhead.
63
+ 110_000,
64
+ );
65
+ });
@@ -0,0 +1,175 @@
1
+ /**
2
+ * Mid-turn `disable_notification` scenario.
3
+ *
4
+ * Goal context: cause class CC-2 in `docs/status-ask-cause-classes.md`
5
+ * — the L2 conversational layer. The conversational-pacing prompt
6
+ * (`profiles/_shared/telegram-style.md.hbs:10`) instructs the model to
7
+ * pass `disable_notification: true` on mid-turn `reply` calls so the
8
+ * user only gets a device ping on the FINAL answer. If that contract
9
+ * silently degrades — model regression, prompt drift, or a gateway
10
+ * code path that drops the flag — every mid-turn reply pings. Users
11
+ * mute the bot. They then can't tell working from done. They ask
12
+ * "are you alive?" — `inbound_status_query` ticks.
13
+ *
14
+ * The flag is observable on the receiving side via mtcute's
15
+ * `message.isSilent` getter (corresponds to Telegram's
16
+ * `message.silent` flag, set by sender's `disable_notification` Bot
17
+ * API param). The driver was extended in this PR to surface it on
18
+ * `ObservedMessage.silent`.
19
+ *
20
+ * ## What the scenario asserts
21
+ *
22
+ * 1. Send a prompt that should produce multiple bot outbounds (a
23
+ * soft commit + mid-turn updates + a final answer). The prompt
24
+ * is explicit about wanting paced updates so the model doesn't
25
+ * optimize to a single reply.
26
+ * 2. Collect every bot message in the turn (waits for quiescence:
27
+ * no fresh bot message for `QUIESCENCE_MS`).
28
+ * 3. Assert: every bot message EXCEPT THE LAST has `silent === true`.
29
+ * 4. Assert: the LAST bot message has `silent === false` (the final
30
+ * answer should ping).
31
+ *
32
+ * ## Tolerances
33
+ *
34
+ * - If the turn has only one bot message (model judged the work fast
35
+ * enough to skip pacing), the mid-turn assertion is vacuous and we
36
+ * only check that the single final message is NOT silent. The
37
+ * prompt is engineered to be slow enough that this is unlikely,
38
+ * but we don't fail on it.
39
+ * - Quiescence window is 12s — long enough that a paused model isn't
40
+ * mistaken for "done", short enough that test wall-clock stays
41
+ * reasonable.
42
+ * - Edits don't count as fresh messages — we observe `edited === false`
43
+ * only. This matches the production semantic: an edit doesn't push
44
+ * a notification.
45
+ *
46
+ * ## Failure shapes
47
+ *
48
+ * 1. Mid-turn ping degrade — at least one non-last message has
49
+ * `silent === false`. The error message names the offending
50
+ * message index + text preview.
51
+ * 2. Final-answer silent — the last message has `silent === true`.
52
+ * Means the final answer doesn't ping; user might miss the
53
+ * reply landing.
54
+ * 3. No bot messages within timeout — distinct failure: agent
55
+ * isn't responding at all.
56
+ *
57
+ * Requires the same env as `smoke-dm-reply.test.ts` (see
58
+ * `uat/SETUP.md` §6).
59
+ */
60
+
61
+ import { describe, expect, it } from "vitest";
62
+ import { spinUp } from "../harness.js";
63
+ import type { ObservedMessage } from "../driver.js";
64
+
65
+ const QUIESCENCE_MS = 12_000;
66
+ const OVERALL_DEADLINE_MS = 120_000;
67
+
68
+ // Multi-step prompt with explicit pacing expectations. Engineered so
69
+ // a well-behaved model produces:
70
+ // 1. soft commit ("on it" / "let me check")
71
+ // 2. mid-turn update after each file (with disable_notification: true)
72
+ // 3. final answer
73
+ //
74
+ // The work itself is two trivial file reads + a one-sentence
75
+ // summary. If the model collapses this to a single reply, the test
76
+ // still asserts the disable_notification contract on what it does
77
+ // emit; the vacuous-mid-turn path is allowed.
78
+ const PACED_PROMPT =
79
+ "Please follow this exact pacing protocol for this turn:\n" +
80
+ " 1. First send a brief 'on it' reply so I know you started.\n" +
81
+ " 2. Read /etc/hostname, then send a brief mid-turn update saying " +
82
+ "what the hostname is. Use disable_notification:true on that update.\n" +
83
+ " 3. Read /etc/os-release, then send a brief mid-turn update saying " +
84
+ "what the OS family is. Use disable_notification:true on that update.\n" +
85
+ " 4. Finally send a single-sentence summary as your final answer " +
86
+ "(no disable_notification flag — this one should ping me).\n" +
87
+ "Keep each message short.";
88
+
89
+ describe("uat: mid-turn replies pass disable_notification (CC-2)", () => {
90
+ it(
91
+ "every mid-turn bot reply is silent; only the final answer pings",
92
+ async () => {
93
+ const sc = await spinUp({ agent: "test-harness" });
94
+ try {
95
+ await sc.sendDM(PACED_PROMPT);
96
+
97
+ const collected: ObservedMessage[] = [];
98
+ const overallDeadline = Date.now() + OVERALL_DEADLINE_MS;
99
+ let quiescenceDeadline = Date.now() + 30_000; // first message
100
+ // bigger budget
101
+
102
+ // Drain bot messages until QUIESCENCE_MS passes with no
103
+ // fresh non-edit observation, or the overall deadline hits.
104
+ while (Date.now() < overallDeadline) {
105
+ const remaining = Math.min(
106
+ quiescenceDeadline - Date.now(),
107
+ overallDeadline - Date.now(),
108
+ );
109
+ if (remaining <= 0) break;
110
+ try {
111
+ const msg = await sc.expectMessage(
112
+ (m: ObservedMessage) => m.fromBot && !m.edited,
113
+ { from: "bot", timeout: remaining },
114
+ );
115
+ collected.push(msg);
116
+ quiescenceDeadline = Date.now() + QUIESCENCE_MS;
117
+ } catch {
118
+ // Timed out — that's the quiescence signal we wanted.
119
+ break;
120
+ }
121
+ }
122
+
123
+ expect(
124
+ collected.length,
125
+ `no bot messages observed within ${OVERALL_DEADLINE_MS}ms — ` +
126
+ `agent isn't responding at all (distinct failure from CC-2).`,
127
+ ).toBeGreaterThan(0);
128
+
129
+ const trail = collected
130
+ .map(
131
+ (m, i) =>
132
+ ` [${i}] silent=${m.silent} text=${JSON.stringify(
133
+ m.text.slice(0, 80),
134
+ )}`,
135
+ )
136
+ .join("\n");
137
+
138
+ // Final answer should ping.
139
+ const last = collected[collected.length - 1];
140
+ expect(
141
+ last.silent,
142
+ `final answer (message ${collected.length - 1}) was marked ` +
143
+ `silent — the user won't get pinged when the turn finishes. ` +
144
+ `Trail:\n${trail}`,
145
+ ).toBe(false);
146
+
147
+ // Mid-turn updates should NOT ping. Vacuous when the model
148
+ // emitted only the final answer; meaningful when paced.
149
+ const midTurn = collected.slice(0, -1);
150
+ const loudMidTurn = midTurn.filter((m) => !m.silent);
151
+ expect(
152
+ loudMidTurn.length,
153
+ `${loudMidTurn.length} mid-turn message(s) were NOT silent — ` +
154
+ `each one pings the user's device. Conversational pacing ` +
155
+ `requires disable_notification:true on mid-turn replies. ` +
156
+ `Trail:\n${trail}`,
157
+ ).toBe(0);
158
+
159
+ if (midTurn.length === 0) {
160
+ console.warn(
161
+ `[midturn-silent] model produced only 1 bot reply — the ` +
162
+ `mid-turn assertion was vacuous. Prompt may not be ` +
163
+ `slow enough to force pacing, or the model is ignoring ` +
164
+ `the explicit step-by-step instructions. This is not a ` +
165
+ `failure of CC-2, but the scenario didn't cover its ` +
166
+ `intended ground.`,
167
+ );
168
+ }
169
+ } finally {
170
+ await sc.tearDown();
171
+ }
172
+ },
173
+ OVERALL_DEADLINE_MS + 30_000,
174
+ );
175
+ });
@@ -0,0 +1,142 @@
1
+ /**
2
+ * Reaction lifecycle scenario — driver DMs the test bot, bot reacts
3
+ * to the inbound message through the lifecycle and lands a terminal
4
+ * emoji once the reply ships.
5
+ *
6
+ * Part of: https://github.com/switchroom/switchroom/issues/866
7
+ * Goal context: cause class CC-1 / CC-6 in
8
+ * `docs/status-ask-cause-classes.md` (the L1 ambient layer should
9
+ * deliver a definitively-done terminal emoji within a few seconds
10
+ * of the bot's final reply — otherwise the user looks at their
11
+ * inbound, sees it still wearing 🤔, and asks "you done?").
12
+ *
13
+ * History: this scenario was previously `describe.skip` with a
14
+ * rationale that the pinned progress card "renders INSTEAD of
15
+ * reactions". The card was retired in #1126; the card-vs-reaction
16
+ * branch in the gateway is dead. We can now exercise the full
17
+ * lifecycle end-to-end without the two-agent split.
18
+ *
19
+ * What we assert (in priority order):
20
+ *
21
+ * 1. Within the turn, the driver sees AT LEAST ONE `+` reaction
22
+ * op (the L1 "I'm alive" signal). Fast turns may collapse
23
+ * intermediate states, so we only require *one* add, not a
24
+ * specific emoji.
25
+ * 2. By the time the bot has sent a final reply (+ a short tail
26
+ * for Telegram to deliver the terminal-emoji replace), the
27
+ * LAST observed `+` op is in the `done` set (`👍 / 💯 / 🎉`).
28
+ *
29
+ * Why "last `+` op wins" rather than `expectReaction(['👍'])` with
30
+ * a literal sequence: `setMessageReaction` REPLACES the prior emoji
31
+ * atomically. mtcute's update stream can deliver the replace as a
32
+ * `-prev` followed by a `+next`, or as a single coalesced event,
33
+ * depending on server batching. The "last add wins" shape matches
34
+ * the production semantics — whatever's *currently* on the message
35
+ * is what the user actually sees.
36
+ *
37
+ * The observer must be attached BEFORE the reply lands so we
38
+ * capture the queued / working reactions, not just the terminal
39
+ * one. Pattern: `observeReactions` immediately after `sendDM`
40
+ * returns the messageId, drain into a trail array while we wait
41
+ * for the reply, then run a short tail to catch the terminal
42
+ * after the reply.
43
+ *
44
+ * Requires the same env as `smoke-dm-reply.test.ts` (see
45
+ * `uat/SETUP.md` §6).
46
+ */
47
+
48
+ import { describe, expect, it } from "vitest";
49
+ import { spinUp } from "../harness.js";
50
+
51
+ const TERMINAL_DONE_EMOJI = new Set(["👍", "💯", "🎉"]);
52
+ const TAIL_AFTER_REPLY_MS = 8_000;
53
+
54
+ const INBOUND = (): string => `uat-reactions ${new Date().toISOString()}`;
55
+
56
+ interface ObservedOp {
57
+ emoji: string;
58
+ op: "+" | "-";
59
+ at: number;
60
+ }
61
+
62
+ describe("uat: reaction lifecycle on driver DM", () => {
63
+ it(
64
+ "driver sees an alive reaction, then a terminal-done emoji by reply tail",
65
+ async () => {
66
+ const sc = await spinUp({ agent: "test-harness" });
67
+ try {
68
+ const sent = await sc.sendDM(INBOUND());
69
+
70
+ // Attach the observer immediately so the queued (👀) and
71
+ // working reactions don't fire before the listener exists.
72
+ const trail: ObservedOp[] = [];
73
+ const iter = sc.driver
74
+ .observeReactions(sc.botUserId, { messageId: sent.messageId })
75
+ [Symbol.asyncIterator]();
76
+ let pump: Promise<void> | null = null;
77
+ let stopPump = false;
78
+ pump = (async () => {
79
+ while (!stopPump) {
80
+ const next = await iter.next();
81
+ if (next.done === true) return;
82
+ trail.push({
83
+ emoji: next.value.emoji,
84
+ op: next.value.op,
85
+ at: Date.now(),
86
+ });
87
+ }
88
+ })();
89
+
90
+ try {
91
+ // Wait for the bot's reply (any content). Gives the L1
92
+ // lifecycle time to traverse queued → working → done.
93
+ const reply = await sc.expectMessage(/\S/, {
94
+ from: "bot",
95
+ timeout: 60_000,
96
+ });
97
+ expect(reply.text.length).toBeGreaterThan(0);
98
+
99
+ // Tail after the reply for Telegram to deliver the
100
+ // terminal-emoji replace. In practice <1s on a healthy bot;
101
+ // 8s ceiling absorbs server batching jitter.
102
+ await new Promise((resolve) =>
103
+ setTimeout(resolve, TAIL_AFTER_REPLY_MS),
104
+ );
105
+ } finally {
106
+ stopPump = true;
107
+ await iter.return?.();
108
+ if (pump) {
109
+ await pump.catch(() => {
110
+ /* generator return triggers rejection on pending iter.next() — ignore */
111
+ });
112
+ }
113
+ }
114
+
115
+ // L1 alive signal: at least one `+` op landed during the turn.
116
+ const adds = trail.filter((o) => o.op === "+");
117
+ expect(
118
+ adds.length,
119
+ `expected at least one reaction-add during the turn, got 0. ` +
120
+ `Full trail: ${trail.map((o) => `${o.op}${o.emoji}`).join(" ") || "(empty)"}`,
121
+ ).toBeGreaterThan(0);
122
+
123
+ // L1 terminal: the LAST `+` op should be a terminal-done emoji.
124
+ // Extra `-` ops after the final `+` are tolerated (Telegram
125
+ // sometimes emits a bare clean-up `-`); the last `+` is what
126
+ // the user actually sees.
127
+ const lastAdd = adds[adds.length - 1];
128
+ expect(
129
+ TERMINAL_DONE_EMOJI.has(lastAdd.emoji),
130
+ `expected last reaction-add to be one of ${[
131
+ ...TERMINAL_DONE_EMOJI,
132
+ ].join(", ")}, got ${lastAdd.emoji}. Full trail: ${trail
133
+ .map((o) => `${o.op}${o.emoji}`)
134
+ .join(" ")}`,
135
+ ).toBe(true);
136
+ } finally {
137
+ await sc.tearDown();
138
+ }
139
+ },
140
+ 90_000,
141
+ );
142
+ });
@@ -0,0 +1,96 @@
1
+ /**
2
+ * UAT scenario — driver reacts to a bot DM with a trigger emoji and
3
+ * observes the agent process a synthetic inbound turn (#1074).
4
+ *
5
+ * Flow:
6
+ * 1. Driver sends a DM that will provoke a bot reply.
7
+ * 2. Bot replies — driver observes the reply message id.
8
+ * 3. Driver places a 👎 reaction on the bot's reply.
9
+ * 4. Assert: the agent emits a subsequent action (another outbound
10
+ * message). The reaction-trigger pipeline synthesizes a new
11
+ * `<channel source="reaction">` inbound turn, which the agent's
12
+ * Claude session treats as a normal turn and (per profile
13
+ * guidance) acknowledges or course-corrects.
14
+ *
15
+ * Negative:
16
+ * - Driver also places a ❤️ reaction (not in the default
17
+ * allowlist) on a separate bot message.
18
+ * - Assert: NO new agent action within the negative-budget window.
19
+ *
20
+ * Requires the same env as `smoke-dm-reply.test.ts` — see
21
+ * `uat/SETUP.md` §6.
22
+ *
23
+ * NOTE: this scenario depends on the test-harness agent having the
24
+ * default `reactions:` config (allowlist includes 👎). If an operator
25
+ * has narrowed the allowlist this case will fail-with-message.
26
+ */
27
+
28
+ import { describe, it, expect } from "vitest";
29
+ import { spinUp } from "../harness.js";
30
+
31
+ const TRIGGER_INBOUND = `uat-reaction-trigger ${new Date().toISOString()}`;
32
+ const NEGATIVE_INBOUND = `uat-reaction-trigger-negative ${new Date().toISOString()}`;
33
+
34
+ describe("uat: bot reaction triggers synthetic agent turn (#1074)", () => {
35
+ it(
36
+ "👎 on a bot reply dispatches a new agent turn; ❤️ does not",
37
+ async () => {
38
+ const sc = await spinUp({ agent: "test-harness" });
39
+ try {
40
+ // 1. Drive the first bot reply we'll react to.
41
+ await sc.sendDM(TRIGGER_INBOUND);
42
+ const firstReply = await sc.expectMessage(/.+/, {
43
+ from: "bot",
44
+ timeout: 90_000,
45
+ });
46
+ expect(firstReply.senderUserId).toBe(sc.botUserId);
47
+
48
+ // 2. React 👎 to the bot's reply. Default allowlist includes 👎,
49
+ // so the gateway should dispatch a synthetic inbound after
50
+ // the debounce window elapses.
51
+ await sc.driver.sendReaction(sc.botUserId, firstReply.messageId, "👎");
52
+
53
+ // 3. Wait for the agent to emit ANY subsequent message. The
54
+ // debounce window is 30s by default, plus a Claude turn —
55
+ // budget 120s to be safe.
56
+ const triggeredReply = await sc.expectMessage(/.+/, {
57
+ from: "bot",
58
+ timeout: 120_000,
59
+ });
60
+ expect(triggeredReply.messageId).not.toBe(firstReply.messageId);
61
+ expect(triggeredReply.senderUserId).toBe(sc.botUserId);
62
+
63
+ // ── Negative case ───────────────────────────────────────────────
64
+ await sc.sendDM(NEGATIVE_INBOUND);
65
+ const secondReply = await sc.expectMessage(/.+/, {
66
+ from: "bot",
67
+ timeout: 90_000,
68
+ });
69
+ await sc.driver.sendReaction(sc.botUserId, secondReply.messageId, "❤️");
70
+
71
+ // Wait the full debounce window + a generous Claude budget. If
72
+ // a new turn fires within this window, the negative case has
73
+ // failed (the allowlist leaked).
74
+ const NEGATIVE_BUDGET_MS = 45_000;
75
+ let leaked = false;
76
+ try {
77
+ await sc.expectMessage(/.+/, {
78
+ from: "bot",
79
+ timeout: NEGATIVE_BUDGET_MS,
80
+ });
81
+ leaked = true;
82
+ } catch {
83
+ // Expected — no new message within the negative window.
84
+ }
85
+ expect(leaked).toBe(false);
86
+ } finally {
87
+ await sc.tearDown();
88
+ }
89
+ },
90
+ // Per-test budget — must cover trigger turn + debounce + agent
91
+ // reply + negative-budget window + spinUp overhead. 5 minutes is
92
+ // generous but on the order of `progress-card-dm.test.ts` which
93
+ // also has multi-phase waits.
94
+ 300_000,
95
+ );
96
+ });