switchroom 0.7.15 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. package/README.md +51 -59
  2. package/bin/run-hook.sh +27 -11
  3. package/bin/timezone-hook.sh +9 -7
  4. package/dist/agent-scheduler/index.js +410 -133
  5. package/dist/auth-broker/index.js +13932 -0
  6. package/dist/cli/switchroom.js +26937 -5601
  7. package/dist/host-control/main.js +12702 -0
  8. package/dist/vault/approvals/kernel-server.js +467 -184
  9. package/dist/vault/broker/server.js +1430 -724
  10. package/examples/minimal.yaml +63 -0
  11. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  12. package/examples/personal-google-workspace-mcp/README.md +194 -0
  13. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  14. package/examples/switchroom.yaml +220 -0
  15. package/package.json +7 -4
  16. package/profiles/_base/settings.json.hbs +20 -5
  17. package/profiles/_base/start.sh.hbs +16 -3
  18. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  19. package/profiles/_shared/telegram-style.md.hbs +20 -90
  20. package/profiles/_shared/vault-protocol.md.hbs +68 -0
  21. package/profiles/default/CLAUDE.md +50 -96
  22. package/profiles/default/CLAUDE.md.hbs +36 -6
  23. package/profiles/default/workspace/SOUL.md.hbs +12 -5
  24. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  25. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  26. package/skills/buildkite-api/SKILL.md +31 -8
  27. package/skills/buildkite-cli/SKILL.md +27 -9
  28. package/skills/buildkite-migration/SKILL.md +22 -9
  29. package/skills/buildkite-pipelines/SKILL.md +26 -9
  30. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  31. package/skills/buildkite-test-engine/SKILL.md +25 -8
  32. package/skills/docx/SKILL.md +1 -1
  33. package/skills/docx/scripts/office/validators/__pycache__/__init__.cpython-313.pyc +0 -0
  34. package/skills/docx/scripts/office/validators/__pycache__/base.cpython-313.pyc +0 -0
  35. package/skills/file-bug/SKILL.md +34 -6
  36. package/skills/humanizer/SKILL.md +15 -0
  37. package/skills/humanizer-calibrate/SKILL.md +7 -1
  38. package/skills/mcp-builder/SKILL.md +1 -1
  39. package/skills/pdf/SKILL.md +1 -1
  40. package/skills/pptx/SKILL.md +1 -1
  41. package/skills/skill-creator/SKILL.md +21 -1
  42. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  43. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  44. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  45. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  46. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  47. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  48. package/skills/switchroom-cli/SKILL.md +63 -64
  49. package/skills/switchroom-health/SKILL.md +23 -10
  50. package/skills/switchroom-install/SKILL.md +3 -3
  51. package/skills/switchroom-manage/SKILL.md +26 -19
  52. package/skills/switchroom-runtime/SKILL.md +191 -0
  53. package/skills/switchroom-status/SKILL.md +27 -2
  54. package/skills/telegram-test-harness/SKILL.md +3 -0
  55. package/skills/token-helpers/SKILL.md +24 -1
  56. package/skills/webapp-testing/SKILL.md +31 -1
  57. package/skills/xlsx/SKILL.md +1 -1
  58. package/telegram-plugin/admin-commands/index.ts +7 -5
  59. package/telegram-plugin/analytics-posthog.ts +191 -0
  60. package/telegram-plugin/bridge/bridge.ts +69 -0
  61. package/telegram-plugin/bridge/ipc-client.ts +4 -1
  62. package/telegram-plugin/dist/bridge/bridge.js +194 -119
  63. package/telegram-plugin/dist/gateway/gateway.js +23611 -19671
  64. package/telegram-plugin/dist/server.js +245 -189
  65. package/telegram-plugin/first-paint.ts +3 -24
  66. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  67. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  68. package/telegram-plugin/gateway/auth-command.ts +794 -0
  69. package/telegram-plugin/gateway/auth-line.ts +123 -0
  70. package/telegram-plugin/gateway/boot-card.ts +169 -40
  71. package/telegram-plugin/gateway/boot-issue-cache.ts +308 -0
  72. package/telegram-plugin/gateway/boot-probes.ts +166 -123
  73. package/telegram-plugin/gateway/boot-reason.ts +41 -7
  74. package/telegram-plugin/gateway/boot-version.ts +66 -0
  75. package/telegram-plugin/gateway/gateway.ts +3499 -1885
  76. package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
  77. package/telegram-plugin/gateway/ipc-protocol.ts +18 -0
  78. package/telegram-plugin/gateway/pending-inbound-buffer.ts +106 -0
  79. package/telegram-plugin/gateway/quarantine.ts +69 -0
  80. package/telegram-plugin/gateway/quota-cache.ts +9 -4
  81. package/telegram-plugin/gateway/reaction-trigger.ts +401 -0
  82. package/telegram-plugin/gateway/recent-denials.test.ts +103 -0
  83. package/telegram-plugin/gateway/recent-denials.ts +77 -0
  84. package/telegram-plugin/gateway/startup-network-retry.ts +109 -31
  85. package/telegram-plugin/gateway/vault-grant-inbound-builders.ts +125 -0
  86. package/telegram-plugin/history.ts +91 -0
  87. package/telegram-plugin/hooks/hooks.json +10 -0
  88. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +130 -0
  89. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +19 -2
  90. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +22 -2
  91. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  92. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  93. package/telegram-plugin/inbound-classifier.ts +50 -0
  94. package/telegram-plugin/inline-keyboard-callbacks.ts +136 -0
  95. package/telegram-plugin/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
  96. package/telegram-plugin/package.json +4 -2
  97. package/telegram-plugin/permission-rule.ts +51 -0
  98. package/telegram-plugin/permission-title.ts +56 -0
  99. package/telegram-plugin/quota-check.ts +19 -41
  100. package/telegram-plugin/registry/reaper.ts +223 -0
  101. package/telegram-plugin/retry-api-call.ts +80 -0
  102. package/telegram-plugin/runtime-metrics.ts +177 -0
  103. package/telegram-plugin/scripts/build.mjs +0 -1
  104. package/telegram-plugin/secret-detect/index.ts +24 -0
  105. package/telegram-plugin/secret-detect/vault-error.test.ts +64 -12
  106. package/telegram-plugin/secret-detect/vault-error.ts +78 -11
  107. package/telegram-plugin/secret-detect/vault-write.ts +14 -2
  108. package/telegram-plugin/server.js +41795 -0
  109. package/telegram-plugin/session-tail.ts +6 -1
  110. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  111. package/telegram-plugin/silence-poke.ts +420 -0
  112. package/telegram-plugin/silent-end.ts +174 -0
  113. package/telegram-plugin/stream-controller.ts +13 -0
  114. package/telegram-plugin/stream-reply-handler.ts +7 -0
  115. package/telegram-plugin/subagent-watcher.ts +213 -4
  116. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  117. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  118. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  119. package/telegram-plugin/tests/boot-card-issue-dedup.test.ts +247 -0
  120. package/telegram-plugin/tests/boot-card-reason-to-render.test.ts +182 -0
  121. package/telegram-plugin/tests/boot-card-reason.test.ts +65 -2
  122. package/telegram-plugin/tests/boot-card-render.test.ts +146 -0
  123. package/telegram-plugin/tests/boot-card-silent-on-operator.test.ts +103 -0
  124. package/telegram-plugin/tests/boot-probes.test.ts +216 -10
  125. package/telegram-plugin/tests/boot-version-string.test.ts +0 -0
  126. package/telegram-plugin/tests/finalize-callback.test.ts +190 -0
  127. package/telegram-plugin/tests/gateway-message-validator.test.ts +26 -0
  128. package/telegram-plugin/tests/gateway-secret-detect.test.ts +12 -3
  129. package/telegram-plugin/tests/gateway-startup-network-retry.test.ts +104 -0
  130. package/telegram-plugin/tests/history-reaper.test.ts +378 -0
  131. package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
  132. package/telegram-plugin/tests/inbound-classifier.test.ts +76 -0
  133. package/telegram-plugin/tests/inbound-message-types.test.ts +267 -0
  134. package/telegram-plugin/tests/issues-card.test.ts +49 -0
  135. package/telegram-plugin/tests/pending-inbound-buffer.test.ts +132 -0
  136. package/telegram-plugin/tests/permission-rule.test.ts +80 -1
  137. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  138. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  139. package/telegram-plugin/tests/races.test.ts +179 -0
  140. package/telegram-plugin/tests/reaction-trigger-flow.test.ts +353 -0
  141. package/telegram-plugin/tests/reaction-trigger.test.ts +397 -0
  142. package/telegram-plugin/tests/retry-api-call.test.ts +152 -1
  143. package/telegram-plugin/tests/runtime-metrics.test.ts +145 -0
  144. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +155 -0
  145. package/telegram-plugin/tests/secret-detect-delete-must-surface-failures.test.ts +133 -0
  146. package/telegram-plugin/tests/secret-detect-false-positives.test.ts +137 -0
  147. package/telegram-plugin/tests/silence-poke.test.ts +493 -0
  148. package/telegram-plugin/tests/silent-end.test.ts +206 -0
  149. package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +107 -0
  150. package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +224 -0
  151. package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +316 -0
  152. package/telegram-plugin/tests/subagent-watcher.test.ts +263 -0
  153. package/telegram-plugin/tests/turn-signal-tracker.test.ts +81 -0
  154. package/telegram-plugin/tests/vault-approval-posture.test.ts +256 -0
  155. package/telegram-plugin/tests/vault-grant-auto-resume.test.ts +73 -0
  156. package/telegram-plugin/tests/vault-grant-inbound-builders.test.ts +226 -0
  157. package/telegram-plugin/tests/vault-grant-union.test.ts +130 -0
  158. package/telegram-plugin/tests/vault-key-regex-allows-slash.test.ts +140 -0
  159. package/telegram-plugin/tests/vault-posture-quarantine.test.ts +104 -0
  160. package/telegram-plugin/tests/vault-request-access-tool.test.ts +114 -0
  161. package/telegram-plugin/tests/vault-request-access-unlock-resume.test.ts +106 -0
  162. package/telegram-plugin/turn-signal-tracker.ts +100 -24
  163. package/telegram-plugin/uat/SETUP.md +210 -35
  164. package/telegram-plugin/uat/assertions.ts +264 -37
  165. package/telegram-plugin/uat/driver-info.ts +57 -0
  166. package/telegram-plugin/uat/driver.ts +590 -51
  167. package/telegram-plugin/uat/harness.ts +140 -94
  168. package/telegram-plugin/uat/load-env.test.ts +72 -0
  169. package/telegram-plugin/uat/load-env.ts +48 -0
  170. package/telegram-plugin/uat/login.ts +96 -53
  171. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  172. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  173. package/telegram-plugin/uat/runners/report.ts +150 -0
  174. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  175. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  176. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  177. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  178. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  179. package/telegram-plugin/uat/scenarios/ask-user-button-tap-dm.test.ts +141 -0
  180. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +191 -0
  181. package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +255 -0
  182. package/telegram-plugin/uat/scenarios/fuzz-human-style-dm.test.ts +275 -0
  183. package/telegram-plugin/uat/scenarios/fuzz-random-prompts-dm.test.ts +146 -0
  184. package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts +486 -0
  185. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +67 -0
  186. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +100 -0
  187. package/telegram-plugin/uat/scenarios/jtbd-soft-commit-dm.test.ts +67 -0
  188. package/telegram-plugin/uat/scenarios/jtbd-status-query-dm.test.ts +49 -0
  189. package/telegram-plugin/uat/scenarios/location-inbound-dm.test.ts +65 -0
  190. package/telegram-plugin/uat/scenarios/midturn-silent-dm.test.ts +175 -0
  191. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +142 -0
  192. package/telegram-plugin/uat/scenarios/reactions-trigger-turn-dm.test.ts +96 -0
  193. package/telegram-plugin/uat/scenarios/secret-redaction-deletes-original-dm.test.ts +123 -0
  194. package/telegram-plugin/uat/scenarios/secret-redaction-no-false-positive-dm.test.ts +87 -0
  195. package/telegram-plugin/uat/scenarios/silence-poke-soft-dm.test.ts +155 -0
  196. package/telegram-plugin/uat/scenarios/silent-end-recovery-dm.test.ts +95 -0
  197. package/telegram-plugin/uat/scenarios/smoke-dm-reply.test.ts +57 -0
  198. package/telegram-plugin/uat/scenarios/subagent-watcher-no-rerun-dm.test.ts +135 -0
  199. package/telegram-plugin/uat/scenarios/vault-approval-posture-telegram-id-dm.test.ts +191 -0
  200. package/telegram-plugin/uat/scenarios/vault-audit-allow-dm.test.ts +108 -0
  201. package/telegram-plugin/uat/scenarios/vault-grant-auto-resume-dm.test.ts +121 -0
  202. package/telegram-plugin/uat/scenarios/vault-request-access-concurrent-dm.test.ts +161 -0
  203. package/telegram-plugin/uat/scenarios/vault-request-access-end-to-end-dm.test.ts +158 -0
  204. package/telegram-plugin/uat/scenarios/voice-inbound-dm.test.ts +65 -0
  205. package/telegram-plugin/vault-approval-posture.ts +42 -0
  206. package/telegram-plugin/welcome-text.ts +1 -0
  207. package/telegram-plugin/active-pins-sweep.ts +0 -204
  208. package/telegram-plugin/active-pins.ts +0 -146
  209. package/telegram-plugin/auth-dashboard.ts +0 -1104
  210. package/telegram-plugin/auth-slot-parser.ts +0 -497
  211. package/telegram-plugin/card-event-log.ts +0 -138
  212. package/telegram-plugin/dist/foreman/foreman.js +0 -31106
  213. package/telegram-plugin/docs/multi-agent-card-design.md +0 -847
  214. package/telegram-plugin/docs/pinned-progress-card-reliability.md +0 -144
  215. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  216. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  217. package/telegram-plugin/foreman/foreman.ts +0 -1165
  218. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  219. package/telegram-plugin/foreman/setup-state.ts +0 -239
  220. package/telegram-plugin/foreman/state.ts +0 -203
  221. package/telegram-plugin/pin-event-log.ts +0 -76
  222. package/telegram-plugin/progress-card-driver.ts +0 -2886
  223. package/telegram-plugin/progress-card-pin-manager.ts +0 -589
  224. package/telegram-plugin/progress-card-pin-watchdog.ts +0 -98
  225. package/telegram-plugin/progress-card.ts +0 -1409
  226. package/telegram-plugin/tests/HARNESS.md +0 -340
  227. package/telegram-plugin/tests/_progress-card-harness.ts +0 -109
  228. package/telegram-plugin/tests/active-pins-boot-reaper.test.ts +0 -211
  229. package/telegram-plugin/tests/active-pins-sweep.test.ts +0 -309
  230. package/telegram-plugin/tests/active-pins.test.ts +0 -187
  231. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  232. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  233. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  234. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  235. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  236. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  237. package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +0 -201
  238. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  239. package/telegram-plugin/tests/card-event-log.test.ts +0 -145
  240. package/telegram-plugin/tests/first-paint.test.ts +0 -257
  241. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  242. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  243. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  244. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  245. package/telegram-plugin/tests/harness-ordering-invariants.test.ts +0 -243
  246. package/telegram-plugin/tests/pin-event-log.test.ts +0 -124
  247. package/telegram-plugin/tests/progress-card-api-failure-during-deferred.test.ts +0 -73
  248. package/telegram-plugin/tests/progress-card-close-paths-converge.test.ts +0 -272
  249. package/telegram-plugin/tests/progress-card-cross-turn.test.ts +0 -258
  250. package/telegram-plugin/tests/progress-card-delay-842.test.ts +0 -160
  251. package/telegram-plugin/tests/progress-card-dispose-preservepending.test.ts +0 -81
  252. package/telegram-plugin/tests/progress-card-draft-flag.test.ts +0 -80
  253. package/telegram-plugin/tests/progress-card-driver-eviction.test.ts +0 -215
  254. package/telegram-plugin/tests/progress-card-driver-fleet-shadow.test.ts +0 -123
  255. package/telegram-plugin/tests/progress-card-driver-force-complete-parent-done.test.ts +0 -76
  256. package/telegram-plugin/tests/progress-card-edit-timestamps-budget.test.ts +0 -62
  257. package/telegram-plugin/tests/progress-card-memory-bounds.test.ts +0 -84
  258. package/telegram-plugin/tests/progress-card-pin-failure-paths.test.ts +0 -139
  259. package/telegram-plugin/tests/progress-card-pin-manager.test.ts +0 -773
  260. package/telegram-plugin/tests/progress-card-pin-race-fast-turn.test.ts +0 -66
  261. package/telegram-plugin/tests/progress-card-pin-sidecar-partial-write.test.ts +0 -64
  262. package/telegram-plugin/tests/progress-card-pin-watchdog.test.ts +0 -190
  263. package/telegram-plugin/tests/progress-card-sigterm-pin-flush.test.ts +0 -146
  264. package/telegram-plugin/tests/real-gateway-f1-ladder-integrity.test.ts +0 -123
  265. package/telegram-plugin/tests/real-gateway-f2-instant-draft.test.ts +0 -82
  266. package/telegram-plugin/tests/real-gateway-f3-late-card.test.ts +0 -114
  267. package/telegram-plugin/tests/real-gateway-harness.ts +0 -699
  268. package/telegram-plugin/tests/real-gateway-i6-turn-flush-replay-dedup.test.ts +0 -313
  269. package/telegram-plugin/tests/real-gateway-ipc-lifecycle.test.ts +0 -299
  270. package/telegram-plugin/tests/real-gateway-spec.test.ts +0 -487
  271. package/telegram-plugin/tests/real-gateway.smoke.test.ts +0 -101
  272. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  273. package/telegram-plugin/tests/setup-state.test.ts +0 -146
  274. package/telegram-plugin/tests/sync-chat-running-subagents.test.ts +0 -116
  275. package/telegram-plugin/tests/turn-end-regressions.test.ts +0 -489
  276. package/telegram-plugin/tests/turn-flush-card-takeover.test.ts +0 -218
  277. package/telegram-plugin/tests/turn-flush-prose-recovery.test.ts +0 -78
  278. package/telegram-plugin/tests/two-zone-bg-carry-full-lifecycle.test.ts +0 -131
  279. package/telegram-plugin/tests/two-zone-bg-detection.test.ts +0 -120
  280. package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +0 -116
  281. package/telegram-plugin/tests/two-zone-bg-early-turn-end.test.ts +0 -87
  282. package/telegram-plugin/tests/two-zone-bg-survives-next-turn.test.ts +0 -211
  283. package/telegram-plugin/tests/two-zone-card-cap.test.ts +0 -62
  284. package/telegram-plugin/tests/two-zone-card-fleet-row.test.ts +0 -101
  285. package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +0 -78
  286. package/telegram-plugin/tests/two-zone-card-html-balance.test.ts +0 -110
  287. package/telegram-plugin/tests/two-zone-card-lifecycle.test.ts +0 -128
  288. package/telegram-plugin/tests/two-zone-card-sanitise.test.ts +0 -58
  289. package/telegram-plugin/tests/two-zone-card-snapshot.test.ts +0 -133
  290. package/telegram-plugin/tests/two-zone-concurrent-turns-isolation.test.ts +0 -155
  291. package/telegram-plugin/tests/two-zone-phasefor-precedence.test.ts +0 -117
  292. package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +0 -187
  293. package/telegram-plugin/tests/two-zone-stuck-edit-throttle.test.ts +0 -149
  294. package/telegram-plugin/tests/two-zone-stuck-header-escalation.test.ts +0 -101
  295. package/telegram-plugin/tests/two-zone-stuck-per-member.test.ts +0 -114
  296. package/telegram-plugin/tests/two-zone-stuck-recovery.test.ts +0 -105
  297. package/telegram-plugin/tests/waiting-ux-harness.ts +0 -381
  298. package/telegram-plugin/tests/waiting-ux.e2e.test.ts +0 -233
  299. package/telegram-plugin/turn-flush-prose-recovery.ts +0 -40
  300. package/telegram-plugin/two-zone-card.ts +0 -269
  301. package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +0 -61
@@ -1,487 +0,0 @@
1
- /**
2
- * Waiting-UX v2 spec — RED tests pinning the new three-class contract.
3
- *
4
- * This is PR 1 of the #553 series. All `describe` blocks here are
5
- * `describe.skip`'d on purpose — these tests author the contract for
6
- * the rewrite, but the production fixes that turn them green land in
7
- * subsequent PRs (2 through 5). Each block carries a `// TODO(#553-PR-N)`
8
- * marker for which PR un-skips it.
9
- *
10
- * Spec contract — three turn classes, gated on tools and elapsed time:
11
- *
12
- * Class A — instant (<2s, NO tools):
13
- * 👀 reaction → answer text. No placeholder. No progress card.
14
- *
15
- * Class B — short (2–60s, tools, NO sub-agents):
16
- * 👀 → ladder reactions (🤔, 🔥, etc.) → answer text streams.
17
- * No placeholder. No progress card.
18
- *
19
- * Class C — long-running (>60s OR sub-agents/background workers):
20
- * 👀 → ladder → progress card appears once
21
- * `(elapsed >= 60s) OR (any sub-agent has appeared)`. Card stays
22
- * pinned-feel until ALL work terminal.
23
- *
24
- * Key invariants:
25
- * - A "background worker" ≡ a sub-agent dispatched with
26
- * `Agent({ run_in_background: true })` — there is no separate concept.
27
- * - The card is gated on `(elapsed >= 60s) OR (any sub-agent appeared)`.
28
- * Tool-use alone NEVER triggers the card.
29
- * - The placeholder strings (`🔵 thinking`, `📚 recalling memories`,
30
- * `💭 thinking`) are removed entirely in PR 5 — none should appear
31
- * in any payload, ever.
32
- * - First-answer-text deadline: <800ms for Class A, TBD by PR 3 for
33
- * Class B/C.
34
- * - Sub-agent header count must equal rendered-list-length (no drift).
35
- *
36
- * RED-state intent: each `it(...)` is authored so that, when un-skipped
37
- * against current main, it FAILS. That failure is the bug. PRs 2–5
38
- * make the failure go away.
39
- *
40
- * PR 2 — kill instant-draft placeholder + early 👀 path
41
- * → un-skips Class A and the ladder/no-placeholder bits of B
42
- * PR 3 — first-answer-text deadline (Class B/C TBD value)
43
- * → un-skips the answer-text-deadline assertions
44
- * PR 4 — card-gate rewrite: `(>=60s) OR (sub-agent appeared)`
45
- * → un-skips Class C card-gate tests + Class B "no card" test
46
- * PR 5 — remove placeholder strings entirely + sub-agent header
47
- * count = list length
48
- * → un-skips the "no placeholder" assertions repo-wide and
49
- * the sub-agent count = list length test
50
- *
51
- * Tracking: #553 (parent series), waiting-ux-spec.md (contract source).
52
- */
53
-
54
- import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
55
- import { createRealGatewayHarness } from './real-gateway-harness.js'
56
-
57
- const CHAT = '8248703757'
58
- const INBOUND_MSG = 100
59
-
60
- // First-answer-text deadlines per spec. Class A is pinned at 1500ms
61
- // (covers the 800ms 👀 deadline + token-stream first chunk). Class
62
- // B/C is pinned at 3000ms in #553 PR 3 — budget = 500ms inbound
63
- // coalesce + ~1s minInitialChars-driven first send + ~1.5s model
64
- // TTFT for short replies.
65
- const CLASS_A_ANSWER_TEXT_DEADLINE_MS = 1500
66
- const CLASS_BC_ANSWER_TEXT_DEADLINE_MS = 3_000
67
-
68
- beforeEach(() => { vi.useFakeTimers() })
69
- afterEach(() => { vi.useRealTimers() })
70
-
71
- // ─── PR 3 — first-answer-text deadlines (Class A & B) ─────────────────────
72
- //
73
- // These two tests are extracted from the Class A / Class B describe.skip
74
- // blocks below and un-skipped in #553 PR 3. The other tests in those
75
- // blocks (no-placeholder, no-card, ladder integrity) remain skipped
76
- // pending PRs 4 & 5. Once those land, the duplicates here can be
77
- // folded back into the parent describes.
78
- describe('v2 spec — PR 3: first-answer-text deadlines', () => {
79
- it(`Class A — first answer text lands within ${CLASS_A_ANSWER_TEXT_DEADLINE_MS}ms of inbound`, async () => {
80
- const h = createRealGatewayHarness({ gapMs: 0 })
81
- const inboundAt = h.clock.now()
82
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
83
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
84
- await h.clock.advance(200)
85
- h.feedSessionEvent({ kind: 'thinking' })
86
- await h.clock.advance(300)
87
- await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
88
- await h.clock.advance(50)
89
-
90
- const answerAt = h.firstAnswerTextMs(CHAT)
91
- expect(answerAt, 'no answer text recorded').not.toBeNull()
92
- expect((answerAt ?? Infinity) - inboundAt).toBeLessThan(CLASS_A_ANSWER_TEXT_DEADLINE_MS)
93
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 600 })
94
- await h.clock.advance(500)
95
- h.finalize()
96
- })
97
-
98
- it(`Class B — first answer text lands within ${CLASS_BC_ANSWER_TEXT_DEADLINE_MS}ms of inbound`, async () => {
99
- // Use the production default gapMs (500ms after PR 3) so the
100
- // deadline reflects what real users see, not a coalesce-disabled
101
- // best-case.
102
- const h = createRealGatewayHarness({ gapMs: 500 })
103
- const inboundAt = h.clock.now()
104
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'short tool' })
105
- // Coalesce window flush.
106
- await h.clock.advance(500)
107
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'short tool' })
108
- await h.clock.advance(200)
109
- h.feedSessionEvent({ kind: 'thinking' })
110
- await h.clock.advance(300)
111
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
112
- await h.clock.advance(1_000)
113
- h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
114
- // Answer text begins streaming as soon as the model resumes.
115
- await h.streamReply({ chat_id: CHAT, text: 'partial...', done: false })
116
- await h.clock.advance(50)
117
-
118
- const answerAt = h.firstAnswerTextMs(CHAT)
119
- expect(answerAt, 'no answer text recorded').not.toBeNull()
120
- expect((answerAt ?? Infinity) - inboundAt).toBeLessThan(CLASS_BC_ANSWER_TEXT_DEADLINE_MS)
121
-
122
- await h.streamReply({ chat_id: CHAT, text: 'partial... done', done: true })
123
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
124
- await h.clock.advance(500)
125
- h.finalize()
126
- })
127
- })
128
-
129
- // ─── PR 4 — card-gate rewrite (Class B no-card; Class C card-gate) ────────
130
- //
131
- // Extracted from the Class B / Class C describe.skip blocks below and
132
- // un-skipped in #553 PR 4. The other tests in those blocks (no-placeholder,
133
- // ladder integrity, sub-agent header count) remain skipped pending PR 5.
134
- // Once PR 5 lands, the duplicates here can be folded back into the parent
135
- // describes.
136
- describe('v2 spec — PR 4: card gate (>=60s) OR (sub-agent appeared)', () => {
137
- it('Class B — emits NO progress card (turn under 60s, no sub-agents)', async () => {
138
- const h = createRealGatewayHarness({ gapMs: 0 })
139
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'short tool turn' })
140
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'short tool turn' })
141
- await h.clock.advance(200)
142
- h.feedSessionEvent({ kind: 'thinking' })
143
- await h.clock.advance(300)
144
- // Two tools, total turn ~10s — well under 60s, no sub-agents.
145
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Read', toolUseId: 't1' })
146
- await h.clock.advance(3_000)
147
- h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Read' })
148
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't2' })
149
- await h.clock.advance(5_000)
150
- h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't2', toolName: 'Bash' })
151
- await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
152
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 9_000 })
153
- await h.clock.advance(500)
154
-
155
- expect(h.expectNoCardSent(CHAT)).toBeNull()
156
- h.finalize()
157
- })
158
-
159
- it('Class C — progress card appears when a sub-agent dispatches (regardless of elapsed time)', async () => {
160
- const h = createRealGatewayHarness({ gapMs: 0 })
161
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'spawn a worker' })
162
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'spawn a worker' })
163
- await h.clock.advance(200)
164
- h.feedSessionEvent({ kind: 'thinking' })
165
- await h.clock.advance(300)
166
- // Sub-agent appears well under the 60s elapsed threshold — the
167
- // card MUST still render because of the sub-agent gate.
168
- h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a1', firstPromptText: 'do work' })
169
- await h.clock.advance(2_000)
170
- h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a1' })
171
- await h.clock.advance(500)
172
-
173
- expect(h.expectNoCardSent(CHAT), 'card MUST render when a sub-agent dispatches').not.toBeNull()
174
-
175
- await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
176
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
177
- await h.clock.advance(500)
178
- h.finalize()
179
- })
180
-
181
- it('Class C — progress card appears when elapsed >= 60s even without a sub-agent', async () => {
182
- const h = createRealGatewayHarness({ gapMs: 0 })
183
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'long single tool' })
184
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'long single tool' })
185
- await h.clock.advance(200)
186
- h.feedSessionEvent({ kind: 'thinking' })
187
- await h.clock.advance(300)
188
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
189
- // Cross the 60s threshold.
190
- await h.clock.advance(61_000)
191
- h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
192
- await h.clock.advance(500)
193
-
194
- expect(h.expectNoCardSent(CHAT), 'card MUST render after 60s elapsed').not.toBeNull()
195
-
196
- await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
197
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 62_000 })
198
- await h.clock.advance(500)
199
- h.finalize()
200
- })
201
- })
202
-
203
- // ─── Class A — instant (<2s, NO tools) ───────────────────────────────────
204
- //
205
- // Un-skipped in #553 PR 5 — the no-placeholder assertions go green
206
- // after PR 5 deletes the production code that emitted placeholder
207
- // strings.
208
- describe('v2 spec — Class A (instant, <2s, no tools)', () => {
209
- it('emits NO placeholder text edits at any point', async () => {
210
- const h = createRealGatewayHarness({ gapMs: 0 })
211
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
212
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
213
- await h.clock.advance(200)
214
- h.feedSessionEvent({ kind: 'thinking' })
215
- await h.clock.advance(300)
216
- await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
217
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 600 })
218
- await h.clock.advance(500)
219
-
220
- expect(h.expectNoPlaceholderEdits(CHAT)).toEqual([])
221
- h.finalize()
222
- })
223
-
224
- it('emits NO progress card', async () => {
225
- const h = createRealGatewayHarness({ gapMs: 0 })
226
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
227
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
228
- await h.clock.advance(200)
229
- h.feedSessionEvent({ kind: 'thinking' })
230
- await h.clock.advance(300)
231
- await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
232
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 600 })
233
- await h.clock.advance(500)
234
-
235
- expect(h.expectNoCardSent(CHAT)).toBeNull()
236
- h.finalize()
237
- })
238
-
239
- it('👀 reaction lands within 800ms of inbound', async () => {
240
- const h = createRealGatewayHarness({ gapMs: 1500 })
241
- const inboundAt = h.clock.now()
242
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
243
- await h.clock.advance(800)
244
-
245
- const firstReactionMs = h.recorder.firstReactionMs(CHAT)
246
- expect(firstReactionMs).not.toBeNull()
247
- expect((firstReactionMs ?? Infinity) - inboundAt).toBeLessThan(800)
248
- h.finalize()
249
- })
250
-
251
- it(`first answer text lands within ${CLASS_A_ANSWER_TEXT_DEADLINE_MS}ms of inbound`, async () => {
252
- const h = createRealGatewayHarness({ gapMs: 0 })
253
- const inboundAt = h.clock.now()
254
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
255
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
256
- await h.clock.advance(200)
257
- h.feedSessionEvent({ kind: 'thinking' })
258
- await h.clock.advance(300)
259
- await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
260
- await h.clock.advance(50)
261
-
262
- const answerAt = h.firstAnswerTextMs(CHAT)
263
- expect(answerAt, 'no answer text recorded').not.toBeNull()
264
- expect((answerAt ?? Infinity) - inboundAt).toBeLessThan(CLASS_A_ANSWER_TEXT_DEADLINE_MS)
265
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 600 })
266
- await h.clock.advance(500)
267
- h.finalize()
268
- })
269
-
270
- it('emits NO `sendMessageDraft`-style placeholder draft sends', async () => {
271
- // Currently the production "instant draft" flow can `sendMessage`
272
- // a placeholder body that gets edited later. The v2 contract
273
- // bans that — the first sendMessage to the user MUST be real
274
- // answer text. We assert this by re-using the placeholder
275
- // helper: any placeholder sendMessage is a draft send.
276
- const h = createRealGatewayHarness({ gapMs: 0 })
277
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
278
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
279
- await h.clock.advance(200)
280
- h.feedSessionEvent({ kind: 'thinking' })
281
- await h.clock.advance(300)
282
- await h.streamReply({ chat_id: CHAT, text: 'hello back', done: true })
283
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 600 })
284
- await h.clock.advance(500)
285
-
286
- const draftSends = h
287
- .expectNoPlaceholderEdits(CHAT)
288
- .filter((c) => c.kind === 'sendMessage')
289
- expect(draftSends).toEqual([])
290
- h.finalize()
291
- })
292
- })
293
-
294
- // ─── Class B — short (2–60s, tools, no sub-agents) ───────────────────────
295
- //
296
- // Un-skipped in #553 PR 5. The no-placeholder + ladder-integrity
297
- // assertions go green once the placeholder code is deleted (PR 5);
298
- // no-card and answer-text-deadline assertions were already covered
299
- // by the PR-3 / PR-4 describe blocks above.
300
- describe('v2 spec — Class B (short, 2–60s, tools, no sub-agents)', () => {
301
- it('emits NO placeholder text edits', async () => {
302
- const h = createRealGatewayHarness({ gapMs: 0 })
303
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'do a thing' })
304
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'do a thing' })
305
- await h.clock.advance(200)
306
- h.feedSessionEvent({ kind: 'thinking' })
307
- await h.clock.advance(300)
308
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
309
- await h.clock.advance(3_000)
310
- h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
311
- await h.streamReply({ chat_id: CHAT, text: 'all done', done: true })
312
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 4_000 })
313
- await h.clock.advance(500)
314
-
315
- expect(h.expectNoPlaceholderEdits(CHAT)).toEqual([])
316
- h.finalize()
317
- })
318
-
319
- it('emits NO progress card (turn under 60s, no sub-agents)', async () => {
320
- const h = createRealGatewayHarness({ gapMs: 0 })
321
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'short tool turn' })
322
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'short tool turn' })
323
- await h.clock.advance(200)
324
- h.feedSessionEvent({ kind: 'thinking' })
325
- await h.clock.advance(300)
326
- // Two tools, total turn ~10s — well under 60s, no sub-agents.
327
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Read', toolUseId: 't1' })
328
- await h.clock.advance(3_000)
329
- h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Read' })
330
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't2' })
331
- await h.clock.advance(5_000)
332
- h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't2', toolName: 'Bash' })
333
- await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
334
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 9_000 })
335
- await h.clock.advance(500)
336
-
337
- expect(h.expectNoCardSent(CHAT)).toBeNull()
338
- h.finalize()
339
- })
340
-
341
- it('ladder integrity: 👀 → at least one tool reaction → 👍 (no straight-to-👍 collapse)', async () => {
342
- const h = createRealGatewayHarness({ gapMs: 0 })
343
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'ladder' })
344
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'ladder' })
345
- await h.clock.advance(200)
346
- h.feedSessionEvent({ kind: 'thinking' })
347
- await h.clock.advance(300)
348
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
349
- await h.clock.advance(3_000)
350
- h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
351
- await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
352
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 4_000 })
353
- await h.clock.advance(1_500)
354
-
355
- const seq = h.recorder.reactionSequence()
356
- // Dedupe consecutive duplicates (early-ack + setQueued both emit 👀).
357
- const ladder: string[] = []
358
- for (const e of seq) if (ladder[ladder.length - 1] !== e) ladder.push(e)
359
- expect(ladder[0]).toBe('👀')
360
- expect(ladder[ladder.length - 1]).toBe('👍')
361
- expect(ladder.length).toBeGreaterThanOrEqual(3)
362
- h.finalize()
363
- })
364
-
365
- it(`first answer text lands within ${CLASS_BC_ANSWER_TEXT_DEADLINE_MS}ms of inbound`, async () => {
366
- const h = createRealGatewayHarness({ gapMs: 0 })
367
- const inboundAt = h.clock.now()
368
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'short tool' })
369
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'short tool' })
370
- await h.clock.advance(200)
371
- h.feedSessionEvent({ kind: 'thinking' })
372
- await h.clock.advance(300)
373
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
374
- await h.clock.advance(2_000)
375
- h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
376
- // Answer text begins streaming as soon as the model resumes — pin
377
- // the deadline to the spec value (TBD: PR 3 may tighten).
378
- await h.streamReply({ chat_id: CHAT, text: 'partial...', done: false })
379
- await h.clock.advance(50)
380
-
381
- const answerAt = h.firstAnswerTextMs(CHAT)
382
- expect(answerAt, 'no answer text recorded').not.toBeNull()
383
- expect((answerAt ?? Infinity) - inboundAt).toBeLessThan(CLASS_BC_ANSWER_TEXT_DEADLINE_MS)
384
-
385
- await h.streamReply({ chat_id: CHAT, text: 'partial... done', done: true })
386
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
387
- await h.clock.advance(500)
388
- h.finalize()
389
- })
390
- })
391
-
392
- // ─── Class C — long-running (>60s OR sub-agents/background workers) ───────
393
- //
394
- // Un-skipped in #553 PR 5. No-placeholder assertions go green via the
395
- // PR-5 deletion of placeholder code; sub-agent header == list length
396
- // goes green via the PR-2 sub-agent count fix (#580).
397
- describe('v2 spec — Class C (long-running OR sub-agents)', () => {
398
- it('progress card appears when a sub-agent dispatches (regardless of elapsed time)', async () => {
399
- const h = createRealGatewayHarness({ gapMs: 0 })
400
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'spawn a worker' })
401
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'spawn a worker' })
402
- await h.clock.advance(200)
403
- h.feedSessionEvent({ kind: 'thinking' })
404
- await h.clock.advance(300)
405
- // Sub-agent appears well under the 60s elapsed threshold — the
406
- // card MUST still render because of the sub-agent gate.
407
- h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a1', firstPromptText: 'do work' })
408
- await h.clock.advance(2_000)
409
- h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a1' })
410
- await h.clock.advance(500)
411
-
412
- expect(h.expectNoCardSent(CHAT), 'card MUST render when a sub-agent dispatches').not.toBeNull()
413
-
414
- await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
415
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
416
- await h.clock.advance(500)
417
- h.finalize()
418
- })
419
-
420
- it('progress card appears when elapsed >= 60s even without a sub-agent', async () => {
421
- const h = createRealGatewayHarness({ gapMs: 0 })
422
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'long single tool' })
423
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'long single tool' })
424
- await h.clock.advance(200)
425
- h.feedSessionEvent({ kind: 'thinking' })
426
- await h.clock.advance(300)
427
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash', toolUseId: 't1' })
428
- // Cross the 60s threshold.
429
- await h.clock.advance(61_000)
430
- h.feedSessionEvent({ kind: 'tool_result', toolUseId: 't1', toolName: 'Bash' })
431
- await h.clock.advance(500)
432
-
433
- expect(h.expectNoCardSent(CHAT), 'card MUST render after 60s elapsed').not.toBeNull()
434
-
435
- await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
436
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 62_000 })
437
- await h.clock.advance(500)
438
- h.finalize()
439
- })
440
-
441
- it('card stays pinned-feel: not marked Done while any sub-agent is in flight', async () => {
442
- const h = createRealGatewayHarness({ gapMs: 0 })
443
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'fanout' })
444
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'fanout' })
445
- await h.clock.advance(200)
446
- h.feedSessionEvent({ kind: 'thinking' })
447
- await h.clock.advance(300)
448
- h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a1', firstPromptText: 'first' })
449
- h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a2', firstPromptText: 'second' })
450
- await h.clock.advance(2_000)
451
- h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a1' })
452
- // a2 still in flight — the card must NOT show Done yet, even though
453
- // the parent turn could complete.
454
- await h.clock.advance(500)
455
- const editsBeforeA2Done = h.recorder.edits(CHAT).map((e) => e.payload ?? '')
456
- const sawPrematureDone = editsBeforeA2Done.some((p) => /done/i.test(p) && !/working/i.test(p))
457
- expect(sawPrematureDone, 'card marked Done while a sub-agent was still running').toBe(false)
458
-
459
- h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a2' })
460
- await h.streamReply({ chat_id: CHAT, text: 'all done', done: true })
461
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
462
- await h.clock.advance(500)
463
- h.finalize()
464
- })
465
-
466
- it('emits NO placeholder text edits across the full turn', async () => {
467
- const h = createRealGatewayHarness({ gapMs: 0 })
468
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'long with workers' })
469
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'long with workers' })
470
- await h.clock.advance(200)
471
- h.feedSessionEvent({ kind: 'thinking' })
472
- await h.clock.advance(300)
473
- h.feedSessionEvent({ kind: 'sub_agent_started', agentId: 'a1', firstPromptText: 'work' })
474
- await h.clock.advance(2_000)
475
- h.feedSessionEvent({ kind: 'sub_agent_turn_end', agentId: 'a1' })
476
- await h.streamReply({ chat_id: CHAT, text: 'done', done: true })
477
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 3_000 })
478
- await h.clock.advance(500)
479
-
480
- expect(h.expectNoPlaceholderEdits(CHAT)).toEqual([])
481
- h.finalize()
482
- })
483
-
484
- // P4 cutover (#662): legacy "<blockquote expandable>" row-count test deleted.
485
- // The two-zone v2 renderer's row-count invariant is covered by
486
- // tests/two-zone-card-cap.test.ts and tests/two-zone-card-snapshot.test.ts.
487
- })
@@ -1,101 +0,0 @@
1
- /**
2
- * Real-gateway harness — smoke tests.
3
- *
4
- * Pin the wiring of `real-gateway-harness.ts` works end-to-end before
5
- * the F1–F4 tests build on it. These tests assert behaviour the harness
6
- * MUST exhibit for the F-tests to be meaningful:
7
- *
8
- * 1. inbound() routes through the real coalescer (👀 fires only after
9
- * the gap window, not synchronously).
10
- * 2. gapMs=0 bypasses the buffer (👀 fires immediately).
11
- * 3. Multiple inbounds within the gap merge into a single flush.
12
- * 4. Controller + driver still work for session-event feeds (Phase 1
13
- * contract still holds).
14
- *
15
- * Same fake-timers + recorder pattern as `waiting-ux.e2e.test.ts`.
16
- */
17
-
18
- import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
19
- import { createRealGatewayHarness } from './real-gateway-harness.js'
20
-
21
- const CHAT = '8248703757'
22
- const INBOUND_MSG = 100
23
-
24
- beforeEach(() => { vi.useFakeTimers() })
25
- afterEach(() => { vi.useRealTimers() })
26
-
27
- describe('real-gateway harness — smoke', () => {
28
- it('inbound() fires 👀 immediately on raw arrival (F2 early-ack), even with coalesce wait pending', async () => {
29
- const h = createRealGatewayHarness({ gapMs: 1500 })
30
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
31
- // Microtask flush only — no real time has passed beyond the void
32
- // setMessageReaction Promise resolving on the next microtask.
33
- await h.clock.advance(0)
34
- expect(h.recorder.firstReactionMs(CHAT)).not.toBeNull()
35
- expect(h.recorder.reactionSequence()[0]).toBe('👀')
36
- // Coalesce buffer still holds the message — only the reaction fired
37
- // early; the actual handleInbound dispatch waits for the gap.
38
- expect(h.coalesceBufferSize()).toBe(1)
39
- h.finalize()
40
- })
41
-
42
- it('after gapMs elapses, the flush fires controller.setQueued (Telegram dedupes the duplicate 👀)', async () => {
43
- const h = createRealGatewayHarness({ gapMs: 1500 })
44
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
45
- await h.clock.advance(1500)
46
- expect(h.recorder.firstReactionMs(CHAT)).not.toBeNull()
47
- // Reaction sequence carries TWO 👀: the early-ack + the controller's
48
- // post-flush setQueued(). Real Telegram dedupes (same emoji = no
49
- // visible change). Tests asserting ladder integrity should dedupe
50
- // consecutive duplicates before checking the sequence.
51
- expect(h.recorder.reactionSequence()[0]).toBe('👀')
52
- expect(h.coalesceBufferSize()).toBe(0)
53
- h.finalize()
54
- })
55
-
56
- it('gapMs=0 bypasses the buffer (👀 fires immediately on first paint)', async () => {
57
- const h = createRealGatewayHarness({ gapMs: 0 })
58
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
59
- await h.clock.advance(0)
60
- expect(h.recorder.firstReactionMs(CHAT)).not.toBeNull()
61
- expect(h.coalesceBufferSize()).toBe(0)
62
- h.finalize()
63
- })
64
-
65
- it('multiple inbounds within the gap window merge into one flush (sliding timer resets)', async () => {
66
- const h = createRealGatewayHarness({ gapMs: 1500 })
67
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'one' })
68
- await h.clock.advance(1000)
69
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG + 1, text: 'two' })
70
- // First inbound's early-ack already fired 👀 by here — that's the F2 win.
71
- expect(h.recorder.firstReactionMs(CHAT)).not.toBeNull()
72
- await h.clock.advance(1000) // 1s after 'two' — still buffered
73
- expect(h.coalesceBufferSize()).toBe(1)
74
- await h.clock.advance(500) // 1.5s after 'two' — flush
75
- expect(h.coalesceBufferSize()).toBe(0)
76
- // The mid-turn 'two' inbound is suppressed by the activeTurns gate
77
- // (turn started on flush of 'one'... but here the flush is at the
78
- // END so 'one' alone never had a flush; both are coalesced into one
79
- // turn). So only the FIRST inbound's early-ack fires; 'two' lands
80
- // before any turn started, but the early-ack still counts it as a
81
- // fresh-turn ack on the same key. Only one 👀 emoji per coalesce
82
- // turn after the controller dedupes. Test simplifies to: at least one
83
- // 👀 fired, but multiple are tolerated (Telegram dedupes by emoji).
84
- expect(h.recorder.reactionSequence().filter((e) => e === '👀').length).toBeGreaterThanOrEqual(1)
85
- h.finalize()
86
- })
87
-
88
- it('Phase 1 contract still holds — feedSessionEvent drives controller transitions', async () => {
89
- const h = createRealGatewayHarness({ gapMs: 0 }) // bypass coalesce for this isolation test
90
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
91
- await h.clock.advance(0)
92
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'hi' })
93
- await h.clock.advance(50)
94
- h.feedSessionEvent({ kind: 'thinking' })
95
- await h.clock.advance(50)
96
- // Status reaction debounce (default 700ms) must elapse for transitions to land.
97
- await h.clock.advance(800)
98
- expect(h.recorder.reactionSequence()).toContain('👀')
99
- h.finalize()
100
- })
101
- })