switchroom 0.7.15 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. package/README.md +51 -59
  2. package/bin/run-hook.sh +27 -11
  3. package/bin/timezone-hook.sh +9 -7
  4. package/dist/agent-scheduler/index.js +410 -133
  5. package/dist/auth-broker/index.js +13932 -0
  6. package/dist/cli/switchroom.js +26937 -5601
  7. package/dist/host-control/main.js +12702 -0
  8. package/dist/vault/approvals/kernel-server.js +467 -184
  9. package/dist/vault/broker/server.js +1430 -724
  10. package/examples/minimal.yaml +63 -0
  11. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  12. package/examples/personal-google-workspace-mcp/README.md +194 -0
  13. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  14. package/examples/switchroom.yaml +220 -0
  15. package/package.json +7 -4
  16. package/profiles/_base/settings.json.hbs +20 -5
  17. package/profiles/_base/start.sh.hbs +16 -3
  18. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  19. package/profiles/_shared/telegram-style.md.hbs +20 -90
  20. package/profiles/_shared/vault-protocol.md.hbs +68 -0
  21. package/profiles/default/CLAUDE.md +50 -96
  22. package/profiles/default/CLAUDE.md.hbs +36 -6
  23. package/profiles/default/workspace/SOUL.md.hbs +12 -5
  24. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  25. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  26. package/skills/buildkite-api/SKILL.md +31 -8
  27. package/skills/buildkite-cli/SKILL.md +27 -9
  28. package/skills/buildkite-migration/SKILL.md +22 -9
  29. package/skills/buildkite-pipelines/SKILL.md +26 -9
  30. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  31. package/skills/buildkite-test-engine/SKILL.md +25 -8
  32. package/skills/docx/SKILL.md +1 -1
  33. package/skills/docx/scripts/office/validators/__pycache__/__init__.cpython-313.pyc +0 -0
  34. package/skills/docx/scripts/office/validators/__pycache__/base.cpython-313.pyc +0 -0
  35. package/skills/file-bug/SKILL.md +34 -6
  36. package/skills/humanizer/SKILL.md +15 -0
  37. package/skills/humanizer-calibrate/SKILL.md +7 -1
  38. package/skills/mcp-builder/SKILL.md +1 -1
  39. package/skills/pdf/SKILL.md +1 -1
  40. package/skills/pptx/SKILL.md +1 -1
  41. package/skills/skill-creator/SKILL.md +21 -1
  42. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  43. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  44. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  45. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  46. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  47. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  48. package/skills/switchroom-cli/SKILL.md +63 -64
  49. package/skills/switchroom-health/SKILL.md +23 -10
  50. package/skills/switchroom-install/SKILL.md +3 -3
  51. package/skills/switchroom-manage/SKILL.md +26 -19
  52. package/skills/switchroom-runtime/SKILL.md +191 -0
  53. package/skills/switchroom-status/SKILL.md +27 -2
  54. package/skills/telegram-test-harness/SKILL.md +3 -0
  55. package/skills/token-helpers/SKILL.md +24 -1
  56. package/skills/webapp-testing/SKILL.md +31 -1
  57. package/skills/xlsx/SKILL.md +1 -1
  58. package/telegram-plugin/admin-commands/index.ts +7 -5
  59. package/telegram-plugin/analytics-posthog.ts +191 -0
  60. package/telegram-plugin/bridge/bridge.ts +69 -0
  61. package/telegram-plugin/bridge/ipc-client.ts +4 -1
  62. package/telegram-plugin/dist/bridge/bridge.js +194 -119
  63. package/telegram-plugin/dist/gateway/gateway.js +23611 -19671
  64. package/telegram-plugin/dist/server.js +245 -189
  65. package/telegram-plugin/first-paint.ts +3 -24
  66. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  67. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  68. package/telegram-plugin/gateway/auth-command.ts +794 -0
  69. package/telegram-plugin/gateway/auth-line.ts +123 -0
  70. package/telegram-plugin/gateway/boot-card.ts +169 -40
  71. package/telegram-plugin/gateway/boot-issue-cache.ts +308 -0
  72. package/telegram-plugin/gateway/boot-probes.ts +166 -123
  73. package/telegram-plugin/gateway/boot-reason.ts +41 -7
  74. package/telegram-plugin/gateway/boot-version.ts +66 -0
  75. package/telegram-plugin/gateway/gateway.ts +3499 -1885
  76. package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
  77. package/telegram-plugin/gateway/ipc-protocol.ts +18 -0
  78. package/telegram-plugin/gateway/pending-inbound-buffer.ts +106 -0
  79. package/telegram-plugin/gateway/quarantine.ts +69 -0
  80. package/telegram-plugin/gateway/quota-cache.ts +9 -4
  81. package/telegram-plugin/gateway/reaction-trigger.ts +401 -0
  82. package/telegram-plugin/gateway/recent-denials.test.ts +103 -0
  83. package/telegram-plugin/gateway/recent-denials.ts +77 -0
  84. package/telegram-plugin/gateway/startup-network-retry.ts +109 -31
  85. package/telegram-plugin/gateway/vault-grant-inbound-builders.ts +125 -0
  86. package/telegram-plugin/history.ts +91 -0
  87. package/telegram-plugin/hooks/hooks.json +10 -0
  88. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +130 -0
  89. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +19 -2
  90. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +22 -2
  91. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  92. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  93. package/telegram-plugin/inbound-classifier.ts +50 -0
  94. package/telegram-plugin/inline-keyboard-callbacks.ts +136 -0
  95. package/telegram-plugin/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
  96. package/telegram-plugin/package.json +4 -2
  97. package/telegram-plugin/permission-rule.ts +51 -0
  98. package/telegram-plugin/permission-title.ts +56 -0
  99. package/telegram-plugin/quota-check.ts +19 -41
  100. package/telegram-plugin/registry/reaper.ts +223 -0
  101. package/telegram-plugin/retry-api-call.ts +80 -0
  102. package/telegram-plugin/runtime-metrics.ts +177 -0
  103. package/telegram-plugin/scripts/build.mjs +0 -1
  104. package/telegram-plugin/secret-detect/index.ts +24 -0
  105. package/telegram-plugin/secret-detect/vault-error.test.ts +64 -12
  106. package/telegram-plugin/secret-detect/vault-error.ts +78 -11
  107. package/telegram-plugin/secret-detect/vault-write.ts +14 -2
  108. package/telegram-plugin/server.js +41795 -0
  109. package/telegram-plugin/session-tail.ts +6 -1
  110. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  111. package/telegram-plugin/silence-poke.ts +420 -0
  112. package/telegram-plugin/silent-end.ts +174 -0
  113. package/telegram-plugin/stream-controller.ts +13 -0
  114. package/telegram-plugin/stream-reply-handler.ts +7 -0
  115. package/telegram-plugin/subagent-watcher.ts +213 -4
  116. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  117. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  118. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  119. package/telegram-plugin/tests/boot-card-issue-dedup.test.ts +247 -0
  120. package/telegram-plugin/tests/boot-card-reason-to-render.test.ts +182 -0
  121. package/telegram-plugin/tests/boot-card-reason.test.ts +65 -2
  122. package/telegram-plugin/tests/boot-card-render.test.ts +146 -0
  123. package/telegram-plugin/tests/boot-card-silent-on-operator.test.ts +103 -0
  124. package/telegram-plugin/tests/boot-probes.test.ts +216 -10
  125. package/telegram-plugin/tests/boot-version-string.test.ts +0 -0
  126. package/telegram-plugin/tests/finalize-callback.test.ts +190 -0
  127. package/telegram-plugin/tests/gateway-message-validator.test.ts +26 -0
  128. package/telegram-plugin/tests/gateway-secret-detect.test.ts +12 -3
  129. package/telegram-plugin/tests/gateway-startup-network-retry.test.ts +104 -0
  130. package/telegram-plugin/tests/history-reaper.test.ts +378 -0
  131. package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
  132. package/telegram-plugin/tests/inbound-classifier.test.ts +76 -0
  133. package/telegram-plugin/tests/inbound-message-types.test.ts +267 -0
  134. package/telegram-plugin/tests/issues-card.test.ts +49 -0
  135. package/telegram-plugin/tests/pending-inbound-buffer.test.ts +132 -0
  136. package/telegram-plugin/tests/permission-rule.test.ts +80 -1
  137. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  138. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  139. package/telegram-plugin/tests/races.test.ts +179 -0
  140. package/telegram-plugin/tests/reaction-trigger-flow.test.ts +353 -0
  141. package/telegram-plugin/tests/reaction-trigger.test.ts +397 -0
  142. package/telegram-plugin/tests/retry-api-call.test.ts +152 -1
  143. package/telegram-plugin/tests/runtime-metrics.test.ts +145 -0
  144. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +155 -0
  145. package/telegram-plugin/tests/secret-detect-delete-must-surface-failures.test.ts +133 -0
  146. package/telegram-plugin/tests/secret-detect-false-positives.test.ts +137 -0
  147. package/telegram-plugin/tests/silence-poke.test.ts +493 -0
  148. package/telegram-plugin/tests/silent-end.test.ts +206 -0
  149. package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +107 -0
  150. package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +224 -0
  151. package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +316 -0
  152. package/telegram-plugin/tests/subagent-watcher.test.ts +263 -0
  153. package/telegram-plugin/tests/turn-signal-tracker.test.ts +81 -0
  154. package/telegram-plugin/tests/vault-approval-posture.test.ts +256 -0
  155. package/telegram-plugin/tests/vault-grant-auto-resume.test.ts +73 -0
  156. package/telegram-plugin/tests/vault-grant-inbound-builders.test.ts +226 -0
  157. package/telegram-plugin/tests/vault-grant-union.test.ts +130 -0
  158. package/telegram-plugin/tests/vault-key-regex-allows-slash.test.ts +140 -0
  159. package/telegram-plugin/tests/vault-posture-quarantine.test.ts +104 -0
  160. package/telegram-plugin/tests/vault-request-access-tool.test.ts +114 -0
  161. package/telegram-plugin/tests/vault-request-access-unlock-resume.test.ts +106 -0
  162. package/telegram-plugin/turn-signal-tracker.ts +100 -24
  163. package/telegram-plugin/uat/SETUP.md +210 -35
  164. package/telegram-plugin/uat/assertions.ts +264 -37
  165. package/telegram-plugin/uat/driver-info.ts +57 -0
  166. package/telegram-plugin/uat/driver.ts +590 -51
  167. package/telegram-plugin/uat/harness.ts +140 -94
  168. package/telegram-plugin/uat/load-env.test.ts +72 -0
  169. package/telegram-plugin/uat/load-env.ts +48 -0
  170. package/telegram-plugin/uat/login.ts +96 -53
  171. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  172. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  173. package/telegram-plugin/uat/runners/report.ts +150 -0
  174. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  175. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  176. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  177. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  178. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  179. package/telegram-plugin/uat/scenarios/ask-user-button-tap-dm.test.ts +141 -0
  180. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +191 -0
  181. package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +255 -0
  182. package/telegram-plugin/uat/scenarios/fuzz-human-style-dm.test.ts +275 -0
  183. package/telegram-plugin/uat/scenarios/fuzz-random-prompts-dm.test.ts +146 -0
  184. package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts +486 -0
  185. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +67 -0
  186. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +100 -0
  187. package/telegram-plugin/uat/scenarios/jtbd-soft-commit-dm.test.ts +67 -0
  188. package/telegram-plugin/uat/scenarios/jtbd-status-query-dm.test.ts +49 -0
  189. package/telegram-plugin/uat/scenarios/location-inbound-dm.test.ts +65 -0
  190. package/telegram-plugin/uat/scenarios/midturn-silent-dm.test.ts +175 -0
  191. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +142 -0
  192. package/telegram-plugin/uat/scenarios/reactions-trigger-turn-dm.test.ts +96 -0
  193. package/telegram-plugin/uat/scenarios/secret-redaction-deletes-original-dm.test.ts +123 -0
  194. package/telegram-plugin/uat/scenarios/secret-redaction-no-false-positive-dm.test.ts +87 -0
  195. package/telegram-plugin/uat/scenarios/silence-poke-soft-dm.test.ts +155 -0
  196. package/telegram-plugin/uat/scenarios/silent-end-recovery-dm.test.ts +95 -0
  197. package/telegram-plugin/uat/scenarios/smoke-dm-reply.test.ts +57 -0
  198. package/telegram-plugin/uat/scenarios/subagent-watcher-no-rerun-dm.test.ts +135 -0
  199. package/telegram-plugin/uat/scenarios/vault-approval-posture-telegram-id-dm.test.ts +191 -0
  200. package/telegram-plugin/uat/scenarios/vault-audit-allow-dm.test.ts +108 -0
  201. package/telegram-plugin/uat/scenarios/vault-grant-auto-resume-dm.test.ts +121 -0
  202. package/telegram-plugin/uat/scenarios/vault-request-access-concurrent-dm.test.ts +161 -0
  203. package/telegram-plugin/uat/scenarios/vault-request-access-end-to-end-dm.test.ts +158 -0
  204. package/telegram-plugin/uat/scenarios/voice-inbound-dm.test.ts +65 -0
  205. package/telegram-plugin/vault-approval-posture.ts +42 -0
  206. package/telegram-plugin/welcome-text.ts +1 -0
  207. package/telegram-plugin/active-pins-sweep.ts +0 -204
  208. package/telegram-plugin/active-pins.ts +0 -146
  209. package/telegram-plugin/auth-dashboard.ts +0 -1104
  210. package/telegram-plugin/auth-slot-parser.ts +0 -497
  211. package/telegram-plugin/card-event-log.ts +0 -138
  212. package/telegram-plugin/dist/foreman/foreman.js +0 -31106
  213. package/telegram-plugin/docs/multi-agent-card-design.md +0 -847
  214. package/telegram-plugin/docs/pinned-progress-card-reliability.md +0 -144
  215. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  216. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  217. package/telegram-plugin/foreman/foreman.ts +0 -1165
  218. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  219. package/telegram-plugin/foreman/setup-state.ts +0 -239
  220. package/telegram-plugin/foreman/state.ts +0 -203
  221. package/telegram-plugin/pin-event-log.ts +0 -76
  222. package/telegram-plugin/progress-card-driver.ts +0 -2886
  223. package/telegram-plugin/progress-card-pin-manager.ts +0 -589
  224. package/telegram-plugin/progress-card-pin-watchdog.ts +0 -98
  225. package/telegram-plugin/progress-card.ts +0 -1409
  226. package/telegram-plugin/tests/HARNESS.md +0 -340
  227. package/telegram-plugin/tests/_progress-card-harness.ts +0 -109
  228. package/telegram-plugin/tests/active-pins-boot-reaper.test.ts +0 -211
  229. package/telegram-plugin/tests/active-pins-sweep.test.ts +0 -309
  230. package/telegram-plugin/tests/active-pins.test.ts +0 -187
  231. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  232. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  233. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  234. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  235. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  236. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  237. package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +0 -201
  238. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  239. package/telegram-plugin/tests/card-event-log.test.ts +0 -145
  240. package/telegram-plugin/tests/first-paint.test.ts +0 -257
  241. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  242. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  243. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  244. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  245. package/telegram-plugin/tests/harness-ordering-invariants.test.ts +0 -243
  246. package/telegram-plugin/tests/pin-event-log.test.ts +0 -124
  247. package/telegram-plugin/tests/progress-card-api-failure-during-deferred.test.ts +0 -73
  248. package/telegram-plugin/tests/progress-card-close-paths-converge.test.ts +0 -272
  249. package/telegram-plugin/tests/progress-card-cross-turn.test.ts +0 -258
  250. package/telegram-plugin/tests/progress-card-delay-842.test.ts +0 -160
  251. package/telegram-plugin/tests/progress-card-dispose-preservepending.test.ts +0 -81
  252. package/telegram-plugin/tests/progress-card-draft-flag.test.ts +0 -80
  253. package/telegram-plugin/tests/progress-card-driver-eviction.test.ts +0 -215
  254. package/telegram-plugin/tests/progress-card-driver-fleet-shadow.test.ts +0 -123
  255. package/telegram-plugin/tests/progress-card-driver-force-complete-parent-done.test.ts +0 -76
  256. package/telegram-plugin/tests/progress-card-edit-timestamps-budget.test.ts +0 -62
  257. package/telegram-plugin/tests/progress-card-memory-bounds.test.ts +0 -84
  258. package/telegram-plugin/tests/progress-card-pin-failure-paths.test.ts +0 -139
  259. package/telegram-plugin/tests/progress-card-pin-manager.test.ts +0 -773
  260. package/telegram-plugin/tests/progress-card-pin-race-fast-turn.test.ts +0 -66
  261. package/telegram-plugin/tests/progress-card-pin-sidecar-partial-write.test.ts +0 -64
  262. package/telegram-plugin/tests/progress-card-pin-watchdog.test.ts +0 -190
  263. package/telegram-plugin/tests/progress-card-sigterm-pin-flush.test.ts +0 -146
  264. package/telegram-plugin/tests/real-gateway-f1-ladder-integrity.test.ts +0 -123
  265. package/telegram-plugin/tests/real-gateway-f2-instant-draft.test.ts +0 -82
  266. package/telegram-plugin/tests/real-gateway-f3-late-card.test.ts +0 -114
  267. package/telegram-plugin/tests/real-gateway-harness.ts +0 -699
  268. package/telegram-plugin/tests/real-gateway-i6-turn-flush-replay-dedup.test.ts +0 -313
  269. package/telegram-plugin/tests/real-gateway-ipc-lifecycle.test.ts +0 -299
  270. package/telegram-plugin/tests/real-gateway-spec.test.ts +0 -487
  271. package/telegram-plugin/tests/real-gateway.smoke.test.ts +0 -101
  272. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  273. package/telegram-plugin/tests/setup-state.test.ts +0 -146
  274. package/telegram-plugin/tests/sync-chat-running-subagents.test.ts +0 -116
  275. package/telegram-plugin/tests/turn-end-regressions.test.ts +0 -489
  276. package/telegram-plugin/tests/turn-flush-card-takeover.test.ts +0 -218
  277. package/telegram-plugin/tests/turn-flush-prose-recovery.test.ts +0 -78
  278. package/telegram-plugin/tests/two-zone-bg-carry-full-lifecycle.test.ts +0 -131
  279. package/telegram-plugin/tests/two-zone-bg-detection.test.ts +0 -120
  280. package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +0 -116
  281. package/telegram-plugin/tests/two-zone-bg-early-turn-end.test.ts +0 -87
  282. package/telegram-plugin/tests/two-zone-bg-survives-next-turn.test.ts +0 -211
  283. package/telegram-plugin/tests/two-zone-card-cap.test.ts +0 -62
  284. package/telegram-plugin/tests/two-zone-card-fleet-row.test.ts +0 -101
  285. package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +0 -78
  286. package/telegram-plugin/tests/two-zone-card-html-balance.test.ts +0 -110
  287. package/telegram-plugin/tests/two-zone-card-lifecycle.test.ts +0 -128
  288. package/telegram-plugin/tests/two-zone-card-sanitise.test.ts +0 -58
  289. package/telegram-plugin/tests/two-zone-card-snapshot.test.ts +0 -133
  290. package/telegram-plugin/tests/two-zone-concurrent-turns-isolation.test.ts +0 -155
  291. package/telegram-plugin/tests/two-zone-phasefor-precedence.test.ts +0 -117
  292. package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +0 -187
  293. package/telegram-plugin/tests/two-zone-stuck-edit-throttle.test.ts +0 -149
  294. package/telegram-plugin/tests/two-zone-stuck-header-escalation.test.ts +0 -101
  295. package/telegram-plugin/tests/two-zone-stuck-per-member.test.ts +0 -114
  296. package/telegram-plugin/tests/two-zone-stuck-recovery.test.ts +0 -105
  297. package/telegram-plugin/tests/waiting-ux-harness.ts +0 -381
  298. package/telegram-plugin/tests/waiting-ux.e2e.test.ts +0 -233
  299. package/telegram-plugin/turn-flush-prose-recovery.ts +0 -40
  300. package/telegram-plugin/two-zone-card.ts +0 -269
  301. package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +0 -61
@@ -1,66 +0,0 @@
1
- /**
2
- * PR-C2 — fast-turn race between considerPin's deferred timer and a
3
- * subsequent completeTurn.
4
- *
5
- * considerPin schedules a pendingPin timer with `pinDelayMs`. If the
6
- * turn completes BEFORE the timer fires, completeTurn must:
7
- *
8
- * - Cancel the timer (no Telegram pin issued).
9
- * - Drop the entry from pendingPins (no orphan).
10
- *
11
- * fails when: completeTurn's pending-pin cancellation branch is removed
12
- * or the entry isn't deleted from `pendingPins` after `timer.cancel()`
13
- * (which would let a follow-up considerPin for the same composite get
14
- * silently no-op'd by the `pendingPins.has(key)` guard).
15
- */
16
- import { describe, it, expect, vi } from 'vitest'
17
- import { createPinManager, type TimerHandle } from '../progress-card-pin-manager.js'
18
-
19
- interface T { fn: () => void; cancelled: boolean; fired: boolean }
20
-
21
- describe('PR-C2: fast-turn pin-race — completeTurn before timer fires', () => {
22
- it('cancels the pending pin timer and clears pendingPins; no pin API call ever issued', async () => {
23
- const timers: T[] = []
24
- const pin = vi.fn(async () => true)
25
- const unpin = vi.fn(async () => true)
26
-
27
- const mgr = createPinManager({
28
- pin, unpin,
29
- log: () => {},
30
- now: () => 1000,
31
- pinDelayMs: 100, // non-zero so we have a race window
32
- scheduleTimer: (fn): TimerHandle => {
33
- const t: T = { fn, cancelled: false, fired: false }
34
- timers.push(t)
35
- return { cancel: () => { t.cancelled = true } }
36
- },
37
- })
38
-
39
- mgr.considerPin({
40
- chatId: 'c', threadId: '0', turnKey: 'c:0:1', messageId: 500, isFirstEmit: true,
41
- })
42
-
43
- // Timer scheduled but NOT fired.
44
- expect(timers).toHaveLength(1)
45
- expect(timers[0].cancelled).toBe(false)
46
- expect(timers[0].fired).toBe(false)
47
-
48
- // Fast turn completes before timer fires.
49
- mgr.completeTurn({ chatId: 'c', turnKey: 'c:0:1' })
50
- await mgr.drainInFlight()
51
-
52
- expect(timers[0].cancelled).toBe(true)
53
- expect(pin).not.toHaveBeenCalled()
54
- expect(unpin).not.toHaveBeenCalled()
55
- expect(mgr.pinnedTurnKeys()).toEqual([])
56
-
57
- // No orphan: a fresh considerPin under the same composite must be
58
- // able to schedule a new timer (would no-op if pendingPins still
59
- // had the stale entry).
60
- mgr.considerPin({
61
- chatId: 'c', threadId: '0', turnKey: 'c:0:1', messageId: 500, isFirstEmit: true,
62
- })
63
- expect(timers).toHaveLength(2)
64
- expect(timers[1].cancelled).toBe(false)
65
- })
66
- })
@@ -1,64 +0,0 @@
1
- /**
2
- * PR-C2 — sweepActivePins must recover gracefully from a malformed
3
- * sidecar file written by a prior crash mid-write.
4
- *
5
- * Two scenarios:
6
- * (a) JSON-truncated file — readActivePins falls back to [] and the
7
- * sweep is a no-op without throwing.
8
- * (b) Mixed valid/invalid entries inside a parseable JSON array —
9
- * readActivePins drops the invalid entries and processes the
10
- * valid ones.
11
- *
12
- * fails when: readActivePins is changed to throw on malformed JSON,
13
- * OR the per-entry validator is loosened so a malformed object slips
14
- * through and crashes the unpin loop.
15
- */
16
- import { describe, it, expect, beforeEach, afterEach } from 'vitest'
17
- import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'
18
- import { tmpdir } from 'node:os'
19
- import { join } from 'node:path'
20
- import { ACTIVE_PINS_FILENAME } from '../active-pins.js'
21
- import { sweepActivePins } from '../active-pins-sweep.js'
22
-
23
- describe('PR-C2: sweepActivePins recovers from a malformed sidecar', () => {
24
- let dir: string
25
- beforeEach(() => { dir = mkdtempSync(join(tmpdir(), 'pin-sidecar-partial-')) })
26
- afterEach(() => { rmSync(dir, { recursive: true, force: true }) })
27
-
28
- it('truncated JSON array: sweep is a clean no-op (no throw, no calls)', async () => {
29
- // Simulate a crash mid-write: a JSON array prefix that never closed.
30
- writeFileSync(
31
- join(dir, ACTIVE_PINS_FILENAME),
32
- '[{"chatId":"A","messageId":1,"turnKey":"A:0:1","pinnedAt":17',
33
- )
34
-
35
- const calls: Array<[string, number]> = []
36
- const logs: string[] = []
37
- const result = await sweepActivePins(
38
- dir,
39
- async (chatId, messageId) => { calls.push([chatId, messageId]) },
40
- { log: (m) => logs.push(m) },
41
- )
42
- expect(calls).toEqual([])
43
- expect(result.swept).toEqual([])
44
- expect(result.timedOut).toBe(false)
45
- })
46
-
47
- it('mixed valid/invalid entries: valid ones still get processed', async () => {
48
- const blob = JSON.stringify([
49
- { chatId: 'A', messageId: 1, turnKey: 'A:0:1', pinnedAt: 1700000000000 }, // valid
50
- { chatId: 42, messageId: 'oops', turnKey: 'B:0:1', pinnedAt: 0 }, // invalid (wrong types)
51
- null, // invalid
52
- { chatId: 'C', messageId: 3, turnKey: 'C:0:1', pinnedAt: 1700000000001 }, // valid
53
- 'garbage', // invalid
54
- ])
55
- writeFileSync(join(dir, ACTIVE_PINS_FILENAME), blob)
56
-
57
- const calls: Array<[string, number]> = []
58
- await sweepActivePins(dir, async (c, m) => { calls.push([c, m]) })
59
- expect(calls.sort()).toEqual([
60
- ['A', 1],
61
- ['C', 3],
62
- ])
63
- })
64
- })
@@ -1,190 +0,0 @@
1
- /**
2
- * Unit tests for the progress-card pin watchdog.
3
- *
4
- * The watchdog is called from the gateway on every progress-card
5
- * heartbeat emit. It verifies Telegram's pinned-message id matches
6
- * what we think is pinned; if not, it re-pins. This suite exercises
7
- * the rate-limiter, re-pin path, and error isolation.
8
- */
9
-
10
- import { describe, it, expect, vi } from 'vitest'
11
- import { createPinWatchdog, type PinWatchdogDeps } from '../progress-card-pin-watchdog.js'
12
-
13
- interface Harness {
14
- wd: ReturnType<typeof createPinWatchdog>
15
- deps: {
16
- getCurrentPinned: ReturnType<typeof vi.fn>
17
- pin: ReturnType<typeof vi.fn>
18
- log: ReturnType<typeof vi.fn>
19
- }
20
- /** Mutable clock. Tests advance this to exercise the rate-limiter. */
21
- clock: { t: number }
22
- }
23
-
24
- function mkHarness(overrides: Partial<PinWatchdogDeps> = {}): Harness {
25
- const clock = { t: 1_000_000 }
26
- const deps = {
27
- getCurrentPinned: vi.fn(async () => 42),
28
- pin: vi.fn(async () => true),
29
- log: vi.fn(),
30
- }
31
- const wd = createPinWatchdog({
32
- getCurrentPinned: deps.getCurrentPinned,
33
- pin: deps.pin,
34
- log: deps.log,
35
- now: () => clock.t,
36
- intervalMs: 30_000,
37
- ...overrides,
38
- })
39
- return { wd, deps, clock }
40
- }
41
-
42
- describe('progress-card pin watchdog', () => {
43
- it('is a no-op when Telegram already shows the expected pin', async () => {
44
- const h = mkHarness()
45
- h.deps.getCurrentPinned.mockResolvedValueOnce(42)
46
-
47
- await h.wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
48
-
49
- expect(h.deps.getCurrentPinned).toHaveBeenCalledTimes(1)
50
- expect(h.deps.pin).not.toHaveBeenCalled()
51
- expect(h.deps.log).not.toHaveBeenCalled()
52
- })
53
-
54
- it('re-pins when Telegram shows a different message pinned', async () => {
55
- const h = mkHarness()
56
- h.deps.getCurrentPinned.mockResolvedValueOnce(999) // some other pin
57
-
58
- await h.wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
59
-
60
- expect(h.deps.getCurrentPinned).toHaveBeenCalledWith('100')
61
- expect(h.deps.pin).toHaveBeenCalledWith('100', 42, { disable_notification: true })
62
- })
63
-
64
- it('re-pins when Telegram reports nothing pinned', async () => {
65
- const h = mkHarness()
66
- h.deps.getCurrentPinned.mockResolvedValueOnce(undefined)
67
-
68
- await h.wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
69
-
70
- expect(h.deps.pin).toHaveBeenCalledWith('100', 42, { disable_notification: true })
71
- })
72
-
73
- it('rate-limits probes per turnKey within intervalMs', async () => {
74
- const h = mkHarness()
75
- h.deps.getCurrentPinned.mockResolvedValue(42)
76
-
77
- // Burst of 5 verify calls within 1s of each other.
78
- for (let i = 0; i < 5; i++) {
79
- await h.wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
80
- h.clock.t += 1_000
81
- }
82
-
83
- expect(h.deps.getCurrentPinned).toHaveBeenCalledTimes(1)
84
- })
85
-
86
- it('probes again after the rate-limit window elapses', async () => {
87
- const h = mkHarness()
88
- h.deps.getCurrentPinned.mockResolvedValue(42)
89
-
90
- await h.wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
91
- h.clock.t += 31_000 // past 30s interval
92
- await h.wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
93
-
94
- expect(h.deps.getCurrentPinned).toHaveBeenCalledTimes(2)
95
- })
96
-
97
- it('rate-limiter is keyed by turnKey — different keys don\'t share budget', async () => {
98
- const h = mkHarness()
99
- h.deps.getCurrentPinned.mockResolvedValue(42)
100
-
101
- await h.wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
102
- await h.wd.verify({ chatId: '100', turnKey: '100:2', expectedMessageId: 43 })
103
- await h.wd.verify({ chatId: '100', turnKey: '100:3', expectedMessageId: 44 })
104
-
105
- expect(h.deps.getCurrentPinned).toHaveBeenCalledTimes(3)
106
- })
107
-
108
- it('swallows and logs getChat errors without throwing', async () => {
109
- const h = mkHarness()
110
- h.deps.getCurrentPinned.mockRejectedValueOnce(new Error('Bad Request: chat not found'))
111
-
112
- await expect(
113
- h.wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 }),
114
- ).resolves.toBeUndefined()
115
-
116
- expect(h.deps.pin).not.toHaveBeenCalled()
117
- expect(h.deps.log).toHaveBeenCalledOnce()
118
- expect(h.deps.log.mock.calls[0]![0]).toMatch(/watchdog failed.*chat not found/)
119
- })
120
-
121
- it('swallows and logs pin errors without throwing', async () => {
122
- const h = mkHarness()
123
- h.deps.getCurrentPinned.mockResolvedValueOnce(999) // mismatch → try to re-pin
124
- h.deps.pin.mockRejectedValueOnce(new Error('Forbidden: not enough rights'))
125
-
126
- await expect(
127
- h.wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 }),
128
- ).resolves.toBeUndefined()
129
-
130
- expect(h.deps.log).toHaveBeenCalledOnce()
131
- expect(h.deps.log.mock.calls[0]![0]).toMatch(/watchdog failed.*not enough rights/)
132
- })
133
-
134
- it('clear() resets the rate-limit so a subsequent verify probes immediately', async () => {
135
- const h = mkHarness()
136
- h.deps.getCurrentPinned.mockResolvedValue(42)
137
-
138
- await h.wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
139
- expect(h.deps.getCurrentPinned).toHaveBeenCalledTimes(1)
140
-
141
- h.wd.clear('100:1')
142
- // No time advance — only the clear should unlock the next probe.
143
- await h.wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
144
-
145
- expect(h.deps.getCurrentPinned).toHaveBeenCalledTimes(2)
146
- })
147
-
148
- it('clear() on an unknown turnKey is a safe no-op', () => {
149
- const h = mkHarness()
150
- expect(() => h.wd.clear('never-seen')).not.toThrow()
151
- })
152
-
153
- it('defaults intervalMs to 30_000 when not provided', async () => {
154
- // Construct without an explicit intervalMs override.
155
- const clock = { t: 0 }
156
- const getCurrentPinned = vi.fn(async () => 42)
157
- const pin = vi.fn(async () => true)
158
- const wd = createPinWatchdog({
159
- getCurrentPinned,
160
- pin,
161
- now: () => clock.t,
162
- })
163
-
164
- await wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
165
- // Just under 30s — should still be gated.
166
- clock.t = 29_999
167
- await wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
168
- expect(getCurrentPinned).toHaveBeenCalledTimes(1)
169
- // Past 30s — should probe again.
170
- clock.t = 30_001
171
- await wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
172
- expect(getCurrentPinned).toHaveBeenCalledTimes(2)
173
- })
174
-
175
- it('first verify for a turnKey always probes, even at t=0', async () => {
176
- const clock = { t: 0 }
177
- const deps = {
178
- getCurrentPinned: vi.fn(async () => 42),
179
- pin: vi.fn(async () => true),
180
- }
181
- const wd = createPinWatchdog({
182
- getCurrentPinned: deps.getCurrentPinned,
183
- pin: deps.pin,
184
- now: () => clock.t,
185
- })
186
-
187
- await wd.verify({ chatId: '100', turnKey: '100:1', expectedMessageId: 42 })
188
- expect(deps.getCurrentPinned).toHaveBeenCalledTimes(1)
189
- })
190
- })
@@ -1,146 +0,0 @@
1
- /**
2
- * Regression test for #689 — SIGTERM mid-turn must flush pinned progress
3
- * cards with a "Restart interrupted" banner and unpin them, instead of
4
- * leaving them frozen on "Working…" forever.
5
- *
6
- * The full SIGTERM flush logic in gateway.ts is built around a closure
7
- * that needs a complete grammY bot harness, so this test exercises the
8
- * pieces it composes: `pinManager.pinnedEntries()` (the new
9
- * shutdown-introspection API) and `pinManager.unpinForChat()` (the
10
- * synchronous unpin path). The gateway's shutdown closure is a trivial
11
- * map over `pinnedEntries()` calling `editMessageText` + `unpinForChat`
12
- * — covering those two primitives covers the regression.
13
- */
14
- import { describe, it, expect, vi } from 'vitest'
15
- import {
16
- createPinManager,
17
- type PinManagerDeps,
18
- type TimerHandle,
19
- } from '../progress-card-pin-manager.js'
20
-
21
- interface PendingTimer { fn: () => void; cancelled: boolean; fired: boolean }
22
-
23
- function mkHarness(overrides: Partial<PinManagerDeps> = {}) {
24
- const timers: PendingTimer[] = []
25
- const deps = {
26
- pin: vi.fn(async () => true),
27
- unpin: vi.fn(async () => true),
28
- deleteMessage: vi.fn(async () => true),
29
- addPin: vi.fn(),
30
- removePin: vi.fn(),
31
- log: vi.fn(),
32
- }
33
- const scheduleTimer = (fn: () => void): TimerHandle => {
34
- const entry: PendingTimer = { fn, cancelled: false, fired: false }
35
- timers.push(entry)
36
- return { cancel() { entry.cancelled = true } }
37
- }
38
- const mgr = createPinManager({ ...deps, now: () => 10_000, scheduleTimer, ...overrides })
39
- const fireTimers = (): void => {
40
- for (const t of [...timers]) {
41
- if (t.cancelled || t.fired) continue
42
- t.fired = true
43
- t.fn()
44
- }
45
- }
46
- return { mgr, deps, fireTimers }
47
- }
48
-
49
- describe('SIGTERM mid-turn progress-card flush (#689)', () => {
50
- it('pinnedEntries() reports chatId + threadId + messageId for every live pin', async () => {
51
- const h = mkHarness()
52
- h.mgr.considerPin({
53
- chatId: 'chat-A', threadId: '7', turnKey: 'chat-A:7:1',
54
- messageId: 101, isFirstEmit: true,
55
- })
56
- h.mgr.considerPin({
57
- chatId: 'chat-B', turnKey: 'chat-B:1',
58
- messageId: 202, isFirstEmit: true,
59
- })
60
- h.fireTimers()
61
- await h.mgr.drainInFlight()
62
-
63
- const entries = h.mgr.pinnedEntries()
64
- expect(entries).toHaveLength(2)
65
- const sorted = [...entries].sort((a, b) => a.messageId - b.messageId)
66
- expect(sorted[0]).toMatchObject({
67
- chatId: 'chat-A', threadId: '7', turnKey: 'chat-A:7:1',
68
- messageId: 101, agentId: '__parent__',
69
- })
70
- expect(sorted[1]).toMatchObject({
71
- chatId: 'chat-B', turnKey: 'chat-B:1',
72
- messageId: 202, agentId: '__parent__',
73
- })
74
- // Threadless pins must not invent a threadId field — the gateway
75
- // shutdown closure skips passing message_thread_id when undefined.
76
- expect(sorted[1].threadId).toBeUndefined()
77
- })
78
-
79
- it('simulated SIGTERM: edit-then-unpin every pinned card with the banner', async () => {
80
- const h = mkHarness()
81
- h.mgr.considerPin({
82
- chatId: 'chat-A', threadId: '7', turnKey: 'chat-A:7:1',
83
- messageId: 101, isFirstEmit: true,
84
- })
85
- h.mgr.considerPin({
86
- chatId: 'chat-B', turnKey: 'chat-B:1',
87
- messageId: 202, isFirstEmit: true,
88
- })
89
- h.fireTimers()
90
- await h.mgr.drainInFlight()
91
-
92
- // Stand-in for `lockedBot.api.editMessageText`.
93
- const editMessageText = vi.fn(async () => true)
94
- const banner = '⚠️ <b>Restart interrupted this work</b>\n<i>SIGTERM: update: pulled X</i>'
95
-
96
- const entries = h.mgr.pinnedEntries()
97
- const ops = entries.map(({ chatId, threadId, messageId }) =>
98
- editMessageText(chatId, messageId, banner, { parse_mode: 'HTML' })
99
- .finally(() => {
100
- h.mgr.unpinForChat(chatId, threadId != null ? Number(threadId) : undefined)
101
- }),
102
- )
103
- await Promise.allSettled(ops)
104
- await h.mgr.drainInFlight()
105
-
106
- // Both cards saw the interrupted-banner edit.
107
- expect(editMessageText).toHaveBeenCalledTimes(2)
108
- expect(editMessageText).toHaveBeenCalledWith('chat-A', 101, banner, { parse_mode: 'HTML' })
109
- expect(editMessageText).toHaveBeenCalledWith('chat-B', 202, banner, { parse_mode: 'HTML' })
110
-
111
- // And both cards were unpinned afterwards.
112
- expect(h.deps.unpin).toHaveBeenCalledWith('chat-A', 101)
113
- expect(h.deps.unpin).toHaveBeenCalledWith('chat-B', 202)
114
- expect(h.mgr.pinnedEntries()).toEqual([])
115
- expect(h.mgr.pinnedTurnKeys()).toEqual([])
116
- })
117
-
118
- it('unpins even when the banner edit fails (frozen card is worse than no card)', async () => {
119
- const h = mkHarness()
120
- h.mgr.considerPin({
121
- chatId: 'chat-A', threadId: '7', turnKey: 'chat-A:7:1',
122
- messageId: 101, isFirstEmit: true,
123
- })
124
- h.fireTimers()
125
- await h.mgr.drainInFlight()
126
-
127
- const editMessageText = vi.fn(async () => {
128
- throw new Error('Bad Request: message to edit not found')
129
- })
130
- const banner = '⚠️ <b>Restart interrupted this work</b>\n<i>SIGTERM</i>'
131
-
132
- const entries = h.mgr.pinnedEntries()
133
- const ops = entries.map(({ chatId, threadId, messageId }) =>
134
- editMessageText(chatId, messageId, banner, { parse_mode: 'HTML' })
135
- .catch(() => {})
136
- .finally(() => {
137
- h.mgr.unpinForChat(chatId, threadId != null ? Number(threadId) : undefined)
138
- }),
139
- )
140
- await Promise.allSettled(ops)
141
- await h.mgr.drainInFlight()
142
-
143
- expect(h.deps.unpin).toHaveBeenCalledWith('chat-A', 101)
144
- expect(h.mgr.pinnedEntries()).toEqual([])
145
- })
146
- })
@@ -1,123 +0,0 @@
1
- /**
2
- * F1 — "ladder collapse" — regression test against real-gateway harness.
3
- *
4
- * Symptom from #545: on a Class B turn (1–3 tool calls, < ~15s), the
5
- * status reaction jumps straight from 👀 to 👍, skipping the
6
- * intermediate 🤔 (thinking) and 🔥 (tool work) states. User loses the
7
- * "agent is doing things" signal.
8
- *
9
- * Root cause: `StatusReactionController.scheduleState()` debounces
10
- * non-immediate transitions (default `debounceMs=700`). When a turn
11
- * completes faster than the debounce window, intermediate states never
12
- * cross the timer — `setDone()` calls `finishWithState()` which
13
- * `clearDebounceTimer()`s and emits 👍 directly, dropping the pending
14
- * 🤔/🔥.
15
- *
16
- * Spec contract from `waiting-ux-spec.md`:
17
- *
18
- * F1: ladder integrity — for Class B turns, recorded reaction
19
- * sequence MUST contain 👀 followed by at least one
20
- * intermediate state (🤔 / 🔥 / a tool-specific reaction)
21
- * BEFORE 👍. No straight-to-👍 collapse.
22
- *
23
- * The fix flushes any pending non-terminal reaction before the
24
- * terminal 👍 emits. Tracking: #545 (parent), #553 (Phase 3).
25
- */
26
-
27
- import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
28
- import { createRealGatewayHarness } from './real-gateway-harness.js'
29
-
30
- const CHAT = '8248703757'
31
- const INBOUND_MSG = 100
32
-
33
- beforeEach(() => { vi.useFakeTimers() })
34
- afterEach(() => { vi.useRealTimers() })
35
-
36
- /**
37
- * Dedupe consecutive duplicate reactions in the recorded sequence.
38
- * The post-F2 harness fires 👀 twice (early-ack + controller setQueued);
39
- * Telegram dedupes by emoji so consecutive duplicates are visually one
40
- * step. Tests asserting ladder integrity should ignore them.
41
- */
42
- function uniqueLadder(seq: string[]): string[] {
43
- const out: string[] = []
44
- for (const e of seq) {
45
- if (out[out.length - 1] !== e) out.push(e)
46
- }
47
- return out
48
- }
49
-
50
- describe('F1 — ladder integrity (no straight-to-👍 collapse)', () => {
51
- it('Class B sub-debounce turn (~500ms): pending tool reaction MUST emit before 👍', async () => {
52
- // The exact failure case from the live demo: a turn that completes
53
- // faster than the controller's 700ms debounce window. Pre-fix, the
54
- // 🔥 reaction was scheduled but cancelled when setDone() cleared
55
- // the debounce timer. User saw 👀 → 👍 with no intermediate state.
56
- const h = createRealGatewayHarness({ gapMs: 0 })
57
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'quick task' })
58
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'quick task' })
59
- await h.clock.advance(50)
60
- h.feedSessionEvent({ kind: 'thinking' })
61
- await h.clock.advance(50)
62
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash' })
63
- await h.clock.advance(400) // tool runs ~400ms, total turn ~500ms — under 700ms debounce
64
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 500 })
65
- await h.clock.advance(1500) // settle, well past debounce window
66
-
67
- const ladder = uniqueLadder(h.recorder.reactionSequence())
68
- expect(ladder[0]).toBe('👀')
69
- expect(ladder[ladder.length - 1]).toBe('👍')
70
- // Must contain at least one intermediate — no straight 👀 → 👍 collapse.
71
- expect(ladder.length).toBeGreaterThanOrEqual(3)
72
- h.finalize()
73
- })
74
-
75
- it('Class B medium turn (~2s, single tool): ladder shows 👀 → tool reaction → 👍', async () => {
76
- // Slower turn (single 2s tool) — works correctly even pre-fix because
77
- // 2000ms > 700ms debounce. Pin so the fix doesn't regress the working case.
78
- const h = createRealGatewayHarness({ gapMs: 0 })
79
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'medium task' })
80
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'medium task' })
81
- await h.clock.advance(50)
82
- h.feedSessionEvent({ kind: 'thinking' })
83
- await h.clock.advance(200)
84
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash' })
85
- await h.clock.advance(2000)
86
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 2300 })
87
- await h.clock.advance(1000)
88
-
89
- const ladder = uniqueLadder(h.recorder.reactionSequence())
90
- expect(ladder[0]).toBe('👀')
91
- expect(ladder[ladder.length - 1]).toBe('👍')
92
- expect(ladder.length).toBeGreaterThanOrEqual(3)
93
- h.finalize()
94
- })
95
-
96
- it('Class B 3-tool series at sub-debounce intervals: each transition shows', async () => {
97
- // Three rapid tool transitions inside a single debounce window.
98
- // Pre-fix, only the LAST one would survive (the others got
99
- // overwritten by the next setTool). We don't strictly require all
100
- // three to appear (the controller can collapse same-emoji adjacent
101
- // calls) — but the FINAL pending state before 👍 must emit.
102
- const h = createRealGatewayHarness({ gapMs: 0 })
103
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'rapid tools' })
104
- h.feedSessionEvent({ kind: 'enqueue', chatId: CHAT, messageId: '1', threadId: null, rawContent: 'rapid tools' })
105
- await h.clock.advance(50)
106
- h.feedSessionEvent({ kind: 'thinking' })
107
- await h.clock.advance(50)
108
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Read' })
109
- await h.clock.advance(100)
110
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Bash' })
111
- await h.clock.advance(100)
112
- h.feedSessionEvent({ kind: 'tool_use', toolName: 'Edit' })
113
- await h.clock.advance(200)
114
- h.feedSessionEvent({ kind: 'turn_end', durationMs: 500 })
115
- await h.clock.advance(1500)
116
-
117
- const ladder = uniqueLadder(h.recorder.reactionSequence())
118
- expect(ladder[0]).toBe('👀')
119
- expect(ladder[ladder.length - 1]).toBe('👍')
120
- expect(ladder.length).toBeGreaterThanOrEqual(3)
121
- h.finalize()
122
- })
123
- })
@@ -1,82 +0,0 @@
1
- /**
2
- * F2 — "no instant draft / typing signal" — regression test.
3
- *
4
- * Spec contract from `waiting-ux-spec.md`:
5
- *
6
- * F2 deadline: firstReactionAt - inboundAt < 800ms for ALL turn classes.
7
- *
8
- * Pre-fix history: Phase 1's harness (#547) called `setQueued()` synchronously
9
- * inside its `inbound()` helper, so F2 passed trivially — the harness was
10
- * lying about the inbound flow. The Phase 3 real-gateway harness (#553 PR 1)
11
- * wired the production `InboundCoalescer` BEFORE first-paint, exposing that
12
- * 👀 only fired AFTER the coalesce window (default 1500ms) — ~700ms over deadline.
13
- *
14
- * Fix (#553 PR 2): `gateway.ts handleInboundCoalesced` now fires the 👀
15
- * reaction directly on raw arrival via `bot.api.setMessageReaction`,
16
- * BEFORE the coalesce buffer. Eligibility: paired DM users on a fresh
17
- * turn (mid-turn messages preserve the current 🔥/🤔 state). The
18
- * controller's later `setQueued()` runs as before; Telegram dedupes
19
- * the duplicate 👀 emit.
20
- *
21
- * These tests pin the post-fix contract so the gap can never re-open.
22
- *
23
- * Tracking: #545 (parent), #553 (Phase 3 harness + fixes).
24
- */
25
-
26
- import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
27
- import { createRealGatewayHarness } from './real-gateway-harness.js'
28
-
29
- const CHAT = '8248703757'
30
- const INBOUND_MSG = 100
31
-
32
- beforeEach(() => { vi.useFakeTimers() })
33
- afterEach(() => { vi.useRealTimers() })
34
-
35
- describe('F2 — first-paint deadline (👀 within 800ms of inbound)', () => {
36
- it('Class A — instant reply: 👀 reaction within 800ms', async () => {
37
- const h = createRealGatewayHarness({ gapMs: 1500 }) // production default
38
- const inboundAt = h.clock.now()
39
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
40
- // Allow up to 800ms for the deadline; do NOT advance through the
41
- // full coalesce window — the deadline says 👀 lands BEFORE the
42
- // coalesce flush would.
43
- await h.clock.advance(800)
44
- const firstReactionMs = h.recorder.firstReactionMs(CHAT)
45
- expect(firstReactionMs).not.toBeNull()
46
- expect((firstReactionMs ?? Infinity) - inboundAt).toBeLessThan(800)
47
- h.finalize()
48
- })
49
-
50
- it('Class B — short turn: 👀 reaction within 800ms even with later tool calls', async () => {
51
- const h = createRealGatewayHarness({ gapMs: 1500 })
52
- const inboundAt = h.clock.now()
53
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'do a thing' })
54
- await h.clock.advance(800)
55
- const firstReactionMs = h.recorder.firstReactionMs(CHAT)
56
- expect(firstReactionMs).not.toBeNull()
57
- expect((firstReactionMs ?? Infinity) - inboundAt).toBeLessThan(800)
58
- h.finalize()
59
- })
60
-
61
- it('Class C — long / multi-agent: 👀 reaction within 800ms regardless of total turn duration', async () => {
62
- const h = createRealGatewayHarness({ gapMs: 1500 })
63
- const inboundAt = h.clock.now()
64
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'big task' })
65
- await h.clock.advance(800)
66
- const firstReactionMs = h.recorder.firstReactionMs(CHAT)
67
- expect(firstReactionMs).not.toBeNull()
68
- expect((firstReactionMs ?? Infinity) - inboundAt).toBeLessThan(800)
69
- h.finalize()
70
- })
71
-
72
- it('still meets deadline when an operator tunes coalescingGapMs lower', async () => {
73
- const h = createRealGatewayHarness({ gapMs: 500 })
74
- const inboundAt = h.clock.now()
75
- h.inbound({ chatId: CHAT, messageId: INBOUND_MSG, text: 'hi' })
76
- await h.clock.advance(800)
77
- const firstReactionMs = h.recorder.firstReactionMs(CHAT)
78
- expect(firstReactionMs).not.toBeNull()
79
- expect((firstReactionMs ?? Infinity) - inboundAt).toBeLessThan(800)
80
- h.finalize()
81
- })
82
- })