switchroom 0.7.15 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. package/README.md +51 -59
  2. package/bin/run-hook.sh +27 -11
  3. package/bin/timezone-hook.sh +9 -7
  4. package/dist/agent-scheduler/index.js +410 -133
  5. package/dist/auth-broker/index.js +13932 -0
  6. package/dist/cli/switchroom.js +26937 -5601
  7. package/dist/host-control/main.js +12702 -0
  8. package/dist/vault/approvals/kernel-server.js +467 -184
  9. package/dist/vault/broker/server.js +1430 -724
  10. package/examples/minimal.yaml +63 -0
  11. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  12. package/examples/personal-google-workspace-mcp/README.md +194 -0
  13. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  14. package/examples/switchroom.yaml +220 -0
  15. package/package.json +7 -4
  16. package/profiles/_base/settings.json.hbs +20 -5
  17. package/profiles/_base/start.sh.hbs +16 -3
  18. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  19. package/profiles/_shared/telegram-style.md.hbs +20 -90
  20. package/profiles/_shared/vault-protocol.md.hbs +68 -0
  21. package/profiles/default/CLAUDE.md +50 -96
  22. package/profiles/default/CLAUDE.md.hbs +36 -6
  23. package/profiles/default/workspace/SOUL.md.hbs +12 -5
  24. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  25. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  26. package/skills/buildkite-api/SKILL.md +31 -8
  27. package/skills/buildkite-cli/SKILL.md +27 -9
  28. package/skills/buildkite-migration/SKILL.md +22 -9
  29. package/skills/buildkite-pipelines/SKILL.md +26 -9
  30. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  31. package/skills/buildkite-test-engine/SKILL.md +25 -8
  32. package/skills/docx/SKILL.md +1 -1
  33. package/skills/docx/scripts/office/validators/__pycache__/__init__.cpython-313.pyc +0 -0
  34. package/skills/docx/scripts/office/validators/__pycache__/base.cpython-313.pyc +0 -0
  35. package/skills/file-bug/SKILL.md +34 -6
  36. package/skills/humanizer/SKILL.md +15 -0
  37. package/skills/humanizer-calibrate/SKILL.md +7 -1
  38. package/skills/mcp-builder/SKILL.md +1 -1
  39. package/skills/pdf/SKILL.md +1 -1
  40. package/skills/pptx/SKILL.md +1 -1
  41. package/skills/skill-creator/SKILL.md +21 -1
  42. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  43. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  44. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  45. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  46. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  47. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  48. package/skills/switchroom-cli/SKILL.md +63 -64
  49. package/skills/switchroom-health/SKILL.md +23 -10
  50. package/skills/switchroom-install/SKILL.md +3 -3
  51. package/skills/switchroom-manage/SKILL.md +26 -19
  52. package/skills/switchroom-runtime/SKILL.md +191 -0
  53. package/skills/switchroom-status/SKILL.md +27 -2
  54. package/skills/telegram-test-harness/SKILL.md +3 -0
  55. package/skills/token-helpers/SKILL.md +24 -1
  56. package/skills/webapp-testing/SKILL.md +31 -1
  57. package/skills/xlsx/SKILL.md +1 -1
  58. package/telegram-plugin/admin-commands/index.ts +7 -5
  59. package/telegram-plugin/analytics-posthog.ts +191 -0
  60. package/telegram-plugin/bridge/bridge.ts +69 -0
  61. package/telegram-plugin/bridge/ipc-client.ts +4 -1
  62. package/telegram-plugin/dist/bridge/bridge.js +194 -119
  63. package/telegram-plugin/dist/gateway/gateway.js +23611 -19671
  64. package/telegram-plugin/dist/server.js +245 -189
  65. package/telegram-plugin/first-paint.ts +3 -24
  66. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  67. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  68. package/telegram-plugin/gateway/auth-command.ts +794 -0
  69. package/telegram-plugin/gateway/auth-line.ts +123 -0
  70. package/telegram-plugin/gateway/boot-card.ts +169 -40
  71. package/telegram-plugin/gateway/boot-issue-cache.ts +308 -0
  72. package/telegram-plugin/gateway/boot-probes.ts +166 -123
  73. package/telegram-plugin/gateway/boot-reason.ts +41 -7
  74. package/telegram-plugin/gateway/boot-version.ts +66 -0
  75. package/telegram-plugin/gateway/gateway.ts +3499 -1885
  76. package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
  77. package/telegram-plugin/gateway/ipc-protocol.ts +18 -0
  78. package/telegram-plugin/gateway/pending-inbound-buffer.ts +106 -0
  79. package/telegram-plugin/gateway/quarantine.ts +69 -0
  80. package/telegram-plugin/gateway/quota-cache.ts +9 -4
  81. package/telegram-plugin/gateway/reaction-trigger.ts +401 -0
  82. package/telegram-plugin/gateway/recent-denials.test.ts +103 -0
  83. package/telegram-plugin/gateway/recent-denials.ts +77 -0
  84. package/telegram-plugin/gateway/startup-network-retry.ts +109 -31
  85. package/telegram-plugin/gateway/vault-grant-inbound-builders.ts +125 -0
  86. package/telegram-plugin/history.ts +91 -0
  87. package/telegram-plugin/hooks/hooks.json +10 -0
  88. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +130 -0
  89. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +19 -2
  90. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +22 -2
  91. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  92. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  93. package/telegram-plugin/inbound-classifier.ts +50 -0
  94. package/telegram-plugin/inline-keyboard-callbacks.ts +136 -0
  95. package/telegram-plugin/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
  96. package/telegram-plugin/package.json +4 -2
  97. package/telegram-plugin/permission-rule.ts +51 -0
  98. package/telegram-plugin/permission-title.ts +56 -0
  99. package/telegram-plugin/quota-check.ts +19 -41
  100. package/telegram-plugin/registry/reaper.ts +223 -0
  101. package/telegram-plugin/retry-api-call.ts +80 -0
  102. package/telegram-plugin/runtime-metrics.ts +177 -0
  103. package/telegram-plugin/scripts/build.mjs +0 -1
  104. package/telegram-plugin/secret-detect/index.ts +24 -0
  105. package/telegram-plugin/secret-detect/vault-error.test.ts +64 -12
  106. package/telegram-plugin/secret-detect/vault-error.ts +78 -11
  107. package/telegram-plugin/secret-detect/vault-write.ts +14 -2
  108. package/telegram-plugin/server.js +41795 -0
  109. package/telegram-plugin/session-tail.ts +6 -1
  110. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  111. package/telegram-plugin/silence-poke.ts +420 -0
  112. package/telegram-plugin/silent-end.ts +174 -0
  113. package/telegram-plugin/stream-controller.ts +13 -0
  114. package/telegram-plugin/stream-reply-handler.ts +7 -0
  115. package/telegram-plugin/subagent-watcher.ts +213 -4
  116. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  117. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  118. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  119. package/telegram-plugin/tests/boot-card-issue-dedup.test.ts +247 -0
  120. package/telegram-plugin/tests/boot-card-reason-to-render.test.ts +182 -0
  121. package/telegram-plugin/tests/boot-card-reason.test.ts +65 -2
  122. package/telegram-plugin/tests/boot-card-render.test.ts +146 -0
  123. package/telegram-plugin/tests/boot-card-silent-on-operator.test.ts +103 -0
  124. package/telegram-plugin/tests/boot-probes.test.ts +216 -10
  125. package/telegram-plugin/tests/boot-version-string.test.ts +0 -0
  126. package/telegram-plugin/tests/finalize-callback.test.ts +190 -0
  127. package/telegram-plugin/tests/gateway-message-validator.test.ts +26 -0
  128. package/telegram-plugin/tests/gateway-secret-detect.test.ts +12 -3
  129. package/telegram-plugin/tests/gateway-startup-network-retry.test.ts +104 -0
  130. package/telegram-plugin/tests/history-reaper.test.ts +378 -0
  131. package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
  132. package/telegram-plugin/tests/inbound-classifier.test.ts +76 -0
  133. package/telegram-plugin/tests/inbound-message-types.test.ts +267 -0
  134. package/telegram-plugin/tests/issues-card.test.ts +49 -0
  135. package/telegram-plugin/tests/pending-inbound-buffer.test.ts +132 -0
  136. package/telegram-plugin/tests/permission-rule.test.ts +80 -1
  137. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  138. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  139. package/telegram-plugin/tests/races.test.ts +179 -0
  140. package/telegram-plugin/tests/reaction-trigger-flow.test.ts +353 -0
  141. package/telegram-plugin/tests/reaction-trigger.test.ts +397 -0
  142. package/telegram-plugin/tests/retry-api-call.test.ts +152 -1
  143. package/telegram-plugin/tests/runtime-metrics.test.ts +145 -0
  144. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +155 -0
  145. package/telegram-plugin/tests/secret-detect-delete-must-surface-failures.test.ts +133 -0
  146. package/telegram-plugin/tests/secret-detect-false-positives.test.ts +137 -0
  147. package/telegram-plugin/tests/silence-poke.test.ts +493 -0
  148. package/telegram-plugin/tests/silent-end.test.ts +206 -0
  149. package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +107 -0
  150. package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +224 -0
  151. package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +316 -0
  152. package/telegram-plugin/tests/subagent-watcher.test.ts +263 -0
  153. package/telegram-plugin/tests/turn-signal-tracker.test.ts +81 -0
  154. package/telegram-plugin/tests/vault-approval-posture.test.ts +256 -0
  155. package/telegram-plugin/tests/vault-grant-auto-resume.test.ts +73 -0
  156. package/telegram-plugin/tests/vault-grant-inbound-builders.test.ts +226 -0
  157. package/telegram-plugin/tests/vault-grant-union.test.ts +130 -0
  158. package/telegram-plugin/tests/vault-key-regex-allows-slash.test.ts +140 -0
  159. package/telegram-plugin/tests/vault-posture-quarantine.test.ts +104 -0
  160. package/telegram-plugin/tests/vault-request-access-tool.test.ts +114 -0
  161. package/telegram-plugin/tests/vault-request-access-unlock-resume.test.ts +106 -0
  162. package/telegram-plugin/turn-signal-tracker.ts +100 -24
  163. package/telegram-plugin/uat/SETUP.md +210 -35
  164. package/telegram-plugin/uat/assertions.ts +264 -37
  165. package/telegram-plugin/uat/driver-info.ts +57 -0
  166. package/telegram-plugin/uat/driver.ts +590 -51
  167. package/telegram-plugin/uat/harness.ts +140 -94
  168. package/telegram-plugin/uat/load-env.test.ts +72 -0
  169. package/telegram-plugin/uat/load-env.ts +48 -0
  170. package/telegram-plugin/uat/login.ts +96 -53
  171. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  172. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  173. package/telegram-plugin/uat/runners/report.ts +150 -0
  174. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  175. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  176. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  177. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  178. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  179. package/telegram-plugin/uat/scenarios/ask-user-button-tap-dm.test.ts +141 -0
  180. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +191 -0
  181. package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +255 -0
  182. package/telegram-plugin/uat/scenarios/fuzz-human-style-dm.test.ts +275 -0
  183. package/telegram-plugin/uat/scenarios/fuzz-random-prompts-dm.test.ts +146 -0
  184. package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts +486 -0
  185. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +67 -0
  186. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +100 -0
  187. package/telegram-plugin/uat/scenarios/jtbd-soft-commit-dm.test.ts +67 -0
  188. package/telegram-plugin/uat/scenarios/jtbd-status-query-dm.test.ts +49 -0
  189. package/telegram-plugin/uat/scenarios/location-inbound-dm.test.ts +65 -0
  190. package/telegram-plugin/uat/scenarios/midturn-silent-dm.test.ts +175 -0
  191. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +142 -0
  192. package/telegram-plugin/uat/scenarios/reactions-trigger-turn-dm.test.ts +96 -0
  193. package/telegram-plugin/uat/scenarios/secret-redaction-deletes-original-dm.test.ts +123 -0
  194. package/telegram-plugin/uat/scenarios/secret-redaction-no-false-positive-dm.test.ts +87 -0
  195. package/telegram-plugin/uat/scenarios/silence-poke-soft-dm.test.ts +155 -0
  196. package/telegram-plugin/uat/scenarios/silent-end-recovery-dm.test.ts +95 -0
  197. package/telegram-plugin/uat/scenarios/smoke-dm-reply.test.ts +57 -0
  198. package/telegram-plugin/uat/scenarios/subagent-watcher-no-rerun-dm.test.ts +135 -0
  199. package/telegram-plugin/uat/scenarios/vault-approval-posture-telegram-id-dm.test.ts +191 -0
  200. package/telegram-plugin/uat/scenarios/vault-audit-allow-dm.test.ts +108 -0
  201. package/telegram-plugin/uat/scenarios/vault-grant-auto-resume-dm.test.ts +121 -0
  202. package/telegram-plugin/uat/scenarios/vault-request-access-concurrent-dm.test.ts +161 -0
  203. package/telegram-plugin/uat/scenarios/vault-request-access-end-to-end-dm.test.ts +158 -0
  204. package/telegram-plugin/uat/scenarios/voice-inbound-dm.test.ts +65 -0
  205. package/telegram-plugin/vault-approval-posture.ts +42 -0
  206. package/telegram-plugin/welcome-text.ts +1 -0
  207. package/telegram-plugin/active-pins-sweep.ts +0 -204
  208. package/telegram-plugin/active-pins.ts +0 -146
  209. package/telegram-plugin/auth-dashboard.ts +0 -1104
  210. package/telegram-plugin/auth-slot-parser.ts +0 -497
  211. package/telegram-plugin/card-event-log.ts +0 -138
  212. package/telegram-plugin/dist/foreman/foreman.js +0 -31106
  213. package/telegram-plugin/docs/multi-agent-card-design.md +0 -847
  214. package/telegram-plugin/docs/pinned-progress-card-reliability.md +0 -144
  215. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  216. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  217. package/telegram-plugin/foreman/foreman.ts +0 -1165
  218. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  219. package/telegram-plugin/foreman/setup-state.ts +0 -239
  220. package/telegram-plugin/foreman/state.ts +0 -203
  221. package/telegram-plugin/pin-event-log.ts +0 -76
  222. package/telegram-plugin/progress-card-driver.ts +0 -2886
  223. package/telegram-plugin/progress-card-pin-manager.ts +0 -589
  224. package/telegram-plugin/progress-card-pin-watchdog.ts +0 -98
  225. package/telegram-plugin/progress-card.ts +0 -1409
  226. package/telegram-plugin/tests/HARNESS.md +0 -340
  227. package/telegram-plugin/tests/_progress-card-harness.ts +0 -109
  228. package/telegram-plugin/tests/active-pins-boot-reaper.test.ts +0 -211
  229. package/telegram-plugin/tests/active-pins-sweep.test.ts +0 -309
  230. package/telegram-plugin/tests/active-pins.test.ts +0 -187
  231. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  232. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  233. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  234. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  235. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  236. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  237. package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +0 -201
  238. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  239. package/telegram-plugin/tests/card-event-log.test.ts +0 -145
  240. package/telegram-plugin/tests/first-paint.test.ts +0 -257
  241. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  242. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  243. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  244. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  245. package/telegram-plugin/tests/harness-ordering-invariants.test.ts +0 -243
  246. package/telegram-plugin/tests/pin-event-log.test.ts +0 -124
  247. package/telegram-plugin/tests/progress-card-api-failure-during-deferred.test.ts +0 -73
  248. package/telegram-plugin/tests/progress-card-close-paths-converge.test.ts +0 -272
  249. package/telegram-plugin/tests/progress-card-cross-turn.test.ts +0 -258
  250. package/telegram-plugin/tests/progress-card-delay-842.test.ts +0 -160
  251. package/telegram-plugin/tests/progress-card-dispose-preservepending.test.ts +0 -81
  252. package/telegram-plugin/tests/progress-card-draft-flag.test.ts +0 -80
  253. package/telegram-plugin/tests/progress-card-driver-eviction.test.ts +0 -215
  254. package/telegram-plugin/tests/progress-card-driver-fleet-shadow.test.ts +0 -123
  255. package/telegram-plugin/tests/progress-card-driver-force-complete-parent-done.test.ts +0 -76
  256. package/telegram-plugin/tests/progress-card-edit-timestamps-budget.test.ts +0 -62
  257. package/telegram-plugin/tests/progress-card-memory-bounds.test.ts +0 -84
  258. package/telegram-plugin/tests/progress-card-pin-failure-paths.test.ts +0 -139
  259. package/telegram-plugin/tests/progress-card-pin-manager.test.ts +0 -773
  260. package/telegram-plugin/tests/progress-card-pin-race-fast-turn.test.ts +0 -66
  261. package/telegram-plugin/tests/progress-card-pin-sidecar-partial-write.test.ts +0 -64
  262. package/telegram-plugin/tests/progress-card-pin-watchdog.test.ts +0 -190
  263. package/telegram-plugin/tests/progress-card-sigterm-pin-flush.test.ts +0 -146
  264. package/telegram-plugin/tests/real-gateway-f1-ladder-integrity.test.ts +0 -123
  265. package/telegram-plugin/tests/real-gateway-f2-instant-draft.test.ts +0 -82
  266. package/telegram-plugin/tests/real-gateway-f3-late-card.test.ts +0 -114
  267. package/telegram-plugin/tests/real-gateway-harness.ts +0 -699
  268. package/telegram-plugin/tests/real-gateway-i6-turn-flush-replay-dedup.test.ts +0 -313
  269. package/telegram-plugin/tests/real-gateway-ipc-lifecycle.test.ts +0 -299
  270. package/telegram-plugin/tests/real-gateway-spec.test.ts +0 -487
  271. package/telegram-plugin/tests/real-gateway.smoke.test.ts +0 -101
  272. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  273. package/telegram-plugin/tests/setup-state.test.ts +0 -146
  274. package/telegram-plugin/tests/sync-chat-running-subagents.test.ts +0 -116
  275. package/telegram-plugin/tests/turn-end-regressions.test.ts +0 -489
  276. package/telegram-plugin/tests/turn-flush-card-takeover.test.ts +0 -218
  277. package/telegram-plugin/tests/turn-flush-prose-recovery.test.ts +0 -78
  278. package/telegram-plugin/tests/two-zone-bg-carry-full-lifecycle.test.ts +0 -131
  279. package/telegram-plugin/tests/two-zone-bg-detection.test.ts +0 -120
  280. package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +0 -116
  281. package/telegram-plugin/tests/two-zone-bg-early-turn-end.test.ts +0 -87
  282. package/telegram-plugin/tests/two-zone-bg-survives-next-turn.test.ts +0 -211
  283. package/telegram-plugin/tests/two-zone-card-cap.test.ts +0 -62
  284. package/telegram-plugin/tests/two-zone-card-fleet-row.test.ts +0 -101
  285. package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +0 -78
  286. package/telegram-plugin/tests/two-zone-card-html-balance.test.ts +0 -110
  287. package/telegram-plugin/tests/two-zone-card-lifecycle.test.ts +0 -128
  288. package/telegram-plugin/tests/two-zone-card-sanitise.test.ts +0 -58
  289. package/telegram-plugin/tests/two-zone-card-snapshot.test.ts +0 -133
  290. package/telegram-plugin/tests/two-zone-concurrent-turns-isolation.test.ts +0 -155
  291. package/telegram-plugin/tests/two-zone-phasefor-precedence.test.ts +0 -117
  292. package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +0 -187
  293. package/telegram-plugin/tests/two-zone-stuck-edit-throttle.test.ts +0 -149
  294. package/telegram-plugin/tests/two-zone-stuck-header-escalation.test.ts +0 -101
  295. package/telegram-plugin/tests/two-zone-stuck-per-member.test.ts +0 -114
  296. package/telegram-plugin/tests/two-zone-stuck-recovery.test.ts +0 -105
  297. package/telegram-plugin/tests/waiting-ux-harness.ts +0 -381
  298. package/telegram-plugin/tests/waiting-ux.e2e.test.ts +0 -233
  299. package/telegram-plugin/turn-flush-prose-recovery.ts +0 -40
  300. package/telegram-plugin/two-zone-card.ts +0 -269
  301. package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +0 -61
@@ -1,144 +0,0 @@
1
- # Pinned progress card — reliability spec
2
-
3
- Status: **documenting existing system + closing gaps.** The pin/unpin machinery already exists (see §3). This spec formalizes the invariants it must hold, enumerates failure modes, specifies the test matrix, and lists the residual gaps that still need closing to hit "insanely reliable UX."
4
-
5
- ## 1. Goal
6
-
7
- When the agent is working on a turn, the user always sees a **single, live-updating, pinned** status message for that turn. When the turn ends, that message is either marked `✅ Done` and unpinned, or (for very fast turns) never shown at all. No stale pins, no orphan pins, no duplicate pins, no silent failures.
8
-
9
- "Insanely reliable" = every one of the invariants in §4 holds under crash, kill, restart, rate-limit, race, and parallel-turn conditions, with automated tests covering each.
10
-
11
- ## 2. Non-goals
12
-
13
- - Per-tool progress granularity beyond what the event-driven card already renders.
14
- - Pinning arbitrary bot messages (only the per-turn progress card).
15
- - Fighting the user for pin real-estate: a user-pinned message is always a barrier (see I8).
16
- - Pinning in group chats with many admins (Telegram restricts `pinChatMessage` to bots with `can_pin_messages`; failure is logged and swallowed, card still updates in place).
17
-
18
- ## 3. Existing implementation (ground truth)
19
-
20
- Files and their load-bearing roles:
21
-
22
- | File | Role |
23
- |---|---|
24
- | `progress-card.ts` | Pure reducer + renderer. Turn-scoped state; event → HTML. |
25
- | `progress-card-driver.ts` | Cadence controller. Coalesce + min-interval + heartbeat + zombie ceiling. Fires `emit` with `isFirstEmit` flag and `onTurnComplete` with `turnKey`. |
26
- | `active-pins.ts` | Sidecar (`$AGENT_DIR/.active-pins.json`) — add/remove/read/write. Atomic rename on write. Shape-validated reads. |
27
- | `active-pins-sweep.ts` | Two sweeps: (a) `sweepActivePins` drains the sidecar and unpins each; (b) `sweepBotAuthoredPins` walks `getChat().pinned_message` and unpins anything authored by our bot, stopping at the first user-authored pin. |
28
- | `server.ts` (streamMode='checklist' block) | Wires driver → Telegram API. Owns `progressPinnedMsgIds`, `unpinnedTurnKeys`, and the idempotent `unpinProgressCard` closure. Boot-time + pre-restart sweeps wired. |
29
-
30
- Current lifecycle:
31
-
32
- ```
33
- enqueue / startTurn
34
- └─► driver allocates turnKey (chatId:threadId:seq)
35
- └─► render(state) → emit(isFirstEmit=true)
36
- └─► handleStreamReply creates message → messageId
37
- ├─► addActivePin(sidecar) ← WRITE BEFORE API CALL
38
- └─► pinChatMessage(disable_notification=true)
39
- └─► on failure: removeActivePin (roll back)
40
-
41
- ...live edits via coalesced flush (400ms) + heartbeat (5s)...
42
-
43
- turn_end OR stream_reply(done=true) OR reply(final=true)
44
- └─► unpinProgressCard(turnKey) — idempotent via unpinnedTurnKeys
45
- └─► unpinChatMessage
46
- └─► finally: removeActivePin (regardless of outcome)
47
- ```
48
-
49
- Crash / kill paths:
50
- - `SIGKILL` mid-turn → sidecar retains entry → next boot's `sweepActivePins` unpins.
51
- - `/restart`, `/update`, `/reconcile --restart` → proactive pre-SIGTERM sweep (see server.ts:2841).
52
- - Sidecar lost but pin still on Telegram → next boot's `sweepBotAuthoredPins` walks `getChat` and removes bot-authored pins until a user-authored pin acts as barrier.
53
-
54
- ## 4. Invariants
55
-
56
- All must be tested (see §7). `I*` numbers are referenced elsewhere in this spec.
57
-
58
- | ID | Invariant |
59
- |---|---|
60
- | **I1** | Every `pinChatMessage` call is preceded by a successful `addActivePin` sidecar write. |
61
- | **I2** | Every successful pin produces exactly one `unpinChatMessage` call over the card's lifetime (across in-session unpin + restart sweep). |
62
- | **I3** | `unpinProgressCard(turnKey, …)` is idempotent — first call fires the API, all subsequent calls for the same `turnKey` are no-ops. |
63
- | **I4** | On process start, any sidecar entry from a prior session is consumed (attempt unpin, then clear) before new traffic is accepted. |
64
- | **I5** | The final render of the card (before unpin) shows `stage === 'done'` → `✅ Done` header. |
65
- | **I6** | Turns that complete faster than `initialDelayMs` (default 30s) produce **no** pin and **no** card — suppressed, not deferred-then-cancelled. |
66
- | **I7** | Parallel active turns on the same `(chatId, threadId)` each have independent `turnKey`, `pin`, `unpin`, and `sidecar` entries. The second `enqueue` force-closes the first (including its unpin) before creating the new pin. |
67
- | **I8** | `sweepBotAuthoredPins` stops at the first non-bot pinned message for a chat — never unpins a user's pin. |
68
- | **I9** | Zombie ceiling: a card whose `lastEventAt` is older than `maxIdleMs` (5 min) is force-closed via the same path as `turn_end` — unpin + `onTurnComplete` + state delete. |
69
- | **I10** | `pinChatMessage` failure does not leave the sidecar polluted — `removeActivePin` is called in the failure branch. |
70
- | **I11** | Completion notification (`✅ Done — <summary>` top-level message) only sent in forum-topic turns (`threadId != null`); never in plain DMs. |
71
-
72
- ## 5. Failure modes & mitigations
73
-
74
- | # | Failure mode | Current mitigation | Residual gap |
75
- |---|---|---|---|
76
- | F1 | SIGKILL between `addActivePin` and `pinChatMessage` | Boot-time `sweepBotAuthoredPins` (no pin to remove, sidecar cleared) | None. |
77
- | F2 | SIGKILL between `pinChatMessage` success and `turn_end` | Boot-time `sweepActivePins` | None. |
78
- | F3 | SIGKILL between `unpinChatMessage` in-flight and `removeActivePin` | Next boot re-attempts unpin; Telegram unpin is idempotent (400 is harmless) | **Gap:** no structured telemetry distinguishes "real stale pin swept" from "redundant sweep on already-unpinned message" — both log generic failure. |
79
- | F4 | `pinChatMessage` 429 (rate limit) | Error logged + sidecar rolled back; card continues to live-update in place (unpinned) | **Gap:** no retry with backoff. User silently loses the pin for that turn. Should at least surface status-reaction signal differently. |
80
- | F5 | `unpinChatMessage` 429 | Error logged + sidecar cleared via `.finally()`; next boot will see stale pin via `sweepBotAuthoredPins` | **Gap:** mid-session stale pin persists until next restart. Consider best-effort retry (1 retry after 1s) before giving up. |
81
- | F6 | Bot lacks `can_pin_messages` in group | Error logged and swallowed; card still live-updates inline | None — graceful degradation. |
82
- | F7 | Session restart while turn still live | Pre-restart sweep unpins; new process's startup sweep is redundant but harmless; the resumed turn creates a fresh pin via the `--continue` path | **Gap:** user sees "Done → unpin → new pin" flicker during ~1–3s restart. Consider deferring pre-restart unpin until the new process confirms it has taken over (out of scope for this spec; needs handoff-protocol work). |
83
- | F8 | Two parallel turns on same `chatId:threadId` | `turnKey` allocator + `isSync` guards in enqueue handler | None (tests cover). |
84
- | F9 | Duplicate enqueue echoes from session-tail (JSONL rotation, reconnect) | `seenEnqueueMsgIds` 60s dedup + `pendingSyncEchoes` sync marker | None — well tested. |
85
- | F10 | Heartbeat keeps ticking a card whose `turn_end` was dropped | `maxIdleMs` zombie ceiling (5 min) force-closes | **Gap:** 5 min is long. Surface a warning in the card header after ~2 min of no events: `⚠️ No events for 2m — likely stuck.` |
86
- | F11 | User manually unpins the card mid-turn | Next `pinChatMessage` in this session is never called (pin is one-shot per turn); sidecar holds stale entry until `onTurnComplete` fires `removeActivePin` after a harmless `unpinChatMessage` 400 | **Gap:** card stops being pinned but user has no visual indication the card is still live. Low priority — if they unpinned it they chose to. |
87
- | F12 | Two bots in the chat both managing pins | `sweepBotAuthoredPins` filters by `botUserId` | None. |
88
- | F13 | `getChat().pinned_message` returns only the top pin, so a stack of bot pins requires iteration | `sweepBotAuthoredPins` loops up to `maxPerChat=32` | None. |
89
-
90
- ## 6. Observability requirements
91
-
92
- Current state: `process.stderr.write` lines for pin/unpin failure. That's insufficient for "insanely reliable."
93
-
94
- Required:
95
-
96
- 1. **Structured log event per pin/unpin**, one line JSON on stderr with prefix `pin-event:`. Fields: `event` (`pin|unpin|sweep-pin|sweep-auth`), `chatId`, `messageId`, `turnKey`, `outcome` (`ok|fail|rate-limited|forbidden`), `error?`, `durationMs`.
97
- 2. **`/pins-status` admin command** (or extend `/status`): report current sidecar entries + in-memory `progressPinnedMsgIds` + any divergence.
98
- 3. **Weekly self-audit** (or on boot): call `sweepBotAuthoredPins` in read-only mode across allowlisted chats and report count of bot pins not tracked in sidecar. Alarm if > 0 after a steady-state period.
99
- 4. **Metric: pin-to-first-edit latency** — time from `pinChatMessage` returning to the first subsequent `editMessageText`. Should stay under ~1s; breach indicates rate-limit pressure.
100
- 5. **Metric: orphan sweep frequency** — count of pins cleaned up by startup / bot-authored sweep per boot. Steady-state should be 0.
101
-
102
- ## 7. Test matrix
103
-
104
- Existing tests to keep (enumerate and reference in CI):
105
- - `active-pins.test.ts` — sidecar add/remove/read/write/idempotency/corruption.
106
- - `active-pins-sweep.test.ts` — timeout bounds, barrier semantics, max-per-chat loop.
107
- - `progress-card.test.ts` — reducer covers all `turn_end` paths, renderer produces `✅ Done`.
108
- - `progress-card-driver.test.ts` — `isFirstEmit` fires exactly once, `onTurnComplete` fires exactly once, `initialDelayMs` suppression, parallel-turn force-close.
109
-
110
- New tests required for this spec:
111
-
112
- | ID | Test | Covers |
113
- |---|---|---|
114
- | T1 | Integration: simulate pin API failure → assert `removeActivePin` called and no stale sidecar entry | I10 |
115
- | T2 | Integration: simulate unpin API failure → assert sidecar cleared in `.finally()`, assert next-boot sweep picks up Telegram-side stale pin via `sweepBotAuthoredPins` | F3, F5 |
116
- | T3 | Unit: `sweepBotAuthoredPins` stops on first user-authored pin (barrier) | I8 |
117
- | T4 | Integration: two parallel `startTurn` calls on same `chatId:threadId` → two distinct `turnKey`s, two pins, two unpins, no orphan sidecar entries at end | I7 |
118
- | T5 | Integration: `turn_end` before `initialDelayMs` → zero emits, zero pins, sidecar untouched | I6 |
119
- | T6 | Integration: heartbeat ticks 2 min past last event → header shows stuck-warning; 5 min → zombie close fires unpin | F10, I9 |
120
- | T7 | Integration: boot with non-empty sidecar → sweep runs before first inbound message is processed | I4 |
121
- | T8 | Integration: rate-limit simulation — 20 rapid turns → each gets pin + unpin, no 429 surfaces to user visibly; degraded path logs structured event | F4, F5, §6.1 |
122
- | T9 | Structured log assertion: every pin/unpin emits exactly one `pin-event:` JSON line with all required fields | §6.1 |
123
- | T10 | Self-audit: boot-time read-only sweep reports 0 orphan pins on a clean chat | §6.3 |
124
-
125
- ## 8. Implementation plan (gap-closing)
126
-
127
- Order of work, smallest-first:
128
-
129
- 1. **Structured pin-event logging** (~30 LOC in `server.ts` + one helper) — closes §6.1, enables T9.
130
- 2. **T1–T5 tests** — no production code changes, just formalizes existing behavior.
131
- 3. **Stuck-warning in card header** (~15 LOC in `progress-card.ts` renderer + driver signal) — closes F10 lower tier.
132
- 4. **Unpin retry (single attempt, 1s backoff)** (~20 LOC in `unpinProgressCard`) — closes F5.
133
- 5. **`/pins-status` admin command** (~40 LOC) — closes §6.2.
134
- 6. **Boot-time read-only audit + metric** (~30 LOC) — closes §6.3–6.5.
135
- 7. **T6–T10 tests.**
136
-
137
- Total estimate: ~150 LOC production + ~400 LOC tests. No schema changes. No config migration. Backwards compatible with existing sidecars.
138
-
139
- ## 9. Out of scope (future work)
140
-
141
- - Handoff-protocol for restart flicker (F7): needs new-process-confirms-takeover handshake. Large.
142
- - Multi-pin stacking UX (one pin per sub-agent task): current model is one pin per parent turn; changing it requires reworking `turnKey` allocation.
143
- - Pinning arbitrary user-selected bot messages.
144
- - Fallback to a "sticky last message" non-pin display when `can_pin_messages` is absent.
@@ -1,202 +0,0 @@
1
- /**
2
- * Pure create-agent flow state machine — extracted from foreman.ts for
3
- * testability. No grammY imports, no SQLite imports, no side effects.
4
- *
5
- * Each function takes current state + input and returns an Action
6
- * (what the foreman should do next). foreman.ts interprets actions
7
- * by calling the actual SQLite / grammY / orchestrator APIs.
8
- *
9
- * Steps:
10
- * start → asked-name (when no name given)
11
- * → asked-profile (when name provided inline)
12
- * asked-name + text → asked-profile (if valid name)
13
- * asked-profile + text → asked-bot-token (if valid profile)
14
- * asked-bot-token + text → asked-oauth-code (after createAgent())
15
- * asked-oauth-code + text → done (after completeCreation())
16
- */
17
-
18
- import type { CreateFlowState, CreateFlowStep } from './state.js'
19
-
20
- // ─── Action types ────────────────────────────────────────────────────────
21
-
22
- export type CreateFlowAction =
23
- | { kind: 'ask-name' }
24
- | { kind: 'ask-profile'; profiles: string[] }
25
- | { kind: 'ask-bot-token'; name: string; profile: string }
26
- | { kind: 'call-create-agent'; name: string; profile: string; botToken: string }
27
- | { kind: 'ask-oauth-code'; loginUrl: string; name: string }
28
- | { kind: 'call-complete-creation'; name: string; code: string }
29
- | { kind: 'done'; name: string; botUsername: string | null }
30
- | { kind: 'error'; message: string; stayInStep: boolean }
31
- | { kind: 'cancel'; reason: string }
32
-
33
- // ─── Name validation (mirrors assertSafeAgentName) ───────────────────────
34
-
35
- export function isValidAgentName(name: string): boolean {
36
- return /^[a-z0-9][a-z0-9_-]{0,50}$/.test(name)
37
- }
38
-
39
- // ─── Flow entry point ────────────────────────────────────────────────────
40
-
41
- /**
42
- * Start or resume a /create-agent flow.
43
- *
44
- * @param inlineName Optional name from the command args (/create-agent gymbro)
45
- * @param profiles Available profile names (from listAvailableProfiles())
46
- * @returns Action to perform
47
- */
48
- export function startCreateFlow(
49
- inlineName: string | null,
50
- profiles: string[],
51
- ): CreateFlowAction {
52
- if (!inlineName) {
53
- return { kind: 'ask-name' }
54
- }
55
-
56
- if (!isValidAgentName(inlineName)) {
57
- return {
58
- kind: 'error',
59
- message: `"${inlineName}" is not a valid agent name. Names must be lowercase, alphanumeric, hyphens or underscores, max 51 chars.`,
60
- stayInStep: false,
61
- }
62
- }
63
-
64
- return { kind: 'ask-profile', profiles }
65
- }
66
-
67
- // ─── Step transition: handle inbound text for current step ───────────────
68
-
69
- export interface StepTransitionInput {
70
- /** Current persisted state (or null if no state yet). */
71
- state: CreateFlowState | null
72
- /** The text the user sent. */
73
- text: string
74
- /** Available profiles (for profile validation). */
75
- profiles: string[]
76
- }
77
-
78
- /**
79
- * Given the current state and user text, compute the next action.
80
- * The caller (foreman.ts) is responsible for persisting state changes
81
- * and executing the returned action.
82
- */
83
- export function handleFlowText(input: StepTransitionInput): CreateFlowAction {
84
- const { state, text, profiles } = input
85
- const trimmed = text.trim()
86
-
87
- if (!state) {
88
- // No active flow — ignore
89
- return { kind: 'cancel', reason: 'no-active-flow' }
90
- }
91
-
92
- switch (state.step) {
93
- case 'asked-name': {
94
- if (!isValidAgentName(trimmed)) {
95
- return {
96
- kind: 'error',
97
- message: `"${trimmed}" is not a valid agent name. Names must be lowercase, alphanumeric, hyphens or underscores, max 51 chars. Try again:`,
98
- stayInStep: true,
99
- }
100
- }
101
- return { kind: 'ask-profile', profiles }
102
- }
103
-
104
- case 'asked-profile': {
105
- if (!profiles.includes(trimmed)) {
106
- return {
107
- kind: 'error',
108
- message: `Unknown profile "${trimmed}". Choose one of: ${profiles.join(', ')}`,
109
- stayInStep: true,
110
- }
111
- }
112
- // Pre-#28 fix this fell back to `trimmed` (the profile name)
113
- // when state.name was missing — silently treating the profile
114
- // as the agent name. Now we cancel with missing-name instead,
115
- // matching the asked-bot-token step's behaviour on corrupt
116
- // state. The fallback wasn't exploitable (assertSafeAgentName
117
- // catches it downstream), but it's semantically wrong.
118
- if (!state.name) {
119
- return { kind: 'cancel', reason: 'missing-name' }
120
- }
121
- return { kind: 'ask-bot-token', name: state.name, profile: trimmed }
122
- }
123
-
124
- case 'asked-bot-token': {
125
- const name = state.name ?? ''
126
- const profile = state.profile ?? ''
127
- if (!name || !profile) {
128
- return { kind: 'cancel', reason: 'missing-name-or-profile' }
129
- }
130
- // Basic bot token shape check (foreman.ts validates via Telegram API)
131
- if (!trimmed.includes(':') || trimmed.length < 20) {
132
- return {
133
- kind: 'error',
134
- message: "That doesn't look like a BotFather token. It should be in the form <code>1234567890:AAH...</code> — try again:",
135
- stayInStep: true,
136
- }
137
- }
138
- return { kind: 'call-create-agent', name, profile, botToken: trimmed }
139
- }
140
-
141
- case 'asked-oauth-code': {
142
- const name = state.name ?? ''
143
- if (!name) return { kind: 'cancel', reason: 'missing-name' }
144
- // Codes are typically 8+ alphanumeric chars; pass through for server validation
145
- if (trimmed.length < 4) {
146
- return {
147
- kind: 'error',
148
- message: 'That code looks too short. Paste the full code from the browser:',
149
- stayInStep: true,
150
- }
151
- }
152
- return { kind: 'call-complete-creation', name, code: trimmed }
153
- }
154
-
155
- case 'done':
156
- return { kind: 'cancel', reason: 'flow-already-done' }
157
-
158
- default: {
159
- const _exhaustive: never = state.step
160
- return { kind: 'cancel', reason: `unknown-step:${_exhaustive}` }
161
- }
162
- }
163
- }
164
-
165
- // ─── State factory helpers (for foreman.ts to build new state objects) ───
166
-
167
- export function makeInitialState(chatId: string, name: string | null): CreateFlowState {
168
- const now = Date.now()
169
- return {
170
- chatId,
171
- step: name ? 'asked-profile' : 'asked-name',
172
- name,
173
- profile: null,
174
- botToken: null,
175
- authSessionName: null,
176
- loginUrl: null,
177
- startedAt: now,
178
- updatedAt: now,
179
- }
180
- }
181
-
182
- export function advanceState(
183
- state: CreateFlowState,
184
- updates: Partial<Omit<CreateFlowState, 'chatId' | 'startedAt'>>,
185
- ): CreateFlowState {
186
- return {
187
- ...state,
188
- ...updates,
189
- updatedAt: Date.now(),
190
- }
191
- }
192
-
193
- /** Compute the human-readable step label for recovery messages. */
194
- export function stepLabel(step: CreateFlowStep): string {
195
- switch (step) {
196
- case 'asked-name': return 'waiting for agent name'
197
- case 'asked-profile': return 'waiting for profile selection'
198
- case 'asked-bot-token': return 'waiting for BotFather token'
199
- case 'asked-oauth-code': return 'waiting for OAuth code'
200
- case 'done': return 'done'
201
- }
202
- }