switchroom 0.7.15 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. package/README.md +51 -59
  2. package/bin/run-hook.sh +27 -11
  3. package/bin/timezone-hook.sh +9 -7
  4. package/dist/agent-scheduler/index.js +410 -133
  5. package/dist/auth-broker/index.js +13932 -0
  6. package/dist/cli/switchroom.js +26937 -5601
  7. package/dist/host-control/main.js +12702 -0
  8. package/dist/vault/approvals/kernel-server.js +467 -184
  9. package/dist/vault/broker/server.js +1430 -724
  10. package/examples/minimal.yaml +63 -0
  11. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  12. package/examples/personal-google-workspace-mcp/README.md +194 -0
  13. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  14. package/examples/switchroom.yaml +220 -0
  15. package/package.json +7 -4
  16. package/profiles/_base/settings.json.hbs +20 -5
  17. package/profiles/_base/start.sh.hbs +16 -3
  18. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  19. package/profiles/_shared/telegram-style.md.hbs +20 -90
  20. package/profiles/_shared/vault-protocol.md.hbs +68 -0
  21. package/profiles/default/CLAUDE.md +50 -96
  22. package/profiles/default/CLAUDE.md.hbs +36 -6
  23. package/profiles/default/workspace/SOUL.md.hbs +12 -5
  24. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  25. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  26. package/skills/buildkite-api/SKILL.md +31 -8
  27. package/skills/buildkite-cli/SKILL.md +27 -9
  28. package/skills/buildkite-migration/SKILL.md +22 -9
  29. package/skills/buildkite-pipelines/SKILL.md +26 -9
  30. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  31. package/skills/buildkite-test-engine/SKILL.md +25 -8
  32. package/skills/docx/SKILL.md +1 -1
  33. package/skills/docx/scripts/office/validators/__pycache__/__init__.cpython-313.pyc +0 -0
  34. package/skills/docx/scripts/office/validators/__pycache__/base.cpython-313.pyc +0 -0
  35. package/skills/file-bug/SKILL.md +34 -6
  36. package/skills/humanizer/SKILL.md +15 -0
  37. package/skills/humanizer-calibrate/SKILL.md +7 -1
  38. package/skills/mcp-builder/SKILL.md +1 -1
  39. package/skills/pdf/SKILL.md +1 -1
  40. package/skills/pptx/SKILL.md +1 -1
  41. package/skills/skill-creator/SKILL.md +21 -1
  42. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  43. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  44. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  45. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  46. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  47. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  48. package/skills/switchroom-cli/SKILL.md +63 -64
  49. package/skills/switchroom-health/SKILL.md +23 -10
  50. package/skills/switchroom-install/SKILL.md +3 -3
  51. package/skills/switchroom-manage/SKILL.md +26 -19
  52. package/skills/switchroom-runtime/SKILL.md +191 -0
  53. package/skills/switchroom-status/SKILL.md +27 -2
  54. package/skills/telegram-test-harness/SKILL.md +3 -0
  55. package/skills/token-helpers/SKILL.md +24 -1
  56. package/skills/webapp-testing/SKILL.md +31 -1
  57. package/skills/xlsx/SKILL.md +1 -1
  58. package/telegram-plugin/admin-commands/index.ts +7 -5
  59. package/telegram-plugin/analytics-posthog.ts +191 -0
  60. package/telegram-plugin/bridge/bridge.ts +69 -0
  61. package/telegram-plugin/bridge/ipc-client.ts +4 -1
  62. package/telegram-plugin/dist/bridge/bridge.js +194 -119
  63. package/telegram-plugin/dist/gateway/gateway.js +23611 -19671
  64. package/telegram-plugin/dist/server.js +245 -189
  65. package/telegram-plugin/first-paint.ts +3 -24
  66. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  67. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  68. package/telegram-plugin/gateway/auth-command.ts +794 -0
  69. package/telegram-plugin/gateway/auth-line.ts +123 -0
  70. package/telegram-plugin/gateway/boot-card.ts +169 -40
  71. package/telegram-plugin/gateway/boot-issue-cache.ts +308 -0
  72. package/telegram-plugin/gateway/boot-probes.ts +166 -123
  73. package/telegram-plugin/gateway/boot-reason.ts +41 -7
  74. package/telegram-plugin/gateway/boot-version.ts +66 -0
  75. package/telegram-plugin/gateway/gateway.ts +3499 -1885
  76. package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
  77. package/telegram-plugin/gateway/ipc-protocol.ts +18 -0
  78. package/telegram-plugin/gateway/pending-inbound-buffer.ts +106 -0
  79. package/telegram-plugin/gateway/quarantine.ts +69 -0
  80. package/telegram-plugin/gateway/quota-cache.ts +9 -4
  81. package/telegram-plugin/gateway/reaction-trigger.ts +401 -0
  82. package/telegram-plugin/gateway/recent-denials.test.ts +103 -0
  83. package/telegram-plugin/gateway/recent-denials.ts +77 -0
  84. package/telegram-plugin/gateway/startup-network-retry.ts +109 -31
  85. package/telegram-plugin/gateway/vault-grant-inbound-builders.ts +125 -0
  86. package/telegram-plugin/history.ts +91 -0
  87. package/telegram-plugin/hooks/hooks.json +10 -0
  88. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +130 -0
  89. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +19 -2
  90. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +22 -2
  91. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  92. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  93. package/telegram-plugin/inbound-classifier.ts +50 -0
  94. package/telegram-plugin/inline-keyboard-callbacks.ts +136 -0
  95. package/telegram-plugin/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
  96. package/telegram-plugin/package.json +4 -2
  97. package/telegram-plugin/permission-rule.ts +51 -0
  98. package/telegram-plugin/permission-title.ts +56 -0
  99. package/telegram-plugin/quota-check.ts +19 -41
  100. package/telegram-plugin/registry/reaper.ts +223 -0
  101. package/telegram-plugin/retry-api-call.ts +80 -0
  102. package/telegram-plugin/runtime-metrics.ts +177 -0
  103. package/telegram-plugin/scripts/build.mjs +0 -1
  104. package/telegram-plugin/secret-detect/index.ts +24 -0
  105. package/telegram-plugin/secret-detect/vault-error.test.ts +64 -12
  106. package/telegram-plugin/secret-detect/vault-error.ts +78 -11
  107. package/telegram-plugin/secret-detect/vault-write.ts +14 -2
  108. package/telegram-plugin/server.js +41795 -0
  109. package/telegram-plugin/session-tail.ts +6 -1
  110. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  111. package/telegram-plugin/silence-poke.ts +420 -0
  112. package/telegram-plugin/silent-end.ts +174 -0
  113. package/telegram-plugin/stream-controller.ts +13 -0
  114. package/telegram-plugin/stream-reply-handler.ts +7 -0
  115. package/telegram-plugin/subagent-watcher.ts +213 -4
  116. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  117. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  118. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  119. package/telegram-plugin/tests/boot-card-issue-dedup.test.ts +247 -0
  120. package/telegram-plugin/tests/boot-card-reason-to-render.test.ts +182 -0
  121. package/telegram-plugin/tests/boot-card-reason.test.ts +65 -2
  122. package/telegram-plugin/tests/boot-card-render.test.ts +146 -0
  123. package/telegram-plugin/tests/boot-card-silent-on-operator.test.ts +103 -0
  124. package/telegram-plugin/tests/boot-probes.test.ts +216 -10
  125. package/telegram-plugin/tests/boot-version-string.test.ts +0 -0
  126. package/telegram-plugin/tests/finalize-callback.test.ts +190 -0
  127. package/telegram-plugin/tests/gateway-message-validator.test.ts +26 -0
  128. package/telegram-plugin/tests/gateway-secret-detect.test.ts +12 -3
  129. package/telegram-plugin/tests/gateway-startup-network-retry.test.ts +104 -0
  130. package/telegram-plugin/tests/history-reaper.test.ts +378 -0
  131. package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
  132. package/telegram-plugin/tests/inbound-classifier.test.ts +76 -0
  133. package/telegram-plugin/tests/inbound-message-types.test.ts +267 -0
  134. package/telegram-plugin/tests/issues-card.test.ts +49 -0
  135. package/telegram-plugin/tests/pending-inbound-buffer.test.ts +132 -0
  136. package/telegram-plugin/tests/permission-rule.test.ts +80 -1
  137. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  138. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  139. package/telegram-plugin/tests/races.test.ts +179 -0
  140. package/telegram-plugin/tests/reaction-trigger-flow.test.ts +353 -0
  141. package/telegram-plugin/tests/reaction-trigger.test.ts +397 -0
  142. package/telegram-plugin/tests/retry-api-call.test.ts +152 -1
  143. package/telegram-plugin/tests/runtime-metrics.test.ts +145 -0
  144. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +155 -0
  145. package/telegram-plugin/tests/secret-detect-delete-must-surface-failures.test.ts +133 -0
  146. package/telegram-plugin/tests/secret-detect-false-positives.test.ts +137 -0
  147. package/telegram-plugin/tests/silence-poke.test.ts +493 -0
  148. package/telegram-plugin/tests/silent-end.test.ts +206 -0
  149. package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +107 -0
  150. package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +224 -0
  151. package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +316 -0
  152. package/telegram-plugin/tests/subagent-watcher.test.ts +263 -0
  153. package/telegram-plugin/tests/turn-signal-tracker.test.ts +81 -0
  154. package/telegram-plugin/tests/vault-approval-posture.test.ts +256 -0
  155. package/telegram-plugin/tests/vault-grant-auto-resume.test.ts +73 -0
  156. package/telegram-plugin/tests/vault-grant-inbound-builders.test.ts +226 -0
  157. package/telegram-plugin/tests/vault-grant-union.test.ts +130 -0
  158. package/telegram-plugin/tests/vault-key-regex-allows-slash.test.ts +140 -0
  159. package/telegram-plugin/tests/vault-posture-quarantine.test.ts +104 -0
  160. package/telegram-plugin/tests/vault-request-access-tool.test.ts +114 -0
  161. package/telegram-plugin/tests/vault-request-access-unlock-resume.test.ts +106 -0
  162. package/telegram-plugin/turn-signal-tracker.ts +100 -24
  163. package/telegram-plugin/uat/SETUP.md +210 -35
  164. package/telegram-plugin/uat/assertions.ts +264 -37
  165. package/telegram-plugin/uat/driver-info.ts +57 -0
  166. package/telegram-plugin/uat/driver.ts +590 -51
  167. package/telegram-plugin/uat/harness.ts +140 -94
  168. package/telegram-plugin/uat/load-env.test.ts +72 -0
  169. package/telegram-plugin/uat/load-env.ts +48 -0
  170. package/telegram-plugin/uat/login.ts +96 -53
  171. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  172. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  173. package/telegram-plugin/uat/runners/report.ts +150 -0
  174. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  175. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  176. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  177. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  178. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  179. package/telegram-plugin/uat/scenarios/ask-user-button-tap-dm.test.ts +141 -0
  180. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +191 -0
  181. package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +255 -0
  182. package/telegram-plugin/uat/scenarios/fuzz-human-style-dm.test.ts +275 -0
  183. package/telegram-plugin/uat/scenarios/fuzz-random-prompts-dm.test.ts +146 -0
  184. package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts +486 -0
  185. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +67 -0
  186. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +100 -0
  187. package/telegram-plugin/uat/scenarios/jtbd-soft-commit-dm.test.ts +67 -0
  188. package/telegram-plugin/uat/scenarios/jtbd-status-query-dm.test.ts +49 -0
  189. package/telegram-plugin/uat/scenarios/location-inbound-dm.test.ts +65 -0
  190. package/telegram-plugin/uat/scenarios/midturn-silent-dm.test.ts +175 -0
  191. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +142 -0
  192. package/telegram-plugin/uat/scenarios/reactions-trigger-turn-dm.test.ts +96 -0
  193. package/telegram-plugin/uat/scenarios/secret-redaction-deletes-original-dm.test.ts +123 -0
  194. package/telegram-plugin/uat/scenarios/secret-redaction-no-false-positive-dm.test.ts +87 -0
  195. package/telegram-plugin/uat/scenarios/silence-poke-soft-dm.test.ts +155 -0
  196. package/telegram-plugin/uat/scenarios/silent-end-recovery-dm.test.ts +95 -0
  197. package/telegram-plugin/uat/scenarios/smoke-dm-reply.test.ts +57 -0
  198. package/telegram-plugin/uat/scenarios/subagent-watcher-no-rerun-dm.test.ts +135 -0
  199. package/telegram-plugin/uat/scenarios/vault-approval-posture-telegram-id-dm.test.ts +191 -0
  200. package/telegram-plugin/uat/scenarios/vault-audit-allow-dm.test.ts +108 -0
  201. package/telegram-plugin/uat/scenarios/vault-grant-auto-resume-dm.test.ts +121 -0
  202. package/telegram-plugin/uat/scenarios/vault-request-access-concurrent-dm.test.ts +161 -0
  203. package/telegram-plugin/uat/scenarios/vault-request-access-end-to-end-dm.test.ts +158 -0
  204. package/telegram-plugin/uat/scenarios/voice-inbound-dm.test.ts +65 -0
  205. package/telegram-plugin/vault-approval-posture.ts +42 -0
  206. package/telegram-plugin/welcome-text.ts +1 -0
  207. package/telegram-plugin/active-pins-sweep.ts +0 -204
  208. package/telegram-plugin/active-pins.ts +0 -146
  209. package/telegram-plugin/auth-dashboard.ts +0 -1104
  210. package/telegram-plugin/auth-slot-parser.ts +0 -497
  211. package/telegram-plugin/card-event-log.ts +0 -138
  212. package/telegram-plugin/dist/foreman/foreman.js +0 -31106
  213. package/telegram-plugin/docs/multi-agent-card-design.md +0 -847
  214. package/telegram-plugin/docs/pinned-progress-card-reliability.md +0 -144
  215. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  216. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  217. package/telegram-plugin/foreman/foreman.ts +0 -1165
  218. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  219. package/telegram-plugin/foreman/setup-state.ts +0 -239
  220. package/telegram-plugin/foreman/state.ts +0 -203
  221. package/telegram-plugin/pin-event-log.ts +0 -76
  222. package/telegram-plugin/progress-card-driver.ts +0 -2886
  223. package/telegram-plugin/progress-card-pin-manager.ts +0 -589
  224. package/telegram-plugin/progress-card-pin-watchdog.ts +0 -98
  225. package/telegram-plugin/progress-card.ts +0 -1409
  226. package/telegram-plugin/tests/HARNESS.md +0 -340
  227. package/telegram-plugin/tests/_progress-card-harness.ts +0 -109
  228. package/telegram-plugin/tests/active-pins-boot-reaper.test.ts +0 -211
  229. package/telegram-plugin/tests/active-pins-sweep.test.ts +0 -309
  230. package/telegram-plugin/tests/active-pins.test.ts +0 -187
  231. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  232. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  233. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  234. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  235. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  236. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  237. package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +0 -201
  238. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  239. package/telegram-plugin/tests/card-event-log.test.ts +0 -145
  240. package/telegram-plugin/tests/first-paint.test.ts +0 -257
  241. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  242. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  243. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  244. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  245. package/telegram-plugin/tests/harness-ordering-invariants.test.ts +0 -243
  246. package/telegram-plugin/tests/pin-event-log.test.ts +0 -124
  247. package/telegram-plugin/tests/progress-card-api-failure-during-deferred.test.ts +0 -73
  248. package/telegram-plugin/tests/progress-card-close-paths-converge.test.ts +0 -272
  249. package/telegram-plugin/tests/progress-card-cross-turn.test.ts +0 -258
  250. package/telegram-plugin/tests/progress-card-delay-842.test.ts +0 -160
  251. package/telegram-plugin/tests/progress-card-dispose-preservepending.test.ts +0 -81
  252. package/telegram-plugin/tests/progress-card-draft-flag.test.ts +0 -80
  253. package/telegram-plugin/tests/progress-card-driver-eviction.test.ts +0 -215
  254. package/telegram-plugin/tests/progress-card-driver-fleet-shadow.test.ts +0 -123
  255. package/telegram-plugin/tests/progress-card-driver-force-complete-parent-done.test.ts +0 -76
  256. package/telegram-plugin/tests/progress-card-edit-timestamps-budget.test.ts +0 -62
  257. package/telegram-plugin/tests/progress-card-memory-bounds.test.ts +0 -84
  258. package/telegram-plugin/tests/progress-card-pin-failure-paths.test.ts +0 -139
  259. package/telegram-plugin/tests/progress-card-pin-manager.test.ts +0 -773
  260. package/telegram-plugin/tests/progress-card-pin-race-fast-turn.test.ts +0 -66
  261. package/telegram-plugin/tests/progress-card-pin-sidecar-partial-write.test.ts +0 -64
  262. package/telegram-plugin/tests/progress-card-pin-watchdog.test.ts +0 -190
  263. package/telegram-plugin/tests/progress-card-sigterm-pin-flush.test.ts +0 -146
  264. package/telegram-plugin/tests/real-gateway-f1-ladder-integrity.test.ts +0 -123
  265. package/telegram-plugin/tests/real-gateway-f2-instant-draft.test.ts +0 -82
  266. package/telegram-plugin/tests/real-gateway-f3-late-card.test.ts +0 -114
  267. package/telegram-plugin/tests/real-gateway-harness.ts +0 -699
  268. package/telegram-plugin/tests/real-gateway-i6-turn-flush-replay-dedup.test.ts +0 -313
  269. package/telegram-plugin/tests/real-gateway-ipc-lifecycle.test.ts +0 -299
  270. package/telegram-plugin/tests/real-gateway-spec.test.ts +0 -487
  271. package/telegram-plugin/tests/real-gateway.smoke.test.ts +0 -101
  272. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  273. package/telegram-plugin/tests/setup-state.test.ts +0 -146
  274. package/telegram-plugin/tests/sync-chat-running-subagents.test.ts +0 -116
  275. package/telegram-plugin/tests/turn-end-regressions.test.ts +0 -489
  276. package/telegram-plugin/tests/turn-flush-card-takeover.test.ts +0 -218
  277. package/telegram-plugin/tests/turn-flush-prose-recovery.test.ts +0 -78
  278. package/telegram-plugin/tests/two-zone-bg-carry-full-lifecycle.test.ts +0 -131
  279. package/telegram-plugin/tests/two-zone-bg-detection.test.ts +0 -120
  280. package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +0 -116
  281. package/telegram-plugin/tests/two-zone-bg-early-turn-end.test.ts +0 -87
  282. package/telegram-plugin/tests/two-zone-bg-survives-next-turn.test.ts +0 -211
  283. package/telegram-plugin/tests/two-zone-card-cap.test.ts +0 -62
  284. package/telegram-plugin/tests/two-zone-card-fleet-row.test.ts +0 -101
  285. package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +0 -78
  286. package/telegram-plugin/tests/two-zone-card-html-balance.test.ts +0 -110
  287. package/telegram-plugin/tests/two-zone-card-lifecycle.test.ts +0 -128
  288. package/telegram-plugin/tests/two-zone-card-sanitise.test.ts +0 -58
  289. package/telegram-plugin/tests/two-zone-card-snapshot.test.ts +0 -133
  290. package/telegram-plugin/tests/two-zone-concurrent-turns-isolation.test.ts +0 -155
  291. package/telegram-plugin/tests/two-zone-phasefor-precedence.test.ts +0 -117
  292. package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +0 -187
  293. package/telegram-plugin/tests/two-zone-stuck-edit-throttle.test.ts +0 -149
  294. package/telegram-plugin/tests/two-zone-stuck-header-escalation.test.ts +0 -101
  295. package/telegram-plugin/tests/two-zone-stuck-per-member.test.ts +0 -114
  296. package/telegram-plugin/tests/two-zone-stuck-recovery.test.ts +0 -105
  297. package/telegram-plugin/tests/waiting-ux-harness.ts +0 -381
  298. package/telegram-plugin/tests/waiting-ux.e2e.test.ts +0 -233
  299. package/telegram-plugin/turn-flush-prose-recovery.ts +0 -40
  300. package/telegram-plugin/two-zone-card.ts +0 -269
  301. package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +0 -61
@@ -0,0 +1,620 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Skill-coverage UAT runner — drives a real Telegram user account
4
+ * against a switchroom agent's bot to validate that the right Claude
5
+ * Code skill fires for fuzzy NL phrasings.
6
+ *
7
+ * Sister to `tests/skill-coverage/cli.ts` (the inject_inbound-based
8
+ * runner that hit an agent-uid perms blocker). This one observes
9
+ * everything through Telegram itself, so no host-side JSONL access
10
+ * is required.
11
+ *
12
+ * **Skill detection.** The PreToolUse hook
13
+ * `telegram-plugin/hooks/tool-label-pretool.mjs` writes one JSONL
14
+ * row per tool invocation to
15
+ * `~/.switchroom/agents/<agent>/telegram/tool-labels-<session_id>.jsonl`.
16
+ * Skill rows have `tool_name === "Skill"` and a label of the form
17
+ * `"Running skill <slug>"`. The runner tails every sidecar file
18
+ * that mtime-changes during a probe window and pulls the slugs out.
19
+ *
20
+ * That sidecar dir is bind-mounted into the agent at
21
+ * `$TELEGRAM_STATE_DIR` AND lives at a host-readable path (owned by
22
+ * the agent UID but mode 0775; jsonl rows are 0644 from the hook).
23
+ * No gateway / progress-card dependency.
24
+ *
25
+ * Usage:
26
+ * bun telegram-plugin/uat/runners/skill-coverage.ts \
27
+ * --agent test-harness:@your_test_bot \
28
+ * --skills switchroom-cli,switchroom-status \
29
+ * --limit-per-skill 2 \
30
+ * --out tests/skill-coverage/out/skill-coverage
31
+ *
32
+ * Env equivalents (UAT-standard, fail loud):
33
+ * TELEGRAM_API_ID, TELEGRAM_API_HASH, TELEGRAM_UAT_DRIVER_SESSION
34
+ * SKILL_COVERAGE_AGENT="test-harness:@your_test_bot"
35
+ * SKILL_COVERAGE_SKILLS="a,b,c" (optional filter)
36
+ * SKILL_COVERAGE_LIMIT_PER_SKILL=N (optional)
37
+ * SKILL_COVERAGE_OUT="..." (default tests/skill-coverage/out/skill-coverage)
38
+ */
39
+
40
+ import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
41
+ import { dirname, join, resolve } from "node:path";
42
+ import { homedir } from "node:os";
43
+ import { fileURLToPath } from "node:url";
44
+ import { Driver, type ObservedMessage } from "../driver.js";
45
+ import { loadUatEnv } from "../load-env.js";
46
+
47
+ loadUatEnv();
48
+
49
+ // ─── Types — mirror tests/skill-coverage/{corpus,harness}/types.ts ────
50
+
51
+ export interface Probe {
52
+ id: string;
53
+ targetSkill: string | null;
54
+ /** Adjacent-skill expectation for negative controls. */
55
+ expectedOtherSkill?: string;
56
+ kind: "paraphrase" | "typo" | "slang" | "indirect" | "negative";
57
+ phrase: string;
58
+ }
59
+
60
+ export interface ProbeResult {
61
+ probe: Probe;
62
+ skillsFired: string[];
63
+ replyText: string;
64
+ durationMs: number;
65
+ timedOut: boolean;
66
+ errorMessage?: string;
67
+ }
68
+
69
+ // ─── Skill-label extraction ──────────────────────────────────────────
70
+
71
+ /**
72
+ * Matches the literal label substring written by the PreToolUse hook
73
+ * `telegram-plugin/hooks/tool-label-pretool.mjs` for a `Skill` tool
74
+ * invocation. Slug regex is restrictive on purpose — skill names are
75
+ * kebab-case ASCII per `skills/<name>/SKILL.md` frontmatter.
76
+ */
77
+ const SKILL_LABEL_RE = /running skill\s+([a-z0-9][a-z0-9-]*)/i;
78
+
79
+ export function extractSkillFromLabel(label: string): string | null {
80
+ const m = SKILL_LABEL_RE.exec(label);
81
+ return m ? m[1]!.toLowerCase() : null;
82
+ }
83
+
84
+ export interface SidecarRow {
85
+ ts: number;
86
+ tool_use_id: string;
87
+ agent_id: string | null;
88
+ label: string;
89
+ tool_name: string;
90
+ }
91
+
92
+ /**
93
+ * Read every `tool-labels-*.jsonl` file in `dir` and return rows
94
+ * with `tool_name === "Skill"` and `ts >= sinceMs`. The sidecar is
95
+ * append-only so partial-line tails are unlikely; we still defensively
96
+ * skip malformed lines.
97
+ */
98
+ export function readSkillRowsSince(
99
+ dir: string,
100
+ sinceMs: number,
101
+ readdir: (p: string) => string[],
102
+ readFile: (p: string) => string,
103
+ ): SidecarRow[] {
104
+ const out: SidecarRow[] = [];
105
+ let entries: string[] = [];
106
+ try {
107
+ entries = readdir(dir);
108
+ } catch {
109
+ return out;
110
+ }
111
+ for (const e of entries) {
112
+ if (!e.startsWith("tool-labels-") || !e.endsWith(".jsonl")) continue;
113
+ let content: string;
114
+ try {
115
+ content = readFile(`${dir}/${e}`);
116
+ } catch {
117
+ continue;
118
+ }
119
+ for (const line of content.split("\n")) {
120
+ if (!line.trim()) continue;
121
+ let row: SidecarRow;
122
+ try {
123
+ row = JSON.parse(line) as SidecarRow;
124
+ } catch {
125
+ continue;
126
+ }
127
+ if (typeof row.ts !== "number" || row.ts < sinceMs) continue;
128
+ if (row.tool_name !== "Skill") continue;
129
+ out.push(row);
130
+ }
131
+ }
132
+ return out;
133
+ }
134
+
135
+ // ─── CLI parsing ─────────────────────────────────────────────────────
136
+
137
+ interface CliConfig {
138
+ agentName: string;
139
+ botUsername: string;
140
+ skillFilter: string[] | null;
141
+ limitPerSkill: number | null;
142
+ /** Per-probe reply timeout, ms. Default 90s. */
143
+ replyTimeoutMs: number;
144
+ /** Inter-probe settle, ms. Default 6s to keep us under Telegram's rate cap. */
145
+ settleMs: number;
146
+ /** Sidecar-drain window after reply is seen, ms. The hook writes
147
+ * asynchronously; a small post-reply hold avoids missing the last
148
+ * Skill row of a turn. Default 3s. */
149
+ sidecarDrainMs: number;
150
+ /** Path to the agent's TELEGRAM_STATE_DIR on the host — where
151
+ * `tool-labels-<session>.jsonl` files live. Defaults to
152
+ * `~/.switchroom/agents/<name>/telegram/`. */
153
+ agentStateDir: string;
154
+ outBase: string;
155
+ }
156
+
157
+ const HERE = dirname(fileURLToPath(import.meta.url));
158
+ const REPO_ROOT = resolve(HERE, "..", "..", "..");
159
+ const DEFAULT_CORPUS_DIR = join(REPO_ROOT, "tests/skill-coverage/corpus");
160
+ const DEFAULT_OUT_BASE = join(REPO_ROOT, "tests/skill-coverage/out/skill-coverage");
161
+
162
+ function fail(msg: string): never {
163
+ process.stderr.write(`[skill-coverage-uat] ${msg}\n`);
164
+ process.exit(2);
165
+ }
166
+
167
+ function parseCli(argv: readonly string[]): CliConfig {
168
+ let agentSpec = process.env.SKILL_COVERAGE_AGENT ?? "";
169
+ let skillFilter = process.env.SKILL_COVERAGE_SKILLS
170
+ ? process.env.SKILL_COVERAGE_SKILLS.split(",").map((s) => s.trim()).filter(Boolean)
171
+ : null;
172
+ let limitPerSkill = process.env.SKILL_COVERAGE_LIMIT_PER_SKILL
173
+ ? Number.parseInt(process.env.SKILL_COVERAGE_LIMIT_PER_SKILL, 10)
174
+ : null;
175
+ let replyTimeoutMs = Number.parseInt(process.env.SKILL_COVERAGE_REPLY_TIMEOUT_MS ?? "90000", 10);
176
+ let settleMs = Number.parseInt(process.env.SKILL_COVERAGE_SETTLE_MS ?? "6000", 10);
177
+ let sidecarDrainMs = Number.parseInt(process.env.SKILL_COVERAGE_SIDECAR_DRAIN_MS ?? "3000", 10);
178
+ let agentStateDir = process.env.SKILL_COVERAGE_AGENT_STATE_DIR ?? "";
179
+ let outBase = process.env.SKILL_COVERAGE_OUT ?? DEFAULT_OUT_BASE;
180
+
181
+ for (let i = 0; i < argv.length; i++) {
182
+ const tok = argv[i]!;
183
+ const next = (): string => {
184
+ const v = argv[++i];
185
+ if (!v) fail(`${tok}: missing value`);
186
+ return v;
187
+ };
188
+ switch (tok) {
189
+ case "--agent":
190
+ agentSpec = next();
191
+ break;
192
+ case "--skills":
193
+ skillFilter = next().split(",").map((s) => s.trim()).filter(Boolean);
194
+ break;
195
+ case "--limit-per-skill":
196
+ limitPerSkill = Number.parseInt(next(), 10);
197
+ break;
198
+ case "--reply-timeout-ms":
199
+ replyTimeoutMs = Number.parseInt(next(), 10);
200
+ break;
201
+ case "--settle-ms":
202
+ settleMs = Number.parseInt(next(), 10);
203
+ break;
204
+ case "--sidecar-drain-ms":
205
+ sidecarDrainMs = Number.parseInt(next(), 10);
206
+ break;
207
+ case "--agent-state-dir":
208
+ agentStateDir = next();
209
+ break;
210
+ case "--out":
211
+ outBase = resolve(next());
212
+ break;
213
+ case "-h":
214
+ case "--help":
215
+ printHelp();
216
+ process.exit(0);
217
+ break;
218
+ default:
219
+ if (tok.startsWith("--")) fail(`unknown flag: ${tok}`);
220
+ }
221
+ }
222
+
223
+ if (!agentSpec) {
224
+ fail(
225
+ "no agent target. Pass --agent <name>:@<bot-username> or set SKILL_COVERAGE_AGENT.",
226
+ );
227
+ }
228
+ const [agentName, botUsername] = agentSpec.split(":").map((s) => s.trim());
229
+ if (!agentName || !botUsername || !botUsername.startsWith("@")) {
230
+ fail(`--agent expects "<name>:@<bot-username>"; got "${agentSpec}"`);
231
+ }
232
+
233
+ const resolvedAgentStateDir = agentStateDir
234
+ ? resolve(agentStateDir)
235
+ : join(homedir(), ".switchroom", "agents", agentName!, "telegram");
236
+
237
+ return {
238
+ agentName: agentName!,
239
+ botUsername: botUsername!,
240
+ skillFilter,
241
+ limitPerSkill,
242
+ replyTimeoutMs,
243
+ settleMs,
244
+ sidecarDrainMs,
245
+ agentStateDir: resolvedAgentStateDir,
246
+ outBase,
247
+ };
248
+ }
249
+
250
+ function printHelp(): void {
251
+ process.stdout.write(`skill-coverage UAT runner
252
+
253
+ Required env (fail loud if missing):
254
+ TELEGRAM_API_ID, TELEGRAM_API_HASH, TELEGRAM_UAT_DRIVER_SESSION
255
+
256
+ Flags:
257
+ --agent NAME:@BOT Agent + bot to target. Required.
258
+ --skills A,B,C Filter to these skills only.
259
+ --limit-per-skill N Cap probes per skill.
260
+ --reply-timeout-ms N Per-probe budget. Default 90000.
261
+ --settle-ms N Inter-probe settle. Default 6000.
262
+ --sidecar-drain-ms N Post-reply hold for the last hook write. Default 3000.
263
+ --agent-state-dir PATH Override sidecar location. Default ~/.switchroom/agents/<name>/telegram.
264
+ --out PATH Output base path. Default tests/skill-coverage/out/skill-coverage.
265
+ `);
266
+ }
267
+
268
+ // ─── Corpus loading ──────────────────────────────────────────────────
269
+
270
+ function loadCorpus(dir: string, skillFilter: string[] | null): Probe[] {
271
+ if (!existsSync(dir)) {
272
+ fail(`corpus dir not found: ${dir} — run \`bun tests/skill-coverage/corpus/generate-corpus.ts --seed=1\` first.`);
273
+ }
274
+ const files = readdirSync(dir).filter((f) => f.endsWith(".jsonl"));
275
+ const out: Probe[] = [];
276
+ for (const f of files) {
277
+ const skill = f.replace(/\.jsonl$/, "");
278
+ if (skillFilter && !skillFilter.includes(skill)) continue;
279
+ const content = readFileSync(join(dir, f), "utf-8");
280
+ for (const line of content.split("\n")) {
281
+ if (!line.trim()) continue;
282
+ try {
283
+ out.push(JSON.parse(line) as Probe);
284
+ } catch {
285
+ // skip malformed lines
286
+ }
287
+ }
288
+ }
289
+ return out;
290
+ }
291
+
292
+ function trimPerSkill(probes: Probe[], limit: number | null): Probe[] {
293
+ if (limit == null) return probes;
294
+ const counts = new Map<string, number>();
295
+ const out: Probe[] = [];
296
+ for (const p of probes) {
297
+ const k = p.targetSkill ?? "<neg>";
298
+ const c = counts.get(k) ?? 0;
299
+ if (c >= limit) continue;
300
+ counts.set(k, c + 1);
301
+ out.push(p);
302
+ }
303
+ return out;
304
+ }
305
+
306
+ // ─── Send + observe a single probe ───────────────────────────────────
307
+
308
+ async function pullOneWithTimeout(
309
+ it: AsyncIterator<ObservedMessage>,
310
+ ms: number,
311
+ ): Promise<ObservedMessage | "timeout"> {
312
+ return new Promise((resolveFn) => {
313
+ let settled = false;
314
+ const timer = setTimeout(() => {
315
+ if (settled) return;
316
+ settled = true;
317
+ resolveFn("timeout");
318
+ }, ms);
319
+ it.next().then((r) => {
320
+ if (settled) return;
321
+ settled = true;
322
+ clearTimeout(timer);
323
+ if (r.done === true) resolveFn("timeout");
324
+ else resolveFn(r.value);
325
+ }).catch(() => {
326
+ if (settled) return;
327
+ settled = true;
328
+ clearTimeout(timer);
329
+ resolveFn("timeout");
330
+ });
331
+ });
332
+ }
333
+
334
+ async function runProbe(
335
+ driver: Driver,
336
+ botUserId: number,
337
+ driverUserId: number,
338
+ probe: Probe,
339
+ cfg: CliConfig,
340
+ ): Promise<ProbeResult> {
341
+ const startedAt = Date.now();
342
+ const stream = driver.observeMessages(botUserId)[Symbol.asyncIterator]();
343
+ const replyTexts = new Map<number, string>();
344
+ let sentMessageId: number;
345
+
346
+ try {
347
+ const sent = await driver.sendText(botUserId, probe.phrase);
348
+ sentMessageId = sent.messageId;
349
+ } catch (err) {
350
+ try {
351
+ await stream.return?.(undefined);
352
+ } catch {
353
+ /* ignore */
354
+ }
355
+ return {
356
+ probe,
357
+ skillsFired: [],
358
+ replyText: "",
359
+ durationMs: Date.now() - startedAt,
360
+ timedOut: false,
361
+ errorMessage: `send failed: ${(err as Error).message}`,
362
+ };
363
+ }
364
+
365
+ // Bot reply is the turn-completion signal — we stop reading the
366
+ // stream once it lands. The sidecar-drain hold below absorbs any
367
+ // late hook writes after the visible reply.
368
+ const deadline = startedAt + cfg.replyTimeoutMs;
369
+ let firstReplyAt = 0;
370
+ try {
371
+ while (Date.now() < deadline) {
372
+ const remaining = deadline - Date.now();
373
+ const slice = await pullOneWithTimeout(stream, Math.min(remaining, 2000));
374
+ if (slice === "timeout") {
375
+ if (firstReplyAt) break;
376
+ continue;
377
+ }
378
+ if (slice.senderUserId === driverUserId) continue;
379
+ if (slice.messageId <= sentMessageId) continue;
380
+ const t = (slice.text ?? "").trim();
381
+ if (!t) continue;
382
+ replyTexts.set(slice.messageId, t);
383
+ if (!firstReplyAt) firstReplyAt = Date.now();
384
+ // First non-empty reply is enough — extra edits don't change
385
+ // which Skill labels landed in the sidecar.
386
+ break;
387
+ }
388
+ } finally {
389
+ try {
390
+ await stream.return?.(undefined);
391
+ } catch {
392
+ /* ignore */
393
+ }
394
+ }
395
+
396
+ if (!firstReplyAt) {
397
+ return {
398
+ probe,
399
+ skillsFired: [],
400
+ replyText: "",
401
+ durationMs: Date.now() - startedAt,
402
+ timedOut: true,
403
+ };
404
+ }
405
+
406
+ // Drain window: hook writes are async to the assistant message
407
+ // landing. A small post-reply hold catches the last row.
408
+ await new Promise((res) => setTimeout(res, cfg.sidecarDrainMs));
409
+
410
+ const rows = readSkillRowsSince(
411
+ cfg.agentStateDir,
412
+ startedAt,
413
+ (p) => readdirSync(p),
414
+ (p) => readFileSync(p, "utf-8"),
415
+ );
416
+ const skills = new Set<string>();
417
+ for (const r of rows) {
418
+ const slug = extractSkillFromLabel(r.label);
419
+ if (slug) skills.add(slug);
420
+ }
421
+
422
+ const replyText = [...replyTexts.entries()]
423
+ .sort((a, b) => a[0] - b[0])
424
+ .map(([, t]) => t)
425
+ .join("\n---\n");
426
+ return {
427
+ probe,
428
+ skillsFired: [...skills],
429
+ replyText,
430
+ durationMs: Date.now() - startedAt,
431
+ timedOut: false,
432
+ };
433
+ }
434
+
435
+ // ─── Scoring ─────────────────────────────────────────────────────────
436
+
437
+ interface SkillRow {
438
+ skill: string;
439
+ sampleSize: number;
440
+ truePositives: number;
441
+ falseNegatives: number;
442
+ falsePositives: number;
443
+ precision: number;
444
+ recall: number;
445
+ f1: number;
446
+ /** True when targetSkill fired at least once on positive probes. */
447
+ execSuccess: number;
448
+ negativeControlFpRate: number;
449
+ }
450
+
451
+ interface Scorecard {
452
+ generatedAt: string;
453
+ agentName: string;
454
+ totalProbes: number;
455
+ rows: SkillRow[];
456
+ aggregate: {
457
+ medianF1: number;
458
+ skillsBelowF1Threshold: number;
459
+ skillsBelowExecThreshold: number;
460
+ f1Threshold: number;
461
+ execThreshold: number;
462
+ };
463
+ }
464
+
465
+ function score(results: ProbeResult[], agentName: string): Scorecard {
466
+ const skills = new Set<string>();
467
+ for (const r of results) {
468
+ if (r.probe.targetSkill) skills.add(r.probe.targetSkill);
469
+ for (const s of r.skillsFired) skills.add(s);
470
+ }
471
+ const rows: SkillRow[] = [];
472
+ const F1_THRESHOLD = 0.9;
473
+ const EXEC_THRESHOLD = 0.95;
474
+ for (const s of [...skills].sort()) {
475
+ let tp = 0, fn = 0, fp = 0;
476
+ let sample = 0;
477
+ let execTotal = 0, execHits = 0;
478
+ let negTotal = 0, negFp = 0;
479
+ for (const r of results) {
480
+ const isTarget = r.probe.targetSkill === s;
481
+ const fired = r.skillsFired.includes(s);
482
+ if (isTarget) {
483
+ sample++;
484
+ if (fired) {
485
+ tp++;
486
+ execTotal++;
487
+ execHits++;
488
+ } else {
489
+ fn++;
490
+ }
491
+ } else if (fired) {
492
+ fp++;
493
+ }
494
+ if (r.probe.targetSkill === null) {
495
+ negTotal++;
496
+ if (fired) negFp++;
497
+ }
498
+ }
499
+ const precision = tp + fp === 0 ? 0 : tp / (tp + fp);
500
+ const recall = tp + fn === 0 ? 0 : tp / (tp + fn);
501
+ const f1 = precision + recall === 0 ? 0 : (2 * precision * recall) / (precision + recall);
502
+ rows.push({
503
+ skill: s,
504
+ sampleSize: sample,
505
+ truePositives: tp,
506
+ falseNegatives: fn,
507
+ falsePositives: fp,
508
+ precision: round3(precision),
509
+ recall: round3(recall),
510
+ f1: round3(f1),
511
+ execSuccess: execTotal === 0 ? 0 : round3(execHits / execTotal),
512
+ negativeControlFpRate: negTotal === 0 ? 0 : round3(negFp / negTotal),
513
+ });
514
+ }
515
+ const f1s = rows.map((r) => r.f1).sort((a, b) => a - b);
516
+ const medianF1 = f1s.length === 0 ? 0 : f1s[Math.floor(f1s.length / 2)]!;
517
+ return {
518
+ generatedAt: new Date().toISOString(),
519
+ agentName,
520
+ totalProbes: results.length,
521
+ rows,
522
+ aggregate: {
523
+ medianF1: round3(medianF1),
524
+ skillsBelowF1Threshold: rows.filter((r) => r.f1 < F1_THRESHOLD).length,
525
+ skillsBelowExecThreshold: rows.filter((r) => r.execSuccess < EXEC_THRESHOLD).length,
526
+ f1Threshold: F1_THRESHOLD,
527
+ execThreshold: EXEC_THRESHOLD,
528
+ },
529
+ };
530
+ }
531
+
532
+ function round3(n: number): number {
533
+ return Math.round(n * 1000) / 1000;
534
+ }
535
+
536
+ function renderMarkdown(card: Scorecard): string {
537
+ const lines: string[] = [];
538
+ lines.push(`# Skill-coverage scorecard`);
539
+ lines.push("");
540
+ lines.push(`- Generated: ${card.generatedAt}`);
541
+ lines.push(`- Agent: \`${card.agentName}\``);
542
+ lines.push(`- Probes: ${card.totalProbes}`);
543
+ lines.push(`- Median F1: ${card.aggregate.medianF1}`);
544
+ lines.push(`- Below F1 ≥ ${card.aggregate.f1Threshold}: ${card.aggregate.skillsBelowF1Threshold}`);
545
+ lines.push(`- Below execSuccess ≥ ${card.aggregate.execThreshold}: ${card.aggregate.skillsBelowExecThreshold}`);
546
+ lines.push("");
547
+ lines.push(`| Skill | n | TP | FN | FP | Precision | Recall | F1 | Exec | NegFP |`);
548
+ lines.push(`|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|`);
549
+ for (const r of card.rows) {
550
+ lines.push(
551
+ `| \`${r.skill}\` | ${r.sampleSize} | ${r.truePositives} | ${r.falseNegatives} | ${r.falsePositives} | ${r.precision} | ${r.recall} | ${r.f1} | ${r.execSuccess} | ${r.negativeControlFpRate} |`,
552
+ );
553
+ }
554
+ return lines.join("\n") + "\n";
555
+ }
556
+
557
+ // ─── Main ────────────────────────────────────────────────────────────
558
+
559
+ async function main(): Promise<void> {
560
+ const cfg = parseCli(process.argv.slice(2));
561
+ for (const v of ["TELEGRAM_API_ID", "TELEGRAM_API_HASH", "TELEGRAM_UAT_DRIVER_SESSION"]) {
562
+ if (!process.env[v]) fail(`missing required env: ${v}`);
563
+ }
564
+
565
+ const corpusDir = DEFAULT_CORPUS_DIR;
566
+ const probesAll = loadCorpus(corpusDir, cfg.skillFilter);
567
+ const probes = trimPerSkill(probesAll, cfg.limitPerSkill);
568
+ process.stderr.write(
569
+ `[skill-coverage-uat] loaded ${probes.length} probes (from ${probesAll.length} in corpus)\n`,
570
+ );
571
+
572
+ const driver = new Driver({
573
+ apiId: Number.parseInt(process.env.TELEGRAM_API_ID!, 10),
574
+ apiHash: process.env.TELEGRAM_API_HASH!,
575
+ session: process.env.TELEGRAM_UAT_DRIVER_SESSION!,
576
+ });
577
+ await driver.connect();
578
+ process.stderr.write(`[skill-coverage-uat] connected as driver user\n`);
579
+
580
+ try {
581
+ const driverUserId = await driver.getMyUserId();
582
+ const botUserId = await driver.resolveBotUserId(cfg.botUsername);
583
+ process.stderr.write(
584
+ `[skill-coverage-uat] target ${cfg.agentName} via ${cfg.botUsername} (uid=${botUserId})\n`,
585
+ );
586
+
587
+ const results: ProbeResult[] = [];
588
+ let i = 0;
589
+ for (const p of probes) {
590
+ i++;
591
+ const r = await runProbe(driver, botUserId, driverUserId, p, cfg);
592
+ results.push(r);
593
+ const status = r.timedOut ? "TIMEOUT" : r.skillsFired.length ? r.skillsFired.join(",") : "<no-skill>";
594
+ process.stderr.write(
595
+ `[skill-coverage-uat] (${i}/${probes.length}) ${p.kind} target=${p.targetSkill ?? "<neg>"} → ${status} (${r.durationMs}ms)\n`,
596
+ );
597
+ if (i < probes.length) {
598
+ await new Promise((res) => setTimeout(res, cfg.settleMs));
599
+ }
600
+ }
601
+
602
+ const card = score(results, cfg.agentName);
603
+ mkdirSync(dirname(cfg.outBase), { recursive: true });
604
+ writeFileSync(`${cfg.outBase}.run.json`, JSON.stringify({ cfg: { ...cfg }, results }, null, 2));
605
+ writeFileSync(`${cfg.outBase}.scorecard.json`, JSON.stringify(card, null, 2));
606
+ writeFileSync(`${cfg.outBase}.scorecard.md`, renderMarkdown(card));
607
+ process.stderr.write(
608
+ `[skill-coverage-uat] wrote ${cfg.outBase}.{run.json,scorecard.json,scorecard.md}\n`,
609
+ );
610
+ } finally {
611
+ await driver.disconnect();
612
+ }
613
+ }
614
+
615
+ if (import.meta.url === `file://${process.argv[1]}`) {
616
+ main().catch((err) => {
617
+ process.stderr.write(`[skill-coverage-uat] FATAL: ${(err as Error).stack ?? err}\n`);
618
+ process.exit(1);
619
+ });
620
+ }