npm - switchroom - Versions diffs - 0.7.15 → 0.10.0 - Mend

switchroom 0.7.15 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (301) hide show

package/README.md +51 -59
package/bin/run-hook.sh +27 -11
package/bin/timezone-hook.sh +9 -7
package/dist/agent-scheduler/index.js +410 -133
package/dist/auth-broker/index.js +13932 -0
package/dist/cli/switchroom.js +26937 -5601
package/dist/host-control/main.js +12702 -0
package/dist/vault/approvals/kernel-server.js +467 -184
package/dist/vault/broker/server.js +1430 -724
package/examples/minimal.yaml +63 -0
package/examples/personal-google-workspace-mcp/.env.example +34 -0
package/examples/personal-google-workspace-mcp/README.md +194 -0
package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
package/examples/switchroom.yaml +220 -0
package/package.json +7 -4
package/profiles/_base/settings.json.hbs +20 -5
package/profiles/_base/start.sh.hbs +16 -3
package/profiles/_shared/agent-self-service.md.hbs +126 -0
package/profiles/_shared/telegram-style.md.hbs +20 -90
package/profiles/_shared/vault-protocol.md.hbs +68 -0
package/profiles/default/CLAUDE.md +50 -96
package/profiles/default/CLAUDE.md.hbs +36 -6
package/profiles/default/workspace/SOUL.md.hbs +12 -5
package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
package/skills/buildkite-agent-runtime/SKILL.md +44 -11
package/skills/buildkite-api/SKILL.md +31 -8
package/skills/buildkite-cli/SKILL.md +27 -9
package/skills/buildkite-migration/SKILL.md +22 -9
package/skills/buildkite-pipelines/SKILL.md +26 -9
package/skills/buildkite-secure-delivery/SKILL.md +23 -9
package/skills/buildkite-test-engine/SKILL.md +25 -8
package/skills/docx/SKILL.md +1 -1
package/skills/docx/scripts/office/validators/__pycache__/__init__.cpython-313.pyc +0 -0
package/skills/docx/scripts/office/validators/__pycache__/base.cpython-313.pyc +0 -0
package/skills/file-bug/SKILL.md +34 -6
package/skills/humanizer/SKILL.md +15 -0
package/skills/humanizer-calibrate/SKILL.md +7 -1
package/skills/mcp-builder/SKILL.md +1 -1
package/skills/pdf/SKILL.md +1 -1
package/skills/pptx/SKILL.md +1 -1
package/skills/skill-creator/SKILL.md +21 -1
package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
package/skills/switchroom-cli/SKILL.md +63 -64
package/skills/switchroom-health/SKILL.md +23 -10
package/skills/switchroom-install/SKILL.md +3 -3
package/skills/switchroom-manage/SKILL.md +26 -19
package/skills/switchroom-runtime/SKILL.md +191 -0
package/skills/switchroom-status/SKILL.md +27 -2
package/skills/telegram-test-harness/SKILL.md +3 -0
package/skills/token-helpers/SKILL.md +24 -1
package/skills/webapp-testing/SKILL.md +31 -1
package/skills/xlsx/SKILL.md +1 -1
package/telegram-plugin/admin-commands/index.ts +7 -5
package/telegram-plugin/analytics-posthog.ts +191 -0
package/telegram-plugin/bridge/bridge.ts +69 -0
package/telegram-plugin/bridge/ipc-client.ts +4 -1
package/telegram-plugin/dist/bridge/bridge.js +194 -119
package/telegram-plugin/dist/gateway/gateway.js +23611 -19671
package/telegram-plugin/dist/server.js +245 -189
package/telegram-plugin/first-paint.ts +3 -24
package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
package/telegram-plugin/gateway/auth-command.ts +794 -0
package/telegram-plugin/gateway/auth-line.ts +123 -0
package/telegram-plugin/gateway/boot-card.ts +169 -40
package/telegram-plugin/gateway/boot-issue-cache.ts +308 -0
package/telegram-plugin/gateway/boot-probes.ts +166 -123
package/telegram-plugin/gateway/boot-reason.ts +41 -7
package/telegram-plugin/gateway/boot-version.ts +66 -0
package/telegram-plugin/gateway/gateway.ts +3499 -1885
package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
package/telegram-plugin/gateway/ipc-protocol.ts +18 -0
package/telegram-plugin/gateway/pending-inbound-buffer.ts +106 -0
package/telegram-plugin/gateway/quarantine.ts +69 -0
package/telegram-plugin/gateway/quota-cache.ts +9 -4
package/telegram-plugin/gateway/reaction-trigger.ts +401 -0
package/telegram-plugin/gateway/recent-denials.test.ts +103 -0
package/telegram-plugin/gateway/recent-denials.ts +77 -0
package/telegram-plugin/gateway/startup-network-retry.ts +109 -31
package/telegram-plugin/gateway/vault-grant-inbound-builders.ts +125 -0
package/telegram-plugin/history.ts +91 -0
package/telegram-plugin/hooks/hooks.json +10 -0
package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +130 -0
package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +19 -2
package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +22 -2
package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
package/telegram-plugin/inbound-classifier.ts +50 -0
package/telegram-plugin/inline-keyboard-callbacks.ts +136 -0
package/telegram-plugin/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
package/telegram-plugin/package.json +4 -2
package/telegram-plugin/permission-rule.ts +51 -0
package/telegram-plugin/permission-title.ts +56 -0
package/telegram-plugin/quota-check.ts +19 -41
package/telegram-plugin/registry/reaper.ts +223 -0
package/telegram-plugin/retry-api-call.ts +80 -0
package/telegram-plugin/runtime-metrics.ts +177 -0
package/telegram-plugin/scripts/build.mjs +0 -1
package/telegram-plugin/secret-detect/index.ts +24 -0
package/telegram-plugin/secret-detect/vault-error.test.ts +64 -12
package/telegram-plugin/secret-detect/vault-error.ts +78 -11
package/telegram-plugin/secret-detect/vault-write.ts +14 -2
package/telegram-plugin/server.js +41795 -0
package/telegram-plugin/session-tail.ts +6 -1
package/telegram-plugin/shared/bot-runtime.ts +5 -4
package/telegram-plugin/silence-poke.ts +420 -0
package/telegram-plugin/silent-end.ts +174 -0
package/telegram-plugin/stream-controller.ts +13 -0
package/telegram-plugin/stream-reply-handler.ts +7 -0
package/telegram-plugin/subagent-watcher.ts +213 -4
package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
package/telegram-plugin/tests/boot-card-issue-dedup.test.ts +247 -0
package/telegram-plugin/tests/boot-card-reason-to-render.test.ts +182 -0
package/telegram-plugin/tests/boot-card-reason.test.ts +65 -2
package/telegram-plugin/tests/boot-card-render.test.ts +146 -0
package/telegram-plugin/tests/boot-card-silent-on-operator.test.ts +103 -0
package/telegram-plugin/tests/boot-probes.test.ts +216 -10
package/telegram-plugin/tests/boot-version-string.test.ts +0 -0
package/telegram-plugin/tests/finalize-callback.test.ts +190 -0
package/telegram-plugin/tests/gateway-message-validator.test.ts +26 -0
package/telegram-plugin/tests/gateway-secret-detect.test.ts +12 -3
package/telegram-plugin/tests/gateway-startup-network-retry.test.ts +104 -0
package/telegram-plugin/tests/history-reaper.test.ts +378 -0
package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
package/telegram-plugin/tests/inbound-classifier.test.ts +76 -0
package/telegram-plugin/tests/inbound-message-types.test.ts +267 -0
package/telegram-plugin/tests/issues-card.test.ts +49 -0
package/telegram-plugin/tests/pending-inbound-buffer.test.ts +132 -0
package/telegram-plugin/tests/permission-rule.test.ts +80 -1
package/telegram-plugin/tests/permission-title.test.ts +31 -0
package/telegram-plugin/tests/quota-check.test.ts +5 -35
package/telegram-plugin/tests/races.test.ts +179 -0
package/telegram-plugin/tests/reaction-trigger-flow.test.ts +353 -0
package/telegram-plugin/tests/reaction-trigger.test.ts +397 -0
package/telegram-plugin/tests/retry-api-call.test.ts +152 -1
package/telegram-plugin/tests/runtime-metrics.test.ts +145 -0
package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +155 -0
package/telegram-plugin/tests/secret-detect-delete-must-surface-failures.test.ts +133 -0
package/telegram-plugin/tests/secret-detect-false-positives.test.ts +137 -0
package/telegram-plugin/tests/silence-poke.test.ts +493 -0
package/telegram-plugin/tests/silent-end.test.ts +206 -0
package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +107 -0
package/telegram-plugin/tests/subagent-watcher-env-thresholds.test.ts +224 -0
package/telegram-plugin/tests/subagent-watcher-stall-terminal.test.ts +316 -0
package/telegram-plugin/tests/subagent-watcher.test.ts +263 -0
package/telegram-plugin/tests/turn-signal-tracker.test.ts +81 -0
package/telegram-plugin/tests/vault-approval-posture.test.ts +256 -0
package/telegram-plugin/tests/vault-grant-auto-resume.test.ts +73 -0
package/telegram-plugin/tests/vault-grant-inbound-builders.test.ts +226 -0
package/telegram-plugin/tests/vault-grant-union.test.ts +130 -0
package/telegram-plugin/tests/vault-key-regex-allows-slash.test.ts +140 -0
package/telegram-plugin/tests/vault-posture-quarantine.test.ts +104 -0
package/telegram-plugin/tests/vault-request-access-tool.test.ts +114 -0
package/telegram-plugin/tests/vault-request-access-unlock-resume.test.ts +106 -0
package/telegram-plugin/turn-signal-tracker.ts +100 -24
package/telegram-plugin/uat/SETUP.md +210 -35
package/telegram-plugin/uat/assertions.ts +264 -37
package/telegram-plugin/uat/driver-info.ts +57 -0
package/telegram-plugin/uat/driver.ts +590 -51
package/telegram-plugin/uat/harness.ts +140 -94
package/telegram-plugin/uat/load-env.test.ts +72 -0
package/telegram-plugin/uat/load-env.ts +48 -0
package/telegram-plugin/uat/login.ts +96 -53
package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
package/telegram-plugin/uat/runners/report.ts +150 -0
package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
package/telegram-plugin/uat/runners/scorer.ts +106 -0
package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
package/telegram-plugin/uat/scenarios/ask-user-button-tap-dm.test.ts +141 -0
package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +191 -0
package/telegram-plugin/uat/scenarios/fuzz-extended-dm.test.ts +255 -0
package/telegram-plugin/uat/scenarios/fuzz-human-style-dm.test.ts +275 -0
package/telegram-plugin/uat/scenarios/fuzz-random-prompts-dm.test.ts +146 -0
package/telegram-plugin/uat/scenarios/fuzz-status-ask-dm.test.ts +486 -0
package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +67 -0
package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +100 -0
package/telegram-plugin/uat/scenarios/jtbd-soft-commit-dm.test.ts +67 -0
package/telegram-plugin/uat/scenarios/jtbd-status-query-dm.test.ts +49 -0
package/telegram-plugin/uat/scenarios/location-inbound-dm.test.ts +65 -0
package/telegram-plugin/uat/scenarios/midturn-silent-dm.test.ts +175 -0
package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +142 -0
package/telegram-plugin/uat/scenarios/reactions-trigger-turn-dm.test.ts +96 -0
package/telegram-plugin/uat/scenarios/secret-redaction-deletes-original-dm.test.ts +123 -0
package/telegram-plugin/uat/scenarios/secret-redaction-no-false-positive-dm.test.ts +87 -0
package/telegram-plugin/uat/scenarios/silence-poke-soft-dm.test.ts +155 -0
package/telegram-plugin/uat/scenarios/silent-end-recovery-dm.test.ts +95 -0
package/telegram-plugin/uat/scenarios/smoke-dm-reply.test.ts +57 -0
package/telegram-plugin/uat/scenarios/subagent-watcher-no-rerun-dm.test.ts +135 -0
package/telegram-plugin/uat/scenarios/vault-approval-posture-telegram-id-dm.test.ts +191 -0
package/telegram-plugin/uat/scenarios/vault-audit-allow-dm.test.ts +108 -0
package/telegram-plugin/uat/scenarios/vault-grant-auto-resume-dm.test.ts +121 -0
package/telegram-plugin/uat/scenarios/vault-request-access-concurrent-dm.test.ts +161 -0
package/telegram-plugin/uat/scenarios/vault-request-access-end-to-end-dm.test.ts +158 -0
package/telegram-plugin/uat/scenarios/voice-inbound-dm.test.ts +65 -0
package/telegram-plugin/vault-approval-posture.ts +42 -0
package/telegram-plugin/welcome-text.ts +1 -0
package/telegram-plugin/active-pins-sweep.ts +0 -204
package/telegram-plugin/active-pins.ts +0 -146
package/telegram-plugin/auth-dashboard.ts +0 -1104
package/telegram-plugin/auth-slot-parser.ts +0 -497
package/telegram-plugin/card-event-log.ts +0 -138
package/telegram-plugin/dist/foreman/foreman.js +0 -31106
package/telegram-plugin/docs/multi-agent-card-design.md +0 -847
package/telegram-plugin/docs/pinned-progress-card-reliability.md +0 -144
package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
package/telegram-plugin/foreman/foreman.ts +0 -1165
package/telegram-plugin/foreman/setup-flow.ts +0 -345
package/telegram-plugin/foreman/setup-state.ts +0 -239
package/telegram-plugin/foreman/state.ts +0 -203
package/telegram-plugin/pin-event-log.ts +0 -76
package/telegram-plugin/progress-card-driver.ts +0 -2886
package/telegram-plugin/progress-card-pin-manager.ts +0 -589
package/telegram-plugin/progress-card-pin-watchdog.ts +0 -98
package/telegram-plugin/progress-card.ts +0 -1409
package/telegram-plugin/tests/HARNESS.md +0 -340
package/telegram-plugin/tests/_progress-card-harness.ts +0 -109
package/telegram-plugin/tests/active-pins-boot-reaper.test.ts +0 -211
package/telegram-plugin/tests/active-pins-sweep.test.ts +0 -309
package/telegram-plugin/tests/active-pins.test.ts +0 -187
package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +0 -201
package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
package/telegram-plugin/tests/card-event-log.test.ts +0 -145
package/telegram-plugin/tests/first-paint.test.ts +0 -257
package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
package/telegram-plugin/tests/foreman-state.test.ts +0 -164
package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
package/telegram-plugin/tests/harness-ordering-invariants.test.ts +0 -243
package/telegram-plugin/tests/pin-event-log.test.ts +0 -124
package/telegram-plugin/tests/progress-card-api-failure-during-deferred.test.ts +0 -73
package/telegram-plugin/tests/progress-card-close-paths-converge.test.ts +0 -272
package/telegram-plugin/tests/progress-card-cross-turn.test.ts +0 -258
package/telegram-plugin/tests/progress-card-delay-842.test.ts +0 -160
package/telegram-plugin/tests/progress-card-dispose-preservepending.test.ts +0 -81
package/telegram-plugin/tests/progress-card-draft-flag.test.ts +0 -80
package/telegram-plugin/tests/progress-card-driver-eviction.test.ts +0 -215
package/telegram-plugin/tests/progress-card-driver-fleet-shadow.test.ts +0 -123
package/telegram-plugin/tests/progress-card-driver-force-complete-parent-done.test.ts +0 -76
package/telegram-plugin/tests/progress-card-edit-timestamps-budget.test.ts +0 -62
package/telegram-plugin/tests/progress-card-memory-bounds.test.ts +0 -84
package/telegram-plugin/tests/progress-card-pin-failure-paths.test.ts +0 -139
package/telegram-plugin/tests/progress-card-pin-manager.test.ts +0 -773
package/telegram-plugin/tests/progress-card-pin-race-fast-turn.test.ts +0 -66
package/telegram-plugin/tests/progress-card-pin-sidecar-partial-write.test.ts +0 -64
package/telegram-plugin/tests/progress-card-pin-watchdog.test.ts +0 -190
package/telegram-plugin/tests/progress-card-sigterm-pin-flush.test.ts +0 -146
package/telegram-plugin/tests/real-gateway-f1-ladder-integrity.test.ts +0 -123
package/telegram-plugin/tests/real-gateway-f2-instant-draft.test.ts +0 -82
package/telegram-plugin/tests/real-gateway-f3-late-card.test.ts +0 -114
package/telegram-plugin/tests/real-gateway-harness.ts +0 -699
package/telegram-plugin/tests/real-gateway-i6-turn-flush-replay-dedup.test.ts +0 -313
package/telegram-plugin/tests/real-gateway-ipc-lifecycle.test.ts +0 -299
package/telegram-plugin/tests/real-gateway-spec.test.ts +0 -487
package/telegram-plugin/tests/real-gateway.smoke.test.ts +0 -101
package/telegram-plugin/tests/setup-flow.test.ts +0 -510
package/telegram-plugin/tests/setup-state.test.ts +0 -146
package/telegram-plugin/tests/sync-chat-running-subagents.test.ts +0 -116
package/telegram-plugin/tests/turn-end-regressions.test.ts +0 -489
package/telegram-plugin/tests/turn-flush-card-takeover.test.ts +0 -218
package/telegram-plugin/tests/turn-flush-prose-recovery.test.ts +0 -78
package/telegram-plugin/tests/two-zone-bg-carry-full-lifecycle.test.ts +0 -131
package/telegram-plugin/tests/two-zone-bg-detection.test.ts +0 -120
package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +0 -116
package/telegram-plugin/tests/two-zone-bg-early-turn-end.test.ts +0 -87
package/telegram-plugin/tests/two-zone-bg-survives-next-turn.test.ts +0 -211
package/telegram-plugin/tests/two-zone-card-cap.test.ts +0 -62
package/telegram-plugin/tests/two-zone-card-fleet-row.test.ts +0 -101
package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +0 -78
package/telegram-plugin/tests/two-zone-card-html-balance.test.ts +0 -110
package/telegram-plugin/tests/two-zone-card-lifecycle.test.ts +0 -128
package/telegram-plugin/tests/two-zone-card-sanitise.test.ts +0 -58
package/telegram-plugin/tests/two-zone-card-snapshot.test.ts +0 -133
package/telegram-plugin/tests/two-zone-concurrent-turns-isolation.test.ts +0 -155
package/telegram-plugin/tests/two-zone-phasefor-precedence.test.ts +0 -117
package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +0 -187
package/telegram-plugin/tests/two-zone-stuck-edit-throttle.test.ts +0 -149
package/telegram-plugin/tests/two-zone-stuck-header-escalation.test.ts +0 -101
package/telegram-plugin/tests/two-zone-stuck-per-member.test.ts +0 -114
package/telegram-plugin/tests/two-zone-stuck-recovery.test.ts +0 -105
package/telegram-plugin/tests/waiting-ux-harness.ts +0 -381
package/telegram-plugin/tests/waiting-ux.e2e.test.ts +0 -233
package/telegram-plugin/turn-flush-prose-recovery.ts +0 -40
package/telegram-plugin/two-zone-card.ts +0 -269
package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +0 -61

package/telegram-plugin/uat/runners/skill-coverage.ts ADDED Viewed

@@ -0,0 +1,620 @@
+#!/usr/bin/env bun
+/**
+ * Skill-coverage UAT runner — drives a real Telegram user account
+ * against a switchroom agent's bot to validate that the right Claude
+ * Code skill fires for fuzzy NL phrasings.
+ *
+ * Sister to `tests/skill-coverage/cli.ts` (the inject_inbound-based
+ * runner that hit an agent-uid perms blocker). This one observes
+ * everything through Telegram itself, so no host-side JSONL access
+ * is required.
+ *
+ * **Skill detection.** The PreToolUse hook
+ * `telegram-plugin/hooks/tool-label-pretool.mjs` writes one JSONL
+ * row per tool invocation to
+ * `~/.switchroom/agents/<agent>/telegram/tool-labels-<session_id>.jsonl`.
+ * Skill rows have `tool_name === "Skill"` and a label of the form
+ * `"Running skill <slug>"`. The runner tails every sidecar file
+ * that mtime-changes during a probe window and pulls the slugs out.
+ *
+ * That sidecar dir is bind-mounted into the agent at
+ * `$TELEGRAM_STATE_DIR` AND lives at a host-readable path (owned by
+ * the agent UID but mode 0775; jsonl rows are 0644 from the hook).
+ * No gateway / progress-card dependency.
+ *
+ * Usage:
+ *   bun telegram-plugin/uat/runners/skill-coverage.ts \
+ *     --agent test-harness:@your_test_bot \
+ *     --skills switchroom-cli,switchroom-status \
+ *     --limit-per-skill 2 \
+ *     --out tests/skill-coverage/out/skill-coverage
+ *
+ * Env equivalents (UAT-standard, fail loud):
+ *   TELEGRAM_API_ID, TELEGRAM_API_HASH, TELEGRAM_UAT_DRIVER_SESSION
+ *   SKILL_COVERAGE_AGENT="test-harness:@your_test_bot"
+ *   SKILL_COVERAGE_SKILLS="a,b,c"             (optional filter)
+ *   SKILL_COVERAGE_LIMIT_PER_SKILL=N          (optional)
+ *   SKILL_COVERAGE_OUT="..."                  (default tests/skill-coverage/out/skill-coverage)
+ */
+import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
+import { dirname, join, resolve } from "node:path";
+import { homedir } from "node:os";
+import { fileURLToPath } from "node:url";
+import { Driver, type ObservedMessage } from "../driver.js";
+import { loadUatEnv } from "../load-env.js";
+loadUatEnv();
+// ─── Types — mirror tests/skill-coverage/{corpus,harness}/types.ts ────
+export interface Probe {
+  id: string;
+  targetSkill: string | null;
+  /** Adjacent-skill expectation for negative controls. */
+  expectedOtherSkill?: string;
+  kind: "paraphrase" | "typo" | "slang" | "indirect" | "negative";
+  phrase: string;
+}
+export interface ProbeResult {
+  probe: Probe;
+  skillsFired: string[];
+  replyText: string;
+  durationMs: number;
+  timedOut: boolean;
+  errorMessage?: string;
+}
+// ─── Skill-label extraction ──────────────────────────────────────────
+/**
+ * Matches the literal label substring written by the PreToolUse hook
+ * `telegram-plugin/hooks/tool-label-pretool.mjs` for a `Skill` tool
+ * invocation. Slug regex is restrictive on purpose — skill names are
+ * kebab-case ASCII per `skills/<name>/SKILL.md` frontmatter.
+ */
+const SKILL_LABEL_RE = /running skill\s+([a-z0-9][a-z0-9-]*)/i;
+export function extractSkillFromLabel(label: string): string | null {
+  const m = SKILL_LABEL_RE.exec(label);
+  return m ? m[1]!.toLowerCase() : null;
+}
+export interface SidecarRow {
+  ts: number;
+  tool_use_id: string;
+  agent_id: string | null;
+  label: string;
+  tool_name: string;
+}
+/**
+ * Read every `tool-labels-*.jsonl` file in `dir` and return rows
+ * with `tool_name === "Skill"` and `ts >= sinceMs`. The sidecar is
+ * append-only so partial-line tails are unlikely; we still defensively
+ * skip malformed lines.
+ */
+export function readSkillRowsSince(
+  dir: string,
+  sinceMs: number,
+  readdir: (p: string) => string[],
+  readFile: (p: string) => string,
+): SidecarRow[] {
+  const out: SidecarRow[] = [];
+  let entries: string[] = [];
+  try {
+    entries = readdir(dir);
+  } catch {
+    return out;
+  }
+  for (const e of entries) {
+    if (!e.startsWith("tool-labels-") || !e.endsWith(".jsonl")) continue;
+    let content: string;
+    try {
+      content = readFile(`${dir}/${e}`);
+    } catch {
+      continue;
+    }
+    for (const line of content.split("\n")) {
+      if (!line.trim()) continue;
+      let row: SidecarRow;
+      try {
+        row = JSON.parse(line) as SidecarRow;
+      } catch {
+        continue;
+      }
+      if (typeof row.ts !== "number" || row.ts < sinceMs) continue;
+      if (row.tool_name !== "Skill") continue;
+      out.push(row);
+    }
+  }
+  return out;
+}
+// ─── CLI parsing ─────────────────────────────────────────────────────
+interface CliConfig {
+  agentName: string;
+  botUsername: string;
+  skillFilter: string[] | null;
+  limitPerSkill: number | null;
+  /** Per-probe reply timeout, ms. Default 90s. */
+  replyTimeoutMs: number;
+  /** Inter-probe settle, ms. Default 6s to keep us under Telegram's rate cap. */
+  settleMs: number;
+  /** Sidecar-drain window after reply is seen, ms. The hook writes
+   *  asynchronously; a small post-reply hold avoids missing the last
+   *  Skill row of a turn. Default 3s. */
+  sidecarDrainMs: number;
+  /** Path to the agent's TELEGRAM_STATE_DIR on the host — where
+   *  `tool-labels-<session>.jsonl` files live. Defaults to
+   *  `~/.switchroom/agents/<name>/telegram/`. */
+  agentStateDir: string;
+  outBase: string;
+}
+const HERE = dirname(fileURLToPath(import.meta.url));
+const REPO_ROOT = resolve(HERE, "..", "..", "..");
+const DEFAULT_CORPUS_DIR = join(REPO_ROOT, "tests/skill-coverage/corpus");
+const DEFAULT_OUT_BASE = join(REPO_ROOT, "tests/skill-coverage/out/skill-coverage");
+function fail(msg: string): never {
+  process.stderr.write(`[skill-coverage-uat] ${msg}\n`);
+  process.exit(2);
+}
+function parseCli(argv: readonly string[]): CliConfig {
+  let agentSpec = process.env.SKILL_COVERAGE_AGENT ?? "";
+  let skillFilter = process.env.SKILL_COVERAGE_SKILLS
+    ? process.env.SKILL_COVERAGE_SKILLS.split(",").map((s) => s.trim()).filter(Boolean)
+    : null;
+  let limitPerSkill = process.env.SKILL_COVERAGE_LIMIT_PER_SKILL
+    ? Number.parseInt(process.env.SKILL_COVERAGE_LIMIT_PER_SKILL, 10)
+    : null;
+  let replyTimeoutMs = Number.parseInt(process.env.SKILL_COVERAGE_REPLY_TIMEOUT_MS ?? "90000", 10);
+  let settleMs = Number.parseInt(process.env.SKILL_COVERAGE_SETTLE_MS ?? "6000", 10);
+  let sidecarDrainMs = Number.parseInt(process.env.SKILL_COVERAGE_SIDECAR_DRAIN_MS ?? "3000", 10);
+  let agentStateDir = process.env.SKILL_COVERAGE_AGENT_STATE_DIR ?? "";
+  let outBase = process.env.SKILL_COVERAGE_OUT ?? DEFAULT_OUT_BASE;
+  for (let i = 0; i < argv.length; i++) {
+    const tok = argv[i]!;
+    const next = (): string => {
+      const v = argv[++i];
+      if (!v) fail(`${tok}: missing value`);
+      return v;
+    };
+    switch (tok) {
+      case "--agent":
+        agentSpec = next();
+        break;
+      case "--skills":
+        skillFilter = next().split(",").map((s) => s.trim()).filter(Boolean);
+        break;
+      case "--limit-per-skill":
+        limitPerSkill = Number.parseInt(next(), 10);
+        break;
+      case "--reply-timeout-ms":
+        replyTimeoutMs = Number.parseInt(next(), 10);
+        break;
+      case "--settle-ms":
+        settleMs = Number.parseInt(next(), 10);
+        break;
+      case "--sidecar-drain-ms":
+        sidecarDrainMs = Number.parseInt(next(), 10);
+        break;
+      case "--agent-state-dir":
+        agentStateDir = next();
+        break;
+      case "--out":
+        outBase = resolve(next());
+        break;
+      case "-h":
+      case "--help":
+        printHelp();
+        process.exit(0);
+        break;
+      default:
+        if (tok.startsWith("--")) fail(`unknown flag: ${tok}`);
+    }
+  }
+  if (!agentSpec) {
+    fail(
+      "no agent target. Pass --agent <name>:@<bot-username> or set SKILL_COVERAGE_AGENT.",
+    );
+  }
+  const [agentName, botUsername] = agentSpec.split(":").map((s) => s.trim());
+  if (!agentName || !botUsername || !botUsername.startsWith("@")) {
+    fail(`--agent expects "<name>:@<bot-username>"; got "${agentSpec}"`);
+  }
+  const resolvedAgentStateDir = agentStateDir
+    ? resolve(agentStateDir)
+    : join(homedir(), ".switchroom", "agents", agentName!, "telegram");
+  return {
+    agentName: agentName!,
+    botUsername: botUsername!,
+    skillFilter,
+    limitPerSkill,
+    replyTimeoutMs,
+    settleMs,
+    sidecarDrainMs,
+    agentStateDir: resolvedAgentStateDir,
+    outBase,
+  };
+}
+function printHelp(): void {
+  process.stdout.write(`skill-coverage UAT runner
+Required env (fail loud if missing):
+  TELEGRAM_API_ID, TELEGRAM_API_HASH, TELEGRAM_UAT_DRIVER_SESSION
+Flags:
+  --agent NAME:@BOT         Agent + bot to target. Required.
+  --skills A,B,C            Filter to these skills only.
+  --limit-per-skill N       Cap probes per skill.
+  --reply-timeout-ms N      Per-probe budget. Default 90000.
+  --settle-ms N             Inter-probe settle. Default 6000.
+  --sidecar-drain-ms N      Post-reply hold for the last hook write. Default 3000.
+  --agent-state-dir PATH    Override sidecar location. Default ~/.switchroom/agents/<name>/telegram.
+  --out PATH                Output base path. Default tests/skill-coverage/out/skill-coverage.
+`);
+}
+// ─── Corpus loading ──────────────────────────────────────────────────
+function loadCorpus(dir: string, skillFilter: string[] | null): Probe[] {
+  if (!existsSync(dir)) {
+    fail(`corpus dir not found: ${dir} — run \`bun tests/skill-coverage/corpus/generate-corpus.ts --seed=1\` first.`);
+  }
+  const files = readdirSync(dir).filter((f) => f.endsWith(".jsonl"));
+  const out: Probe[] = [];
+  for (const f of files) {
+    const skill = f.replace(/\.jsonl$/, "");
+    if (skillFilter && !skillFilter.includes(skill)) continue;
+    const content = readFileSync(join(dir, f), "utf-8");
+    for (const line of content.split("\n")) {
+      if (!line.trim()) continue;
+      try {
+        out.push(JSON.parse(line) as Probe);
+      } catch {
+        // skip malformed lines
+      }
+    }
+  }
+  return out;
+}
+function trimPerSkill(probes: Probe[], limit: number | null): Probe[] {
+  if (limit == null) return probes;
+  const counts = new Map<string, number>();
+  const out: Probe[] = [];
+  for (const p of probes) {
+    const k = p.targetSkill ?? "<neg>";
+    const c = counts.get(k) ?? 0;
+    if (c >= limit) continue;
+    counts.set(k, c + 1);
+    out.push(p);
+  }
+  return out;
+}
+// ─── Send + observe a single probe ───────────────────────────────────
+async function pullOneWithTimeout(
+  it: AsyncIterator<ObservedMessage>,
+  ms: number,
+): Promise<ObservedMessage | "timeout"> {
+  return new Promise((resolveFn) => {
+    let settled = false;
+    const timer = setTimeout(() => {
+      if (settled) return;
+      settled = true;
+      resolveFn("timeout");
+    }, ms);
+    it.next().then((r) => {
+      if (settled) return;
+      settled = true;
+      clearTimeout(timer);
+      if (r.done === true) resolveFn("timeout");
+      else resolveFn(r.value);
+    }).catch(() => {
+      if (settled) return;
+      settled = true;
+      clearTimeout(timer);
+      resolveFn("timeout");
+    });
+  });
+}
+async function runProbe(
+  driver: Driver,
+  botUserId: number,
+  driverUserId: number,
+  probe: Probe,
+  cfg: CliConfig,
+): Promise<ProbeResult> {
+  const startedAt = Date.now();
+  const stream = driver.observeMessages(botUserId)[Symbol.asyncIterator]();
+  const replyTexts = new Map<number, string>();
+  let sentMessageId: number;
+  try {
+    const sent = await driver.sendText(botUserId, probe.phrase);
+    sentMessageId = sent.messageId;
+  } catch (err) {
+    try {
+      await stream.return?.(undefined);
+    } catch {
+      /* ignore */
+    }
+    return {
+      probe,
+      skillsFired: [],
+      replyText: "",
+      durationMs: Date.now() - startedAt,
+      timedOut: false,
+      errorMessage: `send failed: ${(err as Error).message}`,
+    };
+  }
+  // Bot reply is the turn-completion signal — we stop reading the
+  // stream once it lands. The sidecar-drain hold below absorbs any
+  // late hook writes after the visible reply.
+  const deadline = startedAt + cfg.replyTimeoutMs;
+  let firstReplyAt = 0;
+  try {
+    while (Date.now() < deadline) {
+      const remaining = deadline - Date.now();
+      const slice = await pullOneWithTimeout(stream, Math.min(remaining, 2000));
+      if (slice === "timeout") {
+        if (firstReplyAt) break;
+        continue;
+      }
+      if (slice.senderUserId === driverUserId) continue;
+      if (slice.messageId <= sentMessageId) continue;
+      const t = (slice.text ?? "").trim();
+      if (!t) continue;
+      replyTexts.set(slice.messageId, t);
+      if (!firstReplyAt) firstReplyAt = Date.now();
+      // First non-empty reply is enough — extra edits don't change
+      // which Skill labels landed in the sidecar.
+      break;
+    }
+  } finally {
+    try {
+      await stream.return?.(undefined);
+    } catch {
+      /* ignore */
+    }
+  }
+  if (!firstReplyAt) {
+    return {
+      probe,
+      skillsFired: [],
+      replyText: "",
+      durationMs: Date.now() - startedAt,
+      timedOut: true,
+    };
+  }
+  // Drain window: hook writes are async to the assistant message
+  // landing. A small post-reply hold catches the last row.
+  await new Promise((res) => setTimeout(res, cfg.sidecarDrainMs));
+  const rows = readSkillRowsSince(
+    cfg.agentStateDir,
+    startedAt,
+    (p) => readdirSync(p),
+    (p) => readFileSync(p, "utf-8"),
+  );
+  const skills = new Set<string>();
+  for (const r of rows) {
+    const slug = extractSkillFromLabel(r.label);
+    if (slug) skills.add(slug);
+  }
+  const replyText = [...replyTexts.entries()]
+    .sort((a, b) => a[0] - b[0])
+    .map(([, t]) => t)
+    .join("\n---\n");
+  return {
+    probe,
+    skillsFired: [...skills],
+    replyText,
+    durationMs: Date.now() - startedAt,
+    timedOut: false,
+  };
+}
+// ─── Scoring ─────────────────────────────────────────────────────────
+interface SkillRow {
+  skill: string;
+  sampleSize: number;
+  truePositives: number;
+  falseNegatives: number;
+  falsePositives: number;
+  precision: number;
+  recall: number;
+  f1: number;
+  /** True when targetSkill fired at least once on positive probes. */
+  execSuccess: number;
+  negativeControlFpRate: number;
+}
+interface Scorecard {
+  generatedAt: string;
+  agentName: string;
+  totalProbes: number;
+  rows: SkillRow[];
+  aggregate: {
+    medianF1: number;
+    skillsBelowF1Threshold: number;
+    skillsBelowExecThreshold: number;
+    f1Threshold: number;
+    execThreshold: number;
+  };
+}
+function score(results: ProbeResult[], agentName: string): Scorecard {
+  const skills = new Set<string>();
+  for (const r of results) {
+    if (r.probe.targetSkill) skills.add(r.probe.targetSkill);
+    for (const s of r.skillsFired) skills.add(s);
+  }
+  const rows: SkillRow[] = [];
+  const F1_THRESHOLD = 0.9;
+  const EXEC_THRESHOLD = 0.95;
+  for (const s of [...skills].sort()) {
+    let tp = 0, fn = 0, fp = 0;
+    let sample = 0;
+    let execTotal = 0, execHits = 0;
+    let negTotal = 0, negFp = 0;
+    for (const r of results) {
+      const isTarget = r.probe.targetSkill === s;
+      const fired = r.skillsFired.includes(s);
+      if (isTarget) {
+        sample++;
+        if (fired) {
+          tp++;
+          execTotal++;
+          execHits++;
+        } else {
+          fn++;
+        }
+      } else if (fired) {
+        fp++;
+      }
+      if (r.probe.targetSkill === null) {
+        negTotal++;
+        if (fired) negFp++;
+      }
+    }
+    const precision = tp + fp === 0 ? 0 : tp / (tp + fp);
+    const recall = tp + fn === 0 ? 0 : tp / (tp + fn);
+    const f1 = precision + recall === 0 ? 0 : (2 * precision * recall) / (precision + recall);
+    rows.push({
+      skill: s,
+      sampleSize: sample,
+      truePositives: tp,
+      falseNegatives: fn,
+      falsePositives: fp,
+      precision: round3(precision),
+      recall: round3(recall),
+      f1: round3(f1),
+      execSuccess: execTotal === 0 ? 0 : round3(execHits / execTotal),
+      negativeControlFpRate: negTotal === 0 ? 0 : round3(negFp / negTotal),
+    });
+  }
+  const f1s = rows.map((r) => r.f1).sort((a, b) => a - b);
+  const medianF1 = f1s.length === 0 ? 0 : f1s[Math.floor(f1s.length / 2)]!;
+  return {
+    generatedAt: new Date().toISOString(),
+    agentName,
+    totalProbes: results.length,
+    rows,
+    aggregate: {
+      medianF1: round3(medianF1),
+      skillsBelowF1Threshold: rows.filter((r) => r.f1 < F1_THRESHOLD).length,
+      skillsBelowExecThreshold: rows.filter((r) => r.execSuccess < EXEC_THRESHOLD).length,
+      f1Threshold: F1_THRESHOLD,
+      execThreshold: EXEC_THRESHOLD,
+    },
+  };
+}
+function round3(n: number): number {
+  return Math.round(n * 1000) / 1000;
+}
+function renderMarkdown(card: Scorecard): string {
+  const lines: string[] = [];
+  lines.push(`# Skill-coverage scorecard`);
+  lines.push("");
+  lines.push(`- Generated: ${card.generatedAt}`);
+  lines.push(`- Agent: \`${card.agentName}\``);
+  lines.push(`- Probes: ${card.totalProbes}`);
+  lines.push(`- Median F1: ${card.aggregate.medianF1}`);
+  lines.push(`- Below F1 ≥ ${card.aggregate.f1Threshold}: ${card.aggregate.skillsBelowF1Threshold}`);
+  lines.push(`- Below execSuccess ≥ ${card.aggregate.execThreshold}: ${card.aggregate.skillsBelowExecThreshold}`);
+  lines.push("");
+  lines.push(`| Skill | n | TP | FN | FP | Precision | Recall | F1 | Exec | NegFP |`);
+  lines.push(`|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|`);
+  for (const r of card.rows) {
+    lines.push(
+      `| \`${r.skill}\` | ${r.sampleSize} | ${r.truePositives} | ${r.falseNegatives} | ${r.falsePositives} | ${r.precision} | ${r.recall} | ${r.f1} | ${r.execSuccess} | ${r.negativeControlFpRate} |`,
+    );
+  }
+  return lines.join("\n") + "\n";
+}
+// ─── Main ────────────────────────────────────────────────────────────
+async function main(): Promise<void> {
+  const cfg = parseCli(process.argv.slice(2));
+  for (const v of ["TELEGRAM_API_ID", "TELEGRAM_API_HASH", "TELEGRAM_UAT_DRIVER_SESSION"]) {
+    if (!process.env[v]) fail(`missing required env: ${v}`);
+  }
+  const corpusDir = DEFAULT_CORPUS_DIR;
+  const probesAll = loadCorpus(corpusDir, cfg.skillFilter);
+  const probes = trimPerSkill(probesAll, cfg.limitPerSkill);
+  process.stderr.write(
+    `[skill-coverage-uat] loaded ${probes.length} probes (from ${probesAll.length} in corpus)\n`,
+  );
+  const driver = new Driver({
+    apiId: Number.parseInt(process.env.TELEGRAM_API_ID!, 10),
+    apiHash: process.env.TELEGRAM_API_HASH!,
+    session: process.env.TELEGRAM_UAT_DRIVER_SESSION!,
+  });
+  await driver.connect();
+  process.stderr.write(`[skill-coverage-uat] connected as driver user\n`);
+  try {
+    const driverUserId = await driver.getMyUserId();
+    const botUserId = await driver.resolveBotUserId(cfg.botUsername);
+    process.stderr.write(
+      `[skill-coverage-uat] target ${cfg.agentName} via ${cfg.botUsername} (uid=${botUserId})\n`,
+    );
+    const results: ProbeResult[] = [];
+    let i = 0;
+    for (const p of probes) {
+      i++;
+      const r = await runProbe(driver, botUserId, driverUserId, p, cfg);
+      results.push(r);
+      const status = r.timedOut ? "TIMEOUT" : r.skillsFired.length ? r.skillsFired.join(",") : "<no-skill>";
+      process.stderr.write(
+        `[skill-coverage-uat] (${i}/${probes.length}) ${p.kind} target=${p.targetSkill ?? "<neg>"} → ${status} (${r.durationMs}ms)\n`,
+      );
+      if (i < probes.length) {
+        await new Promise((res) => setTimeout(res, cfg.settleMs));
+      }
+    }
+    const card = score(results, cfg.agentName);
+    mkdirSync(dirname(cfg.outBase), { recursive: true });
+    writeFileSync(`${cfg.outBase}.run.json`, JSON.stringify({ cfg: { ...cfg }, results }, null, 2));
+    writeFileSync(`${cfg.outBase}.scorecard.json`, JSON.stringify(card, null, 2));
+    writeFileSync(`${cfg.outBase}.scorecard.md`, renderMarkdown(card));
+    process.stderr.write(
+      `[skill-coverage-uat] wrote ${cfg.outBase}.{run.json,scorecard.json,scorecard.md}\n`,
+    );
+  } finally {
+    await driver.disconnect();
+  }
+}
+if (import.meta.url === `file://${process.argv[1]}`) {
+  main().catch((err) => {
+    process.stderr.write(`[skill-coverage-uat] FATAL: ${(err as Error).stack ?? err}\n`);
+    process.exit(1);
+  });
+}