switchroom 0.8.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/README.md +54 -61
  2. package/bin/timezone-hook.sh +9 -7
  3. package/dist/agent-scheduler/index.js +285 -45
  4. package/dist/auth-broker/index.js +13932 -0
  5. package/dist/cli/drive-write-pretool.mjs +5418 -0
  6. package/dist/cli/switchroom.js +8890 -5560
  7. package/dist/host-control/main.js +582 -43
  8. package/dist/vault/approvals/kernel-server.js +276 -47
  9. package/dist/vault/broker/server.js +333 -69
  10. package/examples/minimal.yaml +63 -0
  11. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  12. package/examples/personal-google-workspace-mcp/README.md +194 -0
  13. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  14. package/examples/switchroom.yaml +220 -0
  15. package/package.json +6 -4
  16. package/profiles/_base/start.sh.hbs +3 -3
  17. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  18. package/profiles/default/CLAUDE.md +10 -0
  19. package/profiles/default/CLAUDE.md.hbs +16 -0
  20. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  21. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  22. package/skills/buildkite-api/SKILL.md +31 -8
  23. package/skills/buildkite-cli/SKILL.md +27 -9
  24. package/skills/buildkite-migration/SKILL.md +22 -9
  25. package/skills/buildkite-pipelines/SKILL.md +26 -9
  26. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  27. package/skills/buildkite-test-engine/SKILL.md +25 -8
  28. package/skills/docx/SKILL.md +1 -1
  29. package/skills/file-bug/SKILL.md +34 -6
  30. package/skills/humanizer/SKILL.md +15 -0
  31. package/skills/humanizer-calibrate/SKILL.md +7 -1
  32. package/skills/mcp-builder/SKILL.md +1 -1
  33. package/skills/pdf/SKILL.md +1 -1
  34. package/skills/pptx/SKILL.md +1 -1
  35. package/skills/skill-creator/SKILL.md +21 -1
  36. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  37. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  38. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  39. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  40. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  41. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  42. package/skills/switchroom-cli/SKILL.md +63 -64
  43. package/skills/switchroom-health/SKILL.md +23 -10
  44. package/skills/switchroom-install/SKILL.md +3 -3
  45. package/skills/switchroom-manage/SKILL.md +26 -19
  46. package/skills/switchroom-runtime/SKILL.md +67 -15
  47. package/skills/switchroom-status/SKILL.md +26 -1
  48. package/skills/telegram-test-harness/SKILL.md +3 -0
  49. package/skills/webapp-testing/SKILL.md +31 -1
  50. package/skills/xlsx/SKILL.md +1 -1
  51. package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
  52. package/telegram-plugin/admin-commands/index.ts +9 -5
  53. package/telegram-plugin/auth-snapshot-format.ts +612 -0
  54. package/telegram-plugin/auto-fallback-fleet.ts +215 -0
  55. package/telegram-plugin/auto-fallback.ts +28 -301
  56. package/telegram-plugin/dist/gateway/gateway.js +17453 -15100
  57. package/telegram-plugin/fleet-fallback-gate.ts +105 -0
  58. package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
  59. package/telegram-plugin/gateway/approval-callback.ts +31 -3
  60. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  61. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  62. package/telegram-plugin/gateway/auth-command.ts +905 -0
  63. package/telegram-plugin/gateway/auth-line.ts +123 -0
  64. package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
  65. package/telegram-plugin/gateway/boot-card.ts +23 -37
  66. package/telegram-plugin/gateway/boot-probes.ts +9 -12
  67. package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
  68. package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
  69. package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
  70. package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
  71. package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
  72. package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
  73. package/telegram-plugin/gateway/gateway.ts +1156 -938
  74. package/telegram-plugin/gateway/hostd-dispatch.ts +244 -0
  75. package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
  76. package/telegram-plugin/gateway/ipc-server.ts +69 -0
  77. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
  78. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  79. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  80. package/telegram-plugin/model-unavailable.ts +28 -12
  81. package/telegram-plugin/permission-title.ts +56 -0
  82. package/telegram-plugin/quota-check.ts +19 -41
  83. package/telegram-plugin/scripts/build.mjs +0 -1
  84. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  85. package/telegram-plugin/silence-poke.ts +153 -1
  86. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  87. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  88. package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
  89. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  90. package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
  91. package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
  92. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
  93. package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
  94. package/telegram-plugin/tests/boot-probes.test.ts +27 -22
  95. package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
  96. package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
  97. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  98. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  99. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
  100. package/telegram-plugin/tests/silence-poke.test.ts +237 -0
  101. package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
  102. package/telegram-plugin/turn-flush-safety.ts +55 -1
  103. package/telegram-plugin/uat/SETUP.md +35 -1
  104. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  105. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  106. package/telegram-plugin/uat/runners/report.ts +150 -0
  107. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  108. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  109. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  110. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  111. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  112. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +7 -1
  113. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +7 -1
  114. package/telegram-plugin/auth-dashboard.ts +0 -1104
  115. package/telegram-plugin/auth-slot-parser.ts +0 -497
  116. package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
  117. package/telegram-plugin/dist/foreman/foreman.js +0 -31358
  118. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  119. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  120. package/telegram-plugin/foreman/foreman.ts +0 -1165
  121. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  122. package/telegram-plugin/foreman/setup-state.ts +0 -239
  123. package/telegram-plugin/foreman/state.ts +0 -203
  124. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  125. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  126. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  127. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  128. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  129. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  130. package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
  131. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  132. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  133. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  134. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  135. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  136. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  137. package/telegram-plugin/tests/setup-state.test.ts +0 -146
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Heuristic pass/fail scoring for the agent-self-sufficiency UAT.
3
+ *
4
+ * Each result also carries the verbatim reply so the report's triage
5
+ * table can show the operator exactly what the agent said. Scoring is
6
+ * deliberately permissive — we're testing whether the agent
7
+ * understood the *intent* (and reached for the right tool), not
8
+ * whether the reply matches a specific phrasing.
9
+ *
10
+ * Failure modes the runner needs to distinguish from "wrong answer":
11
+ *
12
+ * - timeout: agent never replied within the budget. Could mean
13
+ * the agent is wedged, the bot token's wrong, or
14
+ * Telegram is throttling. Reported separately so the
15
+ * operator doesn't conflate "didn't reply" with
16
+ * "replied wrong".
17
+ * - send_error: driver couldn't even deliver the inbound (bot
18
+ * username missing, mtcute connection died, etc.).
19
+ * These bubble up as `error` results, not `fail`.
20
+ */
21
+
22
+ import type { CriterionSpec, Paraphrase } from "./paraphrases.js";
23
+ import { patternFor } from "./paraphrases.js";
24
+
25
+ export type Outcome = "pass" | "fail" | "timeout" | "error";
26
+
27
+ export interface CaseResult {
28
+ agent: string;
29
+ criterion: CriterionSpec["id"];
30
+ paraphrase: Paraphrase;
31
+ outcome: Outcome;
32
+ /** Verbatim reply text, empty for timeout/error. Trimmed; markdown
33
+ * preserved so the report can show what the user actually saw. */
34
+ reply: string;
35
+ /** Wall-clock ms from sendDM to first reply (or to timeout). */
36
+ durationMs: number;
37
+ /** Optional error message for `error` outcomes. */
38
+ errorMessage?: string;
39
+ }
40
+
41
+ /**
42
+ * Score a single reply against a criterion. The runner does NOT call
43
+ * this on timeouts or errors — those outcomes are set directly. For
44
+ * `2b_your_name` and other criteria with `__INJECTED_AGENT_NAME__` in
45
+ * their passPattern, the caller passes the agent name so the matcher
46
+ * substitutes correctly.
47
+ */
48
+ export function scoreReply(
49
+ spec: CriterionSpec,
50
+ reply: string,
51
+ injection: { agentName: string },
52
+ ): Outcome {
53
+ if (!reply.trim()) return "fail";
54
+ const normalized = stripMarkdown(reply).toLowerCase();
55
+ return patternFor(spec, injection).test(normalized) ? "pass" : "fail";
56
+ }
57
+
58
+ /**
59
+ * Strip markdown bold/italic/code-fence markers and collapse runs of
60
+ * whitespace. Permissive on purpose — the scorer's regex matches
61
+ * against words, not formatting.
62
+ */
63
+ function stripMarkdown(s: string): string {
64
+ return s
65
+ .replace(/```[\s\S]*?```/g, " ")
66
+ .replace(/`([^`]+)`/g, "$1")
67
+ .replace(/\*\*([^*]+)\*\*/g, "$1")
68
+ .replace(/__([^_]+)__/g, "$1")
69
+ .replace(/\*([^*]+)\*/g, "$1")
70
+ .replace(/_([^_]+)_/g, "$1")
71
+ .replace(/\s+/g, " ")
72
+ .trim();
73
+ }
74
+
75
+ /**
76
+ * Aggregate per-criterion / per-agent / per-shape pass rates. Pure
77
+ * function — easy to test.
78
+ */
79
+ export interface Aggregate {
80
+ byCriterion: Map<string, { pass: number; fail: number; timeout: number; error: number }>;
81
+ byAgent: Map<string, { pass: number; fail: number; timeout: number; error: number }>;
82
+ byShape: Map<string, { pass: number; fail: number; timeout: number; error: number }>;
83
+ }
84
+
85
+ export function aggregate(results: readonly CaseResult[]): Aggregate {
86
+ const acc: Aggregate = {
87
+ byCriterion: new Map(),
88
+ byAgent: new Map(),
89
+ byShape: new Map(),
90
+ };
91
+ const bump = (
92
+ m: Aggregate["byCriterion"],
93
+ k: string,
94
+ outcome: Outcome,
95
+ ): void => {
96
+ const row = m.get(k) ?? { pass: 0, fail: 0, timeout: 0, error: 0 };
97
+ row[outcome] += 1;
98
+ m.set(k, row);
99
+ };
100
+ for (const r of results) {
101
+ bump(acc.byCriterion, r.criterion, r.outcome);
102
+ bump(acc.byAgent, r.agent, r.outcome);
103
+ bump(acc.byShape, r.paraphrase.shape, r.outcome);
104
+ }
105
+ return acc;
106
+ }
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Unit tests for the skill-coverage UAT runner's pure pieces:
3
+ * label extractor + sidecar JSONL reader. Live driver/network paths
4
+ * are validated by operator-driven runs (see runbook).
5
+ */
6
+
7
+ import { describe, it, expect } from "vitest";
8
+ import {
9
+ extractSkillFromLabel,
10
+ readSkillRowsSince,
11
+ } from "./skill-coverage.js";
12
+
13
+ describe("extractSkillFromLabel", () => {
14
+ it("pulls the slug from the hook's canonical label", () => {
15
+ expect(extractSkillFromLabel("Running skill switchroom-cli")).toBe(
16
+ "switchroom-cli",
17
+ );
18
+ });
19
+
20
+ it("is case-insensitive on the label but lowercases the slug", () => {
21
+ expect(extractSkillFromLabel("RUNNING SKILL BUILDKITE-API")).toBe(
22
+ "buildkite-api",
23
+ );
24
+ });
25
+
26
+ it("returns null for non-Skill labels", () => {
27
+ expect(extractSkillFromLabel("Reading scaffold.ts")).toBeNull();
28
+ expect(extractSkillFromLabel("Replying")).toBeNull();
29
+ });
30
+
31
+ it("returns null when the slug is missing or malformed", () => {
32
+ expect(extractSkillFromLabel("running skill")).toBeNull();
33
+ expect(extractSkillFromLabel("running skill (and)")).toBeNull();
34
+ });
35
+ });
36
+
37
+ describe("readSkillRowsSince", () => {
38
+ const files: Record<string, string> = {
39
+ "tool-labels-A.jsonl": [
40
+ // before sinceMs: ignored
41
+ JSON.stringify({ ts: 100, tool_use_id: "u1", agent_id: "ag", label: "Running skill docx", tool_name: "Skill" }),
42
+ // after sinceMs, Skill: kept
43
+ JSON.stringify({ ts: 1500, tool_use_id: "u2", agent_id: "ag", label: "Running skill switchroom-cli", tool_name: "Skill" }),
44
+ // after sinceMs, non-Skill: ignored
45
+ JSON.stringify({ ts: 1600, tool_use_id: "u3", agent_id: "ag", label: "Reading foo.ts", tool_name: "Read" }),
46
+ ].join("\n") + "\n",
47
+ "tool-labels-B.jsonl": [
48
+ JSON.stringify({ ts: 2000, tool_use_id: "u4", agent_id: "ag", label: "Running skill buildkite-cli", tool_name: "Skill" }),
49
+ // malformed line: ignored
50
+ "{not-json",
51
+ "",
52
+ ].join("\n") + "\n",
53
+ "other.jsonl": JSON.stringify({ ts: 2500, tool_name: "Skill", label: "Running skill x" }),
54
+ };
55
+
56
+ const fakeReaddir = (_p: string): string[] => Object.keys(files);
57
+ const fakeReadFile = (p: string): string => {
58
+ const name = p.split("/").pop()!;
59
+ if (files[name] === undefined) throw new Error("ENOENT");
60
+ return files[name]!;
61
+ };
62
+
63
+ it("returns only Skill rows from tool-labels-*.jsonl with ts >= sinceMs", () => {
64
+ const got = readSkillRowsSince("/fake", 1000, fakeReaddir, fakeReadFile);
65
+ const labels = got.map((r) => r.label).sort();
66
+ expect(labels).toEqual([
67
+ "Running skill buildkite-cli",
68
+ "Running skill switchroom-cli",
69
+ ]);
70
+ });
71
+
72
+ it("returns [] when the dir read throws", () => {
73
+ expect(
74
+ readSkillRowsSince("/fake", 0, () => { throw new Error("EACCES"); }, fakeReadFile),
75
+ ).toEqual([]);
76
+ });
77
+
78
+ it("skips files that fail to read but keeps siblings", () => {
79
+ const breakingRead = (p: string): string => {
80
+ if (p.endsWith("tool-labels-A.jsonl")) throw new Error("EACCES");
81
+ return fakeReadFile(p);
82
+ };
83
+ const got = readSkillRowsSince("/fake", 0, fakeReaddir, breakingRead);
84
+ expect(got.map((r) => r.label)).toEqual(["Running skill buildkite-cli"]);
85
+ });
86
+
87
+ it("ignores files that don't match the tool-labels-*.jsonl pattern", () => {
88
+ const files2: Record<string, string> = {
89
+ "other.jsonl": JSON.stringify({ ts: 100, tool_name: "Skill", label: "x" }),
90
+ "tool-labels-A.jsonl": "",
91
+ };
92
+ const got = readSkillRowsSince(
93
+ "/fake",
94
+ 0,
95
+ () => Object.keys(files2),
96
+ (p) => files2[p.split("/").pop()!]!,
97
+ );
98
+ expect(got).toEqual([]);
99
+ });
100
+ });