switchroom 0.8.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/README.md +49 -57
  2. package/bin/timezone-hook.sh +9 -7
  3. package/dist/agent-scheduler/index.js +285 -45
  4. package/dist/auth-broker/index.js +13932 -0
  5. package/dist/cli/switchroom.js +15931 -12778
  6. package/dist/host-control/main.js +582 -43
  7. package/dist/vault/approvals/kernel-server.js +276 -47
  8. package/dist/vault/broker/server.js +333 -69
  9. package/examples/minimal.yaml +63 -0
  10. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  11. package/examples/personal-google-workspace-mcp/README.md +194 -0
  12. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  13. package/examples/switchroom.yaml +220 -0
  14. package/package.json +6 -4
  15. package/profiles/_base/start.sh.hbs +3 -3
  16. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  17. package/profiles/default/CLAUDE.md +10 -0
  18. package/profiles/default/CLAUDE.md.hbs +16 -0
  19. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  20. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  21. package/skills/buildkite-api/SKILL.md +31 -8
  22. package/skills/buildkite-cli/SKILL.md +27 -9
  23. package/skills/buildkite-migration/SKILL.md +22 -9
  24. package/skills/buildkite-pipelines/SKILL.md +26 -9
  25. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  26. package/skills/buildkite-test-engine/SKILL.md +25 -8
  27. package/skills/docx/SKILL.md +1 -1
  28. package/skills/file-bug/SKILL.md +34 -6
  29. package/skills/humanizer/SKILL.md +15 -0
  30. package/skills/humanizer-calibrate/SKILL.md +7 -1
  31. package/skills/mcp-builder/SKILL.md +1 -1
  32. package/skills/pdf/SKILL.md +1 -1
  33. package/skills/pptx/SKILL.md +1 -1
  34. package/skills/skill-creator/SKILL.md +21 -1
  35. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  36. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  37. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  38. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  39. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  40. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  41. package/skills/switchroom-cli/SKILL.md +63 -64
  42. package/skills/switchroom-health/SKILL.md +23 -10
  43. package/skills/switchroom-install/SKILL.md +3 -3
  44. package/skills/switchroom-manage/SKILL.md +26 -19
  45. package/skills/switchroom-runtime/SKILL.md +67 -15
  46. package/skills/switchroom-status/SKILL.md +26 -1
  47. package/skills/telegram-test-harness/SKILL.md +3 -0
  48. package/skills/webapp-testing/SKILL.md +31 -1
  49. package/skills/xlsx/SKILL.md +1 -1
  50. package/telegram-plugin/admin-commands/index.ts +7 -5
  51. package/telegram-plugin/dist/gateway/gateway.js +13042 -12844
  52. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  53. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  54. package/telegram-plugin/gateway/auth-command.ts +794 -0
  55. package/telegram-plugin/gateway/auth-line.ts +123 -0
  56. package/telegram-plugin/gateway/boot-card.ts +22 -36
  57. package/telegram-plugin/gateway/boot-probes.ts +3 -3
  58. package/telegram-plugin/gateway/gateway.ts +313 -798
  59. package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
  60. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  61. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  62. package/telegram-plugin/permission-title.ts +56 -0
  63. package/telegram-plugin/quota-check.ts +19 -41
  64. package/telegram-plugin/scripts/build.mjs +0 -1
  65. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  66. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  67. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  68. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  69. package/telegram-plugin/tests/boot-probes.test.ts +11 -4
  70. package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
  71. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  72. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  73. package/telegram-plugin/uat/SETUP.md +31 -1
  74. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  75. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  76. package/telegram-plugin/uat/runners/report.ts +150 -0
  77. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  78. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  79. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  80. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  81. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  82. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +7 -1
  83. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +7 -1
  84. package/telegram-plugin/auth-dashboard.ts +0 -1104
  85. package/telegram-plugin/auth-slot-parser.ts +0 -497
  86. package/telegram-plugin/dist/foreman/foreman.js +0 -31358
  87. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  88. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  89. package/telegram-plugin/foreman/foreman.ts +0 -1165
  90. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  91. package/telegram-plugin/foreman/setup-state.ts +0 -239
  92. package/telegram-plugin/foreman/state.ts +0 -203
  93. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  94. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  95. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  96. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  97. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  98. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  99. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  100. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  101. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  102. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  103. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  104. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  105. package/telegram-plugin/tests/setup-state.test.ts +0 -146
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Heuristic pass/fail scoring for the agent-self-sufficiency UAT.
3
+ *
4
+ * Each result also carries the verbatim reply so the report's triage
5
+ * table can show the operator exactly what the agent said. Scoring is
6
+ * deliberately permissive — we're testing whether the agent
7
+ * understood the *intent* (and reached for the right tool), not
8
+ * whether the reply matches a specific phrasing.
9
+ *
10
+ * Failure modes the runner needs to distinguish from "wrong answer":
11
+ *
12
+ * - timeout: agent never replied within the budget. Could mean
13
+ * the agent is wedged, the bot token's wrong, or
14
+ * Telegram is throttling. Reported separately so the
15
+ * operator doesn't conflate "didn't reply" with
16
+ * "replied wrong".
17
+ * - send_error: driver couldn't even deliver the inbound (bot
18
+ * username missing, mtcute connection died, etc.).
19
+ * These bubble up as `error` results, not `fail`.
20
+ */
21
+
22
+ import type { CriterionSpec, Paraphrase } from "./paraphrases.js";
23
+ import { patternFor } from "./paraphrases.js";
24
+
25
+ export type Outcome = "pass" | "fail" | "timeout" | "error";
26
+
27
+ export interface CaseResult {
28
+ agent: string;
29
+ criterion: CriterionSpec["id"];
30
+ paraphrase: Paraphrase;
31
+ outcome: Outcome;
32
+ /** Verbatim reply text, empty for timeout/error. Trimmed; markdown
33
+ * preserved so the report can show what the user actually saw. */
34
+ reply: string;
35
+ /** Wall-clock ms from sendDM to first reply (or to timeout). */
36
+ durationMs: number;
37
+ /** Optional error message for `error` outcomes. */
38
+ errorMessage?: string;
39
+ }
40
+
41
+ /**
42
+ * Score a single reply against a criterion. The runner does NOT call
43
+ * this on timeouts or errors — those outcomes are set directly. For
44
+ * `2b_your_name` and other criteria with `__INJECTED_AGENT_NAME__` in
45
+ * their passPattern, the caller passes the agent name so the matcher
46
+ * substitutes correctly.
47
+ */
48
+ export function scoreReply(
49
+ spec: CriterionSpec,
50
+ reply: string,
51
+ injection: { agentName: string },
52
+ ): Outcome {
53
+ if (!reply.trim()) return "fail";
54
+ const normalized = stripMarkdown(reply).toLowerCase();
55
+ return patternFor(spec, injection).test(normalized) ? "pass" : "fail";
56
+ }
57
+
58
+ /**
59
+ * Strip markdown bold/italic/code-fence markers and collapse runs of
60
+ * whitespace. Permissive on purpose — the scorer's regex matches
61
+ * against words, not formatting.
62
+ */
63
+ function stripMarkdown(s: string): string {
64
+ return s
65
+ .replace(/```[\s\S]*?```/g, " ")
66
+ .replace(/`([^`]+)`/g, "$1")
67
+ .replace(/\*\*([^*]+)\*\*/g, "$1")
68
+ .replace(/__([^_]+)__/g, "$1")
69
+ .replace(/\*([^*]+)\*/g, "$1")
70
+ .replace(/_([^_]+)_/g, "$1")
71
+ .replace(/\s+/g, " ")
72
+ .trim();
73
+ }
74
+
75
+ /**
76
+ * Aggregate per-criterion / per-agent / per-shape pass rates. Pure
77
+ * function — easy to test.
78
+ */
79
+ export interface Aggregate {
80
+ byCriterion: Map<string, { pass: number; fail: number; timeout: number; error: number }>;
81
+ byAgent: Map<string, { pass: number; fail: number; timeout: number; error: number }>;
82
+ byShape: Map<string, { pass: number; fail: number; timeout: number; error: number }>;
83
+ }
84
+
85
+ export function aggregate(results: readonly CaseResult[]): Aggregate {
86
+ const acc: Aggregate = {
87
+ byCriterion: new Map(),
88
+ byAgent: new Map(),
89
+ byShape: new Map(),
90
+ };
91
+ const bump = (
92
+ m: Aggregate["byCriterion"],
93
+ k: string,
94
+ outcome: Outcome,
95
+ ): void => {
96
+ const row = m.get(k) ?? { pass: 0, fail: 0, timeout: 0, error: 0 };
97
+ row[outcome] += 1;
98
+ m.set(k, row);
99
+ };
100
+ for (const r of results) {
101
+ bump(acc.byCriterion, r.criterion, r.outcome);
102
+ bump(acc.byAgent, r.agent, r.outcome);
103
+ bump(acc.byShape, r.paraphrase.shape, r.outcome);
104
+ }
105
+ return acc;
106
+ }
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Unit tests for the skill-coverage UAT runner's pure pieces:
3
+ * label extractor + sidecar JSONL reader. Live driver/network paths
4
+ * are validated by operator-driven runs (see runbook).
5
+ */
6
+
7
+ import { describe, it, expect } from "vitest";
8
+ import {
9
+ extractSkillFromLabel,
10
+ readSkillRowsSince,
11
+ } from "./skill-coverage.js";
12
+
13
+ describe("extractSkillFromLabel", () => {
14
+ it("pulls the slug from the hook's canonical label", () => {
15
+ expect(extractSkillFromLabel("Running skill switchroom-cli")).toBe(
16
+ "switchroom-cli",
17
+ );
18
+ });
19
+
20
+ it("is case-insensitive on the label but lowercases the slug", () => {
21
+ expect(extractSkillFromLabel("RUNNING SKILL BUILDKITE-API")).toBe(
22
+ "buildkite-api",
23
+ );
24
+ });
25
+
26
+ it("returns null for non-Skill labels", () => {
27
+ expect(extractSkillFromLabel("Reading scaffold.ts")).toBeNull();
28
+ expect(extractSkillFromLabel("Replying")).toBeNull();
29
+ });
30
+
31
+ it("returns null when the slug is missing or malformed", () => {
32
+ expect(extractSkillFromLabel("running skill")).toBeNull();
33
+ expect(extractSkillFromLabel("running skill (and)")).toBeNull();
34
+ });
35
+ });
36
+
37
+ describe("readSkillRowsSince", () => {
38
+ const files: Record<string, string> = {
39
+ "tool-labels-A.jsonl": [
40
+ // before sinceMs: ignored
41
+ JSON.stringify({ ts: 100, tool_use_id: "u1", agent_id: "ag", label: "Running skill docx", tool_name: "Skill" }),
42
+ // after sinceMs, Skill: kept
43
+ JSON.stringify({ ts: 1500, tool_use_id: "u2", agent_id: "ag", label: "Running skill switchroom-cli", tool_name: "Skill" }),
44
+ // after sinceMs, non-Skill: ignored
45
+ JSON.stringify({ ts: 1600, tool_use_id: "u3", agent_id: "ag", label: "Reading foo.ts", tool_name: "Read" }),
46
+ ].join("\n") + "\n",
47
+ "tool-labels-B.jsonl": [
48
+ JSON.stringify({ ts: 2000, tool_use_id: "u4", agent_id: "ag", label: "Running skill buildkite-cli", tool_name: "Skill" }),
49
+ // malformed line: ignored
50
+ "{not-json",
51
+ "",
52
+ ].join("\n") + "\n",
53
+ "other.jsonl": JSON.stringify({ ts: 2500, tool_name: "Skill", label: "Running skill x" }),
54
+ };
55
+
56
+ const fakeReaddir = (_p: string): string[] => Object.keys(files);
57
+ const fakeReadFile = (p: string): string => {
58
+ const name = p.split("/").pop()!;
59
+ if (files[name] === undefined) throw new Error("ENOENT");
60
+ return files[name]!;
61
+ };
62
+
63
+ it("returns only Skill rows from tool-labels-*.jsonl with ts >= sinceMs", () => {
64
+ const got = readSkillRowsSince("/fake", 1000, fakeReaddir, fakeReadFile);
65
+ const labels = got.map((r) => r.label).sort();
66
+ expect(labels).toEqual([
67
+ "Running skill buildkite-cli",
68
+ "Running skill switchroom-cli",
69
+ ]);
70
+ });
71
+
72
+ it("returns [] when the dir read throws", () => {
73
+ expect(
74
+ readSkillRowsSince("/fake", 0, () => { throw new Error("EACCES"); }, fakeReadFile),
75
+ ).toEqual([]);
76
+ });
77
+
78
+ it("skips files that fail to read but keeps siblings", () => {
79
+ const breakingRead = (p: string): string => {
80
+ if (p.endsWith("tool-labels-A.jsonl")) throw new Error("EACCES");
81
+ return fakeReadFile(p);
82
+ };
83
+ const got = readSkillRowsSince("/fake", 0, fakeReaddir, breakingRead);
84
+ expect(got.map((r) => r.label)).toEqual(["Running skill buildkite-cli"]);
85
+ });
86
+
87
+ it("ignores files that don't match the tool-labels-*.jsonl pattern", () => {
88
+ const files2: Record<string, string> = {
89
+ "other.jsonl": JSON.stringify({ ts: 100, tool_name: "Skill", label: "x" }),
90
+ "tool-labels-A.jsonl": "",
91
+ };
92
+ const got = readSkillRowsSince(
93
+ "/fake",
94
+ 0,
95
+ () => Object.keys(files2),
96
+ (p) => files2[p.split("/").pop()!]!,
97
+ );
98
+ expect(got).toEqual([]);
99
+ });
100
+ });