switchroom 0.8.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/README.md +49 -57
  2. package/bin/timezone-hook.sh +9 -7
  3. package/dist/agent-scheduler/index.js +285 -45
  4. package/dist/auth-broker/index.js +13932 -0
  5. package/dist/cli/switchroom.js +15931 -12778
  6. package/dist/host-control/main.js +582 -43
  7. package/dist/vault/approvals/kernel-server.js +276 -47
  8. package/dist/vault/broker/server.js +333 -69
  9. package/examples/minimal.yaml +63 -0
  10. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  11. package/examples/personal-google-workspace-mcp/README.md +194 -0
  12. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  13. package/examples/switchroom.yaml +220 -0
  14. package/package.json +6 -4
  15. package/profiles/_base/start.sh.hbs +3 -3
  16. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  17. package/profiles/default/CLAUDE.md +10 -0
  18. package/profiles/default/CLAUDE.md.hbs +16 -0
  19. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  20. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  21. package/skills/buildkite-api/SKILL.md +31 -8
  22. package/skills/buildkite-cli/SKILL.md +27 -9
  23. package/skills/buildkite-migration/SKILL.md +22 -9
  24. package/skills/buildkite-pipelines/SKILL.md +26 -9
  25. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  26. package/skills/buildkite-test-engine/SKILL.md +25 -8
  27. package/skills/docx/SKILL.md +1 -1
  28. package/skills/file-bug/SKILL.md +34 -6
  29. package/skills/humanizer/SKILL.md +15 -0
  30. package/skills/humanizer-calibrate/SKILL.md +7 -1
  31. package/skills/mcp-builder/SKILL.md +1 -1
  32. package/skills/pdf/SKILL.md +1 -1
  33. package/skills/pptx/SKILL.md +1 -1
  34. package/skills/skill-creator/SKILL.md +21 -1
  35. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  36. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  37. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  38. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  39. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  40. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  41. package/skills/switchroom-cli/SKILL.md +63 -64
  42. package/skills/switchroom-health/SKILL.md +23 -10
  43. package/skills/switchroom-install/SKILL.md +3 -3
  44. package/skills/switchroom-manage/SKILL.md +26 -19
  45. package/skills/switchroom-runtime/SKILL.md +67 -15
  46. package/skills/switchroom-status/SKILL.md +26 -1
  47. package/skills/telegram-test-harness/SKILL.md +3 -0
  48. package/skills/webapp-testing/SKILL.md +31 -1
  49. package/skills/xlsx/SKILL.md +1 -1
  50. package/telegram-plugin/admin-commands/index.ts +7 -5
  51. package/telegram-plugin/dist/gateway/gateway.js +13042 -12844
  52. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  53. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  54. package/telegram-plugin/gateway/auth-command.ts +794 -0
  55. package/telegram-plugin/gateway/auth-line.ts +123 -0
  56. package/telegram-plugin/gateway/boot-card.ts +22 -36
  57. package/telegram-plugin/gateway/boot-probes.ts +3 -3
  58. package/telegram-plugin/gateway/gateway.ts +313 -798
  59. package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
  60. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  61. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  62. package/telegram-plugin/permission-title.ts +56 -0
  63. package/telegram-plugin/quota-check.ts +19 -41
  64. package/telegram-plugin/scripts/build.mjs +0 -1
  65. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  66. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  67. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  68. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  69. package/telegram-plugin/tests/boot-probes.test.ts +11 -4
  70. package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
  71. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  72. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  73. package/telegram-plugin/uat/SETUP.md +31 -1
  74. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  75. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  76. package/telegram-plugin/uat/runners/report.ts +150 -0
  77. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  78. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  79. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  80. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  81. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  82. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +7 -1
  83. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +7 -1
  84. package/telegram-plugin/auth-dashboard.ts +0 -1104
  85. package/telegram-plugin/auth-slot-parser.ts +0 -497
  86. package/telegram-plugin/dist/foreman/foreman.js +0 -31358
  87. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  88. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  89. package/telegram-plugin/foreman/foreman.ts +0 -1165
  90. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  91. package/telegram-plugin/foreman/setup-state.ts +0 -239
  92. package/telegram-plugin/foreman/state.ts +0 -203
  93. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  94. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  95. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  96. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  97. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  98. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  99. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  100. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  101. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  102. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  103. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  104. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  105. package/telegram-plugin/tests/setup-state.test.ts +0 -146
@@ -0,0 +1,117 @@
1
+ /**
2
+ * Hostd dispatch helpers for the gateway's self-restart slash-commands
3
+ * (#1175 RFC C, Phase 2). When the operator has opted into
4
+ * `host_control.enabled: true`, /restart, /new, /reset, and
5
+ * /update apply route through the per-agent hostd UDS instead of the
6
+ * in-container `spawnSwitchroomDetached` shellout.
7
+ *
8
+ * Rationale: in docker-mode (the v0.7+ default) the agent container
9
+ * has no docker binary and no `/var/run/docker.sock` — so the
10
+ * spawn-path verbs fail with exit-127 the moment they touch compose.
11
+ * Hostd runs on the host with the docker socket mounted, so the verbs
12
+ * actually work.
13
+ *
14
+ * Extracted from gateway.ts for unit-testability — gateway.ts itself
15
+ * has too many boot-time side-effects to import directly in a test.
16
+ */
17
+ import { existsSync } from "node:fs";
18
+ import { randomBytes } from "node:crypto";
19
+ import { hostdRequest } from "../../src/host-control/client.js";
20
+ import type {
21
+ HostdRequest,
22
+ HostdResponse,
23
+ } from "../../src/host-control/protocol.js";
24
+ import { loadConfig as loadSwitchroomConfig } from "../../src/config/loader.js";
25
+
26
+ let _hostdEnabled: boolean | undefined;
27
+
28
+ /**
29
+ * Reads `host_control.enabled` from the resolved switchroom config.
30
+ * Cached for the gateway's lifetime — config doesn't change without a
31
+ * restart, and the file-read isn't free.
32
+ *
33
+ * Best-effort: if the config can't be loaded (gateway running in a
34
+ * dir where loadConfig fails), returns false so the dispatch helper
35
+ * falls through to the legacy spawn path.
36
+ */
37
+ export function isHostdEnabled(): boolean {
38
+ if (_hostdEnabled !== undefined) return _hostdEnabled;
39
+ try {
40
+ const cfg = loadSwitchroomConfig();
41
+ _hostdEnabled = cfg.host_control?.enabled === true;
42
+ } catch {
43
+ _hostdEnabled = false;
44
+ }
45
+ return _hostdEnabled;
46
+ }
47
+
48
+ /** @internal Reset the cache so tests can swap config and re-probe. */
49
+ export function _resetHostdEnabledCache(): void {
50
+ _hostdEnabled = undefined;
51
+ }
52
+
53
+ export function hostdSocketPath(agentName: string): string {
54
+ return `/run/switchroom/hostd/${agentName}/sock`;
55
+ }
56
+
57
+ /**
58
+ * True only when (a) host_control is enabled in config AND (b) the
59
+ * per-agent socket is bound on disk. Distinct from "will the wire call
60
+ * succeed" — that's only knowable after attempting it.
61
+ *
62
+ * Callers use this to decide *whether to skip docker-availability
63
+ * preflight guards* (since hostd doesn't need in-container docker).
64
+ */
65
+ export function hostdWillBeUsed(agentName: string): boolean {
66
+ if (!isHostdEnabled()) return false;
67
+ return existsSync(hostdSocketPath(agentName));
68
+ }
69
+
70
+ /**
71
+ * Send one request to the per-agent hostd socket.
72
+ *
73
+ * Returns:
74
+ * - `"not-configured"` — hostd is disabled in config OR the per-agent
75
+ * socket isn't bound. Callers should fall back to the legacy
76
+ * `spawnSwitchroomDetached` path.
77
+ * - `HostdResponse` — hostd was contacted. Callers branch on
78
+ * `resp.result`. Wire errors (ECONNREFUSED, timeout, bad frame)
79
+ * are synthesized into a `result: "error"` response so callers
80
+ * don't need a separate try/catch around the failure.
81
+ *
82
+ * Deliberately no silent fallback to spawn when hostd is configured-on
83
+ * but returns error/denied: the operator opted in, so masking failures
84
+ * would just confuse them about why the verb didn't actually run.
85
+ */
86
+ export async function tryHostdDispatch(
87
+ agentName: string,
88
+ req: HostdRequest,
89
+ ): Promise<HostdResponse | "not-configured"> {
90
+ if (!isHostdEnabled()) return "not-configured";
91
+ const sockPath = hostdSocketPath(agentName);
92
+ if (!existsSync(sockPath)) return "not-configured";
93
+ try {
94
+ return await hostdRequest(
95
+ { socketPath: sockPath, timeoutMs: 5000 },
96
+ req,
97
+ );
98
+ } catch (err) {
99
+ process.stderr.write(
100
+ `telegram gateway: hostd dispatch failed ` +
101
+ `(request_id=${req.request_id} op=${req.op}): ` +
102
+ `${(err as Error).message}\n`,
103
+ );
104
+ return {
105
+ v: 1,
106
+ request_id: req.request_id,
107
+ result: "error",
108
+ exit_code: null,
109
+ duration_ms: 0,
110
+ error: `hostd wire error: ${(err as Error).message}`,
111
+ };
112
+ }
113
+ }
114
+
115
+ export function hostdRequestId(prefix: string): string {
116
+ return `${prefix}-${Date.now()}-${randomBytes(4).toString("hex")}`;
117
+ }
@@ -111,6 +111,17 @@ export function computeLabel(toolName, input) {
111
111
  case 'KillBash':
112
112
  case 'KillShell':
113
113
  return 'Stopping background process'
114
+ case 'Skill': {
115
+ // The Skill tool's input is `{ skill: "<slug>", args?: "..." }`.
116
+ // We emit `Running skill <slug>` so downstream observers
117
+ // (notably the skill-coverage UAT runner at
118
+ // telegram-plugin/uat/runners/skill-coverage.ts) can tail the
119
+ // sidecar JSONL and recover which skill fired per turn —
120
+ // the progress card path that used to surface this was retired
121
+ // when `progressDriver` was nulled out in #1122 PR3.
122
+ const slug = clip(String(i.skill ?? ''), 64)
123
+ return slug ? `Running skill ${slug}` : null
124
+ }
114
125
  }
115
126
 
116
127
  // MCP allowlist.
@@ -0,0 +1,303 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * PostToolUse hook — detect a wedged persistent-bash session.
4
+ *
5
+ * Claude Code's Bash tool uses a persistent `bash` subprocess for state
6
+ * continuity (so `cd /foo` in one call survives to the next). When that
7
+ * subprocess's IO state desyncs — typically after a long-running or
8
+ * interrupted command leaves stdin in mid-heredoc, or after sentinel
9
+ * parsing breaks — every subsequent Bash call returns exit-1 with empty
10
+ * stdout and empty stderr. Even `true` returns exit 1. The wedge is
11
+ * sticky for the session; `switchroom agent restart <self>` is the only
12
+ * reliable recovery (it spawns a fresh `claude` → fresh persistent bash).
13
+ *
14
+ * This hook watches PostToolUse events for the wedge signature and,
15
+ * after N consecutive matches, writes a sentinel + logs to stderr so
16
+ * the operator (via `docker logs`) or the gateway (via a future card)
17
+ * can prompt for restart. The hook itself can NEVER fix the wedge —
18
+ * PostToolUse fires after the tool already ran. It's a detection +
19
+ * surfacing surface, not a recovery surface.
20
+ *
21
+ * Claude Code PostToolUse protocol:
22
+ * stdin: JSON { tool_name, tool_use_id, tool_input, tool_response, ... }
23
+ * stdout: optional JSON (hookSpecificOutput.additionalContext for next
24
+ * turn). We use this to nudge the model toward KillBash +
25
+ * self-restart guidance once the wedge is detected.
26
+ * exit: 0 always. Hook failures must never block the tool flow.
27
+ *
28
+ * State:
29
+ * $TELEGRAM_STATE_DIR/wedge-counter.txt — integer, consecutive empty Bash
30
+ * results. Reset to 0 on any non-Bash event or any non-empty Bash
31
+ * result. Incremented on each empty Bash result.
32
+ * $TELEGRAM_STATE_DIR/wedge-detected.json — JSON sentinel written when
33
+ * counter reaches THRESHOLD. Contains { ts, session_id, agent,
34
+ * consecutive }. Gateway can poll for this and surface a card; for
35
+ * now its presence is informational only.
36
+ *
37
+ * Threshold: 3. Picked to balance false positives (some real commands
38
+ * legitimately produce no output and exit non-zero, e.g. `test -f
39
+ * /nonexistent`) against latency-to-detect. Three in a row is rare
40
+ * outside genuine wedge.
41
+ *
42
+ * Detection is shape-based not exit-code-based because the tool_response
43
+ * shape varies by Claude Code version. We match on:
44
+ * - tool_name === "Bash"
45
+ * - stringified response contains BOTH empty stdout marker AND empty
46
+ * stderr marker. Marker patterns covered: <bash-stdout></bash-stdout>,
47
+ * "stdout":"" + "stderr":"", and the bare "(no output)" string some
48
+ * versions emit.
49
+ *
50
+ * If detection markers change in a future Claude Code release, this hook
51
+ * silently misses the wedge — that's the right failure mode (better than
52
+ * false-firing).
53
+ */
54
+
55
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, rmSync } from 'node:fs'
56
+ import { join, dirname } from 'node:path'
57
+
58
+ // Higher than the original 3 to avoid false-firing on legitimate
59
+ // empty-output command sequences (a sed, then two greps with no matches,
60
+ // is a normal refactor pattern and shouldn't trigger). PR #1188 review
61
+ // found 3 was guaranteed-FP. 5 + the noOutputExpected /
62
+ // returnCodeInterpretation skip below should keep real wedges detectable
63
+ // while staying quiet during normal grep/find/sed chains.
64
+ const THRESHOLD = 5
65
+
66
+ // node:fs operations on the counter / sentinel files are read-modify-write
67
+ // without explicit locking. Safe because Claude Code serializes tool calls
68
+ // per session — there is at most one PostToolUse fire in flight per agent
69
+ // at any time. Documented so a future caller doesn't introduce parallelism
70
+ // and silently lose counts.
71
+
72
+ function readStdin() {
73
+ try {
74
+ return readFileSync(0, 'utf8')
75
+ } catch {
76
+ return ''
77
+ }
78
+ }
79
+
80
+ function stateDir() {
81
+ return process.env.TELEGRAM_STATE_DIR || null
82
+ }
83
+
84
+ function counterPath() {
85
+ const dir = stateDir()
86
+ return dir ? join(dir, 'wedge-counter.txt') : null
87
+ }
88
+
89
+ function sentinelPath() {
90
+ const dir = stateDir()
91
+ return dir ? join(dir, 'wedge-detected.json') : null
92
+ }
93
+
94
+ function readCounter() {
95
+ const p = counterPath()
96
+ if (!p || !existsSync(p)) return 0
97
+ try {
98
+ const raw = readFileSync(p, 'utf8').trim()
99
+ const n = Number.parseInt(raw, 10)
100
+ return Number.isFinite(n) && n >= 0 ? n : 0
101
+ } catch {
102
+ return 0
103
+ }
104
+ }
105
+
106
+ function writeCounter(n) {
107
+ const p = counterPath()
108
+ if (!p) return
109
+ try {
110
+ mkdirSync(dirname(p), { recursive: true })
111
+ writeFileSync(p, String(n), 'utf8')
112
+ } catch {
113
+ // fail-silent; counter loss just delays detection by a couple of cycles
114
+ }
115
+ }
116
+
117
+ function writeSentinel(payload) {
118
+ const p = sentinelPath()
119
+ if (!p) return
120
+ try {
121
+ mkdirSync(dirname(p), { recursive: true })
122
+ writeFileSync(p, JSON.stringify(payload, null, 2), 'utf8')
123
+ } catch {
124
+ // fail-silent
125
+ }
126
+ }
127
+
128
+ function clearSentinel() {
129
+ const p = sentinelPath()
130
+ if (!p) return
131
+ try {
132
+ rmSync(p, { force: true })
133
+ } catch {
134
+ // fail-silent
135
+ }
136
+ }
137
+
138
+ function resetCounter() {
139
+ // Counter reset means we're back in healthy territory — clear the
140
+ // sentinel too so a future operator-side surface that polls for
141
+ // `wedge-detected.json` doesn't see stale state from a long-cleared
142
+ // wedge. Per PR #1188 review B2.
143
+ writeCounter(0)
144
+ clearSentinel()
145
+ }
146
+
147
+ /**
148
+ * Test whether a Bash tool_response matches the wedge signature.
149
+ *
150
+ * The wedge produces: empty stdout AND empty stderr AND no
151
+ * Claude-Code-supplied "no output is expected here" annotation AND not
152
+ * interrupted by the user.
153
+ *
154
+ * The benign empty-output cases that PR #1188 review B1 called out
155
+ * (grep/find/sed/test with no matches or in-place mutation) are
156
+ * disambiguated by:
157
+ * - `noOutputExpected: true` — Claude Code annotates Bash calls whose
158
+ * command pattern legitimately produces no output.
159
+ * - `returnCodeInterpretation: "..."` — present when Claude Code has
160
+ * a human-readable explanation for the exit code (e.g. "No matches
161
+ * found" for grep). Its presence means "this empty result is
162
+ * understood, not a desync."
163
+ * - `interrupted: true` — user pressed `!` mid-command. Not a wedge.
164
+ *
165
+ * Defensive: response shape varies across Claude Code versions and
166
+ * across plain-string vs structured-object representations. We check
167
+ * each known marker and fail-no-match on anything else.
168
+ */
169
+ function isEmptyBashResponse(toolResponse) {
170
+ if (toolResponse == null) return false
171
+
172
+ // Structured-object path. Most reliable — read the fields directly
173
+ // and consult the annotations.
174
+ if (typeof toolResponse === 'object') {
175
+ const r = toolResponse
176
+ // Interruption is user-initiated, not a desync. Don't count.
177
+ if (r.interrupted === true) return false
178
+ // Claude Code already knows this command's empty output is expected.
179
+ if (r.noOutputExpected === true) return false
180
+ // Claude Code has a human-readable explanation — the empty result is
181
+ // accounted for, not a parse failure.
182
+ if (typeof r.returnCodeInterpretation === 'string' && r.returnCodeInterpretation.length > 0) {
183
+ return false
184
+ }
185
+ // Real empty-result check. Both streams empty (or missing).
186
+ const stdout = typeof r.stdout === 'string' ? r.stdout : ''
187
+ const stderr = typeof r.stderr === 'string' ? r.stderr : ''
188
+ if (stdout === '' && stderr === '') return true
189
+ return false
190
+ }
191
+
192
+ // String path — older Claude Code versions, or when the response was
193
+ // wrapped before reaching the hook. We can't read structured fields,
194
+ // so we rely on substring shape and accept slightly higher FP risk on
195
+ // this path (covered by THRESHOLD raise + skill-side recovery being
196
+ // cheap).
197
+ let body
198
+ try {
199
+ body = String(toolResponse)
200
+ } catch {
201
+ return false
202
+ }
203
+ if (body.length > 4096) return false
204
+
205
+ // If the string form contains noOutputExpected:true or a
206
+ // returnCodeInterpretation, treat as accounted-for.
207
+ if (/"noOutputExpected"\s*:\s*true/.test(body)) return false
208
+ if (/"interrupted"\s*:\s*true/.test(body)) return false
209
+ if (/"returnCodeInterpretation"\s*:\s*"[^"]+"/.test(body)) return false
210
+
211
+ // XML-style tags: <bash-stdout></bash-stdout><bash-stderr></bash-stderr>
212
+ const hasEmptyStdoutTag = /<bash-stdout>\s*<\/bash-stdout>/i.test(body)
213
+ const hasEmptyStderrTag = /<bash-stderr>\s*<\/bash-stderr>/i.test(body)
214
+ if (hasEmptyStdoutTag && hasEmptyStderrTag) return true
215
+
216
+ // JSON-stringified shape from older serializers.
217
+ const hasEmptyStdoutJson = /"stdout"\s*:\s*""/.test(body)
218
+ const hasEmptyStderrJson = /"stderr"\s*:\s*""/.test(body)
219
+ if (hasEmptyStdoutJson && hasEmptyStderrJson) return true
220
+
221
+ // Literal zero-info bodies.
222
+ if (body === '{}' || body === '""' || body === '') return true
223
+
224
+ return false
225
+ }
226
+
227
+ function emitWedgeContext(consecutive) {
228
+ // PostToolUse can prepend additionalContext to the model's next turn.
229
+ // Use it to surface a single-line nudge once the wedge is suspected
230
+ // so the agent knows to try recovery rather than retrying the same
231
+ // command in a loop.
232
+ const text =
233
+ `[wedge-detect] ${consecutive} consecutive empty-result Bash calls — ` +
234
+ `your persistent shell is likely wedged. Try \`KillBash\` to drop ` +
235
+ `the wedged session, OR ask the user for \`switchroom agent restart ${process.env.SWITCHROOM_AGENT_NAME || '<self>'}\` ` +
236
+ `if KillBash doesn't recover. Don't retry the same command.`
237
+ const payload = {
238
+ hookSpecificOutput: {
239
+ hookEventName: 'PostToolUse',
240
+ additionalContext: text,
241
+ },
242
+ }
243
+ try {
244
+ process.stdout.write(JSON.stringify(payload) + '\n')
245
+ } catch {
246
+ // fail-silent
247
+ }
248
+ }
249
+
250
+ function main() {
251
+ const raw = readStdin()
252
+ if (!raw) return
253
+ let evt
254
+ try {
255
+ evt = JSON.parse(raw)
256
+ } catch {
257
+ return
258
+ }
259
+
260
+ // Non-Bash events reset the counter (the wedge is specific to the
261
+ // persistent shell; other tools succeeding doesn't tell us anything
262
+ // about Bash, but a different tool firing means we're at least not in
263
+ // a tight loop of Bash retries — safe to reset).
264
+ if (evt.tool_name !== 'Bash') {
265
+ resetCounter()
266
+ return
267
+ }
268
+
269
+ if (!isEmptyBashResponse(evt.tool_response)) {
270
+ // Bash call returned real output → not wedged → reset.
271
+ resetCounter()
272
+ return
273
+ }
274
+
275
+ // Empty Bash result. Increment.
276
+ const next = readCounter() + 1
277
+ writeCounter(next)
278
+
279
+ if (next >= THRESHOLD) {
280
+ const sentinel = {
281
+ ts: new Date().toISOString(),
282
+ session_id: evt.session_id || null,
283
+ agent: process.env.SWITCHROOM_AGENT_NAME || null,
284
+ consecutive: next,
285
+ // Capture the last tool_use_id so an operator-side investigator
286
+ // can pin which tool calls triggered the threshold.
287
+ last_tool_use_id: evt.tool_use_id || null,
288
+ }
289
+ writeSentinel(sentinel)
290
+ process.stderr.write(
291
+ `wedge-detect: ${next} consecutive empty-result Bash calls; ` +
292
+ `sentinel at ${sentinelPath()}; recommend KillBash or ` +
293
+ `switchroom agent restart\n`,
294
+ )
295
+ emitWedgeContext(next)
296
+ }
297
+ }
298
+
299
+ try {
300
+ main()
301
+ } catch {
302
+ // PostToolUse must never block the tool flow.
303
+ }
@@ -17,6 +17,45 @@ import { basename } from "node:path";
17
17
  const COMMAND_TITLE_MAX = 40;
18
18
  const PATH_TITLE_MAX = 40;
19
19
 
20
+ /**
21
+ * Human-friendly descriptions for switchroom-managed MCP tools. The
22
+ * raw `mcp__<server>__<tool>` name is operator-unfriendly — they shouldn't
23
+ * have to decode the namespace to understand what the agent is asking
24
+ * to do. Use this map to turn the code-level identifier into a verb
25
+ * phrase ("Read its own merged config" instead of
26
+ * "mcp__agent-config__config_get") for the approval card.
27
+ *
28
+ * Note: post-#1215 these tools are pre-allowed in scaffolded
29
+ * settings.permissions.allow, so the card should fire rarely.
30
+ * This map is for the fallback path — agents the operator
31
+ * narrowed the allowlist on, or tools added in future PRs that
32
+ * haven't shipped the allowlist bump yet.
33
+ */
34
+ const MCP_TOOL_DESCRIPTIONS: Record<string, string> = {
35
+ // agent-config — every agent's self-service surface (#1163, #1215)
36
+ "mcp__agent-config__config_get": "Read its own merged config",
37
+ "mcp__agent-config__cron_list": "List its own scheduled tasks",
38
+ "mcp__agent-config__skill_list": "List its own installed skills",
39
+ "mcp__agent-config__audit_tail": "Read its own recent tool-call audit log",
40
+ "mcp__agent-config__peers_list": "List the other agents on this instance",
41
+ "mcp__agent-config__schedule_add": "Add a scheduled task to its own cron",
42
+ "mcp__agent-config__schedule_remove": "Remove one of its own scheduled tasks",
43
+ "mcp__agent-config__skill_install": "Install a bundled skill onto itself",
44
+ "mcp__agent-config__skill_remove": "Remove one of its own installed skills",
45
+ // hostd — admin-flagged agents' fleet-management surface (#1175, #1215)
46
+ "mcp__hostd__agent_restart": "Restart an agent in the fleet",
47
+ "mcp__hostd__agent_start": "Start a stopped agent in the fleet",
48
+ "mcp__hostd__agent_stop": "Stop a running agent in the fleet",
49
+ "mcp__hostd__agent_logs": "Read another agent's container logs",
50
+ "mcp__hostd__agent_exec": "Run a read-only inspection inside another agent",
51
+ "mcp__hostd__update_check": "Check what a fleet-wide update would do",
52
+ "mcp__hostd__update_apply": "Apply a fleet-wide update (pull + recreate)",
53
+ // hindsight — memory
54
+ "mcp__hindsight__recall": "Recall relevant memories",
55
+ "mcp__hindsight__retain": "Retain a memory",
56
+ "mcp__hindsight__reflect": "Reflect across its memory bank",
57
+ };
58
+
20
59
  /**
21
60
  * Build a title fragment for a permission prompt. Returns the toolName
22
61
  * for any tool we don't recognise — the helper is intentionally
@@ -27,6 +66,23 @@ export function summarizeToolForTitle(
27
66
  toolName: string,
28
67
  inputPreview: string | undefined,
29
68
  ): string {
69
+ // MCP tools: `mcp__<server>__<verb>`. Prefer a curated human
70
+ // description (so the card reads "Read its own merged config"
71
+ // instead of "mcp__agent-config__config_get"). Fall through to a
72
+ // generic `<server>: <verb-with-spaces>` shape for unknown MCP
73
+ // tools and finally to the raw name when even that fails.
74
+ if (toolName.startsWith("mcp__")) {
75
+ const curated = MCP_TOOL_DESCRIPTIONS[toolName];
76
+ if (curated) return curated;
77
+ const parts = toolName.split("__");
78
+ if (parts.length >= 3) {
79
+ const server = parts[1]!;
80
+ const verb = parts.slice(2).join("__").replace(/_/g, " ");
81
+ return `${server}: ${verb}`;
82
+ }
83
+ return toolName;
84
+ }
85
+
30
86
  const input = parseInput(inputPreview);
31
87
  if (!input) return toolName;
32
88
 
@@ -17,11 +17,13 @@
17
17
 
18
18
  import { readFileSync, existsSync } from "fs";
19
19
  import { join } from "path";
20
- import {
21
- readAccountQuota,
22
- snapshotFromQuotaUtilization,
23
- writeAccountQuota,
24
- } from "../src/auth/account-quota-store.js";
20
+
21
+ // RFC H: per-account quota state moved to switchroom-auth-broker
22
+ // (state/auth-broker/quota.json). The gateway's in-process cache
23
+ // below is still useful for sub-second formatting, but the disk-
24
+ // persistence layer that account-quota-store provided is gone —
25
+ // the broker owns the canonical store and exposes it via
26
+ // `list-state`. Disk hydrate / disk persist below are no-ops.
25
27
 
26
28
  /**
27
29
  * OAuth beta flag — proves the request is coming from a subscription client.
@@ -350,20 +352,10 @@ export async function fetchAccountQuota(
350
352
  timeoutMs: opts.timeoutMs,
351
353
  });
352
354
  accountQuotaCache.set(label, { fetchedAt: now, result });
353
- // Persist the snapshot to disk so a future gateway restart can
354
- // re-hydrate its in-process cache without an API call. Best-effort
355
- // (write errors swallowed inside writeAccountQuota). Issue #708.
356
- if (result.ok) {
357
- try {
358
- writeAccountQuota(
359
- label,
360
- snapshotFromQuotaUtilization(result.data, new Date(now)),
361
- opts.home,
362
- );
363
- } catch {
364
- /* best-effort */
365
- }
366
- }
355
+ // Note: pre-RFC-H this also persisted to disk via writeAccountQuota
356
+ // (#708) so a gateway restart could re-hydrate without an API call.
357
+ // Post-RFC-H the broker holds canonical quota state and answers
358
+ // via `list-state`, so the gateway's in-process cache is enough.
367
359
  return result;
368
360
  }
369
361
 
@@ -381,29 +373,15 @@ export async function fetchAccountQuota(
381
373
  * prefetch will replace it on the next tap.
382
374
  */
383
375
  export function hydrateAccountQuotaCacheFromDisk(
384
- labels: ReadonlyArray<string>,
385
- home?: string,
376
+ _labels: ReadonlyArray<string>,
377
+ _home?: string,
386
378
  ): void {
387
- for (const label of labels) {
388
- if (accountQuotaCache.has(label)) continue;
389
- const snap = readAccountQuota(label, home);
390
- if (!snap) continue;
391
- const fetchedAt = Date.parse(snap.capturedAt);
392
- if (!Number.isFinite(fetchedAt)) continue;
393
- const result: QuotaResult = {
394
- ok: true,
395
- data: {
396
- fiveHourUtilizationPct: snap.fiveHourPct ?? 0,
397
- sevenDayUtilizationPct: snap.sevenDayPct ?? 0,
398
- fiveHourResetAt: snap.fiveHourResetAt ? new Date(snap.fiveHourResetAt) : null,
399
- sevenDayResetAt: snap.sevenDayResetAt ? new Date(snap.sevenDayResetAt) : null,
400
- representativeClaim: null,
401
- overageStatus: null,
402
- overageDisabledReason: null,
403
- },
404
- };
405
- accountQuotaCache.set(label, { fetchedAt, result });
406
- }
379
+ // No-op post-RFC-H. The disk-snapshot store this function used to
380
+ // re-hydrate from (per-account quota.json files under
381
+ // ~/.switchroom/accounts/<label>/) is gone — switchroom-auth-broker
382
+ // now owns canonical quota state. Boot-time hydration is the
383
+ // broker's `list-state` call instead. Signature preserved so
384
+ // existing call sites continue to compile while we phase them out.
407
385
  }
408
386
 
409
387
  /** Test/utility helper — wipe the per-account quota cache. The
@@ -24,7 +24,6 @@ const entries = [
24
24
  { src: "server.ts", out: "server.js", label: "server (legacy + dual-mode shim)" },
25
25
  { src: "gateway/gateway.ts", out: "gateway/gateway.js", label: "gateway (persistent service)" },
26
26
  { src: "bridge/bridge.ts", out: "bridge/bridge.js", label: "bridge (MCP proxy)" },
27
- { src: "foreman/foreman.ts", out: "foreman/foreman.js", label: "foreman (admin bot)" },
28
27
  ];
29
28
 
30
29
  for (const { src, out, label } of entries) {
@@ -1,7 +1,8 @@
1
1
  /**
2
- * Shared bot runtime helpers — extracted from gateway.ts so both the
3
- * per-agent gateway and the foreman bot can share the same core plumbing
4
- * without duplicating code.
2
+ * Shared bot runtime helpers — extracted from gateway.ts as a reusable
3
+ * core that callers can build on without duplicating the boilerplate.
4
+ * Used today by the per-agent gateway; historically also by the
5
+ * standalone foreman bot before its retirement.
5
6
  *
6
7
  * What lives here:
7
8
  * - `createRobustApiCall` — thin re-export of createRetryApiCall pre-wired
@@ -361,7 +362,7 @@ export async function runPollingLoop(
361
362
 
362
363
  /**
363
364
  * Returns true if the sender's user ID is in the allowFrom list.
364
- * Used by both gateway and foreman for auth gating.
365
+ * Used by the gateway for sender-allowlist auth gating.
365
366
  */
366
367
  export function isAllowedSender(ctx: Context, allowFrom: string[]): boolean {
367
368
  const from = ctx.from