switchroom 0.8.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -57
- package/bin/timezone-hook.sh +9 -7
- package/dist/agent-scheduler/index.js +285 -45
- package/dist/auth-broker/index.js +13932 -0
- package/dist/cli/switchroom.js +15931 -12778
- package/dist/host-control/main.js +582 -43
- package/dist/vault/approvals/kernel-server.js +276 -47
- package/dist/vault/broker/server.js +333 -69
- package/examples/minimal.yaml +63 -0
- package/examples/personal-google-workspace-mcp/.env.example +34 -0
- package/examples/personal-google-workspace-mcp/README.md +194 -0
- package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
- package/examples/switchroom.yaml +220 -0
- package/package.json +6 -4
- package/profiles/_base/start.sh.hbs +3 -3
- package/profiles/_shared/agent-self-service.md.hbs +126 -0
- package/profiles/default/CLAUDE.md +10 -0
- package/profiles/default/CLAUDE.md.hbs +16 -0
- package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
- package/skills/buildkite-agent-runtime/SKILL.md +44 -11
- package/skills/buildkite-api/SKILL.md +31 -8
- package/skills/buildkite-cli/SKILL.md +27 -9
- package/skills/buildkite-migration/SKILL.md +22 -9
- package/skills/buildkite-pipelines/SKILL.md +26 -9
- package/skills/buildkite-secure-delivery/SKILL.md +23 -9
- package/skills/buildkite-test-engine/SKILL.md +25 -8
- package/skills/docx/SKILL.md +1 -1
- package/skills/file-bug/SKILL.md +34 -6
- package/skills/humanizer/SKILL.md +15 -0
- package/skills/humanizer-calibrate/SKILL.md +7 -1
- package/skills/mcp-builder/SKILL.md +1 -1
- package/skills/pdf/SKILL.md +1 -1
- package/skills/pptx/SKILL.md +1 -1
- package/skills/skill-creator/SKILL.md +21 -1
- package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
- package/skills/switchroom-cli/SKILL.md +63 -64
- package/skills/switchroom-health/SKILL.md +23 -10
- package/skills/switchroom-install/SKILL.md +3 -3
- package/skills/switchroom-manage/SKILL.md +26 -19
- package/skills/switchroom-runtime/SKILL.md +67 -15
- package/skills/switchroom-status/SKILL.md +26 -1
- package/skills/telegram-test-harness/SKILL.md +3 -0
- package/skills/webapp-testing/SKILL.md +31 -1
- package/skills/xlsx/SKILL.md +1 -1
- package/telegram-plugin/admin-commands/index.ts +7 -5
- package/telegram-plugin/dist/gateway/gateway.js +13042 -12844
- package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
- package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
- package/telegram-plugin/gateway/auth-command.ts +794 -0
- package/telegram-plugin/gateway/auth-line.ts +123 -0
- package/telegram-plugin/gateway/boot-card.ts +22 -36
- package/telegram-plugin/gateway/boot-probes.ts +3 -3
- package/telegram-plugin/gateway/gateway.ts +313 -798
- package/telegram-plugin/gateway/hostd-dispatch.ts +117 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
- package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
- package/telegram-plugin/permission-title.ts +56 -0
- package/telegram-plugin/quota-check.ts +19 -41
- package/telegram-plugin/scripts/build.mjs +0 -1
- package/telegram-plugin/shared/bot-runtime.ts +5 -4
- package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
- package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
- package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
- package/telegram-plugin/tests/boot-probes.test.ts +11 -4
- package/telegram-plugin/tests/hostd-dispatch.test.ts +129 -0
- package/telegram-plugin/tests/permission-title.test.ts +31 -0
- package/telegram-plugin/tests/quota-check.test.ts +5 -35
- package/telegram-plugin/uat/SETUP.md +31 -1
- package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
- package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
- package/telegram-plugin/uat/runners/report.ts +150 -0
- package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
- package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
- package/telegram-plugin/uat/runners/scorer.ts +106 -0
- package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
- package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
- package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +7 -1
- package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +7 -1
- package/telegram-plugin/auth-dashboard.ts +0 -1104
- package/telegram-plugin/auth-slot-parser.ts +0 -497
- package/telegram-plugin/dist/foreman/foreman.js +0 -31358
- package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
- package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
- package/telegram-plugin/foreman/foreman.ts +0 -1165
- package/telegram-plugin/foreman/setup-flow.ts +0 -345
- package/telegram-plugin/foreman/setup-state.ts +0 -239
- package/telegram-plugin/foreman/state.ts +0 -203
- package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
- package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
- package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
- package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
- package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
- package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
- package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
- package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
- package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
- package/telegram-plugin/tests/foreman-state.test.ts +0 -164
- package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
- package/telegram-plugin/tests/setup-flow.test.ts +0 -510
- package/telegram-plugin/tests/setup-state.test.ts +0 -146
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hostd dispatch helpers for the gateway's self-restart slash-commands
|
|
3
|
+
* (#1175 RFC C, Phase 2). When the operator has opted into
|
|
4
|
+
* `host_control.enabled: true`, /restart, /new, /reset, and
|
|
5
|
+
* /update apply route through the per-agent hostd UDS instead of the
|
|
6
|
+
* in-container `spawnSwitchroomDetached` shellout.
|
|
7
|
+
*
|
|
8
|
+
* Rationale: in docker-mode (the v0.7+ default) the agent container
|
|
9
|
+
* has no docker binary and no `/var/run/docker.sock` — so the
|
|
10
|
+
* spawn-path verbs fail with exit-127 the moment they touch compose.
|
|
11
|
+
* Hostd runs on the host with the docker socket mounted, so the verbs
|
|
12
|
+
* actually work.
|
|
13
|
+
*
|
|
14
|
+
* Extracted from gateway.ts for unit-testability — gateway.ts itself
|
|
15
|
+
* has too many boot-time side-effects to import directly in a test.
|
|
16
|
+
*/
|
|
17
|
+
import { existsSync } from "node:fs";
|
|
18
|
+
import { randomBytes } from "node:crypto";
|
|
19
|
+
import { hostdRequest } from "../../src/host-control/client.js";
|
|
20
|
+
import type {
|
|
21
|
+
HostdRequest,
|
|
22
|
+
HostdResponse,
|
|
23
|
+
} from "../../src/host-control/protocol.js";
|
|
24
|
+
import { loadConfig as loadSwitchroomConfig } from "../../src/config/loader.js";
|
|
25
|
+
|
|
26
|
+
let _hostdEnabled: boolean | undefined;
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Reads `host_control.enabled` from the resolved switchroom config.
|
|
30
|
+
* Cached for the gateway's lifetime — config doesn't change without a
|
|
31
|
+
* restart, and the file-read isn't free.
|
|
32
|
+
*
|
|
33
|
+
* Best-effort: if the config can't be loaded (gateway running in a
|
|
34
|
+
* dir where loadConfig fails), returns false so the dispatch helper
|
|
35
|
+
* falls through to the legacy spawn path.
|
|
36
|
+
*/
|
|
37
|
+
export function isHostdEnabled(): boolean {
|
|
38
|
+
if (_hostdEnabled !== undefined) return _hostdEnabled;
|
|
39
|
+
try {
|
|
40
|
+
const cfg = loadSwitchroomConfig();
|
|
41
|
+
_hostdEnabled = cfg.host_control?.enabled === true;
|
|
42
|
+
} catch {
|
|
43
|
+
_hostdEnabled = false;
|
|
44
|
+
}
|
|
45
|
+
return _hostdEnabled;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** @internal Reset the cache so tests can swap config and re-probe. */
|
|
49
|
+
export function _resetHostdEnabledCache(): void {
|
|
50
|
+
_hostdEnabled = undefined;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function hostdSocketPath(agentName: string): string {
|
|
54
|
+
return `/run/switchroom/hostd/${agentName}/sock`;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* True only when (a) host_control is enabled in config AND (b) the
|
|
59
|
+
* per-agent socket is bound on disk. Distinct from "will the wire call
|
|
60
|
+
* succeed" — that's only knowable after attempting it.
|
|
61
|
+
*
|
|
62
|
+
* Callers use this to decide *whether to skip docker-availability
|
|
63
|
+
* preflight guards* (since hostd doesn't need in-container docker).
|
|
64
|
+
*/
|
|
65
|
+
export function hostdWillBeUsed(agentName: string): boolean {
|
|
66
|
+
if (!isHostdEnabled()) return false;
|
|
67
|
+
return existsSync(hostdSocketPath(agentName));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Send one request to the per-agent hostd socket.
|
|
72
|
+
*
|
|
73
|
+
* Returns:
|
|
74
|
+
* - `"not-configured"` — hostd is disabled in config OR the per-agent
|
|
75
|
+
* socket isn't bound. Callers should fall back to the legacy
|
|
76
|
+
* `spawnSwitchroomDetached` path.
|
|
77
|
+
* - `HostdResponse` — hostd was contacted. Callers branch on
|
|
78
|
+
* `resp.result`. Wire errors (ECONNREFUSED, timeout, bad frame)
|
|
79
|
+
* are synthesized into a `result: "error"` response so callers
|
|
80
|
+
* don't need a separate try/catch around the failure.
|
|
81
|
+
*
|
|
82
|
+
* Deliberately no silent fallback to spawn when hostd is configured-on
|
|
83
|
+
* but returns error/denied: the operator opted in, so masking failures
|
|
84
|
+
* would just confuse them about why the verb didn't actually run.
|
|
85
|
+
*/
|
|
86
|
+
export async function tryHostdDispatch(
|
|
87
|
+
agentName: string,
|
|
88
|
+
req: HostdRequest,
|
|
89
|
+
): Promise<HostdResponse | "not-configured"> {
|
|
90
|
+
if (!isHostdEnabled()) return "not-configured";
|
|
91
|
+
const sockPath = hostdSocketPath(agentName);
|
|
92
|
+
if (!existsSync(sockPath)) return "not-configured";
|
|
93
|
+
try {
|
|
94
|
+
return await hostdRequest(
|
|
95
|
+
{ socketPath: sockPath, timeoutMs: 5000 },
|
|
96
|
+
req,
|
|
97
|
+
);
|
|
98
|
+
} catch (err) {
|
|
99
|
+
process.stderr.write(
|
|
100
|
+
`telegram gateway: hostd dispatch failed ` +
|
|
101
|
+
`(request_id=${req.request_id} op=${req.op}): ` +
|
|
102
|
+
`${(err as Error).message}\n`,
|
|
103
|
+
);
|
|
104
|
+
return {
|
|
105
|
+
v: 1,
|
|
106
|
+
request_id: req.request_id,
|
|
107
|
+
result: "error",
|
|
108
|
+
exit_code: null,
|
|
109
|
+
duration_ms: 0,
|
|
110
|
+
error: `hostd wire error: ${(err as Error).message}`,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export function hostdRequestId(prefix: string): string {
|
|
116
|
+
return `${prefix}-${Date.now()}-${randomBytes(4).toString("hex")}`;
|
|
117
|
+
}
|
|
@@ -111,6 +111,17 @@ export function computeLabel(toolName, input) {
|
|
|
111
111
|
case 'KillBash':
|
|
112
112
|
case 'KillShell':
|
|
113
113
|
return 'Stopping background process'
|
|
114
|
+
case 'Skill': {
|
|
115
|
+
// The Skill tool's input is `{ skill: "<slug>", args?: "..." }`.
|
|
116
|
+
// We emit `Running skill <slug>` so downstream observers
|
|
117
|
+
// (notably the skill-coverage UAT runner at
|
|
118
|
+
// telegram-plugin/uat/runners/skill-coverage.ts) can tail the
|
|
119
|
+
// sidecar JSONL and recover which skill fired per turn —
|
|
120
|
+
// the progress card path that used to surface this was retired
|
|
121
|
+
// when `progressDriver` was nulled out in #1122 PR3.
|
|
122
|
+
const slug = clip(String(i.skill ?? ''), 64)
|
|
123
|
+
return slug ? `Running skill ${slug}` : null
|
|
124
|
+
}
|
|
114
125
|
}
|
|
115
126
|
|
|
116
127
|
// MCP allowlist.
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* PostToolUse hook — detect a wedged persistent-bash session.
|
|
4
|
+
*
|
|
5
|
+
* Claude Code's Bash tool uses a persistent `bash` subprocess for state
|
|
6
|
+
* continuity (so `cd /foo` in one call survives to the next). When that
|
|
7
|
+
* subprocess's IO state desyncs — typically after a long-running or
|
|
8
|
+
* interrupted command leaves stdin in mid-heredoc, or after sentinel
|
|
9
|
+
* parsing breaks — every subsequent Bash call returns exit-1 with empty
|
|
10
|
+
* stdout and empty stderr. Even `true` returns exit 1. The wedge is
|
|
11
|
+
* sticky for the session; `switchroom agent restart <self>` is the only
|
|
12
|
+
* reliable recovery (it spawns a fresh `claude` → fresh persistent bash).
|
|
13
|
+
*
|
|
14
|
+
* This hook watches PostToolUse events for the wedge signature and,
|
|
15
|
+
* after N consecutive matches, writes a sentinel + logs to stderr so
|
|
16
|
+
* the operator (via `docker logs`) or the gateway (via a future card)
|
|
17
|
+
* can prompt for restart. The hook itself can NEVER fix the wedge —
|
|
18
|
+
* PostToolUse fires after the tool already ran. It's a detection +
|
|
19
|
+
* surfacing surface, not a recovery surface.
|
|
20
|
+
*
|
|
21
|
+
* Claude Code PostToolUse protocol:
|
|
22
|
+
* stdin: JSON { tool_name, tool_use_id, tool_input, tool_response, ... }
|
|
23
|
+
* stdout: optional JSON (hookSpecificOutput.additionalContext for next
|
|
24
|
+
* turn). We use this to nudge the model toward KillBash +
|
|
25
|
+
* self-restart guidance once the wedge is detected.
|
|
26
|
+
* exit: 0 always. Hook failures must never block the tool flow.
|
|
27
|
+
*
|
|
28
|
+
* State:
|
|
29
|
+
* $TELEGRAM_STATE_DIR/wedge-counter.txt — integer, consecutive empty Bash
|
|
30
|
+
* results. Reset to 0 on any non-Bash event or any non-empty Bash
|
|
31
|
+
* result. Incremented on each empty Bash result.
|
|
32
|
+
* $TELEGRAM_STATE_DIR/wedge-detected.json — JSON sentinel written when
|
|
33
|
+
* counter reaches THRESHOLD. Contains { ts, session_id, agent,
|
|
34
|
+
* consecutive }. Gateway can poll for this and surface a card; for
|
|
35
|
+
* now its presence is informational only.
|
|
36
|
+
*
|
|
37
|
+
* Threshold: 3. Picked to balance false positives (some real commands
|
|
38
|
+
* legitimately produce no output and exit non-zero, e.g. `test -f
|
|
39
|
+
* /nonexistent`) against latency-to-detect. Three in a row is rare
|
|
40
|
+
* outside genuine wedge.
|
|
41
|
+
*
|
|
42
|
+
* Detection is shape-based not exit-code-based because the tool_response
|
|
43
|
+
* shape varies by Claude Code version. We match on:
|
|
44
|
+
* - tool_name === "Bash"
|
|
45
|
+
* - stringified response contains BOTH empty stdout marker AND empty
|
|
46
|
+
* stderr marker. Marker patterns covered: <bash-stdout></bash-stdout>,
|
|
47
|
+
* "stdout":"" + "stderr":"", and the bare "(no output)" string some
|
|
48
|
+
* versions emit.
|
|
49
|
+
*
|
|
50
|
+
* If detection markers change in a future Claude Code release, this hook
|
|
51
|
+
* silently misses the wedge — that's the right failure mode (better than
|
|
52
|
+
* false-firing).
|
|
53
|
+
*/
|
|
54
|
+
|
|
55
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync, rmSync } from 'node:fs'
|
|
56
|
+
import { join, dirname } from 'node:path'
|
|
57
|
+
|
|
58
|
+
// Higher than the original 3 to avoid false-firing on legitimate
|
|
59
|
+
// empty-output command sequences (a sed, then two greps with no matches,
|
|
60
|
+
// is a normal refactor pattern and shouldn't trigger). PR #1188 review
|
|
61
|
+
// found 3 was guaranteed-FP. 5 + the noOutputExpected /
|
|
62
|
+
// returnCodeInterpretation skip below should keep real wedges detectable
|
|
63
|
+
// while staying quiet during normal grep/find/sed chains.
|
|
64
|
+
const THRESHOLD = 5
|
|
65
|
+
|
|
66
|
+
// node:fs operations on the counter / sentinel files are read-modify-write
|
|
67
|
+
// without explicit locking. Safe because Claude Code serializes tool calls
|
|
68
|
+
// per session — there is at most one PostToolUse fire in flight per agent
|
|
69
|
+
// at any time. Documented so a future caller doesn't introduce parallelism
|
|
70
|
+
// and silently lose counts.
|
|
71
|
+
|
|
72
|
+
function readStdin() {
|
|
73
|
+
try {
|
|
74
|
+
return readFileSync(0, 'utf8')
|
|
75
|
+
} catch {
|
|
76
|
+
return ''
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function stateDir() {
|
|
81
|
+
return process.env.TELEGRAM_STATE_DIR || null
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function counterPath() {
|
|
85
|
+
const dir = stateDir()
|
|
86
|
+
return dir ? join(dir, 'wedge-counter.txt') : null
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function sentinelPath() {
|
|
90
|
+
const dir = stateDir()
|
|
91
|
+
return dir ? join(dir, 'wedge-detected.json') : null
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function readCounter() {
|
|
95
|
+
const p = counterPath()
|
|
96
|
+
if (!p || !existsSync(p)) return 0
|
|
97
|
+
try {
|
|
98
|
+
const raw = readFileSync(p, 'utf8').trim()
|
|
99
|
+
const n = Number.parseInt(raw, 10)
|
|
100
|
+
return Number.isFinite(n) && n >= 0 ? n : 0
|
|
101
|
+
} catch {
|
|
102
|
+
return 0
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function writeCounter(n) {
|
|
107
|
+
const p = counterPath()
|
|
108
|
+
if (!p) return
|
|
109
|
+
try {
|
|
110
|
+
mkdirSync(dirname(p), { recursive: true })
|
|
111
|
+
writeFileSync(p, String(n), 'utf8')
|
|
112
|
+
} catch {
|
|
113
|
+
// fail-silent; counter loss just delays detection by a couple of cycles
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function writeSentinel(payload) {
|
|
118
|
+
const p = sentinelPath()
|
|
119
|
+
if (!p) return
|
|
120
|
+
try {
|
|
121
|
+
mkdirSync(dirname(p), { recursive: true })
|
|
122
|
+
writeFileSync(p, JSON.stringify(payload, null, 2), 'utf8')
|
|
123
|
+
} catch {
|
|
124
|
+
// fail-silent
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function clearSentinel() {
|
|
129
|
+
const p = sentinelPath()
|
|
130
|
+
if (!p) return
|
|
131
|
+
try {
|
|
132
|
+
rmSync(p, { force: true })
|
|
133
|
+
} catch {
|
|
134
|
+
// fail-silent
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function resetCounter() {
|
|
139
|
+
// Counter reset means we're back in healthy territory — clear the
|
|
140
|
+
// sentinel too so a future operator-side surface that polls for
|
|
141
|
+
// `wedge-detected.json` doesn't see stale state from a long-cleared
|
|
142
|
+
// wedge. Per PR #1188 review B2.
|
|
143
|
+
writeCounter(0)
|
|
144
|
+
clearSentinel()
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Test whether a Bash tool_response matches the wedge signature.
|
|
149
|
+
*
|
|
150
|
+
* The wedge produces: empty stdout AND empty stderr AND no
|
|
151
|
+
* Claude-Code-supplied "no output is expected here" annotation AND not
|
|
152
|
+
* interrupted by the user.
|
|
153
|
+
*
|
|
154
|
+
* The benign empty-output cases that PR #1188 review B1 called out
|
|
155
|
+
* (grep/find/sed/test with no matches or in-place mutation) are
|
|
156
|
+
* disambiguated by:
|
|
157
|
+
* - `noOutputExpected: true` — Claude Code annotates Bash calls whose
|
|
158
|
+
* command pattern legitimately produces no output.
|
|
159
|
+
* - `returnCodeInterpretation: "..."` — present when Claude Code has
|
|
160
|
+
* a human-readable explanation for the exit code (e.g. "No matches
|
|
161
|
+
* found" for grep). Its presence means "this empty result is
|
|
162
|
+
* understood, not a desync."
|
|
163
|
+
* - `interrupted: true` — user pressed `!` mid-command. Not a wedge.
|
|
164
|
+
*
|
|
165
|
+
* Defensive: response shape varies across Claude Code versions and
|
|
166
|
+
* across plain-string vs structured-object representations. We check
|
|
167
|
+
* each known marker and fail-no-match on anything else.
|
|
168
|
+
*/
|
|
169
|
+
function isEmptyBashResponse(toolResponse) {
|
|
170
|
+
if (toolResponse == null) return false
|
|
171
|
+
|
|
172
|
+
// Structured-object path. Most reliable — read the fields directly
|
|
173
|
+
// and consult the annotations.
|
|
174
|
+
if (typeof toolResponse === 'object') {
|
|
175
|
+
const r = toolResponse
|
|
176
|
+
// Interruption is user-initiated, not a desync. Don't count.
|
|
177
|
+
if (r.interrupted === true) return false
|
|
178
|
+
// Claude Code already knows this command's empty output is expected.
|
|
179
|
+
if (r.noOutputExpected === true) return false
|
|
180
|
+
// Claude Code has a human-readable explanation — the empty result is
|
|
181
|
+
// accounted for, not a parse failure.
|
|
182
|
+
if (typeof r.returnCodeInterpretation === 'string' && r.returnCodeInterpretation.length > 0) {
|
|
183
|
+
return false
|
|
184
|
+
}
|
|
185
|
+
// Real empty-result check. Both streams empty (or missing).
|
|
186
|
+
const stdout = typeof r.stdout === 'string' ? r.stdout : ''
|
|
187
|
+
const stderr = typeof r.stderr === 'string' ? r.stderr : ''
|
|
188
|
+
if (stdout === '' && stderr === '') return true
|
|
189
|
+
return false
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// String path — older Claude Code versions, or when the response was
|
|
193
|
+
// wrapped before reaching the hook. We can't read structured fields,
|
|
194
|
+
// so we rely on substring shape and accept slightly higher FP risk on
|
|
195
|
+
// this path (covered by THRESHOLD raise + skill-side recovery being
|
|
196
|
+
// cheap).
|
|
197
|
+
let body
|
|
198
|
+
try {
|
|
199
|
+
body = String(toolResponse)
|
|
200
|
+
} catch {
|
|
201
|
+
return false
|
|
202
|
+
}
|
|
203
|
+
if (body.length > 4096) return false
|
|
204
|
+
|
|
205
|
+
// If the string form contains noOutputExpected:true or a
|
|
206
|
+
// returnCodeInterpretation, treat as accounted-for.
|
|
207
|
+
if (/"noOutputExpected"\s*:\s*true/.test(body)) return false
|
|
208
|
+
if (/"interrupted"\s*:\s*true/.test(body)) return false
|
|
209
|
+
if (/"returnCodeInterpretation"\s*:\s*"[^"]+"/.test(body)) return false
|
|
210
|
+
|
|
211
|
+
// XML-style tags: <bash-stdout></bash-stdout><bash-stderr></bash-stderr>
|
|
212
|
+
const hasEmptyStdoutTag = /<bash-stdout>\s*<\/bash-stdout>/i.test(body)
|
|
213
|
+
const hasEmptyStderrTag = /<bash-stderr>\s*<\/bash-stderr>/i.test(body)
|
|
214
|
+
if (hasEmptyStdoutTag && hasEmptyStderrTag) return true
|
|
215
|
+
|
|
216
|
+
// JSON-stringified shape from older serializers.
|
|
217
|
+
const hasEmptyStdoutJson = /"stdout"\s*:\s*""/.test(body)
|
|
218
|
+
const hasEmptyStderrJson = /"stderr"\s*:\s*""/.test(body)
|
|
219
|
+
if (hasEmptyStdoutJson && hasEmptyStderrJson) return true
|
|
220
|
+
|
|
221
|
+
// Literal zero-info bodies.
|
|
222
|
+
if (body === '{}' || body === '""' || body === '') return true
|
|
223
|
+
|
|
224
|
+
return false
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
function emitWedgeContext(consecutive) {
|
|
228
|
+
// PostToolUse can prepend additionalContext to the model's next turn.
|
|
229
|
+
// Use it to surface a single-line nudge once the wedge is suspected
|
|
230
|
+
// so the agent knows to try recovery rather than retrying the same
|
|
231
|
+
// command in a loop.
|
|
232
|
+
const text =
|
|
233
|
+
`[wedge-detect] ${consecutive} consecutive empty-result Bash calls — ` +
|
|
234
|
+
`your persistent shell is likely wedged. Try \`KillBash\` to drop ` +
|
|
235
|
+
`the wedged session, OR ask the user for \`switchroom agent restart ${process.env.SWITCHROOM_AGENT_NAME || '<self>'}\` ` +
|
|
236
|
+
`if KillBash doesn't recover. Don't retry the same command.`
|
|
237
|
+
const payload = {
|
|
238
|
+
hookSpecificOutput: {
|
|
239
|
+
hookEventName: 'PostToolUse',
|
|
240
|
+
additionalContext: text,
|
|
241
|
+
},
|
|
242
|
+
}
|
|
243
|
+
try {
|
|
244
|
+
process.stdout.write(JSON.stringify(payload) + '\n')
|
|
245
|
+
} catch {
|
|
246
|
+
// fail-silent
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
function main() {
|
|
251
|
+
const raw = readStdin()
|
|
252
|
+
if (!raw) return
|
|
253
|
+
let evt
|
|
254
|
+
try {
|
|
255
|
+
evt = JSON.parse(raw)
|
|
256
|
+
} catch {
|
|
257
|
+
return
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Non-Bash events reset the counter (the wedge is specific to the
|
|
261
|
+
// persistent shell; other tools succeeding doesn't tell us anything
|
|
262
|
+
// about Bash, but a different tool firing means we're at least not in
|
|
263
|
+
// a tight loop of Bash retries — safe to reset).
|
|
264
|
+
if (evt.tool_name !== 'Bash') {
|
|
265
|
+
resetCounter()
|
|
266
|
+
return
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if (!isEmptyBashResponse(evt.tool_response)) {
|
|
270
|
+
// Bash call returned real output → not wedged → reset.
|
|
271
|
+
resetCounter()
|
|
272
|
+
return
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Empty Bash result. Increment.
|
|
276
|
+
const next = readCounter() + 1
|
|
277
|
+
writeCounter(next)
|
|
278
|
+
|
|
279
|
+
if (next >= THRESHOLD) {
|
|
280
|
+
const sentinel = {
|
|
281
|
+
ts: new Date().toISOString(),
|
|
282
|
+
session_id: evt.session_id || null,
|
|
283
|
+
agent: process.env.SWITCHROOM_AGENT_NAME || null,
|
|
284
|
+
consecutive: next,
|
|
285
|
+
// Capture the last tool_use_id so an operator-side investigator
|
|
286
|
+
// can pin which tool calls triggered the threshold.
|
|
287
|
+
last_tool_use_id: evt.tool_use_id || null,
|
|
288
|
+
}
|
|
289
|
+
writeSentinel(sentinel)
|
|
290
|
+
process.stderr.write(
|
|
291
|
+
`wedge-detect: ${next} consecutive empty-result Bash calls; ` +
|
|
292
|
+
`sentinel at ${sentinelPath()}; recommend KillBash or ` +
|
|
293
|
+
`switchroom agent restart\n`,
|
|
294
|
+
)
|
|
295
|
+
emitWedgeContext(next)
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
try {
|
|
300
|
+
main()
|
|
301
|
+
} catch {
|
|
302
|
+
// PostToolUse must never block the tool flow.
|
|
303
|
+
}
|
|
@@ -17,6 +17,45 @@ import { basename } from "node:path";
|
|
|
17
17
|
const COMMAND_TITLE_MAX = 40;
|
|
18
18
|
const PATH_TITLE_MAX = 40;
|
|
19
19
|
|
|
20
|
+
/**
|
|
21
|
+
* Human-friendly descriptions for switchroom-managed MCP tools. The
|
|
22
|
+
* raw `mcp__<server>__<tool>` name is operator-unfriendly — they shouldn't
|
|
23
|
+
* have to decode the namespace to understand what the agent is asking
|
|
24
|
+
* to do. Use this map to turn the code-level identifier into a verb
|
|
25
|
+
* phrase ("Read its own merged config" instead of
|
|
26
|
+
* "mcp__agent-config__config_get") for the approval card.
|
|
27
|
+
*
|
|
28
|
+
* Note: post-#1215 these tools are pre-allowed in scaffolded
|
|
29
|
+
* settings.permissions.allow, so the card should fire rarely.
|
|
30
|
+
* This map is for the fallback path — agents the operator
|
|
31
|
+
* narrowed the allowlist on, or tools added in future PRs that
|
|
32
|
+
* haven't shipped the allowlist bump yet.
|
|
33
|
+
*/
|
|
34
|
+
const MCP_TOOL_DESCRIPTIONS: Record<string, string> = {
|
|
35
|
+
// agent-config — every agent's self-service surface (#1163, #1215)
|
|
36
|
+
"mcp__agent-config__config_get": "Read its own merged config",
|
|
37
|
+
"mcp__agent-config__cron_list": "List its own scheduled tasks",
|
|
38
|
+
"mcp__agent-config__skill_list": "List its own installed skills",
|
|
39
|
+
"mcp__agent-config__audit_tail": "Read its own recent tool-call audit log",
|
|
40
|
+
"mcp__agent-config__peers_list": "List the other agents on this instance",
|
|
41
|
+
"mcp__agent-config__schedule_add": "Add a scheduled task to its own cron",
|
|
42
|
+
"mcp__agent-config__schedule_remove": "Remove one of its own scheduled tasks",
|
|
43
|
+
"mcp__agent-config__skill_install": "Install a bundled skill onto itself",
|
|
44
|
+
"mcp__agent-config__skill_remove": "Remove one of its own installed skills",
|
|
45
|
+
// hostd — admin-flagged agents' fleet-management surface (#1175, #1215)
|
|
46
|
+
"mcp__hostd__agent_restart": "Restart an agent in the fleet",
|
|
47
|
+
"mcp__hostd__agent_start": "Start a stopped agent in the fleet",
|
|
48
|
+
"mcp__hostd__agent_stop": "Stop a running agent in the fleet",
|
|
49
|
+
"mcp__hostd__agent_logs": "Read another agent's container logs",
|
|
50
|
+
"mcp__hostd__agent_exec": "Run a read-only inspection inside another agent",
|
|
51
|
+
"mcp__hostd__update_check": "Check what a fleet-wide update would do",
|
|
52
|
+
"mcp__hostd__update_apply": "Apply a fleet-wide update (pull + recreate)",
|
|
53
|
+
// hindsight — memory
|
|
54
|
+
"mcp__hindsight__recall": "Recall relevant memories",
|
|
55
|
+
"mcp__hindsight__retain": "Retain a memory",
|
|
56
|
+
"mcp__hindsight__reflect": "Reflect across its memory bank",
|
|
57
|
+
};
|
|
58
|
+
|
|
20
59
|
/**
|
|
21
60
|
* Build a title fragment for a permission prompt. Returns the toolName
|
|
22
61
|
* for any tool we don't recognise — the helper is intentionally
|
|
@@ -27,6 +66,23 @@ export function summarizeToolForTitle(
|
|
|
27
66
|
toolName: string,
|
|
28
67
|
inputPreview: string | undefined,
|
|
29
68
|
): string {
|
|
69
|
+
// MCP tools: `mcp__<server>__<verb>`. Prefer a curated human
|
|
70
|
+
// description (so the card reads "Read its own merged config"
|
|
71
|
+
// instead of "mcp__agent-config__config_get"). Fall through to a
|
|
72
|
+
// generic `<server>: <verb-with-spaces>` shape for unknown MCP
|
|
73
|
+
// tools and finally to the raw name when even that fails.
|
|
74
|
+
if (toolName.startsWith("mcp__")) {
|
|
75
|
+
const curated = MCP_TOOL_DESCRIPTIONS[toolName];
|
|
76
|
+
if (curated) return curated;
|
|
77
|
+
const parts = toolName.split("__");
|
|
78
|
+
if (parts.length >= 3) {
|
|
79
|
+
const server = parts[1]!;
|
|
80
|
+
const verb = parts.slice(2).join("__").replace(/_/g, " ");
|
|
81
|
+
return `${server}: ${verb}`;
|
|
82
|
+
}
|
|
83
|
+
return toolName;
|
|
84
|
+
}
|
|
85
|
+
|
|
30
86
|
const input = parseInput(inputPreview);
|
|
31
87
|
if (!input) return toolName;
|
|
32
88
|
|
|
@@ -17,11 +17,13 @@
|
|
|
17
17
|
|
|
18
18
|
import { readFileSync, existsSync } from "fs";
|
|
19
19
|
import { join } from "path";
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
|
|
21
|
+
// RFC H: per-account quota state moved to switchroom-auth-broker
|
|
22
|
+
// (state/auth-broker/quota.json). The gateway's in-process cache
|
|
23
|
+
// below is still useful for sub-second formatting, but the disk-
|
|
24
|
+
// persistence layer that account-quota-store provided is gone —
|
|
25
|
+
// the broker owns the canonical store and exposes it via
|
|
26
|
+
// `list-state`. Disk hydrate / disk persist below are no-ops.
|
|
25
27
|
|
|
26
28
|
/**
|
|
27
29
|
* OAuth beta flag — proves the request is coming from a subscription client.
|
|
@@ -350,20 +352,10 @@ export async function fetchAccountQuota(
|
|
|
350
352
|
timeoutMs: opts.timeoutMs,
|
|
351
353
|
});
|
|
352
354
|
accountQuotaCache.set(label, { fetchedAt: now, result });
|
|
353
|
-
//
|
|
354
|
-
// re-hydrate
|
|
355
|
-
//
|
|
356
|
-
|
|
357
|
-
try {
|
|
358
|
-
writeAccountQuota(
|
|
359
|
-
label,
|
|
360
|
-
snapshotFromQuotaUtilization(result.data, new Date(now)),
|
|
361
|
-
opts.home,
|
|
362
|
-
);
|
|
363
|
-
} catch {
|
|
364
|
-
/* best-effort */
|
|
365
|
-
}
|
|
366
|
-
}
|
|
355
|
+
// Note: pre-RFC-H this also persisted to disk via writeAccountQuota
|
|
356
|
+
// (#708) so a gateway restart could re-hydrate without an API call.
|
|
357
|
+
// Post-RFC-H the broker holds canonical quota state and answers
|
|
358
|
+
// via `list-state`, so the gateway's in-process cache is enough.
|
|
367
359
|
return result;
|
|
368
360
|
}
|
|
369
361
|
|
|
@@ -381,29 +373,15 @@ export async function fetchAccountQuota(
|
|
|
381
373
|
* prefetch will replace it on the next tap.
|
|
382
374
|
*/
|
|
383
375
|
export function hydrateAccountQuotaCacheFromDisk(
|
|
384
|
-
|
|
385
|
-
|
|
376
|
+
_labels: ReadonlyArray<string>,
|
|
377
|
+
_home?: string,
|
|
386
378
|
): void {
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
const result: QuotaResult = {
|
|
394
|
-
ok: true,
|
|
395
|
-
data: {
|
|
396
|
-
fiveHourUtilizationPct: snap.fiveHourPct ?? 0,
|
|
397
|
-
sevenDayUtilizationPct: snap.sevenDayPct ?? 0,
|
|
398
|
-
fiveHourResetAt: snap.fiveHourResetAt ? new Date(snap.fiveHourResetAt) : null,
|
|
399
|
-
sevenDayResetAt: snap.sevenDayResetAt ? new Date(snap.sevenDayResetAt) : null,
|
|
400
|
-
representativeClaim: null,
|
|
401
|
-
overageStatus: null,
|
|
402
|
-
overageDisabledReason: null,
|
|
403
|
-
},
|
|
404
|
-
};
|
|
405
|
-
accountQuotaCache.set(label, { fetchedAt, result });
|
|
406
|
-
}
|
|
379
|
+
// No-op post-RFC-H. The disk-snapshot store this function used to
|
|
380
|
+
// re-hydrate from (per-account quota.json files under
|
|
381
|
+
// ~/.switchroom/accounts/<label>/) is gone — switchroom-auth-broker
|
|
382
|
+
// now owns canonical quota state. Boot-time hydration is the
|
|
383
|
+
// broker's `list-state` call instead. Signature preserved so
|
|
384
|
+
// existing call sites continue to compile while we phase them out.
|
|
407
385
|
}
|
|
408
386
|
|
|
409
387
|
/** Test/utility helper — wipe the per-account quota cache. The
|
|
@@ -24,7 +24,6 @@ const entries = [
|
|
|
24
24
|
{ src: "server.ts", out: "server.js", label: "server (legacy + dual-mode shim)" },
|
|
25
25
|
{ src: "gateway/gateway.ts", out: "gateway/gateway.js", label: "gateway (persistent service)" },
|
|
26
26
|
{ src: "bridge/bridge.ts", out: "bridge/bridge.js", label: "bridge (MCP proxy)" },
|
|
27
|
-
{ src: "foreman/foreman.ts", out: "foreman/foreman.js", label: "foreman (admin bot)" },
|
|
28
27
|
];
|
|
29
28
|
|
|
30
29
|
for (const { src, out, label } of entries) {
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Shared bot runtime helpers — extracted from gateway.ts
|
|
3
|
-
*
|
|
4
|
-
*
|
|
2
|
+
* Shared bot runtime helpers — extracted from gateway.ts as a reusable
|
|
3
|
+
* core that callers can build on without duplicating the boilerplate.
|
|
4
|
+
* Used today by the per-agent gateway; historically also by the
|
|
5
|
+
* standalone foreman bot before its retirement.
|
|
5
6
|
*
|
|
6
7
|
* What lives here:
|
|
7
8
|
* - `createRobustApiCall` — thin re-export of createRetryApiCall pre-wired
|
|
@@ -361,7 +362,7 @@ export async function runPollingLoop(
|
|
|
361
362
|
|
|
362
363
|
/**
|
|
363
364
|
* Returns true if the sender's user ID is in the allowFrom list.
|
|
364
|
-
* Used by
|
|
365
|
+
* Used by the gateway for sender-allowlist auth gating.
|
|
365
366
|
*/
|
|
366
367
|
export function isAllowedSender(ctx: Context, allowFrom: string[]): boolean {
|
|
367
368
|
const from = ctx.from
|