switchroom 0.8.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/README.md +54 -61
  2. package/bin/timezone-hook.sh +9 -7
  3. package/dist/agent-scheduler/index.js +285 -45
  4. package/dist/auth-broker/index.js +13932 -0
  5. package/dist/cli/drive-write-pretool.mjs +5418 -0
  6. package/dist/cli/switchroom.js +8890 -5560
  7. package/dist/host-control/main.js +582 -43
  8. package/dist/vault/approvals/kernel-server.js +276 -47
  9. package/dist/vault/broker/server.js +333 -69
  10. package/examples/minimal.yaml +63 -0
  11. package/examples/personal-google-workspace-mcp/.env.example +34 -0
  12. package/examples/personal-google-workspace-mcp/README.md +194 -0
  13. package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
  14. package/examples/switchroom.yaml +220 -0
  15. package/package.json +6 -4
  16. package/profiles/_base/start.sh.hbs +3 -3
  17. package/profiles/_shared/agent-self-service.md.hbs +126 -0
  18. package/profiles/default/CLAUDE.md +10 -0
  19. package/profiles/default/CLAUDE.md.hbs +16 -0
  20. package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
  21. package/skills/buildkite-agent-runtime/SKILL.md +44 -11
  22. package/skills/buildkite-api/SKILL.md +31 -8
  23. package/skills/buildkite-cli/SKILL.md +27 -9
  24. package/skills/buildkite-migration/SKILL.md +22 -9
  25. package/skills/buildkite-pipelines/SKILL.md +26 -9
  26. package/skills/buildkite-secure-delivery/SKILL.md +23 -9
  27. package/skills/buildkite-test-engine/SKILL.md +25 -8
  28. package/skills/docx/SKILL.md +1 -1
  29. package/skills/file-bug/SKILL.md +34 -6
  30. package/skills/humanizer/SKILL.md +15 -0
  31. package/skills/humanizer-calibrate/SKILL.md +7 -1
  32. package/skills/mcp-builder/SKILL.md +1 -1
  33. package/skills/pdf/SKILL.md +1 -1
  34. package/skills/pptx/SKILL.md +1 -1
  35. package/skills/skill-creator/SKILL.md +21 -1
  36. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
  37. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
  38. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
  39. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
  40. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
  41. package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
  42. package/skills/switchroom-cli/SKILL.md +63 -64
  43. package/skills/switchroom-health/SKILL.md +23 -10
  44. package/skills/switchroom-install/SKILL.md +3 -3
  45. package/skills/switchroom-manage/SKILL.md +26 -19
  46. package/skills/switchroom-runtime/SKILL.md +67 -15
  47. package/skills/switchroom-status/SKILL.md +26 -1
  48. package/skills/telegram-test-harness/SKILL.md +3 -0
  49. package/skills/webapp-testing/SKILL.md +31 -1
  50. package/skills/xlsx/SKILL.md +1 -1
  51. package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
  52. package/telegram-plugin/admin-commands/index.ts +9 -5
  53. package/telegram-plugin/auth-snapshot-format.ts +612 -0
  54. package/telegram-plugin/auto-fallback-fleet.ts +215 -0
  55. package/telegram-plugin/auto-fallback.ts +28 -301
  56. package/telegram-plugin/dist/gateway/gateway.js +17453 -15100
  57. package/telegram-plugin/fleet-fallback-gate.ts +105 -0
  58. package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
  59. package/telegram-plugin/gateway/approval-callback.ts +31 -3
  60. package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
  61. package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
  62. package/telegram-plugin/gateway/auth-command.ts +905 -0
  63. package/telegram-plugin/gateway/auth-line.ts +123 -0
  64. package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
  65. package/telegram-plugin/gateway/boot-card.ts +23 -37
  66. package/telegram-plugin/gateway/boot-probes.ts +9 -12
  67. package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
  68. package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
  69. package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
  70. package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
  71. package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
  72. package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
  73. package/telegram-plugin/gateway/gateway.ts +1156 -938
  74. package/telegram-plugin/gateway/hostd-dispatch.ts +244 -0
  75. package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
  76. package/telegram-plugin/gateway/ipc-server.ts +69 -0
  77. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
  78. package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
  79. package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
  80. package/telegram-plugin/model-unavailable.ts +28 -12
  81. package/telegram-plugin/permission-title.ts +56 -0
  82. package/telegram-plugin/quota-check.ts +19 -41
  83. package/telegram-plugin/scripts/build.mjs +0 -1
  84. package/telegram-plugin/shared/bot-runtime.ts +5 -4
  85. package/telegram-plugin/silence-poke.ts +153 -1
  86. package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
  87. package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
  88. package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
  89. package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
  90. package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
  91. package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
  92. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
  93. package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
  94. package/telegram-plugin/tests/boot-probes.test.ts +27 -22
  95. package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
  96. package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
  97. package/telegram-plugin/tests/permission-title.test.ts +31 -0
  98. package/telegram-plugin/tests/quota-check.test.ts +5 -35
  99. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
  100. package/telegram-plugin/tests/silence-poke.test.ts +237 -0
  101. package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
  102. package/telegram-plugin/turn-flush-safety.ts +55 -1
  103. package/telegram-plugin/uat/SETUP.md +35 -1
  104. package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
  105. package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
  106. package/telegram-plugin/uat/runners/report.ts +150 -0
  107. package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
  108. package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
  109. package/telegram-plugin/uat/runners/scorer.ts +106 -0
  110. package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
  111. package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
  112. package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +7 -1
  113. package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +7 -1
  114. package/telegram-plugin/auth-dashboard.ts +0 -1104
  115. package/telegram-plugin/auth-slot-parser.ts +0 -497
  116. package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
  117. package/telegram-plugin/dist/foreman/foreman.js +0 -31358
  118. package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
  119. package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
  120. package/telegram-plugin/foreman/foreman.ts +0 -1165
  121. package/telegram-plugin/foreman/setup-flow.ts +0 -345
  122. package/telegram-plugin/foreman/setup-state.ts +0 -239
  123. package/telegram-plugin/foreman/state.ts +0 -203
  124. package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
  125. package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
  126. package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
  127. package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
  128. package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
  129. package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
  130. package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
  131. package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
  132. package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
  133. package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
  134. package/telegram-plugin/tests/foreman-state.test.ts +0 -164
  135. package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
  136. package/telegram-plugin/tests/setup-flow.test.ts +0 -510
  137. package/telegram-plugin/tests/setup-state.test.ts +0 -146
@@ -0,0 +1,244 @@
1
+ /**
2
+ * Hostd dispatch helpers for the gateway's self-restart slash-commands
3
+ * (#1175 RFC C, Phase 2). When the operator has opted into
4
+ * `host_control.enabled: true`, /restart, /new, /reset, and
5
+ * /update apply route through the per-agent hostd UDS instead of the
6
+ * in-container `spawnSwitchroomDetached` shellout.
7
+ *
8
+ * Rationale: in docker-mode (the v0.7+ default) the agent container
9
+ * has no docker binary and no `/var/run/docker.sock` — so the
10
+ * spawn-path verbs fail with exit-127 the moment they touch compose.
11
+ * Hostd runs on the host with the docker socket mounted, so the verbs
12
+ * actually work.
13
+ *
14
+ * Extracted from gateway.ts for unit-testability — gateway.ts itself
15
+ * has too many boot-time side-effects to import directly in a test.
16
+ */
17
+ import { existsSync } from "node:fs";
18
+ import { randomBytes } from "node:crypto";
19
+ import { hostdRequest } from "../../src/host-control/client.js";
20
+ import type {
21
+ HostdRequest,
22
+ HostdResponse,
23
+ } from "../../src/host-control/protocol.js";
24
+ import { loadConfig as loadSwitchroomConfig } from "../../src/config/loader.js";
25
+
26
+ let _hostdEnabled: boolean | undefined;
27
+
28
+ /**
29
+ * Reads `host_control.enabled` from the resolved switchroom config.
30
+ * Cached for the gateway's lifetime — config doesn't change without a
31
+ * restart, and the file-read isn't free.
32
+ *
33
+ * Best-effort: if the config can't be loaded (gateway running in a
34
+ * dir where loadConfig fails), returns false so the dispatch helper
35
+ * falls through to the legacy spawn path.
36
+ */
37
+ export function isHostdEnabled(): boolean {
38
+ if (_hostdEnabled !== undefined) return _hostdEnabled;
39
+ try {
40
+ const cfg = loadSwitchroomConfig();
41
+ _hostdEnabled = cfg.host_control?.enabled === true;
42
+ } catch {
43
+ _hostdEnabled = false;
44
+ }
45
+ return _hostdEnabled;
46
+ }
47
+
48
+ /** @internal Reset the cache so tests can swap config and re-probe. */
49
+ export function _resetHostdEnabledCache(): void {
50
+ _hostdEnabled = undefined;
51
+ }
52
+
53
+ export function hostdSocketPath(agentName: string): string {
54
+ return `/run/switchroom/hostd/${agentName}/sock`;
55
+ }
56
+
57
+ /**
58
+ * True only when (a) host_control is enabled in config AND (b) the
59
+ * per-agent socket is bound on disk. Distinct from "will the wire call
60
+ * succeed" — that's only knowable after attempting it.
61
+ *
62
+ * Callers use this to decide *whether to skip docker-availability
63
+ * preflight guards* (since hostd doesn't need in-container docker).
64
+ */
65
+ export function hostdWillBeUsed(agentName: string): boolean {
66
+ if (!isHostdEnabled()) return false;
67
+ return existsSync(hostdSocketPath(agentName));
68
+ }
69
+
70
+ /**
71
+ * Send one request to the per-agent hostd socket.
72
+ *
73
+ * Returns:
74
+ * - `"not-configured"` — hostd is disabled in config OR the per-agent
75
+ * socket isn't bound. Callers should fall back to the legacy
76
+ * `spawnSwitchroomDetached` path.
77
+ * - `HostdResponse` — hostd was contacted. Callers branch on
78
+ * `resp.result`. Wire errors (ECONNREFUSED, timeout, bad frame)
79
+ * are synthesized into a `result: "error"` response so callers
80
+ * don't need a separate try/catch around the failure.
81
+ *
82
+ * Deliberately no silent fallback to spawn when hostd is configured-on
83
+ * but returns error/denied: the operator opted in, so masking failures
84
+ * would just confuse them about why the verb didn't actually run.
85
+ */
86
+ export async function tryHostdDispatch(
87
+ agentName: string,
88
+ req: HostdRequest,
89
+ ): Promise<HostdResponse | "not-configured"> {
90
+ if (!isHostdEnabled()) return "not-configured";
91
+ const sockPath = hostdSocketPath(agentName);
92
+ if (!existsSync(sockPath)) return "not-configured";
93
+ try {
94
+ return await hostdRequest(
95
+ { socketPath: sockPath, timeoutMs: 5000 },
96
+ req,
97
+ );
98
+ } catch (err) {
99
+ process.stderr.write(
100
+ `telegram gateway: hostd dispatch failed ` +
101
+ `(request_id=${req.request_id} op=${req.op}): ` +
102
+ `${(err as Error).message}\n`,
103
+ );
104
+ return {
105
+ v: 1,
106
+ request_id: req.request_id,
107
+ result: "error",
108
+ exit_code: null,
109
+ duration_ms: 0,
110
+ error: `hostd wire error: ${(err as Error).message}`,
111
+ };
112
+ }
113
+ }
114
+
115
+ export function hostdRequestId(prefix: string): string {
116
+ return `${prefix}-${Date.now()}-${randomBytes(4).toString("hex")}`;
117
+ }
118
+
119
+ /**
120
+ * Poll hostd's `get_status` verb until the target request reaches a
121
+ * terminal state (`completed` / `error` / `denied`) or the caller's
122
+ * timeout elapses.
123
+ *
124
+ * Motivation: the long-running mutating verbs (`update_apply`, `apply`)
125
+ * respond `result: "started"` immediately and run the work in a
126
+ * detached child on the daemon side. Without polling, callers that
127
+ * acked "started" to the operator have no way to surface a *fail
128
+ * before recreate* (image-pull error, scaffold regeneration crash,
129
+ * etc.) — the gateway dies if recreate succeeds, but stays alive and
130
+ * silent if it fails. Polling closes that observability hole.
131
+ *
132
+ * Behaviour:
133
+ * - Polls every {@link opts.intervalMs} ms (default 2000 per RFC C §5.3).
134
+ * - Bails out after {@link opts.timeoutMs} with a synthesized
135
+ * `result: "error"` response describing the timeout. Caller should
136
+ * treat that as inconclusive — for `update_apply` specifically,
137
+ * a timeout often means the recreate succeeded and killed the
138
+ * gateway; the *new* gateway's post-restart greeting card is the
139
+ * true success signal.
140
+ * - Any terminal state from the daemon (`completed`/`error`/`denied`)
141
+ * bails immediately and returns that response. Wire errors are
142
+ * synthesized by {@link tryHostdDispatch} as `result: "error"`,
143
+ * which also bails — there's no separate retry on transient wire
144
+ * failures because (a) the daemon doesn't actually go down except
145
+ * during a recreate that kills us anyway, and (b) waiting until
146
+ * timeout to surface a clear error is worse UX than surfacing it
147
+ * immediately.
148
+ * - Returns immediately if hostd is unconfigured (treats as
149
+ * `not-configured`, same as {@link tryHostdDispatch}).
150
+ */
151
+ export async function pollHostdStatus(
152
+ agentName: string,
153
+ targetRequestId: string,
154
+ opts: {
155
+ /** Hard cap. update_apply: 60_000; apply: 30_000. */
156
+ timeoutMs: number;
157
+ /** Default 2000. */
158
+ intervalMs?: number;
159
+ /** Test seam — defaults to `Date.now`. */
160
+ now?: () => number;
161
+ /** Test seam — defaults to `setTimeout`. */
162
+ sleep?: (ms: number) => Promise<void>;
163
+ },
164
+ ): Promise<HostdResponse | "not-configured"> {
165
+ if (!isHostdEnabled()) return "not-configured";
166
+ const sockPath = hostdSocketPath(agentName);
167
+ if (!existsSync(sockPath)) return "not-configured";
168
+ const now = opts.now ?? Date.now;
169
+ const sleep =
170
+ opts.sleep ?? ((ms) => new Promise<void>((r) => setTimeout(r, ms)));
171
+ const intervalMs = opts.intervalMs ?? 2000;
172
+ const deadline = now() + opts.timeoutMs;
173
+ // Initial wait — the caller just sent the kick-off request. Give the
174
+ // daemon a tick to begin work before the first poll.
175
+ await sleep(intervalMs);
176
+ while (now() < deadline) {
177
+ const pollId = hostdRequestId("gw-poll");
178
+ const resp = await tryHostdDispatch(agentName, {
179
+ v: 1,
180
+ op: "get_status",
181
+ request_id: pollId,
182
+ args: { target_request_id: targetRequestId },
183
+ });
184
+ if (resp === "not-configured") {
185
+ // Socket disappeared mid-poll — daemon was stopped. Surface that
186
+ // distinctly from a target-request error so callers can decide
187
+ // whether to retry or bail.
188
+ return resp;
189
+ }
190
+ // get_status returns the StatusEntry's result, which IS the target
191
+ // request's result. Any terminal state (completed/error/denied) is
192
+ // the target's final answer — bail with it. The previous draft of
193
+ // this helper retried on `error`/`denied` in case the daemon was
194
+ // transiently busy; that policy masked real errors as
195
+ // "still polling" until the 60s cap, then synthesized a misleading
196
+ // "timeout" response. Bailing immediately surfaces the daemon's
197
+ // audit-log truth directly to the operator.
198
+ if (
199
+ resp.result === "completed" ||
200
+ resp.result === "error" ||
201
+ resp.result === "denied"
202
+ ) {
203
+ return resp;
204
+ }
205
+ // result: "started" — get_status reflects the latest StatusEntry,
206
+ // which is still `started` until the daemon's mutation finishes.
207
+ // Keep polling.
208
+ await sleep(intervalMs);
209
+ }
210
+ return {
211
+ v: 1,
212
+ request_id: hostdRequestId("gw-poll-timeout"),
213
+ result: "error",
214
+ exit_code: null,
215
+ duration_ms: opts.timeoutMs,
216
+ error:
217
+ `hostd poll timeout after ${opts.timeoutMs}ms waiting for ` +
218
+ `target_request_id=${targetRequestId}`,
219
+ };
220
+ }
221
+
222
+ /**
223
+ * Emit a one-line operator-visible deprecation warning when a verb that
224
+ * hostd supports is being dispatched via the legacy spawn path. Quiet
225
+ * by design — operators see it once per verb per process in journald,
226
+ * never in chat. RFC C §7 Phase 2 → Phase 3.
227
+ */
228
+ const _deprecationSeen = new Set<string>();
229
+ export function warnLegacySpawnIfHostdDisabled(verb: string): void {
230
+ if (isHostdEnabled()) return;
231
+ if (_deprecationSeen.has(verb)) return;
232
+ _deprecationSeen.add(verb);
233
+ process.stderr.write(
234
+ `telegram gateway: spawnSwitchroomDetached(${verb}) — set ` +
235
+ `host_control.enabled: true and run \`switchroom hostd install\` ` +
236
+ `to route through audited hostd. Legacy path scheduled for ` +
237
+ `removal in v0.10 (RFC C Phase 3).\n`,
238
+ );
239
+ }
240
+
241
+ /** @internal Reset both caches so tests can re-assert behaviour. */
242
+ export function _resetDeprecationSeen(): void {
243
+ _deprecationSeen.clear();
244
+ }
@@ -59,12 +59,47 @@ export interface ScheduleRestartResult {
59
59
  error?: string;
60
60
  }
61
61
 
62
+ /**
63
+ * RFC E §4.2 Cut 2 — sent by the gateway to acknowledge that a
64
+ * Drive-write approval card has been posted (or that posting
65
+ * failed). The Drive-write PreToolUse hook (a separate process)
66
+ * uses the `request_id` to poll the kernel's `approval_lookup` for
67
+ * the verdict; if posting fails, the hook fails closed.
68
+ *
69
+ * Why response-shaped: the hook is synchronous from Claude Code's
70
+ * perspective (PreToolUse blocks the tool call). The hook can't
71
+ * return its `decision: "approve" | "block"` until either the
72
+ * card has been posted (so the user can decide) OR posting failed
73
+ * (so the hook can return block immediately). A response message
74
+ * is the cleanest way to surface that.
75
+ */
76
+ export interface DriveApprovalPostedEvent {
77
+ type: "drive_approval_posted";
78
+ /** Same correlation_id the client sent on the request. */
79
+ correlationId: string;
80
+ ok: boolean;
81
+ /**
82
+ * Kernel request_id the hook will pass to `approval_lookup` once
83
+ * it starts polling. Only present when `ok: true`.
84
+ */
85
+ requestId?: string;
86
+ /**
87
+ * Unix-ms expiry of the kernel request, mirrors the ttl_ms the
88
+ * gateway used. Hook uses this as its polling deadline. Only
89
+ * present when `ok: true`.
90
+ */
91
+ expiresAtMs?: number;
92
+ /** Diagnostic detail on failure. */
93
+ reason?: string;
94
+ }
95
+
62
96
  export type GatewayToClient =
63
97
  | InboundMessage
64
98
  | PermissionEvent
65
99
  | StatusEvent
66
100
  | ToolCallResult
67
- | ScheduleRestartResult;
101
+ | ScheduleRestartResult
102
+ | DriveApprovalPostedEvent;
68
103
 
69
104
  // === Bridge (Client) -> Gateway messages ===
70
105
 
@@ -189,6 +224,51 @@ export interface InjectInboundMessage {
189
224
  inbound: InboundMessage;
190
225
  }
191
226
 
227
+ /**
228
+ * RFC E §4.2 Cut 2 — sent by the Drive-write PreToolUse hook to
229
+ * the gateway to register a diff-preview approval card with the
230
+ * kernel + post it to Telegram. The hook waits on the
231
+ * corresponding `drive_approval_posted` reply (matching
232
+ * `correlationId`), then polls `approval_lookup` for the verdict.
233
+ *
234
+ * The `preview` payload is shaped like
235
+ * `src/drive/diff-preview.ts:DiffPreviewInput`. We don't restate
236
+ * the full shape on the wire — the IPC validator does a structural
237
+ * check (required fields present, types right) and the gateway-side
238
+ * consumer feeds it straight to `buildDiffPreview()` which is
239
+ * already defensive against malformed inputs.
240
+ *
241
+ * Trust model: same as `inject_inbound` — the gateway socket lives
242
+ * inside the agent container, only that-UID processes can connect,
243
+ * so the hook is as trusted as anything else in the container.
244
+ */
245
+ export interface RequestDriveApprovalMessage {
246
+ type: "request_drive_approval";
247
+ /**
248
+ * Hook-generated correlation id (any unique string ≤ 64 chars).
249
+ * Echoed back in `drive_approval_posted` so the hook can match
250
+ * the response if multiple Drive-write taps are in flight.
251
+ */
252
+ correlationId: string;
253
+ /**
254
+ * Target agent the gateway serves. Defense in depth — the gateway
255
+ * verifies this matches its own SWITCHROOM_AGENT_NAME and refuses
256
+ * cross-agent requests.
257
+ */
258
+ agentName: string;
259
+ /**
260
+ * DiffPreviewInput payload — see `src/drive/diff-preview.ts`.
261
+ * Carried as an opaque object on the wire; the gateway
262
+ * deserialises it via `buildDiffPreview()`.
263
+ */
264
+ preview: Record<string, unknown>;
265
+ /**
266
+ * TTL for the kernel approval request, in ms. Hook typically
267
+ * passes 5 min; gateway clamps to a sensible range.
268
+ */
269
+ ttlMs?: number;
270
+ }
271
+
192
272
  export type ClientToGateway =
193
273
  | RegisterMessage
194
274
  | ToolCallMessage
@@ -199,4 +279,5 @@ export type ClientToGateway =
199
279
  | OperatorEventForward
200
280
  | PtyPartialForward
201
281
  | UpdatePlaceholderMessage
202
- | InjectInboundMessage;
282
+ | InjectInboundMessage
283
+ | RequestDriveApprovalMessage;
@@ -8,6 +8,7 @@ import type {
8
8
  PermissionRequestForward,
9
9
  PtyPartialForward,
10
10
  RegisterMessage,
11
+ RequestDriveApprovalMessage,
11
12
  ScheduleRestartMessage,
12
13
  SessionEventForward,
13
14
  ToolCallMessage,
@@ -40,6 +41,18 @@ export interface IpcServerOptions {
40
41
  * inline scheduler simply ignore inject_inbound messages.
41
42
  */
42
43
  onInjectInbound?: (client: IpcClient, msg: InjectInboundMessage) => void;
44
+ /**
45
+ * RFC E §4.2 Cut 2 — Drive-write PreToolUse hook asks the gateway
46
+ * to register a kernel approval request + post a diff-preview
47
+ * card to Telegram. Handler is expected to send a
48
+ * `drive_approval_posted` event back over the same connection
49
+ * (`client.send(...)`). Optional: gateways without the hook
50
+ * configured ignore these messages.
51
+ */
52
+ onRequestDriveApproval?: (
53
+ client: IpcClient,
54
+ msg: RequestDriveApprovalMessage,
55
+ ) => Promise<void>;
43
56
  log?: (msg: string) => void;
44
57
  /**
45
58
  * How long (in ms) to wait without a heartbeat before force-closing the
@@ -192,6 +205,23 @@ export function validateClientMessage(msg: unknown): msg is ClientToGateway {
192
205
  && typeof inb.meta === "object"
193
206
  && inb.meta !== null;
194
207
  }
208
+ case "request_drive_approval": {
209
+ // RFC E §4.2 Cut 2. Validate the wire-shaped fields the
210
+ // gateway will route on; the inner `preview` is treated as
211
+ // an opaque object and gets defensively re-validated by
212
+ // `buildDiffPreview()` downstream.
213
+ if (typeof m.correlationId !== "string"
214
+ || (m.correlationId as string).length === 0
215
+ || (m.correlationId as string).length > 64) return false;
216
+ if (typeof m.agentName !== "string"
217
+ || !AGENT_NAME_RE.test(m.agentName as string)) return false;
218
+ if (typeof m.preview !== "object" || m.preview === null) return false;
219
+ if (m.ttlMs !== undefined
220
+ && (typeof m.ttlMs !== "number"
221
+ || !Number.isFinite(m.ttlMs)
222
+ || (m.ttlMs as number) < 0)) return false;
223
+ return true;
224
+ }
195
225
  default:
196
226
  return false;
197
227
  }
@@ -210,6 +240,7 @@ export function createIpcServer(options: IpcServerOptions): IpcServer {
210
240
  onOperatorEvent,
211
241
  onPtyPartial,
212
242
  onInjectInbound,
243
+ onRequestDriveApproval,
213
244
  log = () => {},
214
245
  heartbeatTimeoutMs = 30_000,
215
246
  } = options;
@@ -298,6 +329,44 @@ export function createIpcServer(options: IpcServerOptions): IpcServer {
298
329
  case "inject_inbound":
299
330
  if (onInjectInbound) onInjectInbound(client, msg as InjectInboundMessage);
300
331
  break;
332
+ case "request_drive_approval":
333
+ if (onRequestDriveApproval) {
334
+ // Handler is async — fire-and-forget here; the handler
335
+ // is responsible for sending its `drive_approval_posted`
336
+ // response (success or failure) back to the client.
337
+ onRequestDriveApproval(client, msg as RequestDriveApprovalMessage).catch(
338
+ (err) => {
339
+ log(
340
+ `request_drive_approval handler threw (client=${client.id}): ${(err as Error).message}`,
341
+ );
342
+ try {
343
+ client.send({
344
+ type: "drive_approval_posted",
345
+ correlationId: (msg as RequestDriveApprovalMessage).correlationId,
346
+ ok: false,
347
+ reason: `gateway handler error: ${(err as Error).message}`,
348
+ });
349
+ } catch {
350
+ /* best effort */
351
+ }
352
+ },
353
+ );
354
+ } else {
355
+ // No handler wired — fail closed and tell the hook so it
356
+ // can fall back to blocking the tool. Better than leaving
357
+ // the hook timing out.
358
+ try {
359
+ client.send({
360
+ type: "drive_approval_posted",
361
+ correlationId: (msg as RequestDriveApprovalMessage).correlationId,
362
+ ok: false,
363
+ reason: "gateway not configured for Drive-write approval",
364
+ });
365
+ } catch {
366
+ /* best effort */
367
+ }
368
+ }
369
+ break;
301
370
  case "update_placeholder":
302
371
  // Legacy recall.py IPC — placeholder UX was removed in #553 PR 5.
303
372
  // Soft-accepted so recall.py keeps working without modifying
@@ -90,6 +90,85 @@ function emitContext(text) {
90
90
  process.stdout.write(JSON.stringify(payload) + '\n')
91
91
  }
92
92
 
93
+ /**
94
+ * #1303: classify a tool_response as a failure. Only failures can have
95
+ * hit a kernel sandbox boundary. Pre-fix the hook stringified the whole
96
+ * tool_response and pattern-matched against it — that meant a SUCCESSFUL
97
+ * Read/Edit/Bash whose payload merely MENTIONED "EROFS" or "Read-only
98
+ * file system" (e.g. file content, code comments, grep results, the hook
99
+ * source itself) tripped the advisory. Verified live during #1291/#1292
100
+ * PR work: every `Read` on a file talking about the sandbox model
101
+ * produced a false positive; every `Edit` adding a comment that
102
+ * mentioned read-only-fs did too.
103
+ *
104
+ * Recognise failure across the three observed tool_response shapes:
105
+ * - Edit / Write / NotebookEdit / MCP: `{ is_error: true, ... }`
106
+ * - Bash: `{ exit_code: <non-zero>, stdout, stderr, ... }`
107
+ * - Free-form string body: assume failure if the string parses; the
108
+ * pattern match downstream still gates the advisory text.
109
+ *
110
+ * Also exported as `legacy.error` style for forward-compat: any
111
+ * non-null `tool_response.error` field is treated as failure.
112
+ *
113
+ * If no failure signal is found we have no kernel error to advise on,
114
+ * and the hook stays silent.
115
+ */
116
+ function classifyFailure(toolResponse) {
117
+ if (toolResponse == null) return null
118
+ if (typeof toolResponse === 'string') {
119
+ // Bare string body — no structured failure marker. Treat as a
120
+ // candidate; the pattern match decides.
121
+ return { kind: 'bare-string', body: toolResponse }
122
+ }
123
+ if (typeof toolResponse !== 'object') return null
124
+ const isError =
125
+ toolResponse.is_error === true
126
+ || toolResponse.success === false
127
+ || toolResponse.error != null
128
+ || (typeof toolResponse.exit_code === 'number'
129
+ && toolResponse.exit_code !== 0)
130
+ if (!isError) return null
131
+ // Extract error-bearing fields only — never the full response. For a
132
+ // failed Bash, stdout may carry the relevant kernel message alongside
133
+ // stderr (some commands write errors to stdout), so include stdout
134
+ // when there's a non-zero exit code.
135
+ const parts = []
136
+ if (typeof toolResponse.error === 'string') parts.push(toolResponse.error)
137
+ if (typeof toolResponse.stderr === 'string') parts.push(toolResponse.stderr)
138
+ if (toolResponse.exit_code != null && toolResponse.exit_code !== 0
139
+ && typeof toolResponse.stdout === 'string') {
140
+ parts.push(toolResponse.stdout)
141
+ }
142
+ // Fallback: failure was signalled but no error-bearing field
143
+ // surfaced — stringify the structured response so we don't miss an
144
+ // unusual tool that puts the kernel error in an unexpected key.
145
+ // Bounded by the 64 KiB cap downstream.
146
+ if (parts.length === 0) {
147
+ try { parts.push(JSON.stringify(toolResponse)) } catch { /* unprintable */ }
148
+ }
149
+ return { kind: 'structured-failure', body: parts.join('\n') }
150
+ }
151
+
152
+ /**
153
+ * #1303 secondary defence: only write-capable tools can hit a kernel
154
+ * sandbox boundary. Read/Grep/Glob/WebFetch/etc. cannot EROFS — even if
155
+ * settings.json wires this hook with matcher ".*", we gate at the
156
+ * script level so a future scaffold change can't re-introduce the
157
+ * false-positive class. Bash is included because it's the canonical
158
+ * write surface (mkdir, rm, install, apt, etc.). MCP tools that may
159
+ * proxy writes are included by an `mcp__` prefix check.
160
+ */
161
+ const WRITE_CAPABLE_TOOLS = new Set([
162
+ 'Edit', 'MultiEdit', 'Write', 'NotebookEdit', 'Bash',
163
+ ])
164
+
165
+ function isWriteCapableTool(toolName) {
166
+ if (typeof toolName !== 'string') return false
167
+ if (WRITE_CAPABLE_TOOLS.has(toolName)) return true
168
+ if (toolName.startsWith('mcp__')) return true
169
+ return false
170
+ }
171
+
93
172
  function main() {
94
173
  const raw = readStdin()
95
174
  if (!raw) return
@@ -101,18 +180,18 @@ function main() {
101
180
  return
102
181
  }
103
182
 
104
- // tool_response shape varies by tool — string for Bash, object with
105
- // file/oldString/newString for Edit/Write, etc. Stringify the whole
106
- // thing so we match against every nested error field at once. Cap the
107
- // scan window to keep memory bounded if the model just dumped a 10MB
108
- // log into the tool_response.
109
- let body
110
- try {
111
- body = JSON.stringify(evt.tool_response ?? '')
112
- } catch {
113
- return
114
- }
115
- if (!body) return
183
+ if (!isWriteCapableTool(evt.tool_name)) return
184
+
185
+ // #1303 primary fix: classify success vs failure FIRST. A successful
186
+ // tool can't have hit a kernel sandbox boundary by definition its
187
+ // payload may mention EROFS / read-only-fs in benign content but
188
+ // that's not a kernel error.
189
+ const failure = classifyFailure(evt.tool_response)
190
+ if (failure == null) return
191
+
192
+ let body = failure.body
193
+ if (typeof body !== 'string') return
194
+ if (body.length === 0) return
116
195
  if (body.length > 64 * 1024) body = body.slice(0, 64 * 1024)
117
196
 
118
197
  for (const [pattern, key] of PATTERNS) {
@@ -123,6 +202,18 @@ function main() {
123
202
  }
124
203
  }
125
204
 
205
+ // Test-only export hooks. Node ESM doesn't expose internal symbols
206
+ // without a named export; tests import `__internals` and assert against
207
+ // `classifyFailure` / `isWriteCapableTool` directly. Production paths
208
+ // use `main()` and never touch this object.
209
+ export const __internals = {
210
+ classifyFailure,
211
+ isWriteCapableTool,
212
+ WRITE_CAPABLE_TOOLS,
213
+ PATTERNS,
214
+ buildHint,
215
+ }
216
+
126
217
  try {
127
218
  main()
128
219
  } catch {
@@ -111,6 +111,17 @@ export function computeLabel(toolName, input) {
111
111
  case 'KillBash':
112
112
  case 'KillShell':
113
113
  return 'Stopping background process'
114
+ case 'Skill': {
115
+ // The Skill tool's input is `{ skill: "<slug>", args?: "..." }`.
116
+ // We emit `Running skill <slug>` so downstream observers
117
+ // (notably the skill-coverage UAT runner at
118
+ // telegram-plugin/uat/runners/skill-coverage.ts) can tail the
119
+ // sidecar JSONL and recover which skill fired per turn —
120
+ // the progress card path that used to surface this was retired
121
+ // when `progressDriver` was nulled out in #1122 PR3.
122
+ const slug = clip(String(i.skill ?? ''), 64)
123
+ return slug ? `Running skill ${slug}` : null
124
+ }
114
125
  }
115
126
 
116
127
  // MCP allowlist.