npm - switchroom - Versions diffs - 0.8.1 → 0.11.0 - Mend

switchroom 0.8.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

package/README.md +54 -61
package/bin/timezone-hook.sh +9 -7
package/dist/agent-scheduler/index.js +285 -45
package/dist/auth-broker/index.js +13932 -0
package/dist/cli/drive-write-pretool.mjs +5418 -0
package/dist/cli/switchroom.js +8890 -5560
package/dist/host-control/main.js +582 -43
package/dist/vault/approvals/kernel-server.js +276 -47
package/dist/vault/broker/server.js +333 -69
package/examples/minimal.yaml +63 -0
package/examples/personal-google-workspace-mcp/.env.example +34 -0
package/examples/personal-google-workspace-mcp/README.md +194 -0
package/examples/personal-google-workspace-mcp/compose.yaml +66 -0
package/examples/switchroom.yaml +220 -0
package/package.json +6 -4
package/profiles/_base/start.sh.hbs +3 -3
package/profiles/_shared/agent-self-service.md.hbs +126 -0
package/profiles/default/CLAUDE.md +10 -0
package/profiles/default/CLAUDE.md.hbs +16 -0
package/skills/buildkite-agent-infrastructure/SKILL.md +30 -11
package/skills/buildkite-agent-runtime/SKILL.md +44 -11
package/skills/buildkite-api/SKILL.md +31 -8
package/skills/buildkite-cli/SKILL.md +27 -9
package/skills/buildkite-migration/SKILL.md +22 -9
package/skills/buildkite-pipelines/SKILL.md +26 -9
package/skills/buildkite-secure-delivery/SKILL.md +23 -9
package/skills/buildkite-test-engine/SKILL.md +25 -8
package/skills/docx/SKILL.md +1 -1
package/skills/file-bug/SKILL.md +34 -6
package/skills/humanizer/SKILL.md +15 -0
package/skills/humanizer-calibrate/SKILL.md +7 -1
package/skills/mcp-builder/SKILL.md +1 -1
package/skills/pdf/SKILL.md +1 -1
package/skills/pptx/SKILL.md +1 -1
package/skills/skill-creator/SKILL.md +21 -1
package/skills/skill-creator/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-313.pyc +0 -0
package/skills/skill-creator/scripts/__pycache__/utils.cpython-313.pyc +0 -0
package/skills/switchroom-cli/SKILL.md +63 -64
package/skills/switchroom-health/SKILL.md +23 -10
package/skills/switchroom-install/SKILL.md +3 -3
package/skills/switchroom-manage/SKILL.md +26 -19
package/skills/switchroom-runtime/SKILL.md +67 -15
package/skills/switchroom-status/SKILL.md +26 -1
package/skills/telegram-test-harness/SKILL.md +3 -0
package/skills/webapp-testing/SKILL.md +31 -1
package/skills/xlsx/SKILL.md +1 -1
package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
package/telegram-plugin/admin-commands/index.ts +9 -5
package/telegram-plugin/auth-snapshot-format.ts +612 -0
package/telegram-plugin/auto-fallback-fleet.ts +215 -0
package/telegram-plugin/auto-fallback.ts +28 -301
package/telegram-plugin/dist/gateway/gateway.js +17453 -15100
package/telegram-plugin/fleet-fallback-gate.ts +105 -0
package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
package/telegram-plugin/gateway/approval-callback.ts +31 -3
package/telegram-plugin/gateway/auth-add-flow.ts +326 -0
package/telegram-plugin/gateway/auth-broker-client.ts +75 -0
package/telegram-plugin/gateway/auth-command.ts +905 -0
package/telegram-plugin/gateway/auth-line.ts +123 -0
package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
package/telegram-plugin/gateway/boot-card.ts +23 -37
package/telegram-plugin/gateway/boot-probes.ts +9 -12
package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
package/telegram-plugin/gateway/gateway.ts +1156 -938
package/telegram-plugin/gateway/hostd-dispatch.ts +244 -0
package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
package/telegram-plugin/gateway/ipc-server.ts +69 -0
package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
package/telegram-plugin/hooks/tool-label-pretool.mjs +11 -0
package/telegram-plugin/hooks/wedge-detect-posttool.mjs +303 -0
package/telegram-plugin/model-unavailable.ts +28 -12
package/telegram-plugin/permission-title.ts +56 -0
package/telegram-plugin/quota-check.ts +19 -41
package/telegram-plugin/scripts/build.mjs +0 -1
package/telegram-plugin/shared/bot-runtime.ts +5 -4
package/telegram-plugin/silence-poke.ts +153 -1
package/telegram-plugin/tests/auth-add-flow.test.ts +559 -0
package/telegram-plugin/tests/auth-code-redact.test.ts +8 -4
package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
package/telegram-plugin/tests/auth-command-vernacular.test.ts +531 -0
package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
package/telegram-plugin/tests/boot-probes.test.ts +27 -22
package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
package/telegram-plugin/tests/permission-title.test.ts +31 -0
package/telegram-plugin/tests/quota-check.test.ts +5 -35
package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
package/telegram-plugin/tests/silence-poke.test.ts +237 -0
package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
package/telegram-plugin/turn-flush-safety.ts +55 -1
package/telegram-plugin/uat/SETUP.md +35 -1
package/telegram-plugin/uat/runners/agent-self-sufficiency.ts +457 -0
package/telegram-plugin/uat/runners/paraphrases.ts +231 -0
package/telegram-plugin/uat/runners/report.ts +150 -0
package/telegram-plugin/uat/runners/run-agent-self-sufficiency.sh +50 -0
package/telegram-plugin/uat/runners/scorer.test.ts +196 -0
package/telegram-plugin/uat/runners/scorer.ts +106 -0
package/telegram-plugin/uat/runners/skill-coverage.test.ts +100 -0
package/telegram-plugin/uat/runners/skill-coverage.ts +620 -0
package/telegram-plugin/uat/scenarios/jtbd-interrupt-marker-dm.test.ts +7 -1
package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +7 -1
package/telegram-plugin/auth-dashboard.ts +0 -1104
package/telegram-plugin/auth-slot-parser.ts +0 -497
package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
package/telegram-plugin/dist/foreman/foreman.js +0 -31358
package/telegram-plugin/foreman/foreman-create-flow.ts +0 -202
package/telegram-plugin/foreman/foreman-handlers.ts +0 -493
package/telegram-plugin/foreman/foreman.ts +0 -1165
package/telegram-plugin/foreman/setup-flow.ts +0 -345
package/telegram-plugin/foreman/setup-state.ts +0 -239
package/telegram-plugin/foreman/state.ts +0 -203
package/telegram-plugin/tests/auth-account-identity-surface.test.ts +0 -118
package/telegram-plugin/tests/auth-dashboard-edge-cases.test.ts +0 -260
package/telegram-plugin/tests/auth-dashboard-restart-flow.test.ts +0 -140
package/telegram-plugin/tests/auth-dashboard-v3b.test.ts +0 -559
package/telegram-plugin/tests/auth-dashboard.test.ts +0 -1045
package/telegram-plugin/tests/auth-slot-commands.test.ts +0 -640
package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
package/telegram-plugin/tests/boot-card-account-quota.test.ts +0 -137
package/telegram-plugin/tests/foreman-create-flow.test.ts +0 -359
package/telegram-plugin/tests/foreman-handlers.test.ts +0 -347
package/telegram-plugin/tests/foreman-state.test.ts +0 -164
package/telegram-plugin/tests/foreman-write-ops.test.ts +0 -214
package/telegram-plugin/tests/setup-flow.test.ts +0 -510
package/telegram-plugin/tests/setup-state.test.ts +0 -146

package/telegram-plugin/gateway/hostd-dispatch.ts ADDED Viewed

@@ -0,0 +1,244 @@
+/**
+ * Hostd dispatch helpers for the gateway's self-restart slash-commands
+ * (#1175 RFC C, Phase 2). When the operator has opted into
+ * `host_control.enabled: true`, /restart, /new, /reset, and
+ * /update apply route through the per-agent hostd UDS instead of the
+ * in-container `spawnSwitchroomDetached` shellout.
+ *
+ * Rationale: in docker-mode (the v0.7+ default) the agent container
+ * has no docker binary and no `/var/run/docker.sock` — so the
+ * spawn-path verbs fail with exit-127 the moment they touch compose.
+ * Hostd runs on the host with the docker socket mounted, so the verbs
+ * actually work.
+ *
+ * Extracted from gateway.ts for unit-testability — gateway.ts itself
+ * has too many boot-time side-effects to import directly in a test.
+ */
+import { existsSync } from "node:fs";
+import { randomBytes } from "node:crypto";
+import { hostdRequest } from "../../src/host-control/client.js";
+import type {
+  HostdRequest,
+  HostdResponse,
+} from "../../src/host-control/protocol.js";
+import { loadConfig as loadSwitchroomConfig } from "../../src/config/loader.js";
+let _hostdEnabled: boolean | undefined;
+/**
+ * Reads `host_control.enabled` from the resolved switchroom config.
+ * Cached for the gateway's lifetime — config doesn't change without a
+ * restart, and the file-read isn't free.
+ *
+ * Best-effort: if the config can't be loaded (gateway running in a
+ * dir where loadConfig fails), returns false so the dispatch helper
+ * falls through to the legacy spawn path.
+ */
+export function isHostdEnabled(): boolean {
+  if (_hostdEnabled !== undefined) return _hostdEnabled;
+  try {
+    const cfg = loadSwitchroomConfig();
+    _hostdEnabled = cfg.host_control?.enabled === true;
+  } catch {
+    _hostdEnabled = false;
+  }
+  return _hostdEnabled;
+}
+/** @internal Reset the cache so tests can swap config and re-probe. */
+export function _resetHostdEnabledCache(): void {
+  _hostdEnabled = undefined;
+}
+export function hostdSocketPath(agentName: string): string {
+  return `/run/switchroom/hostd/${agentName}/sock`;
+}
+/**
+ * True only when (a) host_control is enabled in config AND (b) the
+ * per-agent socket is bound on disk. Distinct from "will the wire call
+ * succeed" — that's only knowable after attempting it.
+ *
+ * Callers use this to decide *whether to skip docker-availability
+ * preflight guards* (since hostd doesn't need in-container docker).
+ */
+export function hostdWillBeUsed(agentName: string): boolean {
+  if (!isHostdEnabled()) return false;
+  return existsSync(hostdSocketPath(agentName));
+}
+/**
+ * Send one request to the per-agent hostd socket.
+ *
+ * Returns:
+ *   - `"not-configured"` — hostd is disabled in config OR the per-agent
+ *     socket isn't bound. Callers should fall back to the legacy
+ *     `spawnSwitchroomDetached` path.
+ *   - `HostdResponse` — hostd was contacted. Callers branch on
+ *     `resp.result`. Wire errors (ECONNREFUSED, timeout, bad frame)
+ *     are synthesized into a `result: "error"` response so callers
+ *     don't need a separate try/catch around the failure.
+ *
+ * Deliberately no silent fallback to spawn when hostd is configured-on
+ * but returns error/denied: the operator opted in, so masking failures
+ * would just confuse them about why the verb didn't actually run.
+ */
+export async function tryHostdDispatch(
+  agentName: string,
+  req: HostdRequest,
+): Promise<HostdResponse | "not-configured"> {
+  if (!isHostdEnabled()) return "not-configured";
+  const sockPath = hostdSocketPath(agentName);
+  if (!existsSync(sockPath)) return "not-configured";
+  try {
+    return await hostdRequest(
+      { socketPath: sockPath, timeoutMs: 5000 },
+      req,
+    );
+  } catch (err) {
+    process.stderr.write(
+      `telegram gateway: hostd dispatch failed ` +
+        `(request_id=${req.request_id} op=${req.op}): ` +
+        `${(err as Error).message}\n`,
+    );
+    return {
+      v: 1,
+      request_id: req.request_id,
+      result: "error",
+      exit_code: null,
+      duration_ms: 0,
+      error: `hostd wire error: ${(err as Error).message}`,
+    };
+  }
+}
+export function hostdRequestId(prefix: string): string {
+  return `${prefix}-${Date.now()}-${randomBytes(4).toString("hex")}`;
+}
+/**
+ * Poll hostd's `get_status` verb until the target request reaches a
+ * terminal state (`completed` / `error` / `denied`) or the caller's
+ * timeout elapses.
+ *
+ * Motivation: the long-running mutating verbs (`update_apply`, `apply`)
+ * respond `result: "started"` immediately and run the work in a
+ * detached child on the daemon side. Without polling, callers that
+ * acked "started" to the operator have no way to surface a *fail
+ * before recreate* (image-pull error, scaffold regeneration crash,
+ * etc.) — the gateway dies if recreate succeeds, but stays alive and
+ * silent if it fails. Polling closes that observability hole.
+ *
+ * Behaviour:
+ *   - Polls every {@link opts.intervalMs} ms (default 2000 per RFC C §5.3).
+ *   - Bails out after {@link opts.timeoutMs} with a synthesized
+ *     `result: "error"` response describing the timeout. Caller should
+ *     treat that as inconclusive — for `update_apply` specifically,
+ *     a timeout often means the recreate succeeded and killed the
+ *     gateway; the *new* gateway's post-restart greeting card is the
+ *     true success signal.
+ *   - Any terminal state from the daemon (`completed`/`error`/`denied`)
+ *     bails immediately and returns that response. Wire errors are
+ *     synthesized by {@link tryHostdDispatch} as `result: "error"`,
+ *     which also bails — there's no separate retry on transient wire
+ *     failures because (a) the daemon doesn't actually go down except
+ *     during a recreate that kills us anyway, and (b) waiting until
+ *     timeout to surface a clear error is worse UX than surfacing it
+ *     immediately.
+ *   - Returns immediately if hostd is unconfigured (treats as
+ *     `not-configured`, same as {@link tryHostdDispatch}).
+ */
+export async function pollHostdStatus(
+  agentName: string,
+  targetRequestId: string,
+  opts: {
+    /** Hard cap. update_apply: 60_000; apply: 30_000. */
+    timeoutMs: number;
+    /** Default 2000. */
+    intervalMs?: number;
+    /** Test seam — defaults to `Date.now`. */
+    now?: () => number;
+    /** Test seam — defaults to `setTimeout`. */
+    sleep?: (ms: number) => Promise<void>;
+  },
+): Promise<HostdResponse | "not-configured"> {
+  if (!isHostdEnabled()) return "not-configured";
+  const sockPath = hostdSocketPath(agentName);
+  if (!existsSync(sockPath)) return "not-configured";
+  const now = opts.now ?? Date.now;
+  const sleep =
+    opts.sleep ?? ((ms) => new Promise<void>((r) => setTimeout(r, ms)));
+  const intervalMs = opts.intervalMs ?? 2000;
+  const deadline = now() + opts.timeoutMs;
+  // Initial wait — the caller just sent the kick-off request. Give the
+  // daemon a tick to begin work before the first poll.
+  await sleep(intervalMs);
+  while (now() < deadline) {
+    const pollId = hostdRequestId("gw-poll");
+    const resp = await tryHostdDispatch(agentName, {
+      v: 1,
+      op: "get_status",
+      request_id: pollId,
+      args: { target_request_id: targetRequestId },
+    });
+    if (resp === "not-configured") {
+      // Socket disappeared mid-poll — daemon was stopped. Surface that
+      // distinctly from a target-request error so callers can decide
+      // whether to retry or bail.
+      return resp;
+    }
+    // get_status returns the StatusEntry's result, which IS the target
+    // request's result. Any terminal state (completed/error/denied) is
+    // the target's final answer — bail with it. The previous draft of
+    // this helper retried on `error`/`denied` in case the daemon was
+    // transiently busy; that policy masked real errors as
+    // "still polling" until the 60s cap, then synthesized a misleading
+    // "timeout" response. Bailing immediately surfaces the daemon's
+    // audit-log truth directly to the operator.
+    if (
+      resp.result === "completed" ||
+      resp.result === "error" ||
+      resp.result === "denied"
+    ) {
+      return resp;
+    }
+    // result: "started" — get_status reflects the latest StatusEntry,
+    // which is still `started` until the daemon's mutation finishes.
+    // Keep polling.
+    await sleep(intervalMs);
+  }
+  return {
+    v: 1,
+    request_id: hostdRequestId("gw-poll-timeout"),
+    result: "error",
+    exit_code: null,
+    duration_ms: opts.timeoutMs,
+    error:
+      `hostd poll timeout after ${opts.timeoutMs}ms waiting for ` +
+      `target_request_id=${targetRequestId}`,
+  };
+}
+/**
+ * Emit a one-line operator-visible deprecation warning when a verb that
+ * hostd supports is being dispatched via the legacy spawn path. Quiet
+ * by design — operators see it once per verb per process in journald,
+ * never in chat. RFC C §7 Phase 2 → Phase 3.
+ */
+const _deprecationSeen = new Set<string>();
+export function warnLegacySpawnIfHostdDisabled(verb: string): void {
+  if (isHostdEnabled()) return;
+  if (_deprecationSeen.has(verb)) return;
+  _deprecationSeen.add(verb);
+  process.stderr.write(
+    `telegram gateway: spawnSwitchroomDetached(${verb}) — set ` +
+      `host_control.enabled: true and run \`switchroom hostd install\` ` +
+      `to route through audited hostd. Legacy path scheduled for ` +
+      `removal in v0.10 (RFC C Phase 3).\n`,
+  );
+}
+/** @internal Reset both caches so tests can re-assert behaviour. */
+export function _resetDeprecationSeen(): void {
+  _deprecationSeen.clear();
+}

package/telegram-plugin/gateway/ipc-protocol.ts CHANGED Viewed

@@ -59,12 +59,47 @@ export interface ScheduleRestartResult {
   error?: string;
 }
+/**
+ * RFC E §4.2 Cut 2 — sent by the gateway to acknowledge that a
+ * Drive-write approval card has been posted (or that posting
+ * failed). The Drive-write PreToolUse hook (a separate process)
+ * uses the `request_id` to poll the kernel's `approval_lookup` for
+ * the verdict; if posting fails, the hook fails closed.
+ *
+ * Why response-shaped: the hook is synchronous from Claude Code's
+ * perspective (PreToolUse blocks the tool call). The hook can't
+ * return its `decision: "approve" | "block"` until either the
+ * card has been posted (so the user can decide) OR posting failed
+ * (so the hook can return block immediately). A response message
+ * is the cleanest way to surface that.
+ */
+export interface DriveApprovalPostedEvent {
+  type: "drive_approval_posted";
+  /** Same correlation_id the client sent on the request. */
+  correlationId: string;
+  ok: boolean;
+  /**
+   * Kernel request_id the hook will pass to `approval_lookup` once
+   * it starts polling. Only present when `ok: true`.
+   */
+  requestId?: string;
+  /**
+   * Unix-ms expiry of the kernel request, mirrors the ttl_ms the
+   * gateway used. Hook uses this as its polling deadline. Only
+   * present when `ok: true`.
+   */
+  expiresAtMs?: number;
+  /** Diagnostic detail on failure. */
+  reason?: string;
+}
 export type GatewayToClient =
   | InboundMessage
   | PermissionEvent
   | StatusEvent
   | ToolCallResult
-  | ScheduleRestartResult;
+  | ScheduleRestartResult
+  | DriveApprovalPostedEvent;
 // === Bridge (Client) -> Gateway messages ===
@@ -189,6 +224,51 @@ export interface InjectInboundMessage {
   inbound: InboundMessage;
 }
+/**
+ * RFC E §4.2 Cut 2 — sent by the Drive-write PreToolUse hook to
+ * the gateway to register a diff-preview approval card with the
+ * kernel + post it to Telegram. The hook waits on the
+ * corresponding `drive_approval_posted` reply (matching
+ * `correlationId`), then polls `approval_lookup` for the verdict.
+ *
+ * The `preview` payload is shaped like
+ * `src/drive/diff-preview.ts:DiffPreviewInput`. We don't restate
+ * the full shape on the wire — the IPC validator does a structural
+ * check (required fields present, types right) and the gateway-side
+ * consumer feeds it straight to `buildDiffPreview()` which is
+ * already defensive against malformed inputs.
+ *
+ * Trust model: same as `inject_inbound` — the gateway socket lives
+ * inside the agent container, only that-UID processes can connect,
+ * so the hook is as trusted as anything else in the container.
+ */
+export interface RequestDriveApprovalMessage {
+  type: "request_drive_approval";
+  /**
+   * Hook-generated correlation id (any unique string ≤ 64 chars).
+   * Echoed back in `drive_approval_posted` so the hook can match
+   * the response if multiple Drive-write taps are in flight.
+   */
+  correlationId: string;
+  /**
+   * Target agent the gateway serves. Defense in depth — the gateway
+   * verifies this matches its own SWITCHROOM_AGENT_NAME and refuses
+   * cross-agent requests.
+   */
+  agentName: string;
+  /**
+   * DiffPreviewInput payload — see `src/drive/diff-preview.ts`.
+   * Carried as an opaque object on the wire; the gateway
+   * deserialises it via `buildDiffPreview()`.
+   */
+  preview: Record<string, unknown>;
+  /**
+   * TTL for the kernel approval request, in ms. Hook typically
+   * passes 5 min; gateway clamps to a sensible range.
+   */
+  ttlMs?: number;
+}
 export type ClientToGateway =
   | RegisterMessage
   | ToolCallMessage
@@ -199,4 +279,5 @@ export type ClientToGateway =
   | OperatorEventForward
   | PtyPartialForward
   | UpdatePlaceholderMessage
-  | InjectInboundMessage;
+  | InjectInboundMessage
+  | RequestDriveApprovalMessage;

package/telegram-plugin/gateway/ipc-server.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import type {
   PermissionRequestForward,
   PtyPartialForward,
   RegisterMessage,
+  RequestDriveApprovalMessage,
   ScheduleRestartMessage,
   SessionEventForward,
   ToolCallMessage,
@@ -40,6 +41,18 @@ export interface IpcServerOptions {
    * inline scheduler simply ignore inject_inbound messages.
    */
   onInjectInbound?: (client: IpcClient, msg: InjectInboundMessage) => void;
+  /**
+   * RFC E §4.2 Cut 2 — Drive-write PreToolUse hook asks the gateway
+   * to register a kernel approval request + post a diff-preview
+   * card to Telegram. Handler is expected to send a
+   * `drive_approval_posted` event back over the same connection
+   * (`client.send(...)`). Optional: gateways without the hook
+   * configured ignore these messages.
+   */
+  onRequestDriveApproval?: (
+    client: IpcClient,
+    msg: RequestDriveApprovalMessage,
+  ) => Promise<void>;
   log?: (msg: string) => void;
   /**
    * How long (in ms) to wait without a heartbeat before force-closing the
@@ -192,6 +205,23 @@ export function validateClientMessage(msg: unknown): msg is ClientToGateway {
         && typeof inb.meta === "object"
         && inb.meta !== null;
     }
+    case "request_drive_approval": {
+      // RFC E §4.2 Cut 2. Validate the wire-shaped fields the
+      // gateway will route on; the inner `preview` is treated as
+      // an opaque object and gets defensively re-validated by
+      // `buildDiffPreview()` downstream.
+      if (typeof m.correlationId !== "string"
+        || (m.correlationId as string).length === 0
+        || (m.correlationId as string).length > 64) return false;
+      if (typeof m.agentName !== "string"
+        || !AGENT_NAME_RE.test(m.agentName as string)) return false;
+      if (typeof m.preview !== "object" || m.preview === null) return false;
+      if (m.ttlMs !== undefined
+        && (typeof m.ttlMs !== "number"
+          || !Number.isFinite(m.ttlMs)
+          || (m.ttlMs as number) < 0)) return false;
+      return true;
+    }
     default:
       return false;
   }
@@ -210,6 +240,7 @@ export function createIpcServer(options: IpcServerOptions): IpcServer {
     onOperatorEvent,
     onPtyPartial,
     onInjectInbound,
+    onRequestDriveApproval,
     log = () => {},
     heartbeatTimeoutMs = 30_000,
   } = options;
@@ -298,6 +329,44 @@ export function createIpcServer(options: IpcServerOptions): IpcServer {
       case "inject_inbound":
         if (onInjectInbound) onInjectInbound(client, msg as InjectInboundMessage);
         break;
+      case "request_drive_approval":
+        if (onRequestDriveApproval) {
+          // Handler is async — fire-and-forget here; the handler
+          // is responsible for sending its `drive_approval_posted`
+          // response (success or failure) back to the client.
+          onRequestDriveApproval(client, msg as RequestDriveApprovalMessage).catch(
+            (err) => {
+              log(
+                `request_drive_approval handler threw (client=${client.id}): ${(err as Error).message}`,
+              );
+              try {
+                client.send({
+                  type: "drive_approval_posted",
+                  correlationId: (msg as RequestDriveApprovalMessage).correlationId,
+                  ok: false,
+                  reason: `gateway handler error: ${(err as Error).message}`,
+                });
+              } catch {
+                /* best effort */
+              }
+            },
+          );
+        } else {
+          // No handler wired — fail closed and tell the hook so it
+          // can fall back to blocking the tool. Better than leaving
+          // the hook timing out.
+          try {
+            client.send({
+              type: "drive_approval_posted",
+              correlationId: (msg as RequestDriveApprovalMessage).correlationId,
+              ok: false,
+              reason: "gateway not configured for Drive-write approval",
+            });
+          } catch {
+            /* best effort */
+          }
+        }
+        break;
       case "update_placeholder":
         // Legacy recall.py IPC — placeholder UX was removed in #553 PR 5.
         // Soft-accepted so recall.py keeps working without modifying

package/telegram-plugin/hooks/sandbox-hint-posttool.mjs CHANGED Viewed

@@ -90,6 +90,85 @@ function emitContext(text) {
   process.stdout.write(JSON.stringify(payload) + '\n')
 }
+/**
+ * #1303: classify a tool_response as a failure. Only failures can have
+ * hit a kernel sandbox boundary. Pre-fix the hook stringified the whole
+ * tool_response and pattern-matched against it — that meant a SUCCESSFUL
+ * Read/Edit/Bash whose payload merely MENTIONED "EROFS" or "Read-only
+ * file system" (e.g. file content, code comments, grep results, the hook
+ * source itself) tripped the advisory. Verified live during #1291/#1292
+ * PR work: every `Read` on a file talking about the sandbox model
+ * produced a false positive; every `Edit` adding a comment that
+ * mentioned read-only-fs did too.
+ *
+ * Recognise failure across the three observed tool_response shapes:
+ *   - Edit / Write / NotebookEdit / MCP: `{ is_error: true, ... }`
+ *   - Bash: `{ exit_code: <non-zero>, stdout, stderr, ... }`
+ *   - Free-form string body: assume failure if the string parses; the
+ *     pattern match downstream still gates the advisory text.
+ *
+ * Also exported as `legacy.error` style for forward-compat: any
+ * non-null `tool_response.error` field is treated as failure.
+ *
+ * If no failure signal is found we have no kernel error to advise on,
+ * and the hook stays silent.
+ */
+function classifyFailure(toolResponse) {
+  if (toolResponse == null) return null
+  if (typeof toolResponse === 'string') {
+    // Bare string body — no structured failure marker. Treat as a
+    // candidate; the pattern match decides.
+    return { kind: 'bare-string', body: toolResponse }
+  }
+  if (typeof toolResponse !== 'object') return null
+  const isError =
+    toolResponse.is_error === true
+    || toolResponse.success === false
+    || toolResponse.error != null
+    || (typeof toolResponse.exit_code === 'number'
+        && toolResponse.exit_code !== 0)
+  if (!isError) return null
+  // Extract error-bearing fields only — never the full response. For a
+  // failed Bash, stdout may carry the relevant kernel message alongside
+  // stderr (some commands write errors to stdout), so include stdout
+  // when there's a non-zero exit code.
+  const parts = []
+  if (typeof toolResponse.error === 'string') parts.push(toolResponse.error)
+  if (typeof toolResponse.stderr === 'string') parts.push(toolResponse.stderr)
+  if (toolResponse.exit_code != null && toolResponse.exit_code !== 0
+      && typeof toolResponse.stdout === 'string') {
+    parts.push(toolResponse.stdout)
+  }
+  // Fallback: failure was signalled but no error-bearing field
+  // surfaced — stringify the structured response so we don't miss an
+  // unusual tool that puts the kernel error in an unexpected key.
+  // Bounded by the 64 KiB cap downstream.
+  if (parts.length === 0) {
+    try { parts.push(JSON.stringify(toolResponse)) } catch { /* unprintable */ }
+  }
+  return { kind: 'structured-failure', body: parts.join('\n') }
+}
+/**
+ * #1303 secondary defence: only write-capable tools can hit a kernel
+ * sandbox boundary. Read/Grep/Glob/WebFetch/etc. cannot EROFS — even if
+ * settings.json wires this hook with matcher ".*", we gate at the
+ * script level so a future scaffold change can't re-introduce the
+ * false-positive class. Bash is included because it's the canonical
+ * write surface (mkdir, rm, install, apt, etc.). MCP tools that may
+ * proxy writes are included by an `mcp__` prefix check.
+ */
+const WRITE_CAPABLE_TOOLS = new Set([
+  'Edit', 'MultiEdit', 'Write', 'NotebookEdit', 'Bash',
+])
+function isWriteCapableTool(toolName) {
+  if (typeof toolName !== 'string') return false
+  if (WRITE_CAPABLE_TOOLS.has(toolName)) return true
+  if (toolName.startsWith('mcp__')) return true
+  return false
+}
 function main() {
   const raw = readStdin()
   if (!raw) return
@@ -101,18 +180,18 @@ function main() {
     return
   }
-  // tool_response shape varies by tool — string for Bash, object with
-  // file/oldString/newString for Edit/Write, etc. Stringify the whole
-  // thing so we match against every nested error field at once. Cap the
-  // scan window to keep memory bounded if the model just dumped a 10MB
-  // log into the tool_response.
-  let body
-  try {
-    body = JSON.stringify(evt.tool_response ?? '')
-  } catch {
-    return
-  }
-  if (!body) return
+  if (!isWriteCapableTool(evt.tool_name)) return
+  // #1303 primary fix: classify success vs failure FIRST. A successful
+  // tool can't have hit a kernel sandbox boundary by definition — its
+  // payload may mention EROFS / read-only-fs in benign content but
+  // that's not a kernel error.
+  const failure = classifyFailure(evt.tool_response)
+  if (failure == null) return
+  let body = failure.body
+  if (typeof body !== 'string') return
+  if (body.length === 0) return
   if (body.length > 64 * 1024) body = body.slice(0, 64 * 1024)
   for (const [pattern, key] of PATTERNS) {
@@ -123,6 +202,18 @@ function main() {
   }
 }
+// Test-only export hooks. Node ESM doesn't expose internal symbols
+// without a named export; tests import `__internals` and assert against
+// `classifyFailure` / `isWriteCapableTool` directly. Production paths
+// use `main()` and never touch this object.
+export const __internals = {
+  classifyFailure,
+  isWriteCapableTool,
+  WRITE_CAPABLE_TOOLS,
+  PATTERNS,
+  buildHint,
+}
 try {
   main()
 } catch {

package/telegram-plugin/hooks/tool-label-pretool.mjs CHANGED Viewed

@@ -111,6 +111,17 @@ export function computeLabel(toolName, input) {
     case 'KillBash':
     case 'KillShell':
       return 'Stopping background process'
+    case 'Skill': {
+      // The Skill tool's input is `{ skill: "<slug>", args?: "..." }`.
+      // We emit `Running skill <slug>` so downstream observers
+      // (notably the skill-coverage UAT runner at
+      // telegram-plugin/uat/runners/skill-coverage.ts) can tail the
+      // sidecar JSONL and recover which skill fired per turn —
+      // the progress card path that used to surface this was retired
+      // when `progressDriver` was nulled out in #1122 PR3.
+      const slug = clip(String(i.skill ?? ''), 64)
+      return slug ? `Running skill ${slug}` : null
+    }
   }
   // MCP allowlist.