npm - gclm-code - Versions diffs - 1.0.0 → 1.0.1 - Mend

gclm-code 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/vendor/modules/node_modules/@ant/computer-use-mcp/src/index.ts ADDED Viewed

@@ -0,0 +1,69 @@
+export type {
+  ComputerExecutor,
+  DisplayGeometry,
+  FrontmostApp,
+  InstalledApp,
+  ResolvePrepareCaptureResult,
+  RunningApp,
+  ScreenshotResult,
+} from "./executor.js";
+export type {
+  AppGrant,
+  CuAppPermTier,
+  ComputerUseHostAdapter,
+  ComputerUseOverrides,
+  ComputerUseSessionContext,
+  CoordinateMode,
+  CuGrantFlags,
+  CuPermissionRequest,
+  CuPermissionResponse,
+  CuSubGates,
+  CuTeachPermissionRequest,
+  Logger,
+  ResolvedAppRequest,
+  ScreenshotDims,
+  TeachStepRequest,
+  TeachStepResult,
+} from "./types.js";
+export { DEFAULT_GRANT_FLAGS } from "./types.js";
+export {
+  SENTINEL_BUNDLE_IDS,
+  getSentinelCategory,
+} from "./sentinelApps.js";
+export type { SentinelCategory } from "./sentinelApps.js";
+export {
+  categoryToTier,
+  getDefaultTierForApp,
+  getDeniedCategory,
+  getDeniedCategoryByDisplayName,
+  getDeniedCategoryForApp,
+  isPolicyDenied,
+} from "./deniedApps.js";
+export type { DeniedCategory } from "./deniedApps.js";
+export { isSystemKeyCombo, normalizeKeySequence } from "./keyBlocklist.js";
+export { ALL_SUB_GATES_OFF, ALL_SUB_GATES_ON } from "./subGates.js";
+export { API_RESIZE_PARAMS, targetImageSize } from "./imageResize.js";
+export type { ResizeParams } from "./imageResize.js";
+export { defersLockAcquire, handleToolCall } from "./toolCalls.js";
+export type {
+  CuCallTelemetry,
+  CuCallToolResult,
+  CuErrorKind,
+} from "./toolCalls.js";
+export { bindSessionContext, createComputerUseMcpServer } from "./mcpServer.js";
+export { buildComputerUseTools } from "./tools.js";
+export {
+  comparePixelAtLocation,
+  validateClickTarget,
+} from "./pixelCompare.js";
+export type { CropRawPatchFn, PixelCompareResult } from "./pixelCompare.js";

package/vendor/modules/node_modules/@ant/computer-use-mcp/src/keyBlocklist.ts ADDED Viewed

@@ -0,0 +1,153 @@
+/**
+ * Key combos that cross app boundaries or terminate processes. Gated behind
+ * the `systemKeyCombos` grant flag. When that flag is off, the `key` tool
+ * rejects these and returns a tool error telling the model to request the
+ * flag; all other combos work normally.
+ *
+ * Matching is canonicalized: every modifier alias the Rust executor accepts
+ * collapses to one canonical name. Without this, `command+q` / `meta+q` /
+ * `cmd+alt+escape` bypass the gate — see keyBlocklist.test.ts for the three
+ * bypass forms and the Rust parity check that catches future alias drift.
+ */
+/**
+ * Every modifier alias enigo_wrap.rs accepts (two copies: :351-359, :564-572),
+ * mapped to one canonical per Key:: variant. Left/right variants collapse —
+ * the blocklist doesn't distinguish which Ctrl.
+ *
+ * Canonical names are Rust's own variant names lowercased. Blocklist entries
+ * below use ONLY these. "meta" reads odd for Cmd+Q but it's honest: Rust
+ * sends Key::Meta, which is Cmd on darwin and Win on win32.
+ */
+const CANONICAL_MODIFIER: Readonly<Record<string, string>> = {
+  // Key::Meta — "meta"|"super"|"command"|"cmd"|"windows"|"win"
+  meta: "meta",
+  super: "meta",
+  command: "meta",
+  cmd: "meta",
+  windows: "meta",
+  win: "meta",
+  // Key::Control + LControl + RControl
+  ctrl: "ctrl",
+  control: "ctrl",
+  lctrl: "ctrl",
+  lcontrol: "ctrl",
+  rctrl: "ctrl",
+  rcontrol: "ctrl",
+  // Key::Shift + LShift + RShift
+  shift: "shift",
+  lshift: "shift",
+  rshift: "shift",
+  // Key::Alt and Key::Option — distinct Rust variants but same keycode on
+  // darwin (kVK_Option). Collapse: cmd+alt+escape and cmd+option+escape
+  // both Force Quit.
+  alt: "alt",
+  option: "alt",
+};
+/** Sort order for canonicals. ctrl < alt < shift < meta. */
+const MODIFIER_ORDER = ["ctrl", "alt", "shift", "meta"];
+/**
+ * Canonical-form entries only. Every modifier must be a CANONICAL_MODIFIER
+ * *value* (not key), modifiers must be in MODIFIER_ORDER, non-modifier last.
+ * The self-consistency test enforces this.
+ */
+const BLOCKED_DARWIN = new Set([
+  "meta+q", // Cmd+Q — quit frontmost app
+  "shift+meta+q", // Cmd+Shift+Q — log out
+  "alt+meta+escape", // Cmd+Option+Esc — Force Quit dialog
+  "meta+tab", // Cmd+Tab — app switcher
+  "meta+space", // Cmd+Space — Spotlight
+  "ctrl+meta+q", // Ctrl+Cmd+Q — lock screen
+]);
+const BLOCKED_WIN32 = new Set([
+  "ctrl+alt+delete", // Secure Attention Sequence
+  "alt+f4", // close window
+  "alt+tab", // window switcher
+  "meta+l", // Win+L — lock
+  "meta+d", // Win+D — show desktop
+]);
+/**
+ * Partition into sorted-canonical modifiers and non-modifier keys.
+ * Shared by normalizeKeySequence (join for display) and isSystemKeyCombo
+ * (check mods+each-key to catch the cmd+q+a suffix bypass).
+ */
+function partitionKeys(seq: string): { mods: string[]; keys: string[] } {
+  const parts = seq
+    .toLowerCase()
+    .split("+")
+    .map((p) => p.trim())
+    .filter(Boolean);
+  const mods: string[] = [];
+  const keys: string[] = [];
+  for (const p of parts) {
+    const canonical = CANONICAL_MODIFIER[p];
+    if (canonical !== undefined) {
+      mods.push(canonical);
+    } else {
+      keys.push(p);
+    }
+  }
+  // Dedupe: "cmd+command+q" → "meta+q", not "meta+meta+q".
+  const uniqueMods = [...new Set(mods)];
+  uniqueMods.sort(
+    (a, b) => MODIFIER_ORDER.indexOf(a) - MODIFIER_ORDER.indexOf(b),
+  );
+  return { mods: uniqueMods, keys };
+}
+/**
+ * Normalize "Cmd + Shift + Q" → "shift+meta+q": lowercase, trim, alias →
+ * canonical, dedupe, sort modifiers, non-modifiers last.
+ */
+export function normalizeKeySequence(seq: string): string {
+  const { mods, keys } = partitionKeys(seq);
+  return [...mods, ...keys].join("+");
+}
+/**
+ * True if the sequence would fire a blocked OS shortcut.
+ *
+ * Checks mods + EACH non-modifier key individually, not just the full
+ * joined string. `cmd+q+a` → Rust presses Cmd, then Q (Cmd+Q fires here),
+ * then A. Exact-match against "meta+q+a" misses; checking "meta+q" and
+ * "meta+a" separately catches the Q.
+ *
+ * Modifiers-only sequences ("cmd+shift") are checked as-is — no key to
+ * pair with, and no blocklist entry is modifier-only, so this is a no-op
+ * that falls through to false. Covers the click-modifier case where
+ * `left_click(text="cmd")` is legitimate.
+ */
+export function isSystemKeyCombo(
+  seq: string,
+  platform: "darwin" | "win32",
+): boolean {
+  const blocklist = platform === "darwin" ? BLOCKED_DARWIN : BLOCKED_WIN32;
+  const { mods, keys } = partitionKeys(seq);
+  const prefix = mods.length > 0 ? mods.join("+") + "+" : "";
+  // No non-modifier keys (e.g. "cmd+shift" as click-modifiers) — check the
+  // whole thing. Never matches (no blocklist entry is modifier-only) but
+  // keeps the contract simple: every call reaches a .has().
+  if (keys.length === 0) {
+    return blocklist.has(mods.join("+"));
+  }
+  // mods + each key. Any hit blocks the whole sequence.
+  for (const key of keys) {
+    if (blocklist.has(prefix + key)) {
+      return true;
+    }
+  }
+  return false;
+}
+export const _test = {
+  CANONICAL_MODIFIER,
+  BLOCKED_DARWIN,
+  BLOCKED_WIN32,
+  MODIFIER_ORDER,
+};

package/vendor/modules/node_modules/@ant/computer-use-mcp/src/mcpServer.ts ADDED Viewed

@@ -0,0 +1,313 @@
+/**
+ * MCP server factory + session-context binder.
+ *
+ * Two entry points:
+ *
+ *   `bindSessionContext` — the wrapper closure. Takes a `ComputerUseSessionContext`
+ *   (getters + callbacks backed by host session state), returns a dispatcher.
+ *   Reusable by both the MCP CallTool handler here AND Cowork's
+ *   `InternalServerDefinition.handleToolCall` (which doesn't go through MCP).
+ *   This replaces the duplicated wrapper closures in apps/desktop/…/serverDef.ts
+ *   and the Gclm Code CLI's CU host wrapper — both did the same thing: build `ComputerUseOverrides`
+ *   fresh from getters, call `handleToolCall`, stash screenshot, merge permissions.
+ *
+ *   `createComputerUseMcpServer` — the Server object. When `context` is provided,
+ *   the CallTool handler is real (uses `bindSessionContext`). When not, it's the
+ *   legacy stub that returns a not-wired error. The tool-schema ListTools handler
+ *   is the same either way.
+ */
+import { Server } from "@modelcontextprotocol/sdk/server/index.js";
+import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
+import {
+  CallToolRequestSchema,
+  ListToolsRequestSchema,
+} from "@modelcontextprotocol/sdk/types.js";
+import type { ScreenshotResult } from "./executor.js";
+import type { CuCallToolResult } from "./toolCalls.js";
+import {
+  defersLockAcquire,
+  handleToolCall,
+  resetMouseButtonHeld,
+} from "./toolCalls.js";
+import { buildComputerUseTools } from "./tools.js";
+import type {
+  AppGrant,
+  ComputerUseHostAdapter,
+  ComputerUseOverrides,
+  ComputerUseSessionContext,
+  CoordinateMode,
+  CuGrantFlags,
+  CuPermissionResponse,
+} from "./types.js";
+import { DEFAULT_GRANT_FLAGS } from "./types.js";
+const DEFAULT_LOCK_HELD_MESSAGE =
+  "Another Gclm Code session is currently using the computer. Wait for that " +
+  "session to finish, or find a non-computer-use approach.";
+/**
+ * Dedupe `granted` into `existing` on bundleId, spread truthy-only flags over
+ * defaults+existing. Truthy-only: a subsequent `request_access` that doesn't
+ * request clipboard can't revoke an earlier clipboard grant — revocation lives
+ * in a Settings page, not here.
+ *
+ * Same merge both hosts implemented independently today.
+ */
+function mergePermissionResponse(
+  existing: readonly AppGrant[],
+  existingFlags: CuGrantFlags,
+  response: CuPermissionResponse,
+): { apps: AppGrant[]; flags: CuGrantFlags } {
+  const seen = new Set(existing.map((a) => a.bundleId));
+  const apps = [
+    ...existing,
+    ...response.granted.filter((g) => !seen.has(g.bundleId)),
+  ];
+  const truthyFlags = Object.fromEntries(
+    Object.entries(response.flags).filter(([, v]) => v === true),
+  );
+  const flags: CuGrantFlags = {
+    ...DEFAULT_GRANT_FLAGS,
+    ...existingFlags,
+    ...truthyFlags,
+  };
+  return { apps, flags };
+}
+/**
+ * Bind session state to a reusable dispatcher. The returned function is the
+ * wrapper closure: async lock gate → build overrides fresh → `handleToolCall`
+ * → stash screenshot → strip piggybacked fields.
+ *
+ * The last-screenshot blob is held in a closure cell here (not on `ctx`), so
+ * hosts don't need to guarantee `ctx` object identity across calls — they just
+ * need to hold onto the returned dispatcher. Cowork caches per
+ * `InternalServerContext` in a WeakMap; the CLI host constructs once at server creation.
+ */
+export function bindSessionContext(
+  adapter: ComputerUseHostAdapter,
+  coordinateMode: CoordinateMode,
+  ctx: ComputerUseSessionContext,
+): (name: string, args: unknown) => Promise<CuCallToolResult> {
+  const { logger, serverName } = adapter;
+  // Screenshot blob persists here across calls — NOT on `ctx`. Hosts hold
+  // onto the returned dispatcher; that's the identity that matters.
+  let lastScreenshot: ScreenshotResult | undefined;
+  const wrapPermission = ctx.onPermissionRequest
+    ? async (
+        req: Parameters<NonNullable<typeof ctx.onPermissionRequest>>[0],
+        signal: AbortSignal,
+      ): Promise<CuPermissionResponse> => {
+        const response = await ctx.onPermissionRequest!(req, signal);
+        const { apps, flags } = mergePermissionResponse(
+          ctx.getAllowedApps(),
+          ctx.getGrantFlags(),
+          response,
+        );
+        logger.debug(
+          `[${serverName}] permission result: granted=${response.granted.length} denied=${response.denied.length}`,
+        );
+        ctx.onAllowedAppsChanged?.(apps, flags);
+        return response;
+      }
+    : undefined;
+  const wrapTeachPermission = ctx.onTeachPermissionRequest
+    ? async (
+        req: Parameters<NonNullable<typeof ctx.onTeachPermissionRequest>>[0],
+        signal: AbortSignal,
+      ): Promise<CuPermissionResponse> => {
+        const response = await ctx.onTeachPermissionRequest!(req, signal);
+        logger.debug(
+          `[${serverName}] teach permission result: granted=${response.granted.length} denied=${response.denied.length}`,
+        );
+        // Teach doesn't request grant flags — preserve existing.
+        const { apps } = mergePermissionResponse(
+          ctx.getAllowedApps(),
+          ctx.getGrantFlags(),
+          response,
+        );
+        ctx.onAllowedAppsChanged?.(apps, {
+          ...DEFAULT_GRANT_FLAGS,
+          ...ctx.getGrantFlags(),
+        });
+        return response;
+      }
+    : undefined;
+  return async (name, args) => {
+    // ─── Async lock gate ─────────────────────────────────────────────────
+    // Replaces the sync Gate-3 in `handleToolCall` — we pass
+    // `checkCuLock: undefined` below so it no-ops. Hosts with
+    // cross-process locks (O_EXCL file) await the real primitive here
+    // instead of pre-computing + feeding a fake sync result.
+    if (ctx.checkCuLock) {
+      const lock = await ctx.checkCuLock();
+      if (lock.holder !== undefined && !lock.isSelf) {
+        const text =
+          ctx.formatLockHeldMessage?.(lock.holder) ?? DEFAULT_LOCK_HELD_MESSAGE;
+        return {
+          content: [{ type: "text", text }],
+          isError: true,
+          telemetry: { error_kind: "cu_lock_held" },
+        };
+      }
+      if (lock.holder === undefined && !defersLockAcquire(name)) {
+        await ctx.acquireCuLock?.();
+        // Re-check: the awaits above yield the microtask queue, so another
+        // session's check+acquire can interleave with ours. Hosts where
+        // acquire is a no-op when already held (Cowork's CuLockManager) give
+        // no signal that we lost — verify we're now the holder before
+        // proceeding. The CLI's O_EXCL file lock would surface this as a throw from
+        // acquire instead; this re-check is a belt-and-suspenders for that
+        // path too.
+        const recheck = await ctx.checkCuLock();
+        if (recheck.holder !== undefined && !recheck.isSelf) {
+          const text =
+            ctx.formatLockHeldMessage?.(recheck.holder) ??
+            DEFAULT_LOCK_HELD_MESSAGE;
+          return {
+            content: [{ type: "text", text }],
+            isError: true,
+            telemetry: { error_kind: "cu_lock_held" },
+          };
+        }
+        // Fresh holder → any prior session's mouseButtonHeld is stale.
+        // Mirrors what Gate-3 does on the acquire branch. After the
+        // re-check so we only clear module state when we actually won.
+        resetMouseButtonHeld();
+      }
+    }
+    // ─── Build overrides fresh ───────────────────────────────────────────
+    // Blob-first; dims-fallback with base64:"" when the closure cell is
+    // unset (cross-respawn). scaleCoord reads dims; pixelCompare sees "" →
+    // isEmpty → skip.
+    const dimsFallback = lastScreenshot
+      ? undefined
+      : ctx.getLastScreenshotDims?.();
+    // Per-call AbortController for dialog dismissal. Aborted in `finally` —
+    // if handleToolCall finishes (MCP timeout, throw) before the user
+    // answers, the host's dialog handler sees the abort and tears down.
+    const dialogAbort = new AbortController();
+    const overrides: ComputerUseOverrides = {
+      allowedApps: [...ctx.getAllowedApps()],
+      grantFlags: ctx.getGrantFlags(),
+      userDeniedBundleIds: ctx.getUserDeniedBundleIds(),
+      coordinateMode,
+      selectedDisplayId: ctx.getSelectedDisplayId(),
+      displayPinnedByModel: ctx.getDisplayPinnedByModel?.(),
+      displayResolvedForApps: ctx.getDisplayResolvedForApps?.(),
+      lastScreenshot:
+        lastScreenshot ??
+        (dimsFallback ? { ...dimsFallback, base64: "" } : undefined),
+      onPermissionRequest: wrapPermission
+        ? (req) => wrapPermission(req, dialogAbort.signal)
+        : undefined,
+      onTeachPermissionRequest: wrapTeachPermission
+        ? (req) => wrapTeachPermission(req, dialogAbort.signal)
+        : undefined,
+      onAppsHidden: ctx.onAppsHidden,
+      getClipboardStash: ctx.getClipboardStash,
+      onClipboardStashChanged: ctx.onClipboardStashChanged,
+      onResolvedDisplayUpdated: ctx.onResolvedDisplayUpdated,
+      onDisplayPinned: ctx.onDisplayPinned,
+      onDisplayResolvedForApps: ctx.onDisplayResolvedForApps,
+      onTeachModeActivated: ctx.onTeachModeActivated,
+      onTeachStep: ctx.onTeachStep,
+      onTeachWorking: ctx.onTeachWorking,
+      getTeachModeActive: ctx.getTeachModeActive,
+      // Undefined → handleToolCall's sync Gate-3 no-ops. The async gate
+      // above already ran.
+      checkCuLock: undefined,
+      acquireCuLock: undefined,
+      isAborted: ctx.isAborted,
+    };
+    logger.debug(
+      `[${serverName}] tool=${name} allowedApps=${overrides.allowedApps.length} coordMode=${coordinateMode}`,
+    );
+    // ─── Dispatch ────────────────────────────────────────────────────────
+    try {
+      const result = await handleToolCall(adapter, name, args, overrides);
+      if (result.screenshot) {
+        lastScreenshot = result.screenshot;
+        const { base64: _blob, ...dims } = result.screenshot;
+        logger.debug(`[${serverName}] screenshot dims: ${JSON.stringify(dims)}`);
+        ctx.onScreenshotCaptured?.(dims);
+      }
+      return result;
+    } finally {
+      dialogAbort.abort();
+    }
+  };
+}
+export function createComputerUseMcpServer(
+  adapter: ComputerUseHostAdapter,
+  coordinateMode: CoordinateMode,
+  context?: ComputerUseSessionContext,
+): Server {
+  const { serverName, logger } = adapter;
+  const server = new Server(
+    { name: serverName, version: "0.1.3" },
+    { capabilities: { tools: {}, logging: {} } },
+  );
+  const tools = buildComputerUseTools(
+    adapter.executor.capabilities,
+    coordinateMode,
+  );
+  server.setRequestHandler(ListToolsRequestSchema, async () =>
+    adapter.isDisabled() ? { tools: [] } : { tools },
+  );
+  if (context) {
+    const dispatch = bindSessionContext(adapter, coordinateMode, context);
+    server.setRequestHandler(
+      CallToolRequestSchema,
+      async (request): Promise<CallToolResult> => {
+        const { screenshot: _s, telemetry: _t, ...result } = await dispatch(
+          request.params.name,
+          request.params.arguments ?? {},
+        );
+        return result;
+      },
+    );
+    return server;
+  }
+  // Legacy: no context → stub handler. Reached only if something calls the
+  // server over MCP transport WITHOUT going through a binder (a wiring
+  // regression). Clear error instead of silent failure.
+  server.setRequestHandler(
+    CallToolRequestSchema,
+    async (request): Promise<CallToolResult> => {
+      logger.warn(
+        `[${serverName}] tool call "${request.params.name}" reached the stub handler — no session context bound. Per-session state unavailable.`,
+      );
+      return {
+        content: [
+          {
+            type: "text",
+            text: "This computer-use server instance is not wired to a session. Per-session app permissions are not available on this code path.",
+          },
+        ],
+        isError: true,
+      };
+    },
+  );
+  return server;
+}