ucu-mcp 0.3.7 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.3.8] - 2026-06-08
9
+
10
+ ### Fixed
11
+
12
+ - `focus_app` no longer trips the user-activity pause. It used to be classified as `"other"` (neither observe nor input) so a recent mouse movement could block `focus_app` for 2 s; it is now in `OBSERVE_ACTIONS`, matching the production `withSafety` default. Symptom: OpenCode could not switch the active target app (e.g. CC Switch) without retrying until the cursor had been still for 2 s.
13
+ - `doctor` native-helper path resolution now checks `process.argv[1]` (npm / npx / global install), walks `import.meta.url` up to 4 levels, and falls back to `npm root -g`. Previously, when the MCP client launched `ucu-mcp` from a cwd other than the project root (the common case for `npx ucu-mcp`), the helper binaries would report as missing even though they were in the tarball. The new report includes `path` and a `tried[]` list so the model can see what was checked.
14
+ - `doctor` recommendations now list each missing macOS permission on its own line, name the host terminal app (so the user knows which entry to grant in System Settings), and add an Electron AX hint for the common case where `list_windows` returns `[]` even with Accessibility granted.
15
+
16
+ ### Tests
17
+
18
+ - `safety-guard`: `focus_app` is in `OBSERVE_ACTIONS`; `classifyAction("focus_app") === "observe"`; `withSafety`'s default `skipUserActivityPause` lets the call through even mid user-activity.
19
+ - `errors`: `WindowNotFoundError` preserves an inline `hint` field set by the platform layer, surfaced in the MCP error response.
20
+ - `macos-platform`: OCR JXA `"Failed to load screenshot image"` is re-thrown as `CaptureError` with a hint pointing at the missing Screen Recording permission (the typical cause is `screencapture` writing a 0-byte file when TCC denies Screen Recording, not the helper binary being absent).
21
+ - `tools-layer`: `doctor` report carries `terminalApp` and the richer `nativeHelpers = { cgevent, ocr } = { ok, path, tried[] }` shape.
22
+
8
23
  ## [0.3.7] - 2026-06-07
9
24
 
10
25
  ### Fixed
@@ -94,13 +94,21 @@ function errorDetails(error) {
94
94
  const err = error instanceof Error ? error : new Error(String(error));
95
95
  const code = error instanceof UcuError ? error.code : "UNKNOWN_ERROR";
96
96
  const retryable = error instanceof UcuError ? error.retryable : false;
97
- return {
97
+ // Some platform errors carry an inline `hint` field (added by macos.ts focusApp
98
+ // for the Electron AX case, etc.). Surface it under `hint` so the model can
99
+ // see remediation without parsing the message string.
100
+ const inlineHint = err.hint;
101
+ const details = {
98
102
  name: err.name,
99
103
  code,
100
104
  retryable,
101
105
  message: err.message,
102
106
  recovery: recoveryHint(code),
103
107
  };
108
+ if (typeof inlineHint === "string" && inlineHint.length > 0) {
109
+ details.hint = inlineHint;
110
+ }
111
+ return details;
104
112
  }
105
113
  let _actionCounter = 0;
106
114
  function nextActionId() {
@@ -281,7 +289,28 @@ export function registerTools(server) {
281
289
  includeMinimized: z.boolean().optional().describe("Include minimized windows"),
282
290
  }, async (params) => {
283
291
  const windows = await withSafety({ action: "list_windows", params: {}, requiresAccessibility: true, execute: () => getPlatform().listWindows(params.includeMinimized) });
284
- return { content: [{ type: "text", text: JSON.stringify(windows, null, 2) }] };
292
+ // Attach a diagnostic hint when the result is empty so the model can
293
+ // tell the difference between "no windows are open" and "AX enumeration
294
+ // failed for the target app" (common with Electron apps like CC Switch,
295
+ // VS Code, Discord). The windows list itself is the source of truth; the
296
+ // hint is advisory only.
297
+ let diagnostics;
298
+ if (windows.length === 0) {
299
+ let accessibility = "unknown";
300
+ try {
301
+ const { checkPermission } = await import("../safety/permissions.js");
302
+ const { granted } = await checkPermission("accessibility");
303
+ accessibility = granted ? "granted" : "denied";
304
+ }
305
+ catch { /* keep unknown */ }
306
+ const axNote = accessibility === "denied"
307
+ ? "Accessibility is currently denied to this terminal — grant it via System Settings > Privacy & Security > Accessibility, then retry."
308
+ : accessibility === "granted"
309
+ ? "Accessibility is granted. If you expected a specific app to appear here, it is likely an Electron app whose AX tree is not exposed to System Events. Pixel-level workaround: call screenshot, then ocr to locate the target UI text and get its bounding box, then click(x, y) at those screen coordinates. Alternatively, modify the app's config file or database directly."
310
+ : "Accessibility status is unknown. Run `doctor` first to verify.";
311
+ diagnostics = { hint: `list_windows returned 0 windows. ${axNote}`, accessibility };
312
+ }
313
+ return { content: [{ type: "text", text: JSON.stringify(diagnostics ? { windows, diagnostics } : windows, null, 2) }] };
285
314
  });
286
315
  registry.register("list_windows");
287
316
  registerTool("list_apps", "List all running applications", {}, async () => {
@@ -384,55 +413,104 @@ export function registerTools(server) {
384
413
  });
385
414
  registry.register("drag");
386
415
  registerTool("doctor", "Check system permissions, native helpers, and client readiness", {}, async () => {
387
- const { checkPermissions } = await import("../safety/permissions.js");
416
+ const { checkPermissions, getPermissionInstructions, getTerminalAppName } = await import("../safety/permissions.js");
388
417
  const { MacOSPlatform: MacPlat } = await import("../platform/macos.js");
389
- const { existsSync } = await import("node:fs");
390
- const { join, dirname } = await import("node:path");
418
+ const { existsSync, statSync } = await import("node:fs");
419
+ const { join, dirname, resolve } = await import("node:path");
391
420
  const { fileURLToPath } = await import("node:url");
392
421
  const { execFileSync } = await import("node:child_process");
393
422
  const permissions = await checkPermissions();
394
423
  const screenLocked = process.platform === "darwin" ? new MacPlat().isScreenLocked?.() ?? false : false;
395
- let nativeHelpers;
396
- if (process.platform === "darwin") {
424
+ const termApp = process.platform === "darwin" ? getTerminalAppName() : undefined;
425
+ // Resolve native helper binaries across every install layout we have seen:
426
+ // - dev: process.cwd() === project root
427
+ // - npm install --prefix X: argv[1] is in X/node_modules/ucu-mcp/...
428
+ // - global install via npm: argv[1] is in $(npm root -g)/ucu-mcp/...
429
+ // - npx: argv[1] is in ~/.npm/_npx/.../node_modules/ucu-mcp/...
430
+ // - bin/ucu-mcp.js is the entry; dist/src/*/tools.js is the module path
431
+ function resolveHelperPath(relParts) {
432
+ const tried = [];
433
+ const tryPaths = [];
397
434
  const moduleDir = dirname(fileURLToPath(import.meta.url));
398
- const checkPaths = (subdirs) => {
399
- const paths = [
400
- join(process.cwd(), ...subdirs),
401
- join(moduleDir, "..", ...subdirs),
402
- join(moduleDir, "..", "..", ...subdirs),
403
- ];
404
- return paths.some(p => { try {
405
- return existsSync(p);
435
+ const argv1 = process.argv[1] ? resolve(process.argv[1]) : "";
436
+ const argv1Dir = argv1 ? dirname(argv1) : "";
437
+ // (1) process.cwd() — dev invocation
438
+ tryPaths.push(join(process.cwd(), ...relParts));
439
+ // (2) argv[1] dir — npm / npx / global
440
+ if (argv1Dir) {
441
+ tryPaths.push(join(argv1Dir, ...relParts));
442
+ tryPaths.push(join(argv1Dir, "..", ...relParts));
443
+ tryPaths.push(join(argv1Dir, "..", "..", ...relParts));
444
+ }
445
+ // (3) module dir — dist/bin or dist/src/mcp; walk up to 4 levels
446
+ tryPaths.push(join(moduleDir, "..", ...relParts));
447
+ tryPaths.push(join(moduleDir, "..", "..", ...relParts));
448
+ tryPaths.push(join(moduleDir, "..", "..", "..", ...relParts));
449
+ tryPaths.push(join(moduleDir, "..", "..", "..", "..", ...relParts));
450
+ // (4) npm root -g for global install (best effort)
451
+ if (process.platform === "darwin") {
452
+ try {
453
+ const npmRoot = execFileSync("npm", ["root", "-g"], { encoding: "utf-8", timeout: 2000 }).trim();
454
+ if (npmRoot) {
455
+ tryPaths.push(join(npmRoot, "ucu-mcp", ...relParts));
456
+ }
406
457
  }
407
- catch {
408
- return false;
409
- } });
410
- };
458
+ catch { /* npm not on PATH is fine */ }
459
+ }
460
+ for (const p of tryPaths) {
461
+ tried.push(p);
462
+ try {
463
+ if (existsSync(p) && statSync(p).isFile())
464
+ return { path: p, tried };
465
+ }
466
+ catch { /* skip */ }
467
+ }
468
+ return { path: null, tried };
469
+ }
470
+ let nativeHelpers;
471
+ if (process.platform === "darwin") {
472
+ const cgevent = resolveHelperPath(["native", "cgevent", "cgevent-helper"]);
473
+ const ocr = resolveHelperPath(["native", "ocr", "ocr-helper"]);
474
+ const windowlist = resolveHelperPath(["native", "windowlist", "windowlist-helper"]);
411
475
  nativeHelpers = {
412
- cgevent: checkPaths(["native", "cgevent", "cgevent-helper"]),
413
- ocr: checkPaths(["native", "ocr", "ocr-helper"]),
476
+ cgevent: { ok: cgevent.path !== null, path: cgevent.path, tried: cgevent.tried.slice(0, 3) },
477
+ ocr: { ok: ocr.path !== null, path: ocr.path, tried: ocr.tried.slice(0, 3) },
478
+ windowlist: { ok: windowlist.path !== null, path: windowlist.path, tried: windowlist.tried.slice(0, 3) },
414
479
  };
415
480
  }
416
481
  let readiness = "ready";
417
482
  const issues = [];
418
483
  if (!permissions.granted) {
419
484
  readiness = "blocked";
420
- issues.push("Missing macOS permissions: " + permissions.missing.join(", "));
485
+ for (const m of (permissions.missing ?? [])) {
486
+ issues.push(`Missing macOS permission: ${m}`);
487
+ }
421
488
  }
422
489
  if (screenLocked) {
423
490
  readiness = "blocked";
424
491
  issues.push("Screen is locked");
425
492
  }
426
493
  if (process.platform === "darwin" && nativeHelpers) {
427
- if (!nativeHelpers.cgevent) {
494
+ if (!nativeHelpers.cgevent.ok) {
428
495
  readiness = readiness === "ready" ? "degraded" : readiness;
429
- issues.push("Native CGEvent helper not found (input synthesis may crash on macOS Sequoia+)");
496
+ issues.push("Native CGEvent helper not found (input synthesis may crash on macOS Sequoia+). Run `npm run build` to compile it, or reinstall ucu-mcp so the helper ships from the tarball.");
430
497
  }
431
- if (!nativeHelpers.ocr) {
498
+ if (!nativeHelpers.ocr.ok) {
432
499
  readiness = readiness === "ready" ? "degraded" : readiness;
433
- issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+)");
500
+ issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+). Run `npm run build` to compile it, or reinstall ucu-mcp so the helper ships from the tarball.");
501
+ }
502
+ if (!nativeHelpers.windowlist.ok) {
503
+ readiness = readiness === "ready" ? "degraded" : readiness;
504
+ issues.push("Native windowlist helper not found (window enumeration will fall back to slow JXA). Run `npm run build` to compile it.");
434
505
  }
435
506
  }
507
+ // Heuristic AX hint: if Accessibility is granted but list_windows consistently
508
+ // returns empty for the only app the model cared about, the model has likely
509
+ // hit the Electron AX limitation (Electron windows do not expose AX to System
510
+ // Events unless Accessibility is also granted to the Electron process itself,
511
+ // and the app has accessibility features enabled). This block is read-only —
512
+ // we never hit JXA here because the doctor must stay fast and side-effect free.
513
+ const electronHint = "If the target app is Electron (e.g. CC Switch, VS Code, Discord), list_windows may return [] even with Accessibility granted to your terminal. Grant Accessibility to the Electron app itself in System Settings > Privacy & Security > Accessibility, and restart the app. Pixel-level workaround: use screenshot + ocr to locate UI elements by text, then click(x, y) at the detected bounding box coordinates. Alternatively, modify the app\'s config file or database directly.";
436
514
  const clients = {};
437
515
  for (const bin of ["claude", "codex", "opencode", "npx"]) {
438
516
  try {
@@ -445,16 +523,27 @@ export function registerTools(server) {
445
523
  }
446
524
  const recommendations = [];
447
525
  if (readiness === "blocked") {
448
- recommendations.push("Grant missing permissions in System Settings > Privacy & Security, then restart the MCP client.");
526
+ for (const m of (permissions.missing ?? [])) {
527
+ const app = termApp ?? "your terminal app";
528
+ recommendations.push(`${m}: ${getPermissionInstructions(m)} (Grant to ${app}.)`);
529
+ }
530
+ if (screenLocked)
531
+ recommendations.push("Unlock the screen, then retry.");
449
532
  }
450
- else if (readiness === "degraded") {
451
- if (nativeHelpers && (!nativeHelpers.cgevent || !nativeHelpers.ocr)) {
452
- recommendations.push("Run 'npm run build' to compile native Swift helpers.");
533
+ if (readiness !== "ready") {
534
+ if (process.platform === "darwin" && nativeHelpers && (!nativeHelpers.cgevent.ok || !nativeHelpers.ocr.ok)) {
535
+ recommendations.push("Run `npm run build` in the ucu-mcp project to compile native Swift helpers (cgevent-helper, ocr-helper, windowlist-helper).");
536
+ }
537
+ if (process.platform === "darwin" && nativeHelpers && !nativeHelpers.windowlist.ok) {
538
+ recommendations.push("windowlist helper missing — list_windows will fall back to JXA (~3-6s, unreliable for Electron). Run `npm run build`.");
453
539
  }
454
540
  }
455
- else {
541
+ if (readiness === "ready") {
456
542
  recommendations.push("All checks passed. MCP client can proceed with automation.");
457
543
  }
544
+ else if (process.platform === "darwin") {
545
+ recommendations.push(electronHint);
546
+ }
458
547
  const report = {
459
548
  readiness,
460
549
  issues: issues.length > 0 ? issues : undefined,
@@ -463,6 +552,7 @@ export function registerTools(server) {
463
552
  node: process.version,
464
553
  permissions,
465
554
  screenLocked,
555
+ terminalApp: termApp,
466
556
  nativeHelpers,
467
557
  clients,
468
558
  safety: {
@@ -1,5 +1,15 @@
1
1
  import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResponse, AppInfo, AppTarget, BrowserContext, ScreenshotOptions } from "./base.js";
2
+ export interface MacOSPlatformOptions {
3
+ /**
4
+ * Override native helper resolution.
5
+ * - Map of folder name to absolute binary path to inject a specific helper.
6
+ * - Set a value to null to skip that helper (force JXA fallback).
7
+ * Used by tests to control native helper behavior without filesystem tricks.
8
+ */
9
+ nativeHelperPaths?: Record<string, string | null>;
10
+ }
2
11
  export declare class MacOSPlatform implements Platform {
12
+ private readonly _nativeHelperPaths;
3
13
  private readonly elementCache;
4
14
  private readonly elementCacheTtlMs;
5
15
  private readonly elementCacheMaxSize;
@@ -7,6 +17,7 @@ export declare class MacOSPlatform implements Platform {
7
17
  private windowCache;
8
18
  private activeTarget;
9
19
  private savedFocus;
20
+ constructor(options?: MacOSPlatformOptions);
10
21
  /** Remove expired entries from the element cache. */
11
22
  private evictExpiredCacheEntries;
12
23
  /** Evict oldest entries when cache exceeds the maximum size (LRU-style). */
@@ -27,6 +38,9 @@ export declare class MacOSPlatform implements Platform {
27
38
  focusApp(app: string): Promise<AppTarget>;
28
39
  getActiveBrowserContext(app?: string): Promise<BrowserContext | undefined>;
29
40
  listWindows(_includeMinimized?: boolean): Promise<WindowInfo[]>;
41
+ private listWindowsNative;
42
+ private resolveNativeHelper;
43
+ private listWindowsJxa;
30
44
  getWindowState(windowId?: string, depth?: number, includeBounds?: boolean): Promise<WindowState>;
31
45
  click(x: number, y: number, button?: "left" | "right" | "middle", doubleClick?: boolean): Promise<void>;
32
46
  move(x: number, y: number): Promise<void>;
@@ -4,6 +4,10 @@ import { promisify } from "node:util";
4
4
  import { captureFullScreen, captureRegion } from "../utils/screenshot.js";
5
5
  import { click as inputClick, doubleClick as inputDoubleClick, move as inputMove, drag as inputDrag, scroll as inputScroll, typeText, pressShortcut } from "../utils/input.js";
6
6
  import { CaptureError, ElementNotFoundError, InputSynthesisError, PermissionError, PlatformError, TargetStaleError, UcuError, WindowNotFoundError } from "../util/errors.js";
7
+ import { existsSync } from "node:fs";
8
+ import { join, dirname } from "node:path";
9
+ import { fileURLToPath } from "node:url";
10
+ const __macosDirname = dirname(fileURLToPath(import.meta.url));
7
11
  const execFileAsync = promisify(execFile);
8
12
  function errorMessage(error) {
9
13
  return error instanceof Error ? error.message : String(error);
@@ -59,6 +63,7 @@ function selectWindowForApp(windows, requestedApp) {
59
63
  windows.find((window) => appNameMatches(window.processName, requestedApp));
60
64
  }
61
65
  export class MacOSPlatform {
66
+ _nativeHelperPaths;
62
67
  elementCache = new Map();
63
68
  elementCacheTtlMs = 30_000;
64
69
  elementCacheMaxSize = 100;
@@ -66,6 +71,9 @@ export class MacOSPlatform {
66
71
  windowCache;
67
72
  activeTarget;
68
73
  savedFocus;
74
+ constructor(options) {
75
+ this._nativeHelperPaths = options?.nativeHelperPaths;
76
+ }
69
77
  // ── Element Cache Management ────────────────────────────────────────────
70
78
  /** Remove expired entries from the element cache. */
71
79
  evictExpiredCacheEntries() {
@@ -248,7 +256,23 @@ export class MacOSPlatform {
248
256
  await new Promise((resolve) => setTimeout(resolve, 150));
249
257
  } while (Date.now() < deadline);
250
258
  if (!target) {
251
- throw new WindowNotFoundError(app);
259
+ // Wrap with a more diagnostic message: many real-world failures are
260
+ // Electron apps that do not expose their AX tree to System Events
261
+ // (CC Switch, VS Code, Discord, Slack). WindowNotFoundError carries the
262
+ // app name so the tool handler can surface a remediation hint. The
263
+ // bare WindowNotFoundError("CC Switch") was indistinguishable from
264
+ // "the app is not running", which led models to retry forever.
265
+ const err = new WindowNotFoundError(app);
266
+ err.hint =
267
+ "list_windows returned no match for this app. If the app is running, " +
268
+ "the most likely cause is that it is an Electron app whose AX tree is " +
269
+ "not exposed to System Events (System Settings > Privacy & Security > " +
270
+ "Accessibility must be granted to the Electron process itself, not just " +
271
+ "to the host terminal). Pixel-level workaround: call screenshot to " +
272
+ "capture the screen, then ocr to locate UI text and get its bounding " +
273
+ "box coordinates, then click(x, y) at those screen coordinates. " +
274
+ "Alternatively, modify the app's config file or database directly.";
275
+ throw err;
252
276
  }
253
277
  this.activeTarget = {
254
278
  targetId: randomUUID(),
@@ -320,11 +344,81 @@ export class MacOSPlatform {
320
344
  }));
321
345
  }
322
346
  try {
323
- // Use System Events instead of CGWindowListCopyWindowInfo.
324
- // The CoreGraphics API returns CFArrayRef/CFDictionaryRef which JXA
325
- // cannot iterate reliably CFArrayGetCount works but objectAtIndex
326
- // does not. System Events JXA is slower (~3-6s) but correct.
327
- const jxaScript = `
347
+ // Try native Swift helper first (CGWindowListCopyWindowInfo, ~1ms).
348
+ // Falls back to JXA System Events if the helper is not available.
349
+ // The native helper reliably enumerates ALL windows including Electron
350
+ // apps, whereas JXA relies on System Events AX which is inconsistent
351
+ // for Chromium-rendered windows.
352
+ let windows;
353
+ const nativeResult = this.listWindowsNative();
354
+ if (nativeResult !== null) {
355
+ windows = nativeResult;
356
+ }
357
+ else {
358
+ windows = await this.listWindowsJxa();
359
+ }
360
+ this.windowCache = {
361
+ cachedAt: Date.now(),
362
+ windows: windows.map((window) => ({
363
+ ...window,
364
+ bounds: { ...window.bounds },
365
+ })),
366
+ };
367
+ return windows;
368
+ }
369
+ catch {
370
+ // Fallback: return empty list if both methods fail
371
+ return [];
372
+ }
373
+ }
374
+ listWindowsNative() {
375
+ try {
376
+ const helperPath = this.resolveNativeHelper("windowlist", "windowlist-helper");
377
+ if (!helperPath)
378
+ return null;
379
+ const out = execFileSync(helperPath, [], {
380
+ encoding: "utf-8",
381
+ timeout: 5000,
382
+ });
383
+ const parsed = JSON.parse(out.trim());
384
+ if (parsed.error)
385
+ return null;
386
+ return parsed.windows.map(w => ({
387
+ id: w.id,
388
+ title: w.title,
389
+ processName: w.processName,
390
+ pid: w.pid,
391
+ bounds: w.bounds,
392
+ isMinimized: !w.isOnScreen,
393
+ isOnScreen: w.isOnScreen,
394
+ }));
395
+ }
396
+ catch {
397
+ return null;
398
+ }
399
+ }
400
+ resolveNativeHelper(folder, binary) {
401
+ // Test injection: if the caller provided explicit paths, use those
402
+ // instead of hitting the filesystem.
403
+ if (this._nativeHelperPaths && folder in this._nativeHelperPaths) {
404
+ const override = this._nativeHelperPaths[folder];
405
+ // null means "skip native, force JXA fallback"
406
+ return override === null ? null : override;
407
+ }
408
+ // dev: src/platform/macos.ts → native/<folder>/<binary>
409
+ // prod: dist/src/platform/macos.js → native/<folder>/<binary>
410
+ const candidates = [
411
+ join(__macosDirname, "..", "..", "native", folder, binary),
412
+ join(__macosDirname, "..", "..", "..", "native", folder, binary),
413
+ ];
414
+ for (const p of candidates) {
415
+ if (existsSync(p))
416
+ return p;
417
+ }
418
+ return null;
419
+ }
420
+ async listWindowsJxa() {
421
+ const jxaScript = `
328
422
  var se = Application('System Events');
329
423
  var result = [];
330
424
  var procs = se.processes();
@@ -358,24 +452,11 @@ export class MacOSPlatform {
358
452
  }
359
453
  JSON.stringify(result);
360
454
  `;
361
- const jxaOut = execFileSync("osascript", [
362
- "-l", "JavaScript",
363
- "-e", jxaScript
364
- ], { encoding: "utf-8", timeout: 15000 });
365
- const windows = JSON.parse(jxaOut.trim());
366
- this.windowCache = {
367
- cachedAt: Date.now(),
368
- windows: windows.map((window) => ({
369
- ...window,
370
- bounds: { ...window.bounds },
371
- })),
372
- };
373
- return windows;
374
- }
375
- catch {
376
- // Fallback: return empty list if JXA fails
377
- return [];
378
- }
455
+ const jxaOut = execFileSync("osascript", [
456
+ "-l", "JavaScript",
457
+ "-e", jxaScript
458
+ ], { encoding: "utf-8", timeout: 15000 });
459
+ return JSON.parse(jxaOut.trim());
379
460
  }
380
461
  async getWindowState(windowId, depth, includeBounds = true) {
381
462
  if (!windowId || windowId === this.activeTarget?.windowId) {
@@ -779,8 +860,18 @@ export class MacOSPlatform {
779
860
  `;
780
861
  const out = execFileSync("osascript", ["-l", "JavaScript", "-e", jxaScript], { encoding: "utf-8", timeout: 30000 }).trim();
781
862
  const parsed = JSON.parse(out);
782
- if (parsed.error)
783
- throw new CaptureError(`ocr failed: ${parsed.error}`);
863
+ if (parsed.error) {
864
+ // Distinguish permission-class failures from real Vision errors.
865
+ // screencapture writes a 0-byte file when Screen Recording is not granted,
866
+ // and the JXA NSImage init then fails with "Failed to load screenshot image".
867
+ // Surface that as a PermissionError hint so the model can suggest the right fix.
868
+ const hint = parsed.error === "Failed to load screenshot image"
869
+ ? " (the screenshot file is empty or unreadable — Screen Recording permission is most likely missing; run `doctor` and grant Screen Recording to the host terminal, then retry)"
870
+ : parsed.error === "Failed to get CGImage from screenshot"
871
+ ? " (the screenshot could not be decoded — likely an empty capture; check Screen Recording permission)"
872
+ : "";
873
+ throw new CaptureError(`ocr failed: ${parsed.error}${hint}`);
874
+ }
784
875
  const imgWidth = buf.readUInt32BE(16);
785
876
  const scaleFactorX = screenSize.width / (region ? region.width : (imgWidth / scaleFactor));
786
877
  const elements = parsed.elements.map((el) => ({
@@ -100,6 +100,10 @@ export const OBSERVE_ACTIONS = new Set([
100
100
  "wait_for_element",
101
101
  "doctor",
102
102
  "clipboard_read",
103
+ // focus_app only sets the active target context via AppleScript activate
104
+ // and an AX window lookup — it does not synthesize mouse or keyboard input,
105
+ // so the user-activity pause must not block it. (OpenCode 0.3.7 follow-up)
106
+ "focus_app",
103
107
  ]);
104
108
  /** Actions that synthesize user input — need full user-activity protection. */
105
109
  export const INPUT_ACTIONS = new Set([
@@ -8,6 +8,10 @@ export interface PermissionDetail {
8
8
  granted: boolean;
9
9
  instructions: string;
10
10
  }
11
+ /**
12
+ * Get the name of the terminal app that the user needs to authorize.
13
+ */
14
+ export declare function getTerminalAppName(): string;
11
15
  export declare function checkPermissions(): Promise<PermissionCheckResult>;
12
16
  export declare function checkPermission(type: "accessibility" | "screenRecording"): Promise<{
13
17
  granted: boolean;
@@ -4,7 +4,7 @@ const execFileAsync = promisify(execFile);
4
4
  /**
5
5
  * Get the name of the terminal app that the user needs to authorize.
6
6
  */
7
- function getTerminalAppName() {
7
+ export function getTerminalAppName() {
8
8
  // Walk up the process tree to find the terminal emulator
9
9
  const ppid = process.ppid;
10
10
  // Common terminal app names
@@ -0,0 +1,74 @@
1
+ import CoreGraphics
2
+ import Foundation
3
+
4
+ // Window enumeration using CGWindowListCopyWindowInfo (WindowServer-level).
5
+ // Replaces JXA System Events enumeration which is slow (3-6s) and unreliable
6
+ // for Electron apps. CGWindowListCopyWindowInfo sees ALL windows regardless
7
+ // of whether the app exposes an AX tree.
8
+
9
+ struct WinInfo: Encodable {
10
+ let id: String
11
+ let title: String
12
+ let processName: String
13
+ let pid: Int32
14
+ let bounds: Bounds
15
+ let isOnScreen: Bool
16
+ let windowNumber: Int
17
+ }
18
+
19
+ struct Bounds: Encodable {
20
+ let x: Double
21
+ let y: Double
22
+ let width: Double
23
+ let height: Double
24
+ }
25
+
26
+ struct Output: Encodable {
27
+ let windows: [WinInfo]
28
+ let error: String?
29
+ }
30
+
31
+ let options: CGWindowListOption = .optionOnScreenOnly
32
+ guard let windowList = CGWindowListCopyWindowInfo(options, kCGNullWindowID) as? [[String: Any]] else {
33
+ let out = Output(windows: [], error: "CGWindowListCopyWindowInfo returned nil")
34
+ FileHandle.standardOutput.write(try! JSONEncoder().encode(out))
35
+ exit(0)
36
+ }
37
+
38
+ var results: [WinInfo] = []
39
+
40
+ for info in windowList {
41
+ guard let pid = info[kCGWindowOwnerPID as String] as? Int32,
42
+ let windowNumber = info[kCGWindowNumber as String] as? Int,
43
+ let layer = info[kCGWindowLayer as String] as? Int
44
+ else { continue }
45
+
46
+ // Skip non-normal layers (overlay, screen saver, etc.)
47
+ if layer != 0 { continue }
48
+
49
+ let boundsDict = info[kCGWindowBounds as String] as? [String: Any]
50
+ let w = boundsDict?["Width"] as? Double ?? 0
51
+ let h = boundsDict?["Height"] as? Double ?? 0
52
+ if w == 0 || h == 0 { continue }
53
+
54
+ let processName = (info[kCGWindowOwnerName as String] as? String) ?? ""
55
+ if processName.isEmpty { continue }
56
+
57
+ let title = (info[kCGWindowName as String] as? String) ?? ""
58
+ let isOnScreen = (info[kCGWindowIsOnscreen as String] as? Bool) ?? true
59
+ let x = (boundsDict?["X"] as? Double) ?? 0
60
+ let y = (boundsDict?["Y"] as? Double) ?? 0
61
+
62
+ results.append(WinInfo(
63
+ id: "\(processName)/win\(windowNumber)",
64
+ title: title,
65
+ processName: processName,
66
+ pid: pid,
67
+ bounds: Bounds(x: x, y: y, width: w, height: h),
68
+ isOnScreen: isOnScreen,
69
+ windowNumber: windowNumber
70
+ ))
71
+ }
72
+
73
+ let output = Output(windows: results, error: nil)
74
+ FileHandle.standardOutput.write(try! JSONEncoder().encode(output))
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ucu-mcp",
3
- "version": "0.3.7",
3
+ "version": "0.3.9",
4
4
  "description": "MCP server for Universal Computer Use — desktop automation for AI agents via Model Context Protocol",
5
5
  "type": "module",
6
6
  "bin": {
@@ -27,7 +27,7 @@
27
27
  "test:macos-gui": "UCU_MACOS_GUI_SMOKE=1 vitest run tests/integration/macos-gui-smoke.test.ts",
28
28
  "test:client-cli": "UCU_CLIENT_CLI_SMOKE=1 vitest run tests/integration/client-cli-smoke.test.ts",
29
29
  "prepublishOnly": "npx vitest run tests/unit/ && npm run build",
30
- "build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit"
30
+ "build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit && cd ../windowlist && swiftc -O -o windowlist-helper main.swift -framework CoreGraphics -framework Foundation"
31
31
  },
32
32
  "keywords": [
33
33
  "mcp",