ucu-mcp 0.3.8 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -306,7 +306,7 @@ export function registerTools(server) {
306
306
  const axNote = accessibility === "denied"
307
307
  ? "Accessibility is currently denied to this terminal — grant it via System Settings > Privacy & Security > Accessibility, then retry."
308
308
  : accessibility === "granted"
309
- ? "Accessibility is granted. If you expected a specific app to appear here, it is likely an Electron app whose AX tree is not exposed to System Events; try modifying its config file or database directly rather than driving the UI."
309
+ ? "Accessibility is granted. If you expected a specific app to appear here, it is likely an Electron app whose AX tree is not exposed to System Events. Pixel-level workaround: call screenshot, then ocr to locate the target UI text and get its bounding box, then click(x, y) at those screen coordinates. Alternatively, modify the app's config file or database directly."
310
310
  : "Accessibility status is unknown. Run `doctor` first to verify.";
311
311
  diagnostics = { hint: `list_windows returned 0 windows. ${axNote}`, accessibility };
312
312
  }
@@ -471,9 +471,11 @@ export function registerTools(server) {
471
471
  if (process.platform === "darwin") {
472
472
  const cgevent = resolveHelperPath(["native", "cgevent", "cgevent-helper"]);
473
473
  const ocr = resolveHelperPath(["native", "ocr", "ocr-helper"]);
474
+ const windowlist = resolveHelperPath(["native", "windowlist", "windowlist-helper"]);
474
475
  nativeHelpers = {
475
476
  cgevent: { ok: cgevent.path !== null, path: cgevent.path, tried: cgevent.tried.slice(0, 3) },
476
477
  ocr: { ok: ocr.path !== null, path: ocr.path, tried: ocr.tried.slice(0, 3) },
478
+ windowlist: { ok: windowlist.path !== null, path: windowlist.path, tried: windowlist.tried.slice(0, 3) },
477
479
  };
478
480
  }
479
481
  let readiness = "ready";
@@ -497,6 +499,10 @@ export function registerTools(server) {
497
499
  readiness = readiness === "ready" ? "degraded" : readiness;
498
500
  issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+). Run `npm run build` to compile it, or reinstall ucu-mcp so the helper ships from the tarball.");
499
501
  }
502
+ if (!nativeHelpers.windowlist.ok) {
503
+ readiness = readiness === "ready" ? "degraded" : readiness;
504
+ issues.push("Native windowlist helper not found (window enumeration will fall back to slow JXA). Run `npm run build` to compile it.");
505
+ }
500
506
  }
501
507
  // Heuristic AX hint: if Accessibility is granted but list_windows consistently
502
508
  // returns empty for the only app the model cared about, the model has likely
@@ -504,7 +510,7 @@ export function registerTools(server) {
504
510
  // Events unless Accessibility is also granted to the Electron process itself,
505
511
  // and the app has accessibility features enabled). This block is read-only —
506
512
  // we never hit JXA here because the doctor must stay fast and side-effect free.
507
- const electronHint = "If the target app is Electron (e.g. CC Switch, VS Code, Discord), list_windows may return [] even with Accessibility granted to your terminal. Grant Accessibility to the Electron app itself in System Settings > Privacy & Security > Accessibility, and restart the app. As a workaround, modify the app\'s config file or database directly rather than driving the UI.";
513
+ const electronHint = "If the target app is Electron (e.g. CC Switch, VS Code, Discord), list_windows may return [] even with Accessibility granted to your terminal. Grant Accessibility to the Electron app itself in System Settings > Privacy & Security > Accessibility, and restart the app. Pixel-level workaround: use screenshot + ocr to locate UI elements by text, then click(x, y) at the detected bounding box coordinates. Alternatively, modify the app\'s config file or database directly.";
508
514
  const clients = {};
509
515
  for (const bin of ["claude", "codex", "opencode", "npx"]) {
510
516
  try {
@@ -526,7 +532,10 @@ export function registerTools(server) {
526
532
  }
527
533
  if (readiness !== "ready") {
528
534
  if (process.platform === "darwin" && nativeHelpers && (!nativeHelpers.cgevent.ok || !nativeHelpers.ocr.ok)) {
529
- recommendations.push("Run `npm run build` in the ucu-mcp project to compile native Swift helpers (cgevent-helper, ocr-helper).");
535
+ recommendations.push("Run `npm run build` in the ucu-mcp project to compile native Swift helpers (cgevent-helper, ocr-helper, windowlist-helper).");
536
+ }
537
+ if (process.platform === "darwin" && nativeHelpers && !nativeHelpers.windowlist.ok) {
538
+ recommendations.push("windowlist helper missing — list_windows will fall back to JXA (~3-6s, unreliable for Electron). Run `npm run build`.");
530
539
  }
531
540
  }
532
541
  if (readiness === "ready") {
@@ -1,5 +1,15 @@
1
1
  import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResponse, AppInfo, AppTarget, BrowserContext, ScreenshotOptions } from "./base.js";
2
+ export interface MacOSPlatformOptions {
3
+ /**
4
+ * Override native helper resolution.
5
+ * - Map of folder name to absolute binary path to inject a specific helper.
6
+ * - Set a value to null to skip that helper (force JXA fallback).
7
+ * Used by tests to control native helper behavior without filesystem tricks.
8
+ */
9
+ nativeHelperPaths?: Record<string, string | null>;
10
+ }
2
11
  export declare class MacOSPlatform implements Platform {
12
+ private readonly _nativeHelperPaths;
3
13
  private readonly elementCache;
4
14
  private readonly elementCacheTtlMs;
5
15
  private readonly elementCacheMaxSize;
@@ -7,6 +17,7 @@ export declare class MacOSPlatform implements Platform {
7
17
  private windowCache;
8
18
  private activeTarget;
9
19
  private savedFocus;
20
+ constructor(options?: MacOSPlatformOptions);
10
21
  /** Remove expired entries from the element cache. */
11
22
  private evictExpiredCacheEntries;
12
23
  /** Evict oldest entries when cache exceeds the maximum size (LRU-style). */
@@ -27,6 +38,9 @@ export declare class MacOSPlatform implements Platform {
27
38
  focusApp(app: string): Promise<AppTarget>;
28
39
  getActiveBrowserContext(app?: string): Promise<BrowserContext | undefined>;
29
40
  listWindows(_includeMinimized?: boolean): Promise<WindowInfo[]>;
41
+ private listWindowsNative;
42
+ private resolveNativeHelper;
43
+ private listWindowsJxa;
30
44
  getWindowState(windowId?: string, depth?: number, includeBounds?: boolean): Promise<WindowState>;
31
45
  click(x: number, y: number, button?: "left" | "right" | "middle", doubleClick?: boolean): Promise<void>;
32
46
  move(x: number, y: number): Promise<void>;
@@ -4,6 +4,10 @@ import { promisify } from "node:util";
4
4
  import { captureFullScreen, captureRegion } from "../utils/screenshot.js";
5
5
  import { click as inputClick, doubleClick as inputDoubleClick, move as inputMove, drag as inputDrag, scroll as inputScroll, typeText, pressShortcut } from "../utils/input.js";
6
6
  import { CaptureError, ElementNotFoundError, InputSynthesisError, PermissionError, PlatformError, TargetStaleError, UcuError, WindowNotFoundError } from "../util/errors.js";
7
+ import { existsSync } from "node:fs";
8
+ import { join, dirname } from "node:path";
9
+ import { fileURLToPath } from "node:url";
10
+ const __macosDirname = dirname(fileURLToPath(import.meta.url));
7
11
  const execFileAsync = promisify(execFile);
8
12
  function errorMessage(error) {
9
13
  return error instanceof Error ? error.message : String(error);
@@ -59,6 +63,7 @@ function selectWindowForApp(windows, requestedApp) {
59
63
  windows.find((window) => appNameMatches(window.processName, requestedApp));
60
64
  }
61
65
  export class MacOSPlatform {
66
+ _nativeHelperPaths;
62
67
  elementCache = new Map();
63
68
  elementCacheTtlMs = 30_000;
64
69
  elementCacheMaxSize = 100;
@@ -66,6 +71,9 @@ export class MacOSPlatform {
66
71
  windowCache;
67
72
  activeTarget;
68
73
  savedFocus;
74
+ constructor(options) {
75
+ this._nativeHelperPaths = options?.nativeHelperPaths;
76
+ }
69
77
  // ── Element Cache Management ────────────────────────────────────────────
70
78
  /** Remove expired entries from the element cache. */
71
79
  evictExpiredCacheEntries() {
@@ -260,8 +268,10 @@ export class MacOSPlatform {
260
268
  "the most likely cause is that it is an Electron app whose AX tree is " +
261
269
  "not exposed to System Events (System Settings > Privacy & Security > " +
262
270
  "Accessibility must be granted to the Electron process itself, not just " +
263
- "to the host terminal). As a workaround, modify the app's config file " +
264
- "or database directly.";
271
+ "to the host terminal). Pixel-level workaround: call screenshot to " +
272
+ "capture the screen, then ocr to locate UI text and get its bounding " +
273
+ "box coordinates, then click(x, y) at those screen coordinates. " +
274
+ "Alternatively, modify the app's config file or database directly.";
265
275
  throw err;
266
276
  }
267
277
  this.activeTarget = {
@@ -334,11 +344,81 @@ export class MacOSPlatform {
334
344
  }));
335
345
  }
336
346
  try {
337
- // Use System Events instead of CGWindowListCopyWindowInfo.
338
- // The CoreGraphics API returns CFArrayRef/CFDictionaryRef which JXA
339
- // cannot iterate reliably CFArrayGetCount works but objectAtIndex
340
- // does not. System Events JXA is slower (~3-6s) but correct.
341
- const jxaScript = `
347
+ // Try native Swift helper first (CGWindowListCopyWindowInfo, ~1ms).
348
+ // Falls back to JXA System Events if the helper is not available.
349
+ // The native helper reliably enumerates ALL windows including Electron
350
+ // apps, whereas JXA relies on System Events AX which is inconsistent
351
+ // for Chromium-rendered windows.
352
+ let windows;
353
+ const nativeResult = this.listWindowsNative();
354
+ if (nativeResult !== null) {
355
+ windows = nativeResult;
356
+ }
357
+ else {
358
+ windows = await this.listWindowsJxa();
359
+ }
360
+ this.windowCache = {
361
+ cachedAt: Date.now(),
362
+ windows: windows.map((window) => ({
363
+ ...window,
364
+ bounds: { ...window.bounds },
365
+ })),
366
+ };
367
+ return windows;
368
+ }
369
+ catch {
370
+ // Fallback: return empty list if both methods fail
371
+ return [];
372
+ }
373
+ }
374
+ listWindowsNative() {
375
+ try {
376
+ const helperPath = this.resolveNativeHelper("windowlist", "windowlist-helper");
377
+ if (!helperPath)
378
+ return null;
379
+ const out = execFileSync(helperPath, [], {
380
+ encoding: "utf-8",
381
+ timeout: 5000,
382
+ });
383
+ const parsed = JSON.parse(out.trim());
384
+ if (parsed.error)
385
+ return null;
386
+ return parsed.windows.map(w => ({
387
+ id: w.id,
388
+ title: w.title,
389
+ processName: w.processName,
390
+ pid: w.pid,
391
+ bounds: w.bounds,
392
+ isMinimized: !w.isOnScreen,
393
+ isOnScreen: w.isOnScreen,
394
+ }));
395
+ }
396
+ catch {
397
+ return null;
398
+ }
399
+ }
400
+ resolveNativeHelper(folder, binary) {
401
+ // Test injection: if the caller provided explicit paths, use those
402
+ // instead of hitting the filesystem.
403
+ if (this._nativeHelperPaths && folder in this._nativeHelperPaths) {
404
+ const override = this._nativeHelperPaths[folder];
405
+ // null means "skip native, force JXA fallback"
406
+ return override === null ? null : override;
407
+ }
408
+ // dev: src/platform/macos.ts → native/<folder>/<binary>
409
+ // prod: dist/src/platform/macos.js → native/<folder>/<binary>
410
+ const candidates = [
411
+ join(__macosDirname, "..", "..", "native", folder, binary),
412
+ join(__macosDirname, "..", "..", "..", "native", folder, binary),
413
+ ];
414
+ for (const p of candidates) {
415
+ if (existsSync(p))
416
+ return p;
417
+ }
418
+ return null;
419
+ }
420
+ async listWindowsJxa() {
421
+ const jxaScript = `
342
422
  var se = Application('System Events');
343
423
  var result = [];
344
424
  var procs = se.processes();
@@ -372,24 +452,11 @@ export class MacOSPlatform {
372
452
  }
373
453
  JSON.stringify(result);
374
454
  `;
375
- const jxaOut = execFileSync("osascript", [
376
- "-l", "JavaScript",
377
- "-e", jxaScript
378
- ], { encoding: "utf-8", timeout: 15000 });
379
- const windows = JSON.parse(jxaOut.trim());
380
- this.windowCache = {
381
- cachedAt: Date.now(),
382
- windows: windows.map((window) => ({
383
- ...window,
384
- bounds: { ...window.bounds },
385
- })),
386
- };
387
- return windows;
388
- }
389
- catch {
390
- // Fallback: return empty list if JXA fails
391
- return [];
392
- }
455
+ const jxaOut = execFileSync("osascript", [
456
+ "-l", "JavaScript",
457
+ "-e", jxaScript
458
+ ], { encoding: "utf-8", timeout: 15000 });
459
+ return JSON.parse(jxaOut.trim());
393
460
  }
394
461
  async getWindowState(windowId, depth, includeBounds = true) {
395
462
  if (!windowId || windowId === this.activeTarget?.windowId) {
@@ -0,0 +1,74 @@
1
+ import CoreGraphics
2
+ import Foundation
3
+
4
+ // Window enumeration using CGWindowListCopyWindowInfo (WindowServer-level).
5
+ // Replaces JXA System Events enumeration which is slow (3-6s) and unreliable
6
+ // for Electron apps. CGWindowListCopyWindowInfo sees ALL windows regardless
7
+ // of whether the app exposes an AX tree.
8
+
9
+ struct WinInfo: Encodable {
10
+ let id: String
11
+ let title: String
12
+ let processName: String
13
+ let pid: Int32
14
+ let bounds: Bounds
15
+ let isOnScreen: Bool
16
+ let windowNumber: Int
17
+ }
18
+
19
+ struct Bounds: Encodable {
20
+ let x: Double
21
+ let y: Double
22
+ let width: Double
23
+ let height: Double
24
+ }
25
+
26
+ struct Output: Encodable {
27
+ let windows: [WinInfo]
28
+ let error: String?
29
+ }
30
+
31
+ let options: CGWindowListOption = .optionOnScreenOnly
32
+ guard let windowList = CGWindowListCopyWindowInfo(options, kCGNullWindowID) as? [[String: Any]] else {
33
+ let out = Output(windows: [], error: "CGWindowListCopyWindowInfo returned nil")
34
+ FileHandle.standardOutput.write(try! JSONEncoder().encode(out))
35
+ exit(0)
36
+ }
37
+
38
+ var results: [WinInfo] = []
39
+
40
+ for info in windowList {
41
+ guard let pid = info[kCGWindowOwnerPID as String] as? Int32,
42
+ let windowNumber = info[kCGWindowNumber as String] as? Int,
43
+ let layer = info[kCGWindowLayer as String] as? Int
44
+ else { continue }
45
+
46
+ // Skip non-normal layers (overlay, screen saver, etc.)
47
+ if layer != 0 { continue }
48
+
49
+ let boundsDict = info[kCGWindowBounds as String] as? [String: Any]
50
+ let w = boundsDict?["Width"] as? Double ?? 0
51
+ let h = boundsDict?["Height"] as? Double ?? 0
52
+ if w == 0 || h == 0 { continue }
53
+
54
+ let processName = (info[kCGWindowOwnerName as String] as? String) ?? ""
55
+ if processName.isEmpty { continue }
56
+
57
+ let title = (info[kCGWindowName as String] as? String) ?? ""
58
+ let isOnScreen = (info[kCGWindowIsOnscreen as String] as? Bool) ?? true
59
+ let x = (boundsDict?["X"] as? Double) ?? 0
60
+ let y = (boundsDict?["Y"] as? Double) ?? 0
61
+
62
+ results.append(WinInfo(
63
+ id: "\(processName)/win\(windowNumber)",
64
+ title: title,
65
+ processName: processName,
66
+ pid: pid,
67
+ bounds: Bounds(x: x, y: y, width: w, height: h),
68
+ isOnScreen: isOnScreen,
69
+ windowNumber: windowNumber
70
+ ))
71
+ }
72
+
73
+ let output = Output(windows: results, error: nil)
74
+ FileHandle.standardOutput.write(try! JSONEncoder().encode(output))
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ucu-mcp",
3
- "version": "0.3.8",
3
+ "version": "0.3.9",
4
4
  "description": "MCP server for Universal Computer Use — desktop automation for AI agents via Model Context Protocol",
5
5
  "type": "module",
6
6
  "bin": {
@@ -27,7 +27,7 @@
27
27
  "test:macos-gui": "UCU_MACOS_GUI_SMOKE=1 vitest run tests/integration/macos-gui-smoke.test.ts",
28
28
  "test:client-cli": "UCU_CLIENT_CLI_SMOKE=1 vitest run tests/integration/client-cli-smoke.test.ts",
29
29
  "prepublishOnly": "npx vitest run tests/unit/ && npm run build",
30
- "build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit"
30
+ "build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit && cd ../windowlist && swiftc -O -o windowlist-helper main.swift -framework CoreGraphics -framework Foundation"
31
31
  },
32
32
  "keywords": [
33
33
  "mcp",