ucu-mcp 0.3.8 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/src/mcp/tools.js
CHANGED
|
@@ -306,7 +306,7 @@ export function registerTools(server) {
|
|
|
306
306
|
const axNote = accessibility === "denied"
|
|
307
307
|
? "Accessibility is currently denied to this terminal — grant it via System Settings > Privacy & Security > Accessibility, then retry."
|
|
308
308
|
: accessibility === "granted"
|
|
309
|
-
? "Accessibility is granted. If you expected a specific app to appear here, it is likely an Electron app whose AX tree is not exposed to System Events
|
|
309
|
+
? "Accessibility is granted. If you expected a specific app to appear here, it is likely an Electron app whose AX tree is not exposed to System Events. Pixel-level workaround: call screenshot, then ocr to locate the target UI text and get its bounding box, then click(x, y) at those screen coordinates. Alternatively, modify the app's config file or database directly."
|
|
310
310
|
: "Accessibility status is unknown. Run `doctor` first to verify.";
|
|
311
311
|
diagnostics = { hint: `list_windows returned 0 windows. ${axNote}`, accessibility };
|
|
312
312
|
}
|
|
@@ -471,9 +471,11 @@ export function registerTools(server) {
|
|
|
471
471
|
if (process.platform === "darwin") {
|
|
472
472
|
const cgevent = resolveHelperPath(["native", "cgevent", "cgevent-helper"]);
|
|
473
473
|
const ocr = resolveHelperPath(["native", "ocr", "ocr-helper"]);
|
|
474
|
+
const windowlist = resolveHelperPath(["native", "windowlist", "windowlist-helper"]);
|
|
474
475
|
nativeHelpers = {
|
|
475
476
|
cgevent: { ok: cgevent.path !== null, path: cgevent.path, tried: cgevent.tried.slice(0, 3) },
|
|
476
477
|
ocr: { ok: ocr.path !== null, path: ocr.path, tried: ocr.tried.slice(0, 3) },
|
|
478
|
+
windowlist: { ok: windowlist.path !== null, path: windowlist.path, tried: windowlist.tried.slice(0, 3) },
|
|
477
479
|
};
|
|
478
480
|
}
|
|
479
481
|
let readiness = "ready";
|
|
@@ -497,6 +499,10 @@ export function registerTools(server) {
|
|
|
497
499
|
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
498
500
|
issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+). Run `npm run build` to compile it, or reinstall ucu-mcp so the helper ships from the tarball.");
|
|
499
501
|
}
|
|
502
|
+
if (!nativeHelpers.windowlist.ok) {
|
|
503
|
+
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
504
|
+
issues.push("Native windowlist helper not found (window enumeration will fall back to slow JXA). Run `npm run build` to compile it.");
|
|
505
|
+
}
|
|
500
506
|
}
|
|
501
507
|
// Heuristic AX hint: if Accessibility is granted but list_windows consistently
|
|
502
508
|
// returns empty for the only app the model cared about, the model has likely
|
|
@@ -504,7 +510,7 @@ export function registerTools(server) {
|
|
|
504
510
|
// Events unless Accessibility is also granted to the Electron process itself,
|
|
505
511
|
// and the app has accessibility features enabled). This block is read-only —
|
|
506
512
|
// we never hit JXA here because the doctor must stay fast and side-effect free.
|
|
507
|
-
const electronHint = "If the target app is Electron (e.g. CC Switch, VS Code, Discord), list_windows may return [] even with Accessibility granted to your terminal. Grant Accessibility to the Electron app itself in System Settings > Privacy & Security > Accessibility, and restart the app.
|
|
513
|
+
const electronHint = "If the target app is Electron (e.g. CC Switch, VS Code, Discord), list_windows may return [] even with Accessibility granted to your terminal. Grant Accessibility to the Electron app itself in System Settings > Privacy & Security > Accessibility, and restart the app. Pixel-level workaround: use screenshot + ocr to locate UI elements by text, then click(x, y) at the detected bounding box coordinates. Alternatively, modify the app\'s config file or database directly.";
|
|
508
514
|
const clients = {};
|
|
509
515
|
for (const bin of ["claude", "codex", "opencode", "npx"]) {
|
|
510
516
|
try {
|
|
@@ -526,7 +532,10 @@ export function registerTools(server) {
|
|
|
526
532
|
}
|
|
527
533
|
if (readiness !== "ready") {
|
|
528
534
|
if (process.platform === "darwin" && nativeHelpers && (!nativeHelpers.cgevent.ok || !nativeHelpers.ocr.ok)) {
|
|
529
|
-
recommendations.push("Run `npm run build` in the ucu-mcp project to compile native Swift helpers (cgevent-helper, ocr-helper).");
|
|
535
|
+
recommendations.push("Run `npm run build` in the ucu-mcp project to compile native Swift helpers (cgevent-helper, ocr-helper, windowlist-helper).");
|
|
536
|
+
}
|
|
537
|
+
if (process.platform === "darwin" && nativeHelpers && !nativeHelpers.windowlist.ok) {
|
|
538
|
+
recommendations.push("windowlist helper missing — list_windows will fall back to JXA (~3-6s, unreliable for Electron). Run `npm run build`.");
|
|
530
539
|
}
|
|
531
540
|
}
|
|
532
541
|
if (readiness === "ready") {
|
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResponse, AppInfo, AppTarget, BrowserContext, ScreenshotOptions } from "./base.js";
|
|
2
|
+
export interface MacOSPlatformOptions {
|
|
3
|
+
/**
|
|
4
|
+
* Override native helper resolution.
|
|
5
|
+
* - Map of folder name to absolute binary path to inject a specific helper.
|
|
6
|
+
* - Set a value to null to skip that helper (force JXA fallback).
|
|
7
|
+
* Used by tests to control native helper behavior without filesystem tricks.
|
|
8
|
+
*/
|
|
9
|
+
nativeHelperPaths?: Record<string, string | null>;
|
|
10
|
+
}
|
|
2
11
|
export declare class MacOSPlatform implements Platform {
|
|
12
|
+
private readonly _nativeHelperPaths;
|
|
3
13
|
private readonly elementCache;
|
|
4
14
|
private readonly elementCacheTtlMs;
|
|
5
15
|
private readonly elementCacheMaxSize;
|
|
@@ -7,6 +17,7 @@ export declare class MacOSPlatform implements Platform {
|
|
|
7
17
|
private windowCache;
|
|
8
18
|
private activeTarget;
|
|
9
19
|
private savedFocus;
|
|
20
|
+
constructor(options?: MacOSPlatformOptions);
|
|
10
21
|
/** Remove expired entries from the element cache. */
|
|
11
22
|
private evictExpiredCacheEntries;
|
|
12
23
|
/** Evict oldest entries when cache exceeds the maximum size (LRU-style). */
|
|
@@ -27,6 +38,9 @@ export declare class MacOSPlatform implements Platform {
|
|
|
27
38
|
focusApp(app: string): Promise<AppTarget>;
|
|
28
39
|
getActiveBrowserContext(app?: string): Promise<BrowserContext | undefined>;
|
|
29
40
|
listWindows(_includeMinimized?: boolean): Promise<WindowInfo[]>;
|
|
41
|
+
private listWindowsNative;
|
|
42
|
+
private resolveNativeHelper;
|
|
43
|
+
private listWindowsJxa;
|
|
30
44
|
getWindowState(windowId?: string, depth?: number, includeBounds?: boolean): Promise<WindowState>;
|
|
31
45
|
click(x: number, y: number, button?: "left" | "right" | "middle", doubleClick?: boolean): Promise<void>;
|
|
32
46
|
move(x: number, y: number): Promise<void>;
|
|
@@ -4,6 +4,10 @@ import { promisify } from "node:util";
|
|
|
4
4
|
import { captureFullScreen, captureRegion } from "../utils/screenshot.js";
|
|
5
5
|
import { click as inputClick, doubleClick as inputDoubleClick, move as inputMove, drag as inputDrag, scroll as inputScroll, typeText, pressShortcut } from "../utils/input.js";
|
|
6
6
|
import { CaptureError, ElementNotFoundError, InputSynthesisError, PermissionError, PlatformError, TargetStaleError, UcuError, WindowNotFoundError } from "../util/errors.js";
|
|
7
|
+
import { existsSync } from "node:fs";
|
|
8
|
+
import { join, dirname } from "node:path";
|
|
9
|
+
import { fileURLToPath } from "node:url";
|
|
10
|
+
const __macosDirname = dirname(fileURLToPath(import.meta.url));
|
|
7
11
|
const execFileAsync = promisify(execFile);
|
|
8
12
|
function errorMessage(error) {
|
|
9
13
|
return error instanceof Error ? error.message : String(error);
|
|
@@ -59,6 +63,7 @@ function selectWindowForApp(windows, requestedApp) {
|
|
|
59
63
|
windows.find((window) => appNameMatches(window.processName, requestedApp));
|
|
60
64
|
}
|
|
61
65
|
export class MacOSPlatform {
|
|
66
|
+
_nativeHelperPaths;
|
|
62
67
|
elementCache = new Map();
|
|
63
68
|
elementCacheTtlMs = 30_000;
|
|
64
69
|
elementCacheMaxSize = 100;
|
|
@@ -66,6 +71,9 @@ export class MacOSPlatform {
|
|
|
66
71
|
windowCache;
|
|
67
72
|
activeTarget;
|
|
68
73
|
savedFocus;
|
|
74
|
+
constructor(options) {
|
|
75
|
+
this._nativeHelperPaths = options?.nativeHelperPaths;
|
|
76
|
+
}
|
|
69
77
|
// ── Element Cache Management ────────────────────────────────────────────
|
|
70
78
|
/** Remove expired entries from the element cache. */
|
|
71
79
|
evictExpiredCacheEntries() {
|
|
@@ -260,8 +268,10 @@ export class MacOSPlatform {
|
|
|
260
268
|
"the most likely cause is that it is an Electron app whose AX tree is " +
|
|
261
269
|
"not exposed to System Events (System Settings > Privacy & Security > " +
|
|
262
270
|
"Accessibility must be granted to the Electron process itself, not just " +
|
|
263
|
-
"to the host terminal).
|
|
264
|
-
"
|
|
271
|
+
"to the host terminal). Pixel-level workaround: call screenshot to " +
|
|
272
|
+
"capture the screen, then ocr to locate UI text and get its bounding " +
|
|
273
|
+
"box coordinates, then click(x, y) at those screen coordinates. " +
|
|
274
|
+
"Alternatively, modify the app's config file or database directly.";
|
|
265
275
|
throw err;
|
|
266
276
|
}
|
|
267
277
|
this.activeTarget = {
|
|
@@ -334,11 +344,81 @@ export class MacOSPlatform {
|
|
|
334
344
|
}));
|
|
335
345
|
}
|
|
336
346
|
try {
|
|
337
|
-
//
|
|
338
|
-
//
|
|
339
|
-
//
|
|
340
|
-
//
|
|
341
|
-
|
|
347
|
+
// Try native Swift helper first (CGWindowListCopyWindowInfo, ~1ms).
|
|
348
|
+
// Falls back to JXA System Events if the helper is not available.
|
|
349
|
+
// The native helper reliably enumerates ALL windows including Electron
|
|
350
|
+
// apps, whereas JXA relies on System Events AX which is inconsistent
|
|
351
|
+
// for Chromium-rendered windows.
|
|
352
|
+
let windows;
|
|
353
|
+
const nativeResult = this.listWindowsNative();
|
|
354
|
+
if (nativeResult !== null) {
|
|
355
|
+
windows = nativeResult;
|
|
356
|
+
}
|
|
357
|
+
else {
|
|
358
|
+
windows = await this.listWindowsJxa();
|
|
359
|
+
}
|
|
360
|
+
this.windowCache = {
|
|
361
|
+
cachedAt: Date.now(),
|
|
362
|
+
windows: windows.map((window) => ({
|
|
363
|
+
...window,
|
|
364
|
+
bounds: { ...window.bounds },
|
|
365
|
+
})),
|
|
366
|
+
};
|
|
367
|
+
return windows;
|
|
368
|
+
}
|
|
369
|
+
catch {
|
|
370
|
+
// Fallback: return empty list if both methods fail
|
|
371
|
+
return [];
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
listWindowsNative() {
|
|
375
|
+
try {
|
|
376
|
+
const helperPath = this.resolveNativeHelper("windowlist", "windowlist-helper");
|
|
377
|
+
if (!helperPath)
|
|
378
|
+
return null;
|
|
379
|
+
const out = execFileSync(helperPath, [], {
|
|
380
|
+
encoding: "utf-8",
|
|
381
|
+
timeout: 5000,
|
|
382
|
+
});
|
|
383
|
+
const parsed = JSON.parse(out.trim());
|
|
384
|
+
if (parsed.error)
|
|
385
|
+
return null;
|
|
386
|
+
return parsed.windows.map(w => ({
|
|
387
|
+
id: w.id,
|
|
388
|
+
title: w.title,
|
|
389
|
+
processName: w.processName,
|
|
390
|
+
pid: w.pid,
|
|
391
|
+
bounds: w.bounds,
|
|
392
|
+
isMinimized: !w.isOnScreen,
|
|
393
|
+
isOnScreen: w.isOnScreen,
|
|
394
|
+
}));
|
|
395
|
+
}
|
|
396
|
+
catch {
|
|
397
|
+
return null;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
resolveNativeHelper(folder, binary) {
|
|
401
|
+
// Test injection: if the caller provided explicit paths, use those
|
|
402
|
+
// instead of hitting the filesystem.
|
|
403
|
+
if (this._nativeHelperPaths && folder in this._nativeHelperPaths) {
|
|
404
|
+
const override = this._nativeHelperPaths[folder];
|
|
405
|
+
// null means "skip native, force JXA fallback"
|
|
406
|
+
return override === null ? null : override;
|
|
407
|
+
}
|
|
408
|
+
// dev: src/platform/macos.ts → native/<folder>/<binary>
|
|
409
|
+
// prod: dist/src/platform/macos.js → native/<folder>/<binary>
|
|
410
|
+
const candidates = [
|
|
411
|
+
join(__macosDirname, "..", "..", "native", folder, binary),
|
|
412
|
+
join(__macosDirname, "..", "..", "..", "native", folder, binary),
|
|
413
|
+
];
|
|
414
|
+
for (const p of candidates) {
|
|
415
|
+
if (existsSync(p))
|
|
416
|
+
return p;
|
|
417
|
+
}
|
|
418
|
+
return null;
|
|
419
|
+
}
|
|
420
|
+
async listWindowsJxa() {
|
|
421
|
+
const jxaScript = `
|
|
342
422
|
var se = Application('System Events');
|
|
343
423
|
var result = [];
|
|
344
424
|
var procs = se.processes();
|
|
@@ -372,24 +452,11 @@ export class MacOSPlatform {
|
|
|
372
452
|
}
|
|
373
453
|
JSON.stringify(result);
|
|
374
454
|
`;
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
this.windowCache = {
|
|
381
|
-
cachedAt: Date.now(),
|
|
382
|
-
windows: windows.map((window) => ({
|
|
383
|
-
...window,
|
|
384
|
-
bounds: { ...window.bounds },
|
|
385
|
-
})),
|
|
386
|
-
};
|
|
387
|
-
return windows;
|
|
388
|
-
}
|
|
389
|
-
catch {
|
|
390
|
-
// Fallback: return empty list if JXA fails
|
|
391
|
-
return [];
|
|
392
|
-
}
|
|
455
|
+
const jxaOut = execFileSync("osascript", [
|
|
456
|
+
"-l", "JavaScript",
|
|
457
|
+
"-e", jxaScript
|
|
458
|
+
], { encoding: "utf-8", timeout: 15000 });
|
|
459
|
+
return JSON.parse(jxaOut.trim());
|
|
393
460
|
}
|
|
394
461
|
async getWindowState(windowId, depth, includeBounds = true) {
|
|
395
462
|
if (!windowId || windowId === this.activeTarget?.windowId) {
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import CoreGraphics
|
|
2
|
+
import Foundation
|
|
3
|
+
|
|
4
|
+
// Window enumeration using CGWindowListCopyWindowInfo (WindowServer-level).
|
|
5
|
+
// Replaces JXA System Events enumeration which is slow (3-6s) and unreliable
|
|
6
|
+
// for Electron apps. CGWindowListCopyWindowInfo sees ALL windows regardless
|
|
7
|
+
// of whether the app exposes an AX tree.
|
|
8
|
+
|
|
9
|
+
struct WinInfo: Encodable {
|
|
10
|
+
let id: String
|
|
11
|
+
let title: String
|
|
12
|
+
let processName: String
|
|
13
|
+
let pid: Int32
|
|
14
|
+
let bounds: Bounds
|
|
15
|
+
let isOnScreen: Bool
|
|
16
|
+
let windowNumber: Int
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
struct Bounds: Encodable {
|
|
20
|
+
let x: Double
|
|
21
|
+
let y: Double
|
|
22
|
+
let width: Double
|
|
23
|
+
let height: Double
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
struct Output: Encodable {
|
|
27
|
+
let windows: [WinInfo]
|
|
28
|
+
let error: String?
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
let options: CGWindowListOption = .optionOnScreenOnly
|
|
32
|
+
guard let windowList = CGWindowListCopyWindowInfo(options, kCGNullWindowID) as? [[String: Any]] else {
|
|
33
|
+
let out = Output(windows: [], error: "CGWindowListCopyWindowInfo returned nil")
|
|
34
|
+
FileHandle.standardOutput.write(try! JSONEncoder().encode(out))
|
|
35
|
+
exit(0)
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
var results: [WinInfo] = []
|
|
39
|
+
|
|
40
|
+
for info in windowList {
|
|
41
|
+
guard let pid = info[kCGWindowOwnerPID as String] as? Int32,
|
|
42
|
+
let windowNumber = info[kCGWindowNumber as String] as? Int,
|
|
43
|
+
let layer = info[kCGWindowLayer as String] as? Int
|
|
44
|
+
else { continue }
|
|
45
|
+
|
|
46
|
+
// Skip non-normal layers (overlay, screen saver, etc.)
|
|
47
|
+
if layer != 0 { continue }
|
|
48
|
+
|
|
49
|
+
let boundsDict = info[kCGWindowBounds as String] as? [String: Any]
|
|
50
|
+
let w = boundsDict?["Width"] as? Double ?? 0
|
|
51
|
+
let h = boundsDict?["Height"] as? Double ?? 0
|
|
52
|
+
if w == 0 || h == 0 { continue }
|
|
53
|
+
|
|
54
|
+
let processName = (info[kCGWindowOwnerName as String] as? String) ?? ""
|
|
55
|
+
if processName.isEmpty { continue }
|
|
56
|
+
|
|
57
|
+
let title = (info[kCGWindowName as String] as? String) ?? ""
|
|
58
|
+
let isOnScreen = (info[kCGWindowIsOnscreen as String] as? Bool) ?? true
|
|
59
|
+
let x = (boundsDict?["X"] as? Double) ?? 0
|
|
60
|
+
let y = (boundsDict?["Y"] as? Double) ?? 0
|
|
61
|
+
|
|
62
|
+
results.append(WinInfo(
|
|
63
|
+
id: "\(processName)/win\(windowNumber)",
|
|
64
|
+
title: title,
|
|
65
|
+
processName: processName,
|
|
66
|
+
pid: pid,
|
|
67
|
+
bounds: Bounds(x: x, y: y, width: w, height: h),
|
|
68
|
+
isOnScreen: isOnScreen,
|
|
69
|
+
windowNumber: windowNumber
|
|
70
|
+
))
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
let output = Output(windows: results, error: nil)
|
|
74
|
+
FileHandle.standardOutput.write(try! JSONEncoder().encode(output))
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ucu-mcp",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.9",
|
|
4
4
|
"description": "MCP server for Universal Computer Use — desktop automation for AI agents via Model Context Protocol",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
"test:macos-gui": "UCU_MACOS_GUI_SMOKE=1 vitest run tests/integration/macos-gui-smoke.test.ts",
|
|
28
28
|
"test:client-cli": "UCU_CLIENT_CLI_SMOKE=1 vitest run tests/integration/client-cli-smoke.test.ts",
|
|
29
29
|
"prepublishOnly": "npx vitest run tests/unit/ && npm run build",
|
|
30
|
-
"build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit"
|
|
30
|
+
"build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit && cd ../windowlist && swiftc -O -o windowlist-helper main.swift -framework CoreGraphics -framework Foundation"
|
|
31
31
|
},
|
|
32
32
|
"keywords": [
|
|
33
33
|
"mcp",
|