ucu-mcp 0.3.7 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/src/mcp/tools.js +121 -31
- package/dist/src/platform/macos.d.ts +14 -0
- package/dist/src/platform/macos.js +117 -26
- package/dist/src/safety/guard.js +4 -0
- package/dist/src/safety/permissions.d.ts +4 -0
- package/dist/src/safety/permissions.js +1 -1
- package/native/windowlist/main.swift +74 -0
- package/native/windowlist/windowlist-helper +0 -0
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.3.8] - 2026-06-08
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
- `focus_app` no longer trips the user-activity pause. It used to be classified as `"other"` (neither observe nor input) so a recent mouse movement could block `focus_app` for 2 s; it is now in `OBSERVE_ACTIONS`, matching the production `withSafety` default. Symptom: OpenCode could not switch the active target app (e.g. CC Switch) without retrying until the cursor had been still for 2 s.
|
|
13
|
+
- `doctor` native-helper path resolution now checks `process.argv[1]` (npm / npx / global install), walks `import.meta.url` up to 4 levels, and falls back to `npm root -g`. Previously, when the MCP client launched `ucu-mcp` from a cwd other than the project root (the common case for `npx ucu-mcp`), the helper binaries would report as missing even though they were in the tarball. The new report includes `path` and a `tried[]` list so the model can see what was checked.
|
|
14
|
+
- `doctor` recommendations now list each missing macOS permission on its own line, name the host terminal app (so the user knows which entry to grant in System Settings), and add an Electron AX hint for the common case where `list_windows` returns `[]` even with Accessibility granted.
|
|
15
|
+
|
|
16
|
+
### Tests
|
|
17
|
+
|
|
18
|
+
- `safety-guard`: `focus_app` is in `OBSERVE_ACTIONS`; `classifyAction("focus_app") === "observe"`; `withSafety`'s default `skipUserActivityPause` lets the call through even mid user-activity.
|
|
19
|
+
- `errors`: `WindowNotFoundError` preserves an inline `hint` field set by the platform layer, surfaced in the MCP error response.
|
|
20
|
+
- `macos-platform`: OCR JXA `"Failed to load screenshot image"` is re-thrown as `CaptureError` with a hint pointing at the missing Screen Recording permission (the typical cause is `screencapture` writing a 0-byte file when TCC denies Screen Recording, not the helper binary being absent).
|
|
21
|
+
- `tools-layer`: `doctor` report carries `terminalApp` and the richer `nativeHelpers = { cgevent, ocr } = { ok, path, tried[] }` shape.
|
|
22
|
+
|
|
8
23
|
## [0.3.7] - 2026-06-07
|
|
9
24
|
|
|
10
25
|
### Fixed
|
package/dist/src/mcp/tools.js
CHANGED
|
@@ -94,13 +94,21 @@ function errorDetails(error) {
|
|
|
94
94
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
95
95
|
const code = error instanceof UcuError ? error.code : "UNKNOWN_ERROR";
|
|
96
96
|
const retryable = error instanceof UcuError ? error.retryable : false;
|
|
97
|
-
|
|
97
|
+
// Some platform errors carry an inline `hint` field (added by macos.ts focusApp
|
|
98
|
+
// for the Electron AX case, etc.). Surface it under `hint` so the model can
|
|
99
|
+
// see remediation without parsing the message string.
|
|
100
|
+
const inlineHint = err.hint;
|
|
101
|
+
const details = {
|
|
98
102
|
name: err.name,
|
|
99
103
|
code,
|
|
100
104
|
retryable,
|
|
101
105
|
message: err.message,
|
|
102
106
|
recovery: recoveryHint(code),
|
|
103
107
|
};
|
|
108
|
+
if (typeof inlineHint === "string" && inlineHint.length > 0) {
|
|
109
|
+
details.hint = inlineHint;
|
|
110
|
+
}
|
|
111
|
+
return details;
|
|
104
112
|
}
|
|
105
113
|
let _actionCounter = 0;
|
|
106
114
|
function nextActionId() {
|
|
@@ -281,7 +289,28 @@ export function registerTools(server) {
|
|
|
281
289
|
includeMinimized: z.boolean().optional().describe("Include minimized windows"),
|
|
282
290
|
}, async (params) => {
|
|
283
291
|
const windows = await withSafety({ action: "list_windows", params: {}, requiresAccessibility: true, execute: () => getPlatform().listWindows(params.includeMinimized) });
|
|
284
|
-
|
|
292
|
+
// Attach a diagnostic hint when the result is empty so the model can
|
|
293
|
+
// tell the difference between "no windows are open" and "AX enumeration
|
|
294
|
+
// failed for the target app" (common with Electron apps like CC Switch,
|
|
295
|
+
// VS Code, Discord). The windows list itself is the source of truth; the
|
|
296
|
+
// hint is advisory only.
|
|
297
|
+
let diagnostics;
|
|
298
|
+
if (windows.length === 0) {
|
|
299
|
+
let accessibility = "unknown";
|
|
300
|
+
try {
|
|
301
|
+
const { checkPermission } = await import("../safety/permissions.js");
|
|
302
|
+
const { granted } = await checkPermission("accessibility");
|
|
303
|
+
accessibility = granted ? "granted" : "denied";
|
|
304
|
+
}
|
|
305
|
+
catch { /* keep unknown */ }
|
|
306
|
+
const axNote = accessibility === "denied"
|
|
307
|
+
? "Accessibility is currently denied to this terminal — grant it via System Settings > Privacy & Security > Accessibility, then retry."
|
|
308
|
+
: accessibility === "granted"
|
|
309
|
+
? "Accessibility is granted. If you expected a specific app to appear here, it is likely an Electron app whose AX tree is not exposed to System Events. Pixel-level workaround: call screenshot, then ocr to locate the target UI text and get its bounding box, then click(x, y) at those screen coordinates. Alternatively, modify the app's config file or database directly."
|
|
310
|
+
: "Accessibility status is unknown. Run `doctor` first to verify.";
|
|
311
|
+
diagnostics = { hint: `list_windows returned 0 windows. ${axNote}`, accessibility };
|
|
312
|
+
}
|
|
313
|
+
return { content: [{ type: "text", text: JSON.stringify(diagnostics ? { windows, diagnostics } : windows, null, 2) }] };
|
|
285
314
|
});
|
|
286
315
|
registry.register("list_windows");
|
|
287
316
|
registerTool("list_apps", "List all running applications", {}, async () => {
|
|
@@ -384,55 +413,104 @@ export function registerTools(server) {
|
|
|
384
413
|
});
|
|
385
414
|
registry.register("drag");
|
|
386
415
|
registerTool("doctor", "Check system permissions, native helpers, and client readiness", {}, async () => {
|
|
387
|
-
const { checkPermissions } = await import("../safety/permissions.js");
|
|
416
|
+
const { checkPermissions, getPermissionInstructions, getTerminalAppName } = await import("../safety/permissions.js");
|
|
388
417
|
const { MacOSPlatform: MacPlat } = await import("../platform/macos.js");
|
|
389
|
-
const { existsSync } = await import("node:fs");
|
|
390
|
-
const { join, dirname } = await import("node:path");
|
|
418
|
+
const { existsSync, statSync } = await import("node:fs");
|
|
419
|
+
const { join, dirname, resolve } = await import("node:path");
|
|
391
420
|
const { fileURLToPath } = await import("node:url");
|
|
392
421
|
const { execFileSync } = await import("node:child_process");
|
|
393
422
|
const permissions = await checkPermissions();
|
|
394
423
|
const screenLocked = process.platform === "darwin" ? new MacPlat().isScreenLocked?.() ?? false : false;
|
|
395
|
-
|
|
396
|
-
|
|
424
|
+
const termApp = process.platform === "darwin" ? getTerminalAppName() : undefined;
|
|
425
|
+
// Resolve native helper binaries across every install layout we have seen:
|
|
426
|
+
// - dev: process.cwd() === project root
|
|
427
|
+
// - npm install --prefix X: argv[1] is in X/node_modules/ucu-mcp/...
|
|
428
|
+
// - global install via npm: argv[1] is in $(npm root -g)/ucu-mcp/...
|
|
429
|
+
// - npx: argv[1] is in ~/.npm/_npx/.../node_modules/ucu-mcp/...
|
|
430
|
+
// - bin/ucu-mcp.js is the entry; dist/src/*/tools.js is the module path
|
|
431
|
+
function resolveHelperPath(relParts) {
|
|
432
|
+
const tried = [];
|
|
433
|
+
const tryPaths = [];
|
|
397
434
|
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
398
|
-
const
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
435
|
+
const argv1 = process.argv[1] ? resolve(process.argv[1]) : "";
|
|
436
|
+
const argv1Dir = argv1 ? dirname(argv1) : "";
|
|
437
|
+
// (1) process.cwd() — dev invocation
|
|
438
|
+
tryPaths.push(join(process.cwd(), ...relParts));
|
|
439
|
+
// (2) argv[1] dir — npm / npx / global
|
|
440
|
+
if (argv1Dir) {
|
|
441
|
+
tryPaths.push(join(argv1Dir, ...relParts));
|
|
442
|
+
tryPaths.push(join(argv1Dir, "..", ...relParts));
|
|
443
|
+
tryPaths.push(join(argv1Dir, "..", "..", ...relParts));
|
|
444
|
+
}
|
|
445
|
+
// (3) module dir — dist/bin or dist/src/mcp; walk up to 4 levels
|
|
446
|
+
tryPaths.push(join(moduleDir, "..", ...relParts));
|
|
447
|
+
tryPaths.push(join(moduleDir, "..", "..", ...relParts));
|
|
448
|
+
tryPaths.push(join(moduleDir, "..", "..", "..", ...relParts));
|
|
449
|
+
tryPaths.push(join(moduleDir, "..", "..", "..", "..", ...relParts));
|
|
450
|
+
// (4) npm root -g for global install (best effort)
|
|
451
|
+
if (process.platform === "darwin") {
|
|
452
|
+
try {
|
|
453
|
+
const npmRoot = execFileSync("npm", ["root", "-g"], { encoding: "utf-8", timeout: 2000 }).trim();
|
|
454
|
+
if (npmRoot) {
|
|
455
|
+
tryPaths.push(join(npmRoot, "ucu-mcp", ...relParts));
|
|
456
|
+
}
|
|
406
457
|
}
|
|
407
|
-
catch {
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
458
|
+
catch { /* npm not on PATH is fine */ }
|
|
459
|
+
}
|
|
460
|
+
for (const p of tryPaths) {
|
|
461
|
+
tried.push(p);
|
|
462
|
+
try {
|
|
463
|
+
if (existsSync(p) && statSync(p).isFile())
|
|
464
|
+
return { path: p, tried };
|
|
465
|
+
}
|
|
466
|
+
catch { /* skip */ }
|
|
467
|
+
}
|
|
468
|
+
return { path: null, tried };
|
|
469
|
+
}
|
|
470
|
+
let nativeHelpers;
|
|
471
|
+
if (process.platform === "darwin") {
|
|
472
|
+
const cgevent = resolveHelperPath(["native", "cgevent", "cgevent-helper"]);
|
|
473
|
+
const ocr = resolveHelperPath(["native", "ocr", "ocr-helper"]);
|
|
474
|
+
const windowlist = resolveHelperPath(["native", "windowlist", "windowlist-helper"]);
|
|
411
475
|
nativeHelpers = {
|
|
412
|
-
cgevent:
|
|
413
|
-
ocr:
|
|
476
|
+
cgevent: { ok: cgevent.path !== null, path: cgevent.path, tried: cgevent.tried.slice(0, 3) },
|
|
477
|
+
ocr: { ok: ocr.path !== null, path: ocr.path, tried: ocr.tried.slice(0, 3) },
|
|
478
|
+
windowlist: { ok: windowlist.path !== null, path: windowlist.path, tried: windowlist.tried.slice(0, 3) },
|
|
414
479
|
};
|
|
415
480
|
}
|
|
416
481
|
let readiness = "ready";
|
|
417
482
|
const issues = [];
|
|
418
483
|
if (!permissions.granted) {
|
|
419
484
|
readiness = "blocked";
|
|
420
|
-
|
|
485
|
+
for (const m of (permissions.missing ?? [])) {
|
|
486
|
+
issues.push(`Missing macOS permission: ${m}`);
|
|
487
|
+
}
|
|
421
488
|
}
|
|
422
489
|
if (screenLocked) {
|
|
423
490
|
readiness = "blocked";
|
|
424
491
|
issues.push("Screen is locked");
|
|
425
492
|
}
|
|
426
493
|
if (process.platform === "darwin" && nativeHelpers) {
|
|
427
|
-
if (!nativeHelpers.cgevent) {
|
|
494
|
+
if (!nativeHelpers.cgevent.ok) {
|
|
428
495
|
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
429
|
-
issues.push("Native CGEvent helper not found (input synthesis may crash on macOS Sequoia+)");
|
|
496
|
+
issues.push("Native CGEvent helper not found (input synthesis may crash on macOS Sequoia+). Run `npm run build` to compile it, or reinstall ucu-mcp so the helper ships from the tarball.");
|
|
430
497
|
}
|
|
431
|
-
if (!nativeHelpers.ocr) {
|
|
498
|
+
if (!nativeHelpers.ocr.ok) {
|
|
432
499
|
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
433
|
-
issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+)");
|
|
500
|
+
issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+). Run `npm run build` to compile it, or reinstall ucu-mcp so the helper ships from the tarball.");
|
|
501
|
+
}
|
|
502
|
+
if (!nativeHelpers.windowlist.ok) {
|
|
503
|
+
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
504
|
+
issues.push("Native windowlist helper not found (window enumeration will fall back to slow JXA). Run `npm run build` to compile it.");
|
|
434
505
|
}
|
|
435
506
|
}
|
|
507
|
+
// Heuristic AX hint: if Accessibility is granted but list_windows consistently
|
|
508
|
+
// returns empty for the only app the model cared about, the model has likely
|
|
509
|
+
// hit the Electron AX limitation (Electron windows do not expose AX to System
|
|
510
|
+
// Events unless Accessibility is also granted to the Electron process itself,
|
|
511
|
+
// and the app has accessibility features enabled). This block is read-only —
|
|
512
|
+
// we never hit JXA here because the doctor must stay fast and side-effect free.
|
|
513
|
+
const electronHint = "If the target app is Electron (e.g. CC Switch, VS Code, Discord), list_windows may return [] even with Accessibility granted to your terminal. Grant Accessibility to the Electron app itself in System Settings > Privacy & Security > Accessibility, and restart the app. Pixel-level workaround: use screenshot + ocr to locate UI elements by text, then click(x, y) at the detected bounding box coordinates. Alternatively, modify the app\'s config file or database directly.";
|
|
436
514
|
const clients = {};
|
|
437
515
|
for (const bin of ["claude", "codex", "opencode", "npx"]) {
|
|
438
516
|
try {
|
|
@@ -445,16 +523,27 @@ export function registerTools(server) {
|
|
|
445
523
|
}
|
|
446
524
|
const recommendations = [];
|
|
447
525
|
if (readiness === "blocked") {
|
|
448
|
-
|
|
526
|
+
for (const m of (permissions.missing ?? [])) {
|
|
527
|
+
const app = termApp ?? "your terminal app";
|
|
528
|
+
recommendations.push(`${m}: ${getPermissionInstructions(m)} (Grant to ${app}.)`);
|
|
529
|
+
}
|
|
530
|
+
if (screenLocked)
|
|
531
|
+
recommendations.push("Unlock the screen, then retry.");
|
|
449
532
|
}
|
|
450
|
-
|
|
451
|
-
if (nativeHelpers && (!nativeHelpers.cgevent || !nativeHelpers.ocr)) {
|
|
452
|
-
recommendations.push("Run
|
|
533
|
+
if (readiness !== "ready") {
|
|
534
|
+
if (process.platform === "darwin" && nativeHelpers && (!nativeHelpers.cgevent.ok || !nativeHelpers.ocr.ok)) {
|
|
535
|
+
recommendations.push("Run `npm run build` in the ucu-mcp project to compile native Swift helpers (cgevent-helper, ocr-helper, windowlist-helper).");
|
|
536
|
+
}
|
|
537
|
+
if (process.platform === "darwin" && nativeHelpers && !nativeHelpers.windowlist.ok) {
|
|
538
|
+
recommendations.push("windowlist helper missing — list_windows will fall back to JXA (~3-6s, unreliable for Electron). Run `npm run build`.");
|
|
453
539
|
}
|
|
454
540
|
}
|
|
455
|
-
|
|
541
|
+
if (readiness === "ready") {
|
|
456
542
|
recommendations.push("All checks passed. MCP client can proceed with automation.");
|
|
457
543
|
}
|
|
544
|
+
else if (process.platform === "darwin") {
|
|
545
|
+
recommendations.push(electronHint);
|
|
546
|
+
}
|
|
458
547
|
const report = {
|
|
459
548
|
readiness,
|
|
460
549
|
issues: issues.length > 0 ? issues : undefined,
|
|
@@ -463,6 +552,7 @@ export function registerTools(server) {
|
|
|
463
552
|
node: process.version,
|
|
464
553
|
permissions,
|
|
465
554
|
screenLocked,
|
|
555
|
+
terminalApp: termApp,
|
|
466
556
|
nativeHelpers,
|
|
467
557
|
clients,
|
|
468
558
|
safety: {
|
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResponse, AppInfo, AppTarget, BrowserContext, ScreenshotOptions } from "./base.js";
|
|
2
|
+
export interface MacOSPlatformOptions {
|
|
3
|
+
/**
|
|
4
|
+
* Override native helper resolution.
|
|
5
|
+
* - Map of folder name to absolute binary path to inject a specific helper.
|
|
6
|
+
* - Set a value to null to skip that helper (force JXA fallback).
|
|
7
|
+
* Used by tests to control native helper behavior without filesystem tricks.
|
|
8
|
+
*/
|
|
9
|
+
nativeHelperPaths?: Record<string, string | null>;
|
|
10
|
+
}
|
|
2
11
|
export declare class MacOSPlatform implements Platform {
|
|
12
|
+
private readonly _nativeHelperPaths;
|
|
3
13
|
private readonly elementCache;
|
|
4
14
|
private readonly elementCacheTtlMs;
|
|
5
15
|
private readonly elementCacheMaxSize;
|
|
@@ -7,6 +17,7 @@ export declare class MacOSPlatform implements Platform {
|
|
|
7
17
|
private windowCache;
|
|
8
18
|
private activeTarget;
|
|
9
19
|
private savedFocus;
|
|
20
|
+
constructor(options?: MacOSPlatformOptions);
|
|
10
21
|
/** Remove expired entries from the element cache. */
|
|
11
22
|
private evictExpiredCacheEntries;
|
|
12
23
|
/** Evict oldest entries when cache exceeds the maximum size (LRU-style). */
|
|
@@ -27,6 +38,9 @@ export declare class MacOSPlatform implements Platform {
|
|
|
27
38
|
focusApp(app: string): Promise<AppTarget>;
|
|
28
39
|
getActiveBrowserContext(app?: string): Promise<BrowserContext | undefined>;
|
|
29
40
|
listWindows(_includeMinimized?: boolean): Promise<WindowInfo[]>;
|
|
41
|
+
private listWindowsNative;
|
|
42
|
+
private resolveNativeHelper;
|
|
43
|
+
private listWindowsJxa;
|
|
30
44
|
getWindowState(windowId?: string, depth?: number, includeBounds?: boolean): Promise<WindowState>;
|
|
31
45
|
click(x: number, y: number, button?: "left" | "right" | "middle", doubleClick?: boolean): Promise<void>;
|
|
32
46
|
move(x: number, y: number): Promise<void>;
|
|
@@ -4,6 +4,10 @@ import { promisify } from "node:util";
|
|
|
4
4
|
import { captureFullScreen, captureRegion } from "../utils/screenshot.js";
|
|
5
5
|
import { click as inputClick, doubleClick as inputDoubleClick, move as inputMove, drag as inputDrag, scroll as inputScroll, typeText, pressShortcut } from "../utils/input.js";
|
|
6
6
|
import { CaptureError, ElementNotFoundError, InputSynthesisError, PermissionError, PlatformError, TargetStaleError, UcuError, WindowNotFoundError } from "../util/errors.js";
|
|
7
|
+
import { existsSync } from "node:fs";
|
|
8
|
+
import { join, dirname } from "node:path";
|
|
9
|
+
import { fileURLToPath } from "node:url";
|
|
10
|
+
const __macosDirname = dirname(fileURLToPath(import.meta.url));
|
|
7
11
|
const execFileAsync = promisify(execFile);
|
|
8
12
|
function errorMessage(error) {
|
|
9
13
|
return error instanceof Error ? error.message : String(error);
|
|
@@ -59,6 +63,7 @@ function selectWindowForApp(windows, requestedApp) {
|
|
|
59
63
|
windows.find((window) => appNameMatches(window.processName, requestedApp));
|
|
60
64
|
}
|
|
61
65
|
export class MacOSPlatform {
|
|
66
|
+
_nativeHelperPaths;
|
|
62
67
|
elementCache = new Map();
|
|
63
68
|
elementCacheTtlMs = 30_000;
|
|
64
69
|
elementCacheMaxSize = 100;
|
|
@@ -66,6 +71,9 @@ export class MacOSPlatform {
|
|
|
66
71
|
windowCache;
|
|
67
72
|
activeTarget;
|
|
68
73
|
savedFocus;
|
|
74
|
+
constructor(options) {
|
|
75
|
+
this._nativeHelperPaths = options?.nativeHelperPaths;
|
|
76
|
+
}
|
|
69
77
|
// ── Element Cache Management ────────────────────────────────────────────
|
|
70
78
|
/** Remove expired entries from the element cache. */
|
|
71
79
|
evictExpiredCacheEntries() {
|
|
@@ -248,7 +256,23 @@ export class MacOSPlatform {
|
|
|
248
256
|
await new Promise((resolve) => setTimeout(resolve, 150));
|
|
249
257
|
} while (Date.now() < deadline);
|
|
250
258
|
if (!target) {
|
|
251
|
-
|
|
259
|
+
// Wrap with a more diagnostic message: many real-world failures are
|
|
260
|
+
// Electron apps that do not expose their AX tree to System Events
|
|
261
|
+
// (CC Switch, VS Code, Discord, Slack). WindowNotFoundError carries the
|
|
262
|
+
// app name so the tool handler can surface a remediation hint. The
|
|
263
|
+
// bare WindowNotFoundError("CC Switch") was indistinguishable from
|
|
264
|
+
// "the app is not running", which led models to retry forever.
|
|
265
|
+
const err = new WindowNotFoundError(app);
|
|
266
|
+
err.hint =
|
|
267
|
+
"list_windows returned no match for this app. If the app is running, " +
|
|
268
|
+
"the most likely cause is that it is an Electron app whose AX tree is " +
|
|
269
|
+
"not exposed to System Events (System Settings > Privacy & Security > " +
|
|
270
|
+
"Accessibility must be granted to the Electron process itself, not just " +
|
|
271
|
+
"to the host terminal). Pixel-level workaround: call screenshot to " +
|
|
272
|
+
"capture the screen, then ocr to locate UI text and get its bounding " +
|
|
273
|
+
"box coordinates, then click(x, y) at those screen coordinates. " +
|
|
274
|
+
"Alternatively, modify the app's config file or database directly.";
|
|
275
|
+
throw err;
|
|
252
276
|
}
|
|
253
277
|
this.activeTarget = {
|
|
254
278
|
targetId: randomUUID(),
|
|
@@ -320,11 +344,81 @@ export class MacOSPlatform {
|
|
|
320
344
|
}));
|
|
321
345
|
}
|
|
322
346
|
try {
|
|
323
|
-
//
|
|
324
|
-
//
|
|
325
|
-
//
|
|
326
|
-
//
|
|
327
|
-
|
|
347
|
+
// Try native Swift helper first (CGWindowListCopyWindowInfo, ~1ms).
|
|
348
|
+
// Falls back to JXA System Events if the helper is not available.
|
|
349
|
+
// The native helper reliably enumerates ALL windows including Electron
|
|
350
|
+
// apps, whereas JXA relies on System Events AX which is inconsistent
|
|
351
|
+
// for Chromium-rendered windows.
|
|
352
|
+
let windows;
|
|
353
|
+
const nativeResult = this.listWindowsNative();
|
|
354
|
+
if (nativeResult !== null) {
|
|
355
|
+
windows = nativeResult;
|
|
356
|
+
}
|
|
357
|
+
else {
|
|
358
|
+
windows = await this.listWindowsJxa();
|
|
359
|
+
}
|
|
360
|
+
this.windowCache = {
|
|
361
|
+
cachedAt: Date.now(),
|
|
362
|
+
windows: windows.map((window) => ({
|
|
363
|
+
...window,
|
|
364
|
+
bounds: { ...window.bounds },
|
|
365
|
+
})),
|
|
366
|
+
};
|
|
367
|
+
return windows;
|
|
368
|
+
}
|
|
369
|
+
catch {
|
|
370
|
+
// Fallback: return empty list if both methods fail
|
|
371
|
+
return [];
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
listWindowsNative() {
|
|
375
|
+
try {
|
|
376
|
+
const helperPath = this.resolveNativeHelper("windowlist", "windowlist-helper");
|
|
377
|
+
if (!helperPath)
|
|
378
|
+
return null;
|
|
379
|
+
const out = execFileSync(helperPath, [], {
|
|
380
|
+
encoding: "utf-8",
|
|
381
|
+
timeout: 5000,
|
|
382
|
+
});
|
|
383
|
+
const parsed = JSON.parse(out.trim());
|
|
384
|
+
if (parsed.error)
|
|
385
|
+
return null;
|
|
386
|
+
return parsed.windows.map(w => ({
|
|
387
|
+
id: w.id,
|
|
388
|
+
title: w.title,
|
|
389
|
+
processName: w.processName,
|
|
390
|
+
pid: w.pid,
|
|
391
|
+
bounds: w.bounds,
|
|
392
|
+
isMinimized: !w.isOnScreen,
|
|
393
|
+
isOnScreen: w.isOnScreen,
|
|
394
|
+
}));
|
|
395
|
+
}
|
|
396
|
+
catch {
|
|
397
|
+
return null;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
resolveNativeHelper(folder, binary) {
|
|
401
|
+
// Test injection: if the caller provided explicit paths, use those
|
|
402
|
+
// instead of hitting the filesystem.
|
|
403
|
+
if (this._nativeHelperPaths && folder in this._nativeHelperPaths) {
|
|
404
|
+
const override = this._nativeHelperPaths[folder];
|
|
405
|
+
// null means "skip native, force JXA fallback"
|
|
406
|
+
return override === null ? null : override;
|
|
407
|
+
}
|
|
408
|
+
// dev: src/platform/macos.ts → native/<folder>/<binary>
|
|
409
|
+
// prod: dist/src/platform/macos.js → native/<folder>/<binary>
|
|
410
|
+
const candidates = [
|
|
411
|
+
join(__macosDirname, "..", "..", "native", folder, binary),
|
|
412
|
+
join(__macosDirname, "..", "..", "..", "native", folder, binary),
|
|
413
|
+
];
|
|
414
|
+
for (const p of candidates) {
|
|
415
|
+
if (existsSync(p))
|
|
416
|
+
return p;
|
|
417
|
+
}
|
|
418
|
+
return null;
|
|
419
|
+
}
|
|
420
|
+
async listWindowsJxa() {
|
|
421
|
+
const jxaScript = `
|
|
328
422
|
var se = Application('System Events');
|
|
329
423
|
var result = [];
|
|
330
424
|
var procs = se.processes();
|
|
@@ -358,24 +452,11 @@ export class MacOSPlatform {
|
|
|
358
452
|
}
|
|
359
453
|
JSON.stringify(result);
|
|
360
454
|
`;
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
this.windowCache = {
|
|
367
|
-
cachedAt: Date.now(),
|
|
368
|
-
windows: windows.map((window) => ({
|
|
369
|
-
...window,
|
|
370
|
-
bounds: { ...window.bounds },
|
|
371
|
-
})),
|
|
372
|
-
};
|
|
373
|
-
return windows;
|
|
374
|
-
}
|
|
375
|
-
catch {
|
|
376
|
-
// Fallback: return empty list if JXA fails
|
|
377
|
-
return [];
|
|
378
|
-
}
|
|
455
|
+
const jxaOut = execFileSync("osascript", [
|
|
456
|
+
"-l", "JavaScript",
|
|
457
|
+
"-e", jxaScript
|
|
458
|
+
], { encoding: "utf-8", timeout: 15000 });
|
|
459
|
+
return JSON.parse(jxaOut.trim());
|
|
379
460
|
}
|
|
380
461
|
async getWindowState(windowId, depth, includeBounds = true) {
|
|
381
462
|
if (!windowId || windowId === this.activeTarget?.windowId) {
|
|
@@ -779,8 +860,18 @@ export class MacOSPlatform {
|
|
|
779
860
|
`;
|
|
780
861
|
const out = execFileSync("osascript", ["-l", "JavaScript", "-e", jxaScript], { encoding: "utf-8", timeout: 30000 }).trim();
|
|
781
862
|
const parsed = JSON.parse(out);
|
|
782
|
-
if (parsed.error)
|
|
783
|
-
|
|
863
|
+
if (parsed.error) {
|
|
864
|
+
// Distinguish permission-class failures from real Vision errors.
|
|
865
|
+
// screencapture writes a 0-byte file when Screen Recording is not granted,
|
|
866
|
+
// and the JXA NSImage init then fails with "Failed to load screenshot image".
|
|
867
|
+
// Surface that as a PermissionError hint so the model can suggest the right fix.
|
|
868
|
+
const hint = parsed.error === "Failed to load screenshot image"
|
|
869
|
+
? " (the screenshot file is empty or unreadable — Screen Recording permission is most likely missing; run `doctor` and grant Screen Recording to the host terminal, then retry)"
|
|
870
|
+
: parsed.error === "Failed to get CGImage from screenshot"
|
|
871
|
+
? " (the screenshot could not be decoded — likely an empty capture; check Screen Recording permission)"
|
|
872
|
+
: "";
|
|
873
|
+
throw new CaptureError(`ocr failed: ${parsed.error}${hint}`);
|
|
874
|
+
}
|
|
784
875
|
const imgWidth = buf.readUInt32BE(16);
|
|
785
876
|
const scaleFactorX = screenSize.width / (region ? region.width : (imgWidth / scaleFactor));
|
|
786
877
|
const elements = parsed.elements.map((el) => ({
|
package/dist/src/safety/guard.js
CHANGED
|
@@ -100,6 +100,10 @@ export const OBSERVE_ACTIONS = new Set([
|
|
|
100
100
|
"wait_for_element",
|
|
101
101
|
"doctor",
|
|
102
102
|
"clipboard_read",
|
|
103
|
+
// focus_app only sets the active target context via AppleScript activate
|
|
104
|
+
// and an AX window lookup — it does not synthesize mouse or keyboard input,
|
|
105
|
+
// so the user-activity pause must not block it. (OpenCode 0.3.7 follow-up)
|
|
106
|
+
"focus_app",
|
|
103
107
|
]);
|
|
104
108
|
/** Actions that synthesize user input — need full user-activity protection. */
|
|
105
109
|
export const INPUT_ACTIONS = new Set([
|
|
@@ -8,6 +8,10 @@ export interface PermissionDetail {
|
|
|
8
8
|
granted: boolean;
|
|
9
9
|
instructions: string;
|
|
10
10
|
}
|
|
11
|
+
/**
|
|
12
|
+
* Get the name of the terminal app that the user needs to authorize.
|
|
13
|
+
*/
|
|
14
|
+
export declare function getTerminalAppName(): string;
|
|
11
15
|
export declare function checkPermissions(): Promise<PermissionCheckResult>;
|
|
12
16
|
export declare function checkPermission(type: "accessibility" | "screenRecording"): Promise<{
|
|
13
17
|
granted: boolean;
|
|
@@ -4,7 +4,7 @@ const execFileAsync = promisify(execFile);
|
|
|
4
4
|
/**
|
|
5
5
|
* Get the name of the terminal app that the user needs to authorize.
|
|
6
6
|
*/
|
|
7
|
-
function getTerminalAppName() {
|
|
7
|
+
export function getTerminalAppName() {
|
|
8
8
|
// Walk up the process tree to find the terminal emulator
|
|
9
9
|
const ppid = process.ppid;
|
|
10
10
|
// Common terminal app names
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import CoreGraphics
|
|
2
|
+
import Foundation
|
|
3
|
+
|
|
4
|
+
// Window enumeration using CGWindowListCopyWindowInfo (WindowServer-level).
|
|
5
|
+
// Replaces JXA System Events enumeration which is slow (3-6s) and unreliable
|
|
6
|
+
// for Electron apps. CGWindowListCopyWindowInfo sees ALL windows regardless
|
|
7
|
+
// of whether the app exposes an AX tree.
|
|
8
|
+
|
|
9
|
+
struct WinInfo: Encodable {
|
|
10
|
+
let id: String
|
|
11
|
+
let title: String
|
|
12
|
+
let processName: String
|
|
13
|
+
let pid: Int32
|
|
14
|
+
let bounds: Bounds
|
|
15
|
+
let isOnScreen: Bool
|
|
16
|
+
let windowNumber: Int
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
struct Bounds: Encodable {
|
|
20
|
+
let x: Double
|
|
21
|
+
let y: Double
|
|
22
|
+
let width: Double
|
|
23
|
+
let height: Double
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
struct Output: Encodable {
|
|
27
|
+
let windows: [WinInfo]
|
|
28
|
+
let error: String?
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
let options: CGWindowListOption = .optionOnScreenOnly
|
|
32
|
+
guard let windowList = CGWindowListCopyWindowInfo(options, kCGNullWindowID) as? [[String: Any]] else {
|
|
33
|
+
let out = Output(windows: [], error: "CGWindowListCopyWindowInfo returned nil")
|
|
34
|
+
FileHandle.standardOutput.write(try! JSONEncoder().encode(out))
|
|
35
|
+
exit(0)
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
var results: [WinInfo] = []
|
|
39
|
+
|
|
40
|
+
for info in windowList {
|
|
41
|
+
guard let pid = info[kCGWindowOwnerPID as String] as? Int32,
|
|
42
|
+
let windowNumber = info[kCGWindowNumber as String] as? Int,
|
|
43
|
+
let layer = info[kCGWindowLayer as String] as? Int
|
|
44
|
+
else { continue }
|
|
45
|
+
|
|
46
|
+
// Skip non-normal layers (overlay, screen saver, etc.)
|
|
47
|
+
if layer != 0 { continue }
|
|
48
|
+
|
|
49
|
+
let boundsDict = info[kCGWindowBounds as String] as? [String: Any]
|
|
50
|
+
let w = boundsDict?["Width"] as? Double ?? 0
|
|
51
|
+
let h = boundsDict?["Height"] as? Double ?? 0
|
|
52
|
+
if w == 0 || h == 0 { continue }
|
|
53
|
+
|
|
54
|
+
let processName = (info[kCGWindowOwnerName as String] as? String) ?? ""
|
|
55
|
+
if processName.isEmpty { continue }
|
|
56
|
+
|
|
57
|
+
let title = (info[kCGWindowName as String] as? String) ?? ""
|
|
58
|
+
let isOnScreen = (info[kCGWindowIsOnscreen as String] as? Bool) ?? true
|
|
59
|
+
let x = (boundsDict?["X"] as? Double) ?? 0
|
|
60
|
+
let y = (boundsDict?["Y"] as? Double) ?? 0
|
|
61
|
+
|
|
62
|
+
results.append(WinInfo(
|
|
63
|
+
id: "\(processName)/win\(windowNumber)",
|
|
64
|
+
title: title,
|
|
65
|
+
processName: processName,
|
|
66
|
+
pid: pid,
|
|
67
|
+
bounds: Bounds(x: x, y: y, width: w, height: h),
|
|
68
|
+
isOnScreen: isOnScreen,
|
|
69
|
+
windowNumber: windowNumber
|
|
70
|
+
))
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
let output = Output(windows: results, error: nil)
|
|
74
|
+
FileHandle.standardOutput.write(try! JSONEncoder().encode(output))
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ucu-mcp",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.9",
|
|
4
4
|
"description": "MCP server for Universal Computer Use — desktop automation for AI agents via Model Context Protocol",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
"test:macos-gui": "UCU_MACOS_GUI_SMOKE=1 vitest run tests/integration/macos-gui-smoke.test.ts",
|
|
28
28
|
"test:client-cli": "UCU_CLIENT_CLI_SMOKE=1 vitest run tests/integration/client-cli-smoke.test.ts",
|
|
29
29
|
"prepublishOnly": "npx vitest run tests/unit/ && npm run build",
|
|
30
|
-
"build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit"
|
|
30
|
+
"build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit && cd ../windowlist && swiftc -O -o windowlist-helper main.swift -framework CoreGraphics -framework Foundation"
|
|
31
31
|
},
|
|
32
32
|
"keywords": [
|
|
33
33
|
"mcp",
|