ucu-mcp 0.3.8 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/src/mcp/tools.js +21 -5
- package/dist/src/platform/macos.d.ts +15 -0
- package/dist/src/platform/macos.js +135 -45
- package/dist/src/safety/guard.js +1 -1
- package/native/windowlist/main.swift +74 -0
- package/native/windowlist/windowlist-helper +0 -0
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.4.0] - 2026-06-11
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
- **JXA return values fixed (P0)**: Three JXA scripts (`click_element`, `type_in_element`, `set_value`) called `JSON.stringify({success:…})` as a bare statement — the result was computed but discarded, so the osascript output was empty and `JSON.parse(out)` would fail or return undefined. Now each script assigns to `_result` and calls `JSON.stringify(_result)` once at the end.
|
|
13
|
+
- **Rate-limit timestamp ordering (P0)**: `lastActionTime` was updated before the user-activity pause check. If the pause blocked the action, the rate-limit window was consumed anyway, causing subsequent retries to also be rate-limited. Now `lastActionTime` is set only after both checks pass.
|
|
14
|
+
- **Window cache concurrency guard (P0)**: `listWindows` could be called concurrently (e.g. `validateActiveTarget` + `list_windows` tool). Two overlapping calls could write `windowCache` at the same time, producing torn reads. Added `windowCacheInFlight` flag — concurrent callers return stale data instead of racing.
|
|
15
|
+
- **`validateActiveTarget` checks pid (P1)**: Previously only checked windowId, missing the case where an app restarts and the OS reuses the same window ID. Now also checks pid match.
|
|
16
|
+
- **`focusApp` failure clears stale target (P1)**: When `focusApp` threw `WindowNotFoundError`, the old `activeTarget` was retained. Subsequent AX tools would try to use the dead target. Now `activeTarget` is cleared on failure.
|
|
17
|
+
- **`get_screen_size` goes through `withSafety` (P1)**: Was the only tool that bypassed the safety/permission/retry pipeline. Now wrapped in `withSafety` for consistent error handling and rate limiting.
|
|
18
|
+
|
|
19
|
+
### Tests
|
|
20
|
+
|
|
21
|
+
- 225 unit tests pass (13 test files).
|
|
22
|
+
- MCP stdio smoke: `doctor`, `list_windows`, `list_apps`, `get_screen_size` all return valid responses.
|
|
23
|
+
- All 3 JXA scripts now produce valid JSON output (verified via stdio pipe test).
|
|
24
|
+
|
|
25
|
+
|
|
8
26
|
## [0.3.8] - 2026-06-08
|
|
9
27
|
|
|
10
28
|
### Fixed
|
package/dist/src/mcp/tools.js
CHANGED
|
@@ -208,7 +208,13 @@ async function withSafety(sa) {
|
|
|
208
208
|
}
|
|
209
209
|
if (sa.dryRun)
|
|
210
210
|
return `[DRY-RUN] ${await sa.dryRun()}`;
|
|
211
|
-
|
|
211
|
+
// Focus management is disabled by default: CGEvent input injection works
|
|
212
|
+
// at the HID level without requiring the target app to be frontmost, and
|
|
213
|
+
// AX operations target processes by name/PID via System Events. The user
|
|
214
|
+
// should remain in their current app while the agent works in the background.
|
|
215
|
+
// Re-enable saveFocus/restoreFocus only if a specific AX operation truly
|
|
216
|
+
// requires the target app to be frontmost (rare).
|
|
217
|
+
const shouldManageFocus = false;
|
|
212
218
|
if (shouldManageFocus)
|
|
213
219
|
await platform.saveFocus?.();
|
|
214
220
|
const start = Date.now();
|
|
@@ -306,7 +312,7 @@ export function registerTools(server) {
|
|
|
306
312
|
const axNote = accessibility === "denied"
|
|
307
313
|
? "Accessibility is currently denied to this terminal — grant it via System Settings > Privacy & Security > Accessibility, then retry."
|
|
308
314
|
: accessibility === "granted"
|
|
309
|
-
? "Accessibility is granted. If you expected a specific app to appear here, it is likely an Electron app whose AX tree is not exposed to System Events
|
|
315
|
+
? "Accessibility is granted. If you expected a specific app to appear here, it is likely an Electron app whose AX tree is not exposed to System Events. Pixel-level workaround: call screenshot, then ocr to locate the target UI text and get its bounding box, then click(x, y) at those screen coordinates. Alternatively, modify the app's config file or database directly."
|
|
310
316
|
: "Accessibility status is unknown. Run `doctor` first to verify.";
|
|
311
317
|
diagnostics = { hint: `list_windows returned 0 windows. ${axNote}`, accessibility };
|
|
312
318
|
}
|
|
@@ -471,9 +477,11 @@ export function registerTools(server) {
|
|
|
471
477
|
if (process.platform === "darwin") {
|
|
472
478
|
const cgevent = resolveHelperPath(["native", "cgevent", "cgevent-helper"]);
|
|
473
479
|
const ocr = resolveHelperPath(["native", "ocr", "ocr-helper"]);
|
|
480
|
+
const windowlist = resolveHelperPath(["native", "windowlist", "windowlist-helper"]);
|
|
474
481
|
nativeHelpers = {
|
|
475
482
|
cgevent: { ok: cgevent.path !== null, path: cgevent.path, tried: cgevent.tried.slice(0, 3) },
|
|
476
483
|
ocr: { ok: ocr.path !== null, path: ocr.path, tried: ocr.tried.slice(0, 3) },
|
|
484
|
+
windowlist: { ok: windowlist.path !== null, path: windowlist.path, tried: windowlist.tried.slice(0, 3) },
|
|
477
485
|
};
|
|
478
486
|
}
|
|
479
487
|
let readiness = "ready";
|
|
@@ -497,6 +505,10 @@ export function registerTools(server) {
|
|
|
497
505
|
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
498
506
|
issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+). Run `npm run build` to compile it, or reinstall ucu-mcp so the helper ships from the tarball.");
|
|
499
507
|
}
|
|
508
|
+
if (!nativeHelpers.windowlist.ok) {
|
|
509
|
+
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
510
|
+
issues.push("Native windowlist helper not found (window enumeration will fall back to slow JXA). Run `npm run build` to compile it.");
|
|
511
|
+
}
|
|
500
512
|
}
|
|
501
513
|
// Heuristic AX hint: if Accessibility is granted but list_windows consistently
|
|
502
514
|
// returns empty for the only app the model cared about, the model has likely
|
|
@@ -504,7 +516,7 @@ export function registerTools(server) {
|
|
|
504
516
|
// Events unless Accessibility is also granted to the Electron process itself,
|
|
505
517
|
// and the app has accessibility features enabled). This block is read-only —
|
|
506
518
|
// we never hit JXA here because the doctor must stay fast and side-effect free.
|
|
507
|
-
const electronHint = "If the target app is Electron (e.g. CC Switch, VS Code, Discord), list_windows may return [] even with Accessibility granted to your terminal. Grant Accessibility to the Electron app itself in System Settings > Privacy & Security > Accessibility, and restart the app.
|
|
519
|
+
const electronHint = "If the target app is Electron (e.g. CC Switch, VS Code, Discord), list_windows may return [] even with Accessibility granted to your terminal. Grant Accessibility to the Electron app itself in System Settings > Privacy & Security > Accessibility, and restart the app. Pixel-level workaround: use screenshot + ocr to locate UI elements by text, then click(x, y) at the detected bounding box coordinates. Alternatively, modify the app\'s config file or database directly.";
|
|
508
520
|
const clients = {};
|
|
509
521
|
for (const bin of ["claude", "codex", "opencode", "npx"]) {
|
|
510
522
|
try {
|
|
@@ -526,7 +538,10 @@ export function registerTools(server) {
|
|
|
526
538
|
}
|
|
527
539
|
if (readiness !== "ready") {
|
|
528
540
|
if (process.platform === "darwin" && nativeHelpers && (!nativeHelpers.cgevent.ok || !nativeHelpers.ocr.ok)) {
|
|
529
|
-
recommendations.push("Run `npm run build` in the ucu-mcp project to compile native Swift helpers (cgevent-helper, ocr-helper).");
|
|
541
|
+
recommendations.push("Run `npm run build` in the ucu-mcp project to compile native Swift helpers (cgevent-helper, ocr-helper, windowlist-helper).");
|
|
542
|
+
}
|
|
543
|
+
if (process.platform === "darwin" && nativeHelpers && !nativeHelpers.windowlist.ok) {
|
|
544
|
+
recommendations.push("windowlist helper missing — list_windows will fall back to JXA (~3-6s, unreliable for Electron). Run `npm run build`.");
|
|
530
545
|
}
|
|
531
546
|
}
|
|
532
547
|
if (readiness === "ready") {
|
|
@@ -625,7 +640,8 @@ export function registerTools(server) {
|
|
|
625
640
|
registerTool("get_screen_size", "Get screen dimensions and scale factor", {
|
|
626
641
|
display: z.number().optional().describe("Display index"),
|
|
627
642
|
}, async (params) => {
|
|
628
|
-
|
|
643
|
+
const result = await withSafety({ action: "get_screen_size", params: {}, execute: () => Promise.resolve(getPlatform().getScreenSize(params.display)) });
|
|
644
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
629
645
|
});
|
|
630
646
|
registry.register("get_screen_size");
|
|
631
647
|
registerTool("ocr", "Perform OCR on screen region", {
|
|
@@ -1,12 +1,24 @@
|
|
|
1
1
|
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResponse, AppInfo, AppTarget, BrowserContext, ScreenshotOptions } from "./base.js";
|
|
2
|
+
export interface MacOSPlatformOptions {
|
|
3
|
+
/**
|
|
4
|
+
* Override native helper resolution.
|
|
5
|
+
* - Map of folder name to absolute binary path to inject a specific helper.
|
|
6
|
+
* - Set a value to null to skip that helper (force JXA fallback).
|
|
7
|
+
* Used by tests to control native helper behavior without filesystem tricks.
|
|
8
|
+
*/
|
|
9
|
+
nativeHelperPaths?: Record<string, string | null>;
|
|
10
|
+
}
|
|
2
11
|
export declare class MacOSPlatform implements Platform {
|
|
12
|
+
private readonly _nativeHelperPaths;
|
|
3
13
|
private readonly elementCache;
|
|
4
14
|
private readonly elementCacheTtlMs;
|
|
5
15
|
private readonly elementCacheMaxSize;
|
|
6
16
|
private readonly windowCacheTtlMs;
|
|
7
17
|
private windowCache;
|
|
18
|
+
private windowCacheInFlight;
|
|
8
19
|
private activeTarget;
|
|
9
20
|
private savedFocus;
|
|
21
|
+
constructor(options?: MacOSPlatformOptions);
|
|
10
22
|
/** Remove expired entries from the element cache. */
|
|
11
23
|
private evictExpiredCacheEntries;
|
|
12
24
|
/** Evict oldest entries when cache exceeds the maximum size (LRU-style). */
|
|
@@ -27,6 +39,9 @@ export declare class MacOSPlatform implements Platform {
|
|
|
27
39
|
focusApp(app: string): Promise<AppTarget>;
|
|
28
40
|
getActiveBrowserContext(app?: string): Promise<BrowserContext | undefined>;
|
|
29
41
|
listWindows(_includeMinimized?: boolean): Promise<WindowInfo[]>;
|
|
42
|
+
private listWindowsNative;
|
|
43
|
+
private resolveNativeHelper;
|
|
44
|
+
private listWindowsJxa;
|
|
30
45
|
getWindowState(windowId?: string, depth?: number, includeBounds?: boolean): Promise<WindowState>;
|
|
31
46
|
click(x: number, y: number, button?: "left" | "right" | "middle", doubleClick?: boolean): Promise<void>;
|
|
32
47
|
move(x: number, y: number): Promise<void>;
|
|
@@ -4,6 +4,10 @@ import { promisify } from "node:util";
|
|
|
4
4
|
import { captureFullScreen, captureRegion } from "../utils/screenshot.js";
|
|
5
5
|
import { click as inputClick, doubleClick as inputDoubleClick, move as inputMove, drag as inputDrag, scroll as inputScroll, typeText, pressShortcut } from "../utils/input.js";
|
|
6
6
|
import { CaptureError, ElementNotFoundError, InputSynthesisError, PermissionError, PlatformError, TargetStaleError, UcuError, WindowNotFoundError } from "../util/errors.js";
|
|
7
|
+
import { existsSync } from "node:fs";
|
|
8
|
+
import { join, dirname } from "node:path";
|
|
9
|
+
import { fileURLToPath } from "node:url";
|
|
10
|
+
const __macosDirname = dirname(fileURLToPath(import.meta.url));
|
|
7
11
|
const execFileAsync = promisify(execFile);
|
|
8
12
|
function errorMessage(error) {
|
|
9
13
|
return error instanceof Error ? error.message : String(error);
|
|
@@ -59,13 +63,18 @@ function selectWindowForApp(windows, requestedApp) {
|
|
|
59
63
|
windows.find((window) => appNameMatches(window.processName, requestedApp));
|
|
60
64
|
}
|
|
61
65
|
export class MacOSPlatform {
|
|
66
|
+
_nativeHelperPaths;
|
|
62
67
|
elementCache = new Map();
|
|
63
68
|
elementCacheTtlMs = 30_000;
|
|
64
69
|
elementCacheMaxSize = 100;
|
|
65
70
|
windowCacheTtlMs = 300;
|
|
66
71
|
windowCache;
|
|
72
|
+
windowCacheInFlight = false;
|
|
67
73
|
activeTarget;
|
|
68
74
|
savedFocus;
|
|
75
|
+
constructor(options) {
|
|
76
|
+
this._nativeHelperPaths = options?.nativeHelperPaths;
|
|
77
|
+
}
|
|
69
78
|
// ── Element Cache Management ────────────────────────────────────────────
|
|
70
79
|
/** Remove expired entries from the element cache. */
|
|
71
80
|
evictExpiredCacheEntries() {
|
|
@@ -106,8 +115,12 @@ export class MacOSPlatform {
|
|
|
106
115
|
return;
|
|
107
116
|
this.windowCache = undefined; // Bypass cache — stale detection must use fresh data
|
|
108
117
|
const windows = await this.listWindows(true);
|
|
109
|
-
const
|
|
110
|
-
if (!
|
|
118
|
+
const match = windows.find(w => w.id === this.activeTarget.windowId);
|
|
119
|
+
if (!match) {
|
|
120
|
+
throw new TargetStaleError(this.activeTarget.windowId);
|
|
121
|
+
}
|
|
122
|
+
// Also invalidate if pid changed (app restarted)
|
|
123
|
+
if (match.pid !== this.activeTarget.pid) {
|
|
111
124
|
throw new TargetStaleError(this.activeTarget.windowId);
|
|
112
125
|
}
|
|
113
126
|
}
|
|
@@ -231,13 +244,14 @@ export class MacOSPlatform {
|
|
|
231
244
|
async focusApp(app) {
|
|
232
245
|
const escapedApp = app.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
|
|
233
246
|
this.windowCache = undefined;
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
247
|
+
// NOTE: We intentionally do NOT call AppleScript "activate" here.
|
|
248
|
+
// focus_app sets the internal target context so subsequent operations
|
|
249
|
+
// know which app/window to target. It does NOT bring the app to the
|
|
250
|
+
// foreground — the user should remain in their current app (terminal,
|
|
251
|
+
// Codex, etc.) while the agent works in the background.
|
|
252
|
+
// CGEvent input injection works at the HID level and doesn't require
|
|
253
|
+
// the target app to be frontmost. AX operations target processes by
|
|
254
|
+
// name/PID via System Events, also without needing frontmost status.
|
|
241
255
|
let target;
|
|
242
256
|
const deadline = Date.now() + 3000;
|
|
243
257
|
do {
|
|
@@ -254,14 +268,17 @@ export class MacOSPlatform {
|
|
|
254
268
|
// app name so the tool handler can surface a remediation hint. The
|
|
255
269
|
// bare WindowNotFoundError("CC Switch") was indistinguishable from
|
|
256
270
|
// "the app is not running", which led models to retry forever.
|
|
271
|
+
this.activeTarget = undefined; // Clear stale target on focus failure
|
|
257
272
|
const err = new WindowNotFoundError(app);
|
|
258
273
|
err.hint =
|
|
259
274
|
"list_windows returned no match for this app. If the app is running, " +
|
|
260
275
|
"the most likely cause is that it is an Electron app whose AX tree is " +
|
|
261
276
|
"not exposed to System Events (System Settings > Privacy & Security > " +
|
|
262
277
|
"Accessibility must be granted to the Electron process itself, not just " +
|
|
263
|
-
"to the host terminal).
|
|
264
|
-
"
|
|
278
|
+
"to the host terminal). Pixel-level workaround: call screenshot to " +
|
|
279
|
+
"capture the screen, then ocr to locate UI text and get its bounding " +
|
|
280
|
+
"box coordinates, then click(x, y) at those screen coordinates. " +
|
|
281
|
+
"Alternatively, modify the app's config file or database directly.";
|
|
265
282
|
throw err;
|
|
266
283
|
}
|
|
267
284
|
this.activeTarget = {
|
|
@@ -333,12 +350,91 @@ export class MacOSPlatform {
|
|
|
333
350
|
bounds: { ...window.bounds },
|
|
334
351
|
}));
|
|
335
352
|
}
|
|
353
|
+
// P0 #3: Prevent concurrent cache refreshes
|
|
354
|
+
if (this.windowCacheInFlight) {
|
|
355
|
+
// Another call is already refreshing; return stale or empty
|
|
356
|
+
return this.windowCache?.windows.map(w => ({ ...w, bounds: { ...w.bounds } })) ?? [];
|
|
357
|
+
}
|
|
358
|
+
this.windowCacheInFlight = true;
|
|
336
359
|
try {
|
|
337
|
-
//
|
|
338
|
-
//
|
|
339
|
-
//
|
|
340
|
-
//
|
|
341
|
-
|
|
360
|
+
// Try native Swift helper first (CGWindowListCopyWindowInfo, ~1ms).
|
|
361
|
+
// Falls back to JXA System Events if the helper is not available.
|
|
362
|
+
// The native helper reliably enumerates ALL windows including Electron
|
|
363
|
+
// apps, whereas JXA relies on System Events AX which is inconsistent
|
|
364
|
+
// for Chromium-rendered windows.
|
|
365
|
+
let windows;
|
|
366
|
+
const nativeResult = this.listWindowsNative();
|
|
367
|
+
if (nativeResult !== null) {
|
|
368
|
+
windows = nativeResult;
|
|
369
|
+
}
|
|
370
|
+
else {
|
|
371
|
+
windows = await this.listWindowsJxa();
|
|
372
|
+
}
|
|
373
|
+
this.windowCache = {
|
|
374
|
+
cachedAt: Date.now(),
|
|
375
|
+
windows: windows.map((window) => ({
|
|
376
|
+
...window,
|
|
377
|
+
bounds: { ...window.bounds },
|
|
378
|
+
})),
|
|
379
|
+
};
|
|
380
|
+
return windows;
|
|
381
|
+
}
|
|
382
|
+
catch {
|
|
383
|
+
// Fallback: return empty list if both methods fail
|
|
384
|
+
return [];
|
|
385
|
+
}
|
|
386
|
+
finally {
|
|
387
|
+
this.windowCacheInFlight = false;
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
listWindowsNative() {
|
|
391
|
+
try {
|
|
392
|
+
const helperPath = this.resolveNativeHelper("windowlist", "windowlist-helper");
|
|
393
|
+
if (!helperPath)
|
|
394
|
+
return null;
|
|
395
|
+
const out = execFileSync(helperPath, [], {
|
|
396
|
+
encoding: "utf-8",
|
|
397
|
+
timeout: 5000,
|
|
398
|
+
});
|
|
399
|
+
const parsed = JSON.parse(out.trim());
|
|
400
|
+
if (parsed.error)
|
|
401
|
+
return null;
|
|
402
|
+
return parsed.windows.map(w => ({
|
|
403
|
+
id: w.id,
|
|
404
|
+
title: w.title,
|
|
405
|
+
processName: w.processName,
|
|
406
|
+
pid: w.pid,
|
|
407
|
+
bounds: w.bounds,
|
|
408
|
+
isMinimized: !w.isOnScreen,
|
|
409
|
+
isOnScreen: w.isOnScreen,
|
|
410
|
+
}));
|
|
411
|
+
}
|
|
412
|
+
catch {
|
|
413
|
+
return null;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
resolveNativeHelper(folder, binary) {
|
|
417
|
+
// Test injection: if the caller provided explicit paths, use those
|
|
418
|
+
// instead of hitting the filesystem.
|
|
419
|
+
if (this._nativeHelperPaths && folder in this._nativeHelperPaths) {
|
|
420
|
+
const override = this._nativeHelperPaths[folder];
|
|
421
|
+
// null means "skip native, force JXA fallback"
|
|
422
|
+
return override === null ? null : override;
|
|
423
|
+
}
|
|
424
|
+
// dev: src/platform/macos.ts → native/<folder>/<binary>
|
|
425
|
+
// prod: dist/src/platform/macos.js → native/<folder>/<binary>
|
|
426
|
+
const candidates = [
|
|
427
|
+
join(__macosDirname, "..", "..", "native", folder, binary),
|
|
428
|
+
join(__macosDirname, "..", "..", "..", "native", folder, binary),
|
|
429
|
+
];
|
|
430
|
+
for (const p of candidates) {
|
|
431
|
+
if (existsSync(p))
|
|
432
|
+
return p;
|
|
433
|
+
}
|
|
434
|
+
return null;
|
|
435
|
+
}
|
|
436
|
+
async listWindowsJxa() {
|
|
437
|
+
const jxaScript = `
|
|
342
438
|
var se = Application('System Events');
|
|
343
439
|
var result = [];
|
|
344
440
|
var procs = se.processes();
|
|
@@ -372,24 +468,11 @@ export class MacOSPlatform {
|
|
|
372
468
|
}
|
|
373
469
|
JSON.stringify(result);
|
|
374
470
|
`;
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
this.windowCache = {
|
|
381
|
-
cachedAt: Date.now(),
|
|
382
|
-
windows: windows.map((window) => ({
|
|
383
|
-
...window,
|
|
384
|
-
bounds: { ...window.bounds },
|
|
385
|
-
})),
|
|
386
|
-
};
|
|
387
|
-
return windows;
|
|
388
|
-
}
|
|
389
|
-
catch {
|
|
390
|
-
// Fallback: return empty list if JXA fails
|
|
391
|
-
return [];
|
|
392
|
-
}
|
|
471
|
+
const jxaOut = execFileSync("osascript", [
|
|
472
|
+
"-l", "JavaScript",
|
|
473
|
+
"-e", jxaScript
|
|
474
|
+
], { encoding: "utf-8", timeout: 15000 });
|
|
475
|
+
return JSON.parse(jxaOut.trim());
|
|
393
476
|
}
|
|
394
477
|
async getWindowState(windowId, depth, includeBounds = true) {
|
|
395
478
|
if (!windowId || windowId === this.activeTarget?.windowId) {
|
|
@@ -1110,6 +1193,7 @@ export class MacOSPlatform {
|
|
|
1110
1193
|
const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
|
|
1111
1194
|
const jxaScript = `
|
|
1112
1195
|
var se = Application('System Events');
|
|
1196
|
+
var _result = null;
|
|
1113
1197
|
function childElements(elem) {
|
|
1114
1198
|
try { return elem.uiElements(); } catch(e1) {
|
|
1115
1199
|
try { return elem.elements(); } catch(e2) { return []; }
|
|
@@ -1287,11 +1371,11 @@ export class MacOSPlatform {
|
|
|
1287
1371
|
}
|
|
1288
1372
|
|
|
1289
1373
|
if (!elem) {
|
|
1290
|
-
|
|
1374
|
+
_result = {success: false, error: "Element not found: " + elemPath};
|
|
1291
1375
|
} else {
|
|
1292
1376
|
try {
|
|
1293
1377
|
elem.actions.AXPress.perform();
|
|
1294
|
-
|
|
1378
|
+
_result = {success: true};
|
|
1295
1379
|
} catch(e) {
|
|
1296
1380
|
try {
|
|
1297
1381
|
var pos = elem.position();
|
|
@@ -1305,12 +1389,13 @@ export class MacOSPlatform {
|
|
|
1305
1389
|
$.CGEventPost($.kCGHIDEventTap, down);
|
|
1306
1390
|
var up = $.CGEventCreateMouseEvent(src, $.kCGEventLeftMouseUp, pt, $.kCGMouseButtonLeft);
|
|
1307
1391
|
$.CGEventPost($.kCGHIDEventTap, up);
|
|
1308
|
-
|
|
1392
|
+
_result = {success: true};
|
|
1309
1393
|
} catch(e2) {
|
|
1310
|
-
|
|
1394
|
+
_result = {success: false, error: "Could not click element: " + String(e2.message || e2)};
|
|
1311
1395
|
}
|
|
1312
1396
|
}
|
|
1313
1397
|
}
|
|
1398
|
+
JSON.stringify(_result);
|
|
1314
1399
|
`;
|
|
1315
1400
|
try {
|
|
1316
1401
|
const out = execFileSync("osascript", [
|
|
@@ -1341,6 +1426,7 @@ export class MacOSPlatform {
|
|
|
1341
1426
|
const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
|
|
1342
1427
|
const jxaScript = `
|
|
1343
1428
|
var se = Application('System Events');
|
|
1429
|
+
var _result = null;
|
|
1344
1430
|
function childElements(elem) {
|
|
1345
1431
|
try { return elem.uiElements(); } catch(e1) {
|
|
1346
1432
|
try { return elem.elements(); } catch(e2) { return []; }
|
|
@@ -1520,7 +1606,7 @@ export class MacOSPlatform {
|
|
|
1520
1606
|
}
|
|
1521
1607
|
|
|
1522
1608
|
if (!elem) {
|
|
1523
|
-
|
|
1609
|
+
_result = {success: false, error: "Element not found: " + elemPath};
|
|
1524
1610
|
} else {
|
|
1525
1611
|
try {
|
|
1526
1612
|
elem.focused = true;
|
|
@@ -1547,13 +1633,15 @@ export class MacOSPlatform {
|
|
|
1547
1633
|
if (!didSet) {
|
|
1548
1634
|
try {
|
|
1549
1635
|
se.keystroke(textToType);
|
|
1636
|
+
_result = {success: true};
|
|
1550
1637
|
} catch(e) {
|
|
1551
|
-
|
|
1638
|
+
_result = {success: false, error: "Could not type into element: " + String(e.message || e)};
|
|
1552
1639
|
}
|
|
1640
|
+
} else {
|
|
1641
|
+
_result = {success: true};
|
|
1553
1642
|
}
|
|
1554
|
-
|
|
1555
|
-
JSON.stringify({success: true});
|
|
1556
1643
|
}
|
|
1644
|
+
JSON.stringify(_result);
|
|
1557
1645
|
`;
|
|
1558
1646
|
try {
|
|
1559
1647
|
const out = execFileSync("osascript", [
|
|
@@ -1602,6 +1690,7 @@ export class MacOSPlatform {
|
|
|
1602
1690
|
const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
|
|
1603
1691
|
const jxaScript = `
|
|
1604
1692
|
var se = Application('System Events');
|
|
1693
|
+
var _result = null;
|
|
1605
1694
|
function childElements(elem) {
|
|
1606
1695
|
try { return elem.uiElements(); } catch(e1) {
|
|
1607
1696
|
try { return elem.elements(); } catch(e2) { return []; }
|
|
@@ -1775,15 +1864,16 @@ export class MacOSPlatform {
|
|
|
1775
1864
|
}
|
|
1776
1865
|
|
|
1777
1866
|
if (!elem) {
|
|
1778
|
-
|
|
1867
|
+
_result = {success: false, error: "Element not found: " + elemPath};
|
|
1779
1868
|
} else {
|
|
1780
1869
|
try {
|
|
1781
1870
|
elem.value = valueToSet;
|
|
1782
|
-
|
|
1871
|
+
_result = {success: true};
|
|
1783
1872
|
} catch(e) {
|
|
1784
|
-
|
|
1873
|
+
_result = {success: false, error: "Could not set AX value: " + String(e.message || e)};
|
|
1785
1874
|
}
|
|
1786
1875
|
}
|
|
1876
|
+
JSON.stringify(_result);
|
|
1787
1877
|
`;
|
|
1788
1878
|
try {
|
|
1789
1879
|
const out = execFileSync("osascript", [
|
package/dist/src/safety/guard.js
CHANGED
|
@@ -233,7 +233,6 @@ export class SafetyGuard {
|
|
|
233
233
|
reason: `Rate-limited: ${elapsed}ms since last action (min ${this.rateLimitMs}ms)`,
|
|
234
234
|
};
|
|
235
235
|
}
|
|
236
|
-
this.lastActionTime = now;
|
|
237
236
|
// 6. User activity pause (skipped for observe-class actions) -----------------
|
|
238
237
|
if (!options.skipUserActivityPause && this.isUserActivityPauseActive()) {
|
|
239
238
|
return {
|
|
@@ -241,6 +240,7 @@ export class SafetyGuard {
|
|
|
241
240
|
reason: `User activity detected — pausing automation for ${this.userActivityPauseMs}ms`,
|
|
242
241
|
};
|
|
243
242
|
}
|
|
243
|
+
this.lastActionTime = now;
|
|
244
244
|
return { allowed: true };
|
|
245
245
|
}
|
|
246
246
|
// -----------------------------------------------------------------------
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import CoreGraphics
|
|
2
|
+
import Foundation
|
|
3
|
+
|
|
4
|
+
// Window enumeration using CGWindowListCopyWindowInfo (WindowServer-level).
|
|
5
|
+
// Replaces JXA System Events enumeration which is slow (3-6s) and unreliable
|
|
6
|
+
// for Electron apps. CGWindowListCopyWindowInfo sees ALL windows regardless
|
|
7
|
+
// of whether the app exposes an AX tree.
|
|
8
|
+
|
|
9
|
+
struct WinInfo: Encodable {
|
|
10
|
+
let id: String
|
|
11
|
+
let title: String
|
|
12
|
+
let processName: String
|
|
13
|
+
let pid: Int32
|
|
14
|
+
let bounds: Bounds
|
|
15
|
+
let isOnScreen: Bool
|
|
16
|
+
let windowNumber: Int
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
struct Bounds: Encodable {
|
|
20
|
+
let x: Double
|
|
21
|
+
let y: Double
|
|
22
|
+
let width: Double
|
|
23
|
+
let height: Double
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
struct Output: Encodable {
|
|
27
|
+
let windows: [WinInfo]
|
|
28
|
+
let error: String?
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
let options: CGWindowListOption = .optionOnScreenOnly
|
|
32
|
+
guard let windowList = CGWindowListCopyWindowInfo(options, kCGNullWindowID) as? [[String: Any]] else {
|
|
33
|
+
let out = Output(windows: [], error: "CGWindowListCopyWindowInfo returned nil")
|
|
34
|
+
FileHandle.standardOutput.write(try! JSONEncoder().encode(out))
|
|
35
|
+
exit(0)
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
var results: [WinInfo] = []
|
|
39
|
+
|
|
40
|
+
for info in windowList {
|
|
41
|
+
guard let pid = info[kCGWindowOwnerPID as String] as? Int32,
|
|
42
|
+
let windowNumber = info[kCGWindowNumber as String] as? Int,
|
|
43
|
+
let layer = info[kCGWindowLayer as String] as? Int
|
|
44
|
+
else { continue }
|
|
45
|
+
|
|
46
|
+
// Skip non-normal layers (overlay, screen saver, etc.)
|
|
47
|
+
if layer != 0 { continue }
|
|
48
|
+
|
|
49
|
+
let boundsDict = info[kCGWindowBounds as String] as? [String: Any]
|
|
50
|
+
let w = boundsDict?["Width"] as? Double ?? 0
|
|
51
|
+
let h = boundsDict?["Height"] as? Double ?? 0
|
|
52
|
+
if w == 0 || h == 0 { continue }
|
|
53
|
+
|
|
54
|
+
let processName = (info[kCGWindowOwnerName as String] as? String) ?? ""
|
|
55
|
+
if processName.isEmpty { continue }
|
|
56
|
+
|
|
57
|
+
let title = (info[kCGWindowName as String] as? String) ?? ""
|
|
58
|
+
let isOnScreen = (info[kCGWindowIsOnscreen as String] as? Bool) ?? true
|
|
59
|
+
let x = (boundsDict?["X"] as? Double) ?? 0
|
|
60
|
+
let y = (boundsDict?["Y"] as? Double) ?? 0
|
|
61
|
+
|
|
62
|
+
results.append(WinInfo(
|
|
63
|
+
id: "\(processName)/win\(windowNumber)",
|
|
64
|
+
title: title,
|
|
65
|
+
processName: processName,
|
|
66
|
+
pid: pid,
|
|
67
|
+
bounds: Bounds(x: x, y: y, width: w, height: h),
|
|
68
|
+
isOnScreen: isOnScreen,
|
|
69
|
+
windowNumber: windowNumber
|
|
70
|
+
))
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
let output = Output(windows: results, error: nil)
|
|
74
|
+
FileHandle.standardOutput.write(try! JSONEncoder().encode(output))
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ucu-mcp",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "MCP server for Universal Computer Use — desktop automation for AI agents via Model Context Protocol",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
"test:macos-gui": "UCU_MACOS_GUI_SMOKE=1 vitest run tests/integration/macos-gui-smoke.test.ts",
|
|
28
28
|
"test:client-cli": "UCU_CLIENT_CLI_SMOKE=1 vitest run tests/integration/client-cli-smoke.test.ts",
|
|
29
29
|
"prepublishOnly": "npx vitest run tests/unit/ && npm run build",
|
|
30
|
-
"build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit"
|
|
30
|
+
"build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit && cd ../windowlist && swiftc -O -o windowlist-helper main.swift -framework CoreGraphics -framework Foundation"
|
|
31
31
|
},
|
|
32
32
|
"keywords": [
|
|
33
33
|
"mcp",
|