ucu-mcp 0.3.8 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.4.0] - 2026-06-11
9
+
10
+ ### Fixed
11
+
12
+ - **JXA return values fixed (P0)**: Three JXA scripts (`click_element`, `type_in_element`, `set_value`) called `JSON.stringify({success:…})` as a bare statement — the result was computed but discarded, so the osascript output was empty and `JSON.parse(out)` would fail or return undefined. Now each script assigns to `_result` and calls `JSON.stringify(_result)` once at the end.
13
+ - **Rate-limit timestamp ordering (P0)**: `lastActionTime` was updated before the user-activity pause check. If the pause blocked the action, the rate-limit window was consumed anyway, causing subsequent retries to also be rate-limited. Now `lastActionTime` is set only after both checks pass.
14
+ - **Window cache concurrency guard (P0)**: `listWindows` could be called concurrently (e.g. `validateActiveTarget` + `list_windows` tool). Two overlapping calls could write `windowCache` at the same time, producing torn reads. Added `windowCacheInFlight` flag — concurrent callers return stale data instead of racing.
15
+ - **`validateActiveTarget` checks pid (P1)**: Previously only checked windowId, missing the case where an app restarts and the OS reuses the same window ID. Now also checks pid match.
16
+ - **`focusApp` failure clears stale target (P1)**: When `focusApp` threw `WindowNotFoundError`, the old `activeTarget` was retained. Subsequent AX tools would try to use the dead target. Now `activeTarget` is cleared on failure.
17
+ - **`get_screen_size` goes through `withSafety` (P1)**: Was the only tool that bypassed the safety/permission/retry pipeline. Now wrapped in `withSafety` for consistent error handling and rate limiting.
18
+
19
+ ### Tests
20
+
21
+ - 225 unit tests pass (13 test files).
22
+ - MCP stdio smoke: `doctor`, `list_windows`, `list_apps`, `get_screen_size` all return valid responses.
23
+ - All 3 JXA scripts now produce valid JSON output (verified via stdio pipe test).
24
+
25
+
8
26
  ## [0.3.8] - 2026-06-08
9
27
 
10
28
  ### Fixed
@@ -208,7 +208,13 @@ async function withSafety(sa) {
208
208
  }
209
209
  if (sa.dryRun)
210
210
  return `[DRY-RUN] ${await sa.dryRun()}`;
211
- const shouldManageFocus = sa.requiresAccessibility && !["screenshot", "list_windows", "list_apps", "get_window_state", "get_cursor_position", "get_screen_size", "ocr", "doctor", "wait", "wait_for_element", "find_element", "focus_app"].includes(sa.action);
211
+ // Focus management is disabled by default: CGEvent input injection works
212
+ // at the HID level without requiring the target app to be frontmost, and
213
+ // AX operations target processes by name/PID via System Events. The user
214
+ // should remain in their current app while the agent works in the background.
215
+ // Re-enable saveFocus/restoreFocus only if a specific AX operation truly
216
+ // requires the target app to be frontmost (rare).
217
+ const shouldManageFocus = false;
212
218
  if (shouldManageFocus)
213
219
  await platform.saveFocus?.();
214
220
  const start = Date.now();
@@ -306,7 +312,7 @@ export function registerTools(server) {
306
312
  const axNote = accessibility === "denied"
307
313
  ? "Accessibility is currently denied to this terminal — grant it via System Settings > Privacy & Security > Accessibility, then retry."
308
314
  : accessibility === "granted"
309
- ? "Accessibility is granted. If you expected a specific app to appear here, it is likely an Electron app whose AX tree is not exposed to System Events; try modifying its config file or database directly rather than driving the UI."
315
+ ? "Accessibility is granted. If you expected a specific app to appear here, it is likely an Electron app whose AX tree is not exposed to System Events. Pixel-level workaround: call screenshot, then ocr to locate the target UI text and get its bounding box, then click(x, y) at those screen coordinates. Alternatively, modify the app's config file or database directly."
310
316
  : "Accessibility status is unknown. Run `doctor` first to verify.";
311
317
  diagnostics = { hint: `list_windows returned 0 windows. ${axNote}`, accessibility };
312
318
  }
@@ -471,9 +477,11 @@ export function registerTools(server) {
471
477
  if (process.platform === "darwin") {
472
478
  const cgevent = resolveHelperPath(["native", "cgevent", "cgevent-helper"]);
473
479
  const ocr = resolveHelperPath(["native", "ocr", "ocr-helper"]);
480
+ const windowlist = resolveHelperPath(["native", "windowlist", "windowlist-helper"]);
474
481
  nativeHelpers = {
475
482
  cgevent: { ok: cgevent.path !== null, path: cgevent.path, tried: cgevent.tried.slice(0, 3) },
476
483
  ocr: { ok: ocr.path !== null, path: ocr.path, tried: ocr.tried.slice(0, 3) },
484
+ windowlist: { ok: windowlist.path !== null, path: windowlist.path, tried: windowlist.tried.slice(0, 3) },
477
485
  };
478
486
  }
479
487
  let readiness = "ready";
@@ -497,6 +505,10 @@ export function registerTools(server) {
497
505
  readiness = readiness === "ready" ? "degraded" : readiness;
498
506
  issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+). Run `npm run build` to compile it, or reinstall ucu-mcp so the helper ships from the tarball.");
499
507
  }
508
+ if (!nativeHelpers.windowlist.ok) {
509
+ readiness = readiness === "ready" ? "degraded" : readiness;
510
+ issues.push("Native windowlist helper not found (window enumeration will fall back to slow JXA). Run `npm run build` to compile it.");
511
+ }
500
512
  }
501
513
  // Heuristic AX hint: if Accessibility is granted but list_windows consistently
502
514
  // returns empty for the only app the model cared about, the model has likely
@@ -504,7 +516,7 @@ export function registerTools(server) {
504
516
  // Events unless Accessibility is also granted to the Electron process itself,
505
517
  // and the app has accessibility features enabled). This block is read-only —
506
518
  // we never hit JXA here because the doctor must stay fast and side-effect free.
507
- const electronHint = "If the target app is Electron (e.g. CC Switch, VS Code, Discord), list_windows may return [] even with Accessibility granted to your terminal. Grant Accessibility to the Electron app itself in System Settings > Privacy & Security > Accessibility, and restart the app. As a workaround, modify the app\'s config file or database directly rather than driving the UI.";
519
+ const electronHint = "If the target app is Electron (e.g. CC Switch, VS Code, Discord), list_windows may return [] even with Accessibility granted to your terminal. Grant Accessibility to the Electron app itself in System Settings > Privacy & Security > Accessibility, and restart the app. Pixel-level workaround: use screenshot + ocr to locate UI elements by text, then click(x, y) at the detected bounding box coordinates. Alternatively, modify the app\'s config file or database directly.";
508
520
  const clients = {};
509
521
  for (const bin of ["claude", "codex", "opencode", "npx"]) {
510
522
  try {
@@ -526,7 +538,10 @@ export function registerTools(server) {
526
538
  }
527
539
  if (readiness !== "ready") {
528
540
  if (process.platform === "darwin" && nativeHelpers && (!nativeHelpers.cgevent.ok || !nativeHelpers.ocr.ok)) {
529
- recommendations.push("Run `npm run build` in the ucu-mcp project to compile native Swift helpers (cgevent-helper, ocr-helper).");
541
+ recommendations.push("Run `npm run build` in the ucu-mcp project to compile native Swift helpers (cgevent-helper, ocr-helper, windowlist-helper).");
542
+ }
543
+ if (process.platform === "darwin" && nativeHelpers && !nativeHelpers.windowlist.ok) {
544
+ recommendations.push("windowlist helper missing — list_windows will fall back to JXA (~3-6s, unreliable for Electron). Run `npm run build`.");
530
545
  }
531
546
  }
532
547
  if (readiness === "ready") {
@@ -625,7 +640,8 @@ export function registerTools(server) {
625
640
  registerTool("get_screen_size", "Get screen dimensions and scale factor", {
626
641
  display: z.number().optional().describe("Display index"),
627
642
  }, async (params) => {
628
- return { content: [{ type: "text", text: JSON.stringify(getPlatform().getScreenSize(params.display), null, 2) }] };
643
+ const result = await withSafety({ action: "get_screen_size", params: {}, execute: () => Promise.resolve(getPlatform().getScreenSize(params.display)) });
644
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
629
645
  });
630
646
  registry.register("get_screen_size");
631
647
  registerTool("ocr", "Perform OCR on screen region", {
@@ -1,12 +1,24 @@
1
1
  import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResponse, AppInfo, AppTarget, BrowserContext, ScreenshotOptions } from "./base.js";
2
+ export interface MacOSPlatformOptions {
3
+ /**
4
+ * Override native helper resolution.
5
+ * - Map of folder name to absolute binary path to inject a specific helper.
6
+ * - Set a value to null to skip that helper (force JXA fallback).
7
+ * Used by tests to control native helper behavior without filesystem tricks.
8
+ */
9
+ nativeHelperPaths?: Record<string, string | null>;
10
+ }
2
11
  export declare class MacOSPlatform implements Platform {
12
+ private readonly _nativeHelperPaths;
3
13
  private readonly elementCache;
4
14
  private readonly elementCacheTtlMs;
5
15
  private readonly elementCacheMaxSize;
6
16
  private readonly windowCacheTtlMs;
7
17
  private windowCache;
18
+ private windowCacheInFlight;
8
19
  private activeTarget;
9
20
  private savedFocus;
21
+ constructor(options?: MacOSPlatformOptions);
10
22
  /** Remove expired entries from the element cache. */
11
23
  private evictExpiredCacheEntries;
12
24
  /** Evict oldest entries when cache exceeds the maximum size (LRU-style). */
@@ -27,6 +39,9 @@ export declare class MacOSPlatform implements Platform {
27
39
  focusApp(app: string): Promise<AppTarget>;
28
40
  getActiveBrowserContext(app?: string): Promise<BrowserContext | undefined>;
29
41
  listWindows(_includeMinimized?: boolean): Promise<WindowInfo[]>;
42
+ private listWindowsNative;
43
+ private resolveNativeHelper;
44
+ private listWindowsJxa;
30
45
  getWindowState(windowId?: string, depth?: number, includeBounds?: boolean): Promise<WindowState>;
31
46
  click(x: number, y: number, button?: "left" | "right" | "middle", doubleClick?: boolean): Promise<void>;
32
47
  move(x: number, y: number): Promise<void>;
@@ -4,6 +4,10 @@ import { promisify } from "node:util";
4
4
  import { captureFullScreen, captureRegion } from "../utils/screenshot.js";
5
5
  import { click as inputClick, doubleClick as inputDoubleClick, move as inputMove, drag as inputDrag, scroll as inputScroll, typeText, pressShortcut } from "../utils/input.js";
6
6
  import { CaptureError, ElementNotFoundError, InputSynthesisError, PermissionError, PlatformError, TargetStaleError, UcuError, WindowNotFoundError } from "../util/errors.js";
7
+ import { existsSync } from "node:fs";
8
+ import { join, dirname } from "node:path";
9
+ import { fileURLToPath } from "node:url";
10
+ const __macosDirname = dirname(fileURLToPath(import.meta.url));
7
11
  const execFileAsync = promisify(execFile);
8
12
  function errorMessage(error) {
9
13
  return error instanceof Error ? error.message : String(error);
@@ -59,13 +63,18 @@ function selectWindowForApp(windows, requestedApp) {
59
63
  windows.find((window) => appNameMatches(window.processName, requestedApp));
60
64
  }
61
65
  export class MacOSPlatform {
66
+ _nativeHelperPaths;
62
67
  elementCache = new Map();
63
68
  elementCacheTtlMs = 30_000;
64
69
  elementCacheMaxSize = 100;
65
70
  windowCacheTtlMs = 300;
66
71
  windowCache;
72
+ windowCacheInFlight = false;
67
73
  activeTarget;
68
74
  savedFocus;
75
+ constructor(options) {
76
+ this._nativeHelperPaths = options?.nativeHelperPaths;
77
+ }
69
78
  // ── Element Cache Management ────────────────────────────────────────────
70
79
  /** Remove expired entries from the element cache. */
71
80
  evictExpiredCacheEntries() {
@@ -106,8 +115,12 @@ export class MacOSPlatform {
106
115
  return;
107
116
  this.windowCache = undefined; // Bypass cache — stale detection must use fresh data
108
117
  const windows = await this.listWindows(true);
109
- const stillExists = windows.some(w => w.id === this.activeTarget.windowId);
110
- if (!stillExists) {
118
+ const match = windows.find(w => w.id === this.activeTarget.windowId);
119
+ if (!match) {
120
+ throw new TargetStaleError(this.activeTarget.windowId);
121
+ }
122
+ // Also invalidate if pid changed (app restarted)
123
+ if (match.pid !== this.activeTarget.pid) {
111
124
  throw new TargetStaleError(this.activeTarget.windowId);
112
125
  }
113
126
  }
@@ -231,13 +244,14 @@ export class MacOSPlatform {
231
244
  async focusApp(app) {
232
245
  const escapedApp = app.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
233
246
  this.windowCache = undefined;
234
- try {
235
- execFileSync("osascript", ["-e", `tell application "${escapedApp}" to activate`], { timeout: 5000 });
236
- }
237
- catch {
238
- // Some app names are process labels rather than AppleScript application names.
239
- // Continue with the AX window lookup below so existing callers still work.
240
- }
247
+ // NOTE: We intentionally do NOT call AppleScript "activate" here.
248
+ // focus_app sets the internal target context so subsequent operations
249
+ // know which app/window to target. It does NOT bring the app to the
250
+ // foreground — the user should remain in their current app (terminal,
251
+ // Codex, etc.) while the agent works in the background.
252
+ // CGEvent input injection works at the HID level and doesn't require
253
+ // the target app to be frontmost. AX operations target processes by
254
+ // name/PID via System Events, also without needing frontmost status.
241
255
  let target;
242
256
  const deadline = Date.now() + 3000;
243
257
  do {
@@ -254,14 +268,17 @@ export class MacOSPlatform {
254
268
  // app name so the tool handler can surface a remediation hint. The
255
269
  // bare WindowNotFoundError("CC Switch") was indistinguishable from
256
270
  // "the app is not running", which led models to retry forever.
271
+ this.activeTarget = undefined; // Clear stale target on focus failure
257
272
  const err = new WindowNotFoundError(app);
258
273
  err.hint =
259
274
  "list_windows returned no match for this app. If the app is running, " +
260
275
  "the most likely cause is that it is an Electron app whose AX tree is " +
261
276
  "not exposed to System Events (System Settings > Privacy & Security > " +
262
277
  "Accessibility must be granted to the Electron process itself, not just " +
263
- "to the host terminal). As a workaround, modify the app's config file " +
264
- "or database directly.";
278
+ "to the host terminal). Pixel-level workaround: call screenshot to " +
279
+ "capture the screen, then ocr to locate UI text and get its bounding " +
280
+ "box coordinates, then click(x, y) at those screen coordinates. " +
281
+ "Alternatively, modify the app's config file or database directly.";
265
282
  throw err;
266
283
  }
267
284
  this.activeTarget = {
@@ -333,12 +350,91 @@ export class MacOSPlatform {
333
350
  bounds: { ...window.bounds },
334
351
  }));
335
352
  }
353
+ // P0 #3: Prevent concurrent cache refreshes
354
+ if (this.windowCacheInFlight) {
355
+ // Another call is already refreshing; return stale or empty
356
+ return this.windowCache?.windows.map(w => ({ ...w, bounds: { ...w.bounds } })) ?? [];
357
+ }
358
+ this.windowCacheInFlight = true;
336
359
  try {
337
- // Use System Events instead of CGWindowListCopyWindowInfo.
338
- // The CoreGraphics API returns CFArrayRef/CFDictionaryRef which JXA
339
- // cannot iterate reliably CFArrayGetCount works but objectAtIndex
340
- // does not. System Events JXA is slower (~3-6s) but correct.
341
- const jxaScript = `
360
+ // Try native Swift helper first (CGWindowListCopyWindowInfo, ~1ms).
361
+ // Falls back to JXA System Events if the helper is not available.
362
+ // The native helper reliably enumerates ALL windows including Electron
363
+ // apps, whereas JXA relies on System Events AX which is inconsistent
364
+ // for Chromium-rendered windows.
365
+ let windows;
366
+ const nativeResult = this.listWindowsNative();
367
+ if (nativeResult !== null) {
368
+ windows = nativeResult;
369
+ }
370
+ else {
371
+ windows = await this.listWindowsJxa();
372
+ }
373
+ this.windowCache = {
374
+ cachedAt: Date.now(),
375
+ windows: windows.map((window) => ({
376
+ ...window,
377
+ bounds: { ...window.bounds },
378
+ })),
379
+ };
380
+ return windows;
381
+ }
382
+ catch {
383
+ // Fallback: return empty list if both methods fail
384
+ return [];
385
+ }
386
+ finally {
387
+ this.windowCacheInFlight = false;
388
+ }
389
+ }
390
+ listWindowsNative() {
391
+ try {
392
+ const helperPath = this.resolveNativeHelper("windowlist", "windowlist-helper");
393
+ if (!helperPath)
394
+ return null;
395
+ const out = execFileSync(helperPath, [], {
396
+ encoding: "utf-8",
397
+ timeout: 5000,
398
+ });
399
+ const parsed = JSON.parse(out.trim());
400
+ if (parsed.error)
401
+ return null;
402
+ return parsed.windows.map(w => ({
403
+ id: w.id,
404
+ title: w.title,
405
+ processName: w.processName,
406
+ pid: w.pid,
407
+ bounds: w.bounds,
408
+ isMinimized: !w.isOnScreen,
409
+ isOnScreen: w.isOnScreen,
410
+ }));
411
+ }
412
+ catch {
413
+ return null;
414
+ }
415
+ }
416
+ resolveNativeHelper(folder, binary) {
417
+ // Test injection: if the caller provided explicit paths, use those
418
+ // instead of hitting the filesystem.
419
+ if (this._nativeHelperPaths && folder in this._nativeHelperPaths) {
420
+ const override = this._nativeHelperPaths[folder];
421
+ // null means "skip native, force JXA fallback"
422
+ return override === null ? null : override;
423
+ }
424
+ // dev: src/platform/macos.ts → native/<folder>/<binary>
425
+ // prod: dist/src/platform/macos.js → native/<folder>/<binary>
426
+ const candidates = [
427
+ join(__macosDirname, "..", "..", "native", folder, binary),
428
+ join(__macosDirname, "..", "..", "..", "native", folder, binary),
429
+ ];
430
+ for (const p of candidates) {
431
+ if (existsSync(p))
432
+ return p;
433
+ }
434
+ return null;
435
+ }
436
+ async listWindowsJxa() {
437
+ const jxaScript = `
342
438
  var se = Application('System Events');
343
439
  var result = [];
344
440
  var procs = se.processes();
@@ -372,24 +468,11 @@ export class MacOSPlatform {
372
468
  }
373
469
  JSON.stringify(result);
374
470
  `;
375
- const jxaOut = execFileSync("osascript", [
376
- "-l", "JavaScript",
377
- "-e", jxaScript
378
- ], { encoding: "utf-8", timeout: 15000 });
379
- const windows = JSON.parse(jxaOut.trim());
380
- this.windowCache = {
381
- cachedAt: Date.now(),
382
- windows: windows.map((window) => ({
383
- ...window,
384
- bounds: { ...window.bounds },
385
- })),
386
- };
387
- return windows;
388
- }
389
- catch {
390
- // Fallback: return empty list if JXA fails
391
- return [];
392
- }
471
+ const jxaOut = execFileSync("osascript", [
472
+ "-l", "JavaScript",
473
+ "-e", jxaScript
474
+ ], { encoding: "utf-8", timeout: 15000 });
475
+ return JSON.parse(jxaOut.trim());
393
476
  }
394
477
  async getWindowState(windowId, depth, includeBounds = true) {
395
478
  if (!windowId || windowId === this.activeTarget?.windowId) {
@@ -1110,6 +1193,7 @@ export class MacOSPlatform {
1110
1193
  const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
1111
1194
  const jxaScript = `
1112
1195
  var se = Application('System Events');
1196
+ var _result = null;
1113
1197
  function childElements(elem) {
1114
1198
  try { return elem.uiElements(); } catch(e1) {
1115
1199
  try { return elem.elements(); } catch(e2) { return []; }
@@ -1287,11 +1371,11 @@ export class MacOSPlatform {
1287
1371
  }
1288
1372
 
1289
1373
  if (!elem) {
1290
- JSON.stringify({success: false, error: "Element not found: " + elemPath});
1374
+ _result = {success: false, error: "Element not found: " + elemPath};
1291
1375
  } else {
1292
1376
  try {
1293
1377
  elem.actions.AXPress.perform();
1294
- JSON.stringify({success: true});
1378
+ _result = {success: true};
1295
1379
  } catch(e) {
1296
1380
  try {
1297
1381
  var pos = elem.position();
@@ -1305,12 +1389,13 @@ export class MacOSPlatform {
1305
1389
  $.CGEventPost($.kCGHIDEventTap, down);
1306
1390
  var up = $.CGEventCreateMouseEvent(src, $.kCGEventLeftMouseUp, pt, $.kCGMouseButtonLeft);
1307
1391
  $.CGEventPost($.kCGHIDEventTap, up);
1308
- JSON.stringify({success: true});
1392
+ _result = {success: true};
1309
1393
  } catch(e2) {
1310
- JSON.stringify({success: false, error: "Could not click element: " + String(e2.message || e2)});
1394
+ _result = {success: false, error: "Could not click element: " + String(e2.message || e2)};
1311
1395
  }
1312
1396
  }
1313
1397
  }
1398
+ JSON.stringify(_result);
1314
1399
  `;
1315
1400
  try {
1316
1401
  const out = execFileSync("osascript", [
@@ -1341,6 +1426,7 @@ export class MacOSPlatform {
1341
1426
  const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
1342
1427
  const jxaScript = `
1343
1428
  var se = Application('System Events');
1429
+ var _result = null;
1344
1430
  function childElements(elem) {
1345
1431
  try { return elem.uiElements(); } catch(e1) {
1346
1432
  try { return elem.elements(); } catch(e2) { return []; }
@@ -1520,7 +1606,7 @@ export class MacOSPlatform {
1520
1606
  }
1521
1607
 
1522
1608
  if (!elem) {
1523
- JSON.stringify({success: false, error: "Element not found: " + elemPath});
1609
+ _result = {success: false, error: "Element not found: " + elemPath};
1524
1610
  } else {
1525
1611
  try {
1526
1612
  elem.focused = true;
@@ -1547,13 +1633,15 @@ export class MacOSPlatform {
1547
1633
  if (!didSet) {
1548
1634
  try {
1549
1635
  se.keystroke(textToType);
1636
+ _result = {success: true};
1550
1637
  } catch(e) {
1551
- JSON.stringify({success: false, error: "Could not type into element: " + String(e.message || e)});
1638
+ _result = {success: false, error: "Could not type into element: " + String(e.message || e)};
1552
1639
  }
1640
+ } else {
1641
+ _result = {success: true};
1553
1642
  }
1554
-
1555
- JSON.stringify({success: true});
1556
1643
  }
1644
+ JSON.stringify(_result);
1557
1645
  `;
1558
1646
  try {
1559
1647
  const out = execFileSync("osascript", [
@@ -1602,6 +1690,7 @@ export class MacOSPlatform {
1602
1690
  const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
1603
1691
  const jxaScript = `
1604
1692
  var se = Application('System Events');
1693
+ var _result = null;
1605
1694
  function childElements(elem) {
1606
1695
  try { return elem.uiElements(); } catch(e1) {
1607
1696
  try { return elem.elements(); } catch(e2) { return []; }
@@ -1775,15 +1864,16 @@ export class MacOSPlatform {
1775
1864
  }
1776
1865
 
1777
1866
  if (!elem) {
1778
- JSON.stringify({success: false, error: "Element not found: " + elemPath});
1867
+ _result = {success: false, error: "Element not found: " + elemPath};
1779
1868
  } else {
1780
1869
  try {
1781
1870
  elem.value = valueToSet;
1782
- JSON.stringify({success: true});
1871
+ _result = {success: true};
1783
1872
  } catch(e) {
1784
- JSON.stringify({success: false, error: "Could not set AX value: " + String(e.message || e)});
1873
+ _result = {success: false, error: "Could not set AX value: " + String(e.message || e)};
1785
1874
  }
1786
1875
  }
1876
+ JSON.stringify(_result);
1787
1877
  `;
1788
1878
  try {
1789
1879
  const out = execFileSync("osascript", [
@@ -233,7 +233,6 @@ export class SafetyGuard {
233
233
  reason: `Rate-limited: ${elapsed}ms since last action (min ${this.rateLimitMs}ms)`,
234
234
  };
235
235
  }
236
- this.lastActionTime = now;
237
236
  // 6. User activity pause (skipped for observe-class actions) -----------------
238
237
  if (!options.skipUserActivityPause && this.isUserActivityPauseActive()) {
239
238
  return {
@@ -241,6 +240,7 @@ export class SafetyGuard {
241
240
  reason: `User activity detected — pausing automation for ${this.userActivityPauseMs}ms`,
242
241
  };
243
242
  }
243
+ this.lastActionTime = now;
244
244
  return { allowed: true };
245
245
  }
246
246
  // -----------------------------------------------------------------------
@@ -0,0 +1,74 @@
1
+ import CoreGraphics
2
+ import Foundation
3
+
4
+ // Window enumeration using CGWindowListCopyWindowInfo (WindowServer-level).
5
+ // Replaces JXA System Events enumeration which is slow (3-6s) and unreliable
6
+ // for Electron apps. CGWindowListCopyWindowInfo sees ALL windows regardless
7
+ // of whether the app exposes an AX tree.
8
+
9
+ struct WinInfo: Encodable {
10
+ let id: String
11
+ let title: String
12
+ let processName: String
13
+ let pid: Int32
14
+ let bounds: Bounds
15
+ let isOnScreen: Bool
16
+ let windowNumber: Int
17
+ }
18
+
19
+ struct Bounds: Encodable {
20
+ let x: Double
21
+ let y: Double
22
+ let width: Double
23
+ let height: Double
24
+ }
25
+
26
+ struct Output: Encodable {
27
+ let windows: [WinInfo]
28
+ let error: String?
29
+ }
30
+
31
+ let options: CGWindowListOption = .optionOnScreenOnly
32
+ guard let windowList = CGWindowListCopyWindowInfo(options, kCGNullWindowID) as? [[String: Any]] else {
33
+ let out = Output(windows: [], error: "CGWindowListCopyWindowInfo returned nil")
34
+ FileHandle.standardOutput.write(try! JSONEncoder().encode(out))
35
+ exit(0)
36
+ }
37
+
38
+ var results: [WinInfo] = []
39
+
40
+ for info in windowList {
41
+ guard let pid = info[kCGWindowOwnerPID as String] as? Int32,
42
+ let windowNumber = info[kCGWindowNumber as String] as? Int,
43
+ let layer = info[kCGWindowLayer as String] as? Int
44
+ else { continue }
45
+
46
+ // Skip non-normal layers (overlay, screen saver, etc.)
47
+ if layer != 0 { continue }
48
+
49
+ let boundsDict = info[kCGWindowBounds as String] as? [String: Any]
50
+ let w = boundsDict?["Width"] as? Double ?? 0
51
+ let h = boundsDict?["Height"] as? Double ?? 0
52
+ if w == 0 || h == 0 { continue }
53
+
54
+ let processName = (info[kCGWindowOwnerName as String] as? String) ?? ""
55
+ if processName.isEmpty { continue }
56
+
57
+ let title = (info[kCGWindowName as String] as? String) ?? ""
58
+ let isOnScreen = (info[kCGWindowIsOnscreen as String] as? Bool) ?? true
59
+ let x = (boundsDict?["X"] as? Double) ?? 0
60
+ let y = (boundsDict?["Y"] as? Double) ?? 0
61
+
62
+ results.append(WinInfo(
63
+ id: "\(processName)/win\(windowNumber)",
64
+ title: title,
65
+ processName: processName,
66
+ pid: pid,
67
+ bounds: Bounds(x: x, y: y, width: w, height: h),
68
+ isOnScreen: isOnScreen,
69
+ windowNumber: windowNumber
70
+ ))
71
+ }
72
+
73
+ let output = Output(windows: results, error: nil)
74
+ FileHandle.standardOutput.write(try! JSONEncoder().encode(output))
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ucu-mcp",
3
- "version": "0.3.8",
3
+ "version": "0.4.0",
4
4
  "description": "MCP server for Universal Computer Use — desktop automation for AI agents via Model Context Protocol",
5
5
  "type": "module",
6
6
  "bin": {
@@ -27,7 +27,7 @@
27
27
  "test:macos-gui": "UCU_MACOS_GUI_SMOKE=1 vitest run tests/integration/macos-gui-smoke.test.ts",
28
28
  "test:client-cli": "UCU_CLIENT_CLI_SMOKE=1 vitest run tests/integration/client-cli-smoke.test.ts",
29
29
  "prepublishOnly": "npx vitest run tests/unit/ && npm run build",
30
- "build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit"
30
+ "build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit && cd ../windowlist && swiftc -O -o windowlist-helper main.swift -framework CoreGraphics -framework Foundation"
31
31
  },
32
32
  "keywords": [
33
33
  "mcp",