ucu-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1478 @@
1
+ import { execFile, execFileSync } from "node:child_process";
2
+ import { randomUUID } from "node:crypto";
3
+ import { promisify } from "node:util";
4
+ import { captureFullScreen, captureRegion, captureWindow } from "../utils/screenshot.js";
5
+ import { click as inputClick, doubleClick as inputDoubleClick, move as inputMove, drag as inputDrag, scroll as inputScroll, typeText, pressShortcut } from "../utils/input.js";
6
+ const execFileAsync = promisify(execFile);
7
+ export class MacOSPlatform {
8
+ elementCache = new Map();
9
+ elementCacheTtlMs = 30_000;
10
+ elementCacheMaxSize = 100;
11
+ activeTarget;
12
+ savedFocus;
13
+ // ── Element Cache Management ────────────────────────────────────────────
14
+ /** Remove expired entries from the element cache. */
15
+ evictExpiredCacheEntries() {
16
+ const now = Date.now();
17
+ for (const [key, descriptor] of this.elementCache) {
18
+ if (now - descriptor.cachedAt > this.elementCacheTtlMs) {
19
+ this.elementCache.delete(key);
20
+ }
21
+ }
22
+ }
23
+ /** Evict oldest entries when cache exceeds the maximum size (LRU-style). */
24
+ evictOverflowCacheEntries() {
25
+ while (this.elementCache.size > this.elementCacheMaxSize) {
26
+ let oldestKey = null;
27
+ let oldestTime = Infinity;
28
+ for (const [key, descriptor] of this.elementCache) {
29
+ if (descriptor.cachedAt < oldestTime) {
30
+ oldestTime = descriptor.cachedAt;
31
+ oldestKey = key;
32
+ }
33
+ }
34
+ if (oldestKey !== null) {
35
+ this.elementCache.delete(oldestKey);
36
+ }
37
+ else {
38
+ break;
39
+ }
40
+ }
41
+ }
42
+ /** Check whether a cached element descriptor has expired. */
43
+ isCacheEntryExpired(descriptor) {
44
+ return Date.now() - descriptor.cachedAt > this.elementCacheTtlMs;
45
+ }
46
+ // ── Focus Management ────────────────────────────────────────────────────
47
+ /** Save the current frontmost app/window so we can restore after an action. */
48
+ async saveFocus() {
49
+ try {
50
+ const apps = await this.listApps();
51
+ const front = apps.find((a) => a.isFrontmost);
52
+ if (front) {
53
+ const windows = await this.listWindows();
54
+ const win = windows.find((w) => w.processName === front.name && w.isOnScreen);
55
+ this.savedFocus = {
56
+ appName: front.name,
57
+ windowTitle: win?.title ?? "",
58
+ };
59
+ }
60
+ }
61
+ catch {
62
+ this.savedFocus = undefined;
63
+ }
64
+ }
65
+ /** Restore the previously saved frontmost app/window. */
66
+ async restoreFocus() {
67
+ if (!this.savedFocus)
68
+ return;
69
+ try {
70
+ const { appName } = this.savedFocus;
71
+ execFileSync("osascript", [
72
+ "-e", `tell application "${appName}" to activate`,
73
+ ], { timeout: 5000 });
74
+ }
75
+ catch {
76
+ // Best effort — don't fail the action if restore fails
77
+ }
78
+ this.savedFocus = undefined;
79
+ }
80
+ // ── Screenshot ──────────────────────────────────────────────────────────
81
+ async screenshot(_display, region, options) {
82
+ const base64 = region
83
+ ? await captureRegion(region.x, region.y, region.width, region.height, options)
84
+ : await captureFullScreen(options);
85
+ return Buffer.from(base64, "base64");
86
+ }
87
+ async screenshotWindow(windowId, options) {
88
+ const base64 = await captureWindow(windowId, options);
89
+ return Buffer.from(base64, "base64");
90
+ }
91
+ // ── Screen Info ─────────────────────────────────────────────────────────
92
+ getScreenSize(display) {
93
+ try {
94
+ const idx = display ?? 0;
95
+ const out = execFileSync("osascript", [
96
+ "-l", "JavaScript",
97
+ "-e",
98
+ `ObjC.import('AppKit');
99
+ var screens = $.NSScreen.screens;
100
+ var idx = ${idx};
101
+ if (idx < 0 || idx >= screens.count) idx = 0;
102
+ var screen = $(screens).objectAtIndex(idx);
103
+ var frame = screen.frame;
104
+ var scaleFactor = screen.backingScaleFactor;
105
+ JSON.stringify({width:Math.round(frame.size.width),height:Math.round(frame.size.height),scaleFactor:scaleFactor})`,
106
+ ], { encoding: "utf-8", timeout: 5000 }).trim();
107
+ return JSON.parse(out);
108
+ }
109
+ catch {
110
+ return { width: 1920, height: 1080, scaleFactor: 2 };
111
+ }
112
+ }
113
+ isScreenLocked() {
114
+ try {
115
+ const out = execFileSync("/usr/sbin/ioreg", ["-n", "Root", "-d1"], {
116
+ encoding: "utf-8",
117
+ timeout: 5000,
118
+ });
119
+ return /"IOConsoleLocked"\s*=\s*Yes/.test(out);
120
+ }
121
+ catch {
122
+ return false;
123
+ }
124
+ }
125
+ // ── Window Management ───────────────────────────────────────────────────
126
+ async listApps() {
127
+ const jxaScript = `
128
+ var se = Application('System Events');
129
+ var result = [];
130
+ var procs = se.processes();
131
+ for (var i = 0; i < procs.length; i++) {
132
+ try {
133
+ var p = procs[i];
134
+ var background = false;
135
+ try { background = p.backgroundOnly(); } catch(e) {}
136
+ if (background) continue;
137
+ var wins = [];
138
+ try { wins = p.windows(); } catch(e) {}
139
+ result.push({
140
+ name: p.name() || '',
141
+ pid: p.unixId ? p.unixId() : 0,
142
+ isFrontmost: p.frontmost ? !!p.frontmost() : false,
143
+ windowCount: wins.length || 0
144
+ });
145
+ } catch(e) {}
146
+ }
147
+ JSON.stringify(result);
148
+ `;
149
+ const out = execFileSync("osascript", [
150
+ "-l", "JavaScript",
151
+ "-e", jxaScript,
152
+ ], { encoding: "utf-8", timeout: 10000 }).trim();
153
+ return JSON.parse(out);
154
+ }
155
+ async focusApp(app) {
156
+ const appLower = app.toLowerCase();
157
+ const windows = await this.listWindows(true);
158
+ const target = windows.find((w) => w.processName.toLowerCase().includes(appLower));
159
+ if (!target) {
160
+ throw new Error(`No on-screen window found for app "${app}". Use list_apps to inspect localized macOS app names.`);
161
+ }
162
+ this.activeTarget = {
163
+ appName: target.processName,
164
+ pid: target.pid,
165
+ windowId: target.id,
166
+ title: target.title,
167
+ };
168
+ return this.activeTarget;
169
+ }
170
+ async getActiveBrowserContext(app) {
171
+ const appName = app || this.activeTarget?.appName;
172
+ if (!appName)
173
+ return undefined;
174
+ const normalized = appName.toLowerCase();
175
+ const knownBrowser = [
176
+ "safari",
177
+ "google chrome",
178
+ "chrome",
179
+ "arc",
180
+ "microsoft edge",
181
+ "edge",
182
+ "brave browser",
183
+ "brave",
184
+ ].some((name) => normalized.includes(name));
185
+ if (!knownBrowser)
186
+ return undefined;
187
+ const escapedApp = appName.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
188
+ const jxaScript = `
189
+ function run() {
190
+ var appName = "${escapedApp}";
191
+ try {
192
+ var app = Application(appName);
193
+ var url = "";
194
+ var title = "";
195
+ if (appName.toLowerCase().indexOf("safari") !== -1) {
196
+ try { url = app.documents[0].url(); } catch(e) {}
197
+ try { title = app.documents[0].name(); } catch(e) {}
198
+ } else {
199
+ try { url = app.windows[0].activeTab.url(); } catch(e) {}
200
+ try { title = app.windows[0].activeTab.title(); } catch(e) {}
201
+ }
202
+ return JSON.stringify({appName: appName, url: url || undefined, title: title || undefined});
203
+ } catch(e) {
204
+ return JSON.stringify({appName: appName});
205
+ }
206
+ }
207
+ run();
208
+ `;
209
+ try {
210
+ const out = execFileSync("osascript", [
211
+ "-l", "JavaScript",
212
+ "-e", jxaScript,
213
+ ], { encoding: "utf-8", timeout: 5000 }).trim();
214
+ const parsed = JSON.parse(out);
215
+ return parsed.url || parsed.title ? parsed : undefined;
216
+ }
217
+ catch {
218
+ return undefined;
219
+ }
220
+ }
221
+ async listWindows(_includeMinimized) {
222
+ try {
223
+ const jxaScript = `
224
+ ObjC.import('CoreGraphics');
225
+ ObjC.import('Foundation');
226
+ var winList = $.CGWindowListCopyWindowInfo(1, 0);
227
+ var count = winList.count;
228
+ var result = [];
229
+ for (var i = 0; i < count; i++) {
230
+ var w = $(winList).objectAtIndex(i);
231
+ var bounds = w.objectForKey('kCGWindowBounds');
232
+ var numberVal = w.objectForKey('kCGWindowNumber');
233
+ var nameVal = w.objectForKey('kCGWindowName');
234
+ var ownerVal = w.objectForKey('kCGWindowOwnerName');
235
+ var pidVal = w.objectForKey('kCGWindowOwnerPID');
236
+ var onScreenVal = w.objectForKey('kCGWindowIsOnscreen');
237
+ var layerVal = w.objectForKey('kCGWindowLayer');
238
+
239
+ // Skip windows at layer > 0 (menus, overlays, etc.)
240
+ if (layerVal && layerVal.intValue > 0) continue;
241
+
242
+ var bx = 0, by = 0, bw = 0, bh = 0;
243
+ try { bx = $(bounds).objectForKey('X').intValue; } catch(e) {}
244
+ try { by = $(bounds).objectForKey('Y').intValue; } catch(e) {}
245
+ try { bw = $(bounds).objectForKey('Width').intValue; } catch(e) {}
246
+ try { bh = $(bounds).objectForKey('Height').intValue; } catch(e) {}
247
+
248
+ // Skip zero-size windows
249
+ if (bw === 0 && bh === 0) continue;
250
+
251
+ result.push({
252
+ id: String(numberVal ? numberVal.intValue : 0),
253
+ title: nameVal ? String(nameVal) : '',
254
+ processName: ownerVal ? String(ownerVal) : '',
255
+ pid: pidVal ? pidVal.intValue : 0,
256
+ bounds: { x: bx, y: by, width: bw, height: bh },
257
+ isMinimized: false,
258
+ isOnScreen: onScreenVal ? onScreenVal.boolValue : true
259
+ });
260
+ }
261
+ JSON.stringify(result);
262
+ `;
263
+ const jxaOut = execFileSync("osascript", [
264
+ "-l", "JavaScript",
265
+ "-e", jxaScript
266
+ ], { encoding: "utf-8", timeout: 15000 });
267
+ return JSON.parse(jxaOut.trim());
268
+ }
269
+ catch {
270
+ // Fallback: return empty list if JXA fails
271
+ return [];
272
+ }
273
+ }
274
+ async getWindowState(windowId, depth, includeBounds = true) {
275
+ const resolvedWindowId = windowId || this.activeTarget?.windowId;
276
+ if (!resolvedWindowId) {
277
+ throw new Error("getWindowState requires windowId or a prior focus_app target");
278
+ }
279
+ const maxDepth = Math.min(depth || 3, 10);
280
+ const maxElements = 50;
281
+ const escapedWindowId = resolvedWindowId.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$');
282
+ const targetWindow = (await this.listWindows(true)).find((w) => w.id === resolvedWindowId);
283
+ const targetJson = JSON.stringify(targetWindow ?? null);
284
+ try {
285
+ const jxaScript = `
286
+ ObjC.import('AppKit');
287
+ var se = Application('System Events');
288
+ var result = {window: null, focusedElement: null, tree: null, error: null};
289
+ var target = ${targetJson};
290
+ var includeBounds = ${includeBounds ? "true" : "false"};
291
+
292
+ function closeEnough(a, b, tolerance) {
293
+ return Math.abs(Number(a || 0) - Number(b || 0)) <= tolerance;
294
+ }
295
+
296
+ function windowMatches(win, proc) {
297
+ if (!target) {
298
+ try { return String(win.id()) === String("${escapedWindowId}"); } catch(e) { return false; }
299
+ }
300
+ try {
301
+ if (target.pid && proc.unixId && proc.unixId() !== target.pid) return false;
302
+ } catch(e) {}
303
+
304
+ var name = "";
305
+ try { name = win.name() || ""; } catch(e) {}
306
+ if (target.title && name && name === target.title) return true;
307
+
308
+ try {
309
+ var pos = win.position();
310
+ var size = win.size();
311
+ var b = target.bounds || {};
312
+ return closeEnough(pos[0], b.x, 12) &&
313
+ closeEnough(pos[1], b.y, 12) &&
314
+ closeEnough(size[0], b.width, 24) &&
315
+ closeEnough(size[1], b.height, 24);
316
+ } catch(e) {}
317
+
318
+ try { return String(win.id()) === String("${escapedWindowId}"); } catch(e) {}
319
+ return false;
320
+ }
321
+
322
+ try {
323
+ var foundWin = null;
324
+ var foundProc = null;
325
+ var procs = se.processes();
326
+ for (var p = 0; p < procs.length; p++) {
327
+ var proc = procs[p];
328
+ try {
329
+ var wins = proc.windows();
330
+ for (var w = 0; w < wins.length; w++) {
331
+ if (windowMatches(wins[w], proc)) {
332
+ foundWin = wins[w];
333
+ foundProc = proc;
334
+ break;
335
+ }
336
+ }
337
+ } catch(e) {}
338
+ if (foundWin) break;
339
+ }
340
+ if (!foundWin) { result.error = 'Window not found'; JSON.stringify(result); return; }
341
+
342
+ var winPos = foundWin.position();
343
+ var winSize = foundWin.size();
344
+ result.window = {
345
+ id: String("${escapedWindowId}"),
346
+ title: foundWin.name() || '',
347
+ processName: foundProc.name() || '',
348
+ pid: foundProc.unixId ? foundProc.unixId() : 0,
349
+ bounds: {x: winPos[0] || 0, y: winPos[1] || 0, width: winSize[0] || 0, height: winSize[1] || 0},
350
+ isMinimized: false,
351
+ isOnScreen: true
352
+ };
353
+
354
+ var elemCount = [0];
355
+ function summarizeFocusedElement(info) {
356
+ var summary = {
357
+ role: info.role || '',
358
+ name: info.name || '',
359
+ value: info.value || '',
360
+ states: info.states ? info.states.slice(0) : []
361
+ };
362
+ if (includeBounds && info.bounds) summary.bounds = info.bounds;
363
+ return summary;
364
+ }
365
+
366
+ function getElementBounds(axElem) {
367
+ try {
368
+ var pos = axElem.position();
369
+ var sz = axElem.size();
370
+ return {x: pos[0]||0, y: pos[1]||0, width: sz[0]||0, height: sz[1]||0};
371
+ } catch(e) {
372
+ return null;
373
+ }
374
+ }
375
+
376
+ function elementBelongsToWindow(axElem) {
377
+ var b = getElementBounds(axElem);
378
+ if (!b) return false;
379
+ var wx = winPos[0] || 0;
380
+ var wy = winPos[1] || 0;
381
+ var ww = winSize[0] || 0;
382
+ var wh = winSize[1] || 0;
383
+ var cx = b.x + b.width / 2;
384
+ var cy = b.y + b.height / 2;
385
+ return cx >= wx && cx <= wx + ww && cy >= wy && cy <= wy + wh;
386
+ }
387
+
388
+ function readElementInfo(axElem) {
389
+ var info = {role: '', name: '', value: '', states: [], children: []};
390
+ try { info.role = axElem.role() || ''; } catch(e) {}
391
+ try { info.name = axElem.description ? axElem.description() : (axElem.name ? axElem.name() : ''); } catch(e) {}
392
+ try {
393
+ var val = axElem.value();
394
+ info.value = (val !== undefined && val !== null) ? String(val) : '';
395
+ } catch(e) {}
396
+ if (includeBounds) {
397
+ info.bounds = getElementBounds(axElem) || {x: 0, y: 0, width: 0, height: 0};
398
+ }
399
+ return info;
400
+ }
401
+
402
+ try {
403
+ var processFocused = foundProc.focusedUIElement ? foundProc.focusedUIElement() : null;
404
+ if (processFocused && elementBelongsToWindow(processFocused)) {
405
+ var focusedInfo = readElementInfo(processFocused);
406
+ focusedInfo.states.push('focused');
407
+ result.focusedElement = summarizeFocusedElement(focusedInfo);
408
+ }
409
+ } catch(e) {}
410
+
411
+ function extractElement(axElem, currentDepth) {
412
+ if (elemCount[0] >= ${maxElements}) return null;
413
+ elemCount[0]++;
414
+ var info = readElementInfo(axElem);
415
+ try {
416
+ try {
417
+ if (axElem.focused && axElem.focused()) info.states.push('focused');
418
+ } catch(e0) {}
419
+ } catch(e) {}
420
+ if (!result.focusedElement && info.states.indexOf('focused') !== -1) {
421
+ result.focusedElement = summarizeFocusedElement(info);
422
+ }
423
+
424
+ if (currentDepth < ${maxDepth}) {
425
+ try {
426
+ var kids = axElem.elements();
427
+ for (var k = 0; k < kids.length && elemCount[0] < ${maxElements}; k++) {
428
+ var child = extractElement(kids[k], currentDepth + 1);
429
+ if (child) info.children.push(child);
430
+ }
431
+ } catch(e) {}
432
+ }
433
+ return info;
434
+ }
435
+
436
+ result.tree = extractElement(foundWin, 0);
437
+ } catch(e) {
438
+ result.error = String(e.message || e);
439
+ }
440
+ JSON.stringify(result);
441
+ `;
442
+ const out = execFileSync("osascript", [
443
+ "-l", "JavaScript",
444
+ "-e", jxaScript,
445
+ ], { encoding: "utf-8", timeout: 15000 }).trim();
446
+ const parsed = JSON.parse(out);
447
+ if (parsed.error && !parsed.window) {
448
+ throw new Error(parsed.error);
449
+ }
450
+ const windowInfo = parsed.window || {
451
+ id: resolvedWindowId,
452
+ title: "",
453
+ processName: "",
454
+ pid: 0,
455
+ bounds: { x: 0, y: 0, width: 0, height: 0 },
456
+ isMinimized: false,
457
+ isOnScreen: true,
458
+ };
459
+ return {
460
+ window: windowInfo,
461
+ focusedElement: parsed.focusedElement || undefined,
462
+ tree: parsed.tree || undefined,
463
+ };
464
+ }
465
+ catch (error) {
466
+ if (String(error.message || error).includes("not allowed") ||
467
+ String(error.message || error).includes("permission") ||
468
+ String(error.message || error).includes("assistive")) {
469
+ throw new Error(`Accessibility permission required: grant System Events access in System Preferences > Privacy & Accessibility`);
470
+ }
471
+ throw new Error(`Window ${resolvedWindowId} not found or Accessibility permission missing`);
472
+ }
473
+ }
474
+ // ── Mouse ───────────────────────────────────────────────────────────────
475
+ async click(x, y, button, doubleClick) {
476
+ if (doubleClick) {
477
+ await inputDoubleClick(x, y, button);
478
+ }
479
+ else {
480
+ await inputClick(x, y, button);
481
+ }
482
+ }
483
+ async move(x, y) {
484
+ await inputMove(x, y);
485
+ }
486
+ async drag(startX, startY, endX, endY, button, duration) {
487
+ await inputDrag(startX, startY, endX, endY, button, duration);
488
+ }
489
+ async scroll(x, y, deltaX, deltaY) {
490
+ await inputScroll(x, y, deltaX, deltaY);
491
+ }
492
+ // ── Cursor ──────────────────────────────────────────────────────────────
493
+ getCursorPosition() {
494
+ try {
495
+ const out = execFileSync("osascript", [
496
+ "-l", "JavaScript",
497
+ "-e",
498
+ `ObjC.import('CoreGraphics');
499
+ var event = $.CGEventCreate(null);
500
+ var loc = $.CGEventGetLocation(event);
501
+ $.CFRelease(event);
502
+ JSON.stringify({x:Math.round(loc.x),y:Math.round(loc.y)})`,
503
+ ], { encoding: "utf-8", timeout: 5000 }).trim();
504
+ return JSON.parse(out);
505
+ }
506
+ catch (error) {
507
+ throw new Error(`get_cursor_position failed: ${error.message || error}`);
508
+ }
509
+ }
510
+ // ── OCR ──────────────────────────────────────────────────────────────────
511
+ async ocr(display, region) {
512
+ // Take a screenshot first (reuse existing logic)
513
+ const buf = await this.screenshot(display, region);
514
+ // Write screenshot to a temp file so Vision framework can read it
515
+ const { writeFile, unlink } = await import("node:fs/promises");
516
+ const { join } = await import("node:path");
517
+ const { tmpdir } = await import("node:os");
518
+ const tmpPath = join(tmpdir(), `ucu-ocr-${randomUUID()}.png`);
519
+ await writeFile(tmpPath, buf);
520
+ try {
521
+ const screenSize = this.getScreenSize(display);
522
+ const scaleFactor = screenSize.scaleFactor ?? 2;
523
+ // Build JXA script that uses Vision framework for OCR
524
+ // JXA does not allow return statements at global scope, so we wrap in a function
525
+ const jxaScript = `
526
+ function run() {
527
+ ObjC.import('Vision');
528
+ ObjC.import('AppKit');
529
+ ObjC.import('Foundation');
530
+
531
+ var app = Application.currentApplication();
532
+ app.includeStandardAdditions = true;
533
+
534
+ var path = "${tmpPath.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$')}";
535
+ var url = $.NSURL.fileURLWithPath(path);
536
+ var image = $.NSImage.alloc.initWithContentsOfURL(url);
537
+
538
+ if (!image || image.isValid() === false) {
539
+ return JSON.stringify({error: "Failed to load screenshot image", elements: [], fullText: ""});
540
+ }
541
+
542
+ var cgImage = image.CGImageForProposedRectContextHints(null, null, null);
543
+ if (!cgImage) {
544
+ return JSON.stringify({error: "Failed to get CGImage from screenshot", elements: [], fullText: ""});
545
+ }
546
+
547
+ var request = $.VNRecognizeTextRequest.alloc.init;
548
+ request.recognitionLevel = $.VNRequestTextRecognitionLevelAccurate;
549
+ request.usesLanguageCorrection = true;
550
+
551
+ var handler = $.VNImageRequestHandler.alloc.initWithCGImageOptions(cgImage, null);
552
+ var performError = $();
553
+
554
+ var success = handler.performRequestsError([request], performError);
555
+ if (!success) {
556
+ return JSON.stringify({error: "OCR request failed", elements: [], fullText: ""});
557
+ }
558
+
559
+ var results = request.results;
560
+ var elements = [];
561
+ var fullTextParts = [];
562
+
563
+ var imgWidth = cgImage.width;
564
+ var imgHeight = cgImage.height;
565
+
566
+ for (var i = 0; i < results.count; i++) {
567
+ var obs = $(results).objectAtIndex(i);
568
+ var candidates = obs.topCandidates(1);
569
+ if (candidates && candidates.count > 0) {
570
+ var candidate = $(candidates).objectAtIndex(0);
571
+ var text = candidate.string.toString();
572
+ var confidence = candidate.confidence;
573
+ var bbox = obs.boundingBox;
574
+
575
+ // Vision boundingBox is normalized (0-1) with origin at bottom-left
576
+ // Convert to screen coordinates (origin at top-left)
577
+ var bx = bbox.origin.x * imgWidth;
578
+ var by = (1 - bbox.origin.y - bbox.size.height) * imgHeight;
579
+ var bw = bbox.size.width * imgWidth;
580
+ var bh = bbox.size.height * imgHeight;
581
+
582
+ elements.push({
583
+ text: text,
584
+ x: Math.round(bx),
585
+ y: Math.round(by),
586
+ width: Math.round(bw),
587
+ height: Math.round(bh),
588
+ confidence: confidence
589
+ });
590
+ fullTextParts.push(text);
591
+ }
592
+ }
593
+
594
+ return JSON.stringify({elements: elements, fullText: fullTextParts.join("\\n"), error: null});
595
+ }
596
+ run();
597
+ `;
598
+ const out = execFileSync("osascript", [
599
+ "-l", "JavaScript",
600
+ "-e", jxaScript,
601
+ ], { encoding: "utf-8", timeout: 30000 }).trim();
602
+ const parsed = JSON.parse(out);
603
+ if (parsed.error) {
604
+ throw new Error(parsed.error);
605
+ }
606
+ // Scale coordinates from image space to screen space
607
+ // The screenshot may be taken at a different resolution than screen coordinates
608
+ const imgWidth = buf.readUInt32BE(16); // PNG width at offset 16
609
+ const scaleFactorX = screenSize.width / (region ? region.width : (imgWidth / scaleFactor));
610
+ const elements = parsed.elements.map((el) => ({
611
+ text: el.text,
612
+ x: Math.round(el.x / scaleFactor) + (region ? region.x : 0),
613
+ y: Math.round(el.y / scaleFactor) + (region ? region.y : 0),
614
+ width: Math.round(el.width / scaleFactor),
615
+ height: Math.round(el.height / scaleFactor),
616
+ confidence: el.confidence,
617
+ }));
618
+ return {
619
+ elements,
620
+ fullText: parsed.fullText,
621
+ };
622
+ }
623
+ finally {
624
+ await unlink(tmpPath).catch(() => { });
625
+ }
626
+ }
627
+ // ── Keyboard ────────────────────────────────────────────────────────────
628
+ async type(text, delay) {
629
+ await typeText(text, delay);
630
+ }
631
+ async key(keys) {
632
+ await pressShortcut(keys);
633
+ }
634
+ // ── Accessibility (AX) Element Actions ───────────────────────────────────
635
+ async findElement(options) {
636
+ this.evictExpiredCacheEntries();
637
+ const { text, role, app, depth, includeBounds = true } = options;
638
+ const effectiveApp = app || this.activeTarget?.appName;
639
+ const maxDepth = Math.min(depth || 5, 10);
640
+ const maxResults = Math.min(Math.max(options.maxResults ?? 50, 1), 200);
641
+ const escapedApp = (effectiveApp || "").replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$');
642
+ const escapedText = text ? text.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$') : "";
643
+ const escapedRole = role ? role.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$') : "";
644
+ const jxaScript = `
645
+ var se = Application('System Events');
646
+ var results = [];
647
+ var resultCount = [0];
648
+ var maxResults = ${maxResults};
649
+ var includeBounds = ${includeBounds ? "true" : "false"};
650
+
651
+ var textFilter = ${text ? `"${escapedText}"` : "null"};
652
+ var roleFilter = ${role ? `"${escapedRole}"` : "null"};
653
+
654
+ function matches(elem) {
655
+ var elemName = '';
656
+ var elemRole = '';
657
+ var elemDesc = '';
658
+ var elemValue = '';
659
+ try { elemName = elem.name() || ''; } catch(e) {}
660
+ try { elemRole = elem.role() || ''; } catch(e) {}
661
+ try { elemDesc = elem.description() || ''; } catch(e) {}
662
+ try { var v = elem.value(); elemValue = (v !== undefined && v !== null) ? String(v) : ''; } catch(e) {}
663
+
664
+ if (textFilter !== null) {
665
+ var t = textFilter.toLowerCase();
666
+ if (elemName.toLowerCase().indexOf(t) === -1 &&
667
+ elemValue.toLowerCase().indexOf(t) === -1 &&
668
+ elemDesc.toLowerCase().indexOf(t) === -1) {
669
+ return false;
670
+ }
671
+ }
672
+ if (roleFilter !== null) {
673
+ if (elemRole !== roleFilter) return false;
674
+ }
675
+ return true;
676
+ }
677
+
678
+ function getBounds(elem) {
679
+ try {
680
+ var pos = elem.position();
681
+ var sz = elem.size();
682
+ return {x: pos[0] || 0, y: pos[1] || 0, width: sz[0] || 0, height: sz[1] || 0};
683
+ } catch(e) {
684
+ return {x: 0, y: 0, width: 0, height: 0};
685
+ }
686
+ }
687
+
688
+ function traverse(elem, path, currentDepth) {
689
+ if (resultCount[0] >= maxResults) return;
690
+ if (currentDepth > ${maxDepth}) return;
691
+
692
+ if (matches(elem)) {
693
+ var item = {
694
+ id: path,
695
+ role: '',
696
+ name: '',
697
+ value: undefined,
698
+ description: undefined,
699
+ subrole: undefined,
700
+ identifier: undefined
701
+ };
702
+ var elemName = '';
703
+ var elemRole = '';
704
+ var elemDesc = '';
705
+ var elemValue = '';
706
+ var elemSubrole = '';
707
+ var elemIdentifier = '';
708
+ try { elemName = elem.name() || ''; } catch(e) {}
709
+ try { elemRole = elem.role() || ''; } catch(e) {}
710
+ try { elemDesc = elem.description() || ''; } catch(e) {}
711
+ try { var v = elem.value(); elemValue = (v !== undefined && v !== null) ? String(v) : ''; } catch(e) {}
712
+ try { elemSubrole = elem.subrole() || ''; } catch(e) {}
713
+ try { elemIdentifier = elem.identifier() || ''; } catch(e) {}
714
+
715
+ item.role = elemRole;
716
+ item.name = elemName;
717
+ if (elemValue) item.value = elemValue;
718
+ if (elemDesc) item.description = elemDesc;
719
+ if (elemSubrole) item.subrole = elemSubrole;
720
+ if (elemIdentifier) item.identifier = elemIdentifier;
721
+ if (includeBounds) item.bounds = getBounds(elem);
722
+ results.push(item);
723
+ resultCount[0]++;
724
+ }
725
+
726
+ if (currentDepth < ${maxDepth}) {
727
+ try {
728
+ var kids = elem.elements();
729
+ for (var k = 0; k < kids.length && resultCount[0] < maxResults; k++) {
730
+ traverse(kids[k], path + '/' + k, currentDepth + 1);
731
+ }
732
+ } catch(e) {}
733
+ }
734
+ }
735
+
736
+ try {
737
+ if ("${escapedApp}") {
738
+ var proc = se.processes["${escapedApp}"]();
739
+ var wins = proc.windows();
740
+ for (var w = 0; w < wins.length && resultCount[0] < maxResults; w++) {
741
+ traverse(wins[w], "win" + w, 0);
742
+ }
743
+ } else {
744
+ var procs = se.processes();
745
+ for (var p = 0; p < procs.length && resultCount[0] < maxResults; p++) {
746
+ try {
747
+ var procName = procs[p].name();
748
+ var wins = procs[p].windows();
749
+ for (var w = 0; w < wins.length && resultCount[0] < maxResults; w++) {
750
+ traverse(wins[w], procName + "/win" + w, 0);
751
+ }
752
+ } catch(e) {}
753
+ }
754
+ }
755
+ } catch(e) {}
756
+
757
+ JSON.stringify(results);
758
+ `;
759
+ try {
760
+ const out = execFileSync("osascript", [
761
+ "-l", "JavaScript",
762
+ "-e", jxaScript,
763
+ ], { encoding: "utf-8", timeout: 30000 }).trim();
764
+ const results = JSON.parse(out);
765
+ for (const result of results) {
766
+ const appName = effectiveApp || result.id.split("/")[0] || "";
767
+ this.elementCache.set(result.id, {
768
+ elementId: result.id,
769
+ appName,
770
+ role: result.role,
771
+ name: result.name,
772
+ value: result.value,
773
+ description: result.description,
774
+ subrole: result.subrole,
775
+ identifier: result.identifier,
776
+ bounds: result.bounds,
777
+ cachedAt: Date.now(),
778
+ });
779
+ }
780
+ this.evictOverflowCacheEntries();
781
+ return results;
782
+ }
783
+ catch (error) {
784
+ if (String(error.message || error).includes("not allowed") ||
785
+ String(error.message || error).includes("permission") ||
786
+ String(error.message || error).includes("assistive")) {
787
+ throw new Error("Accessibility permission required: grant System Events access in System Preferences > Privacy & Accessibility");
788
+ }
789
+ throw new Error(`find_element failed: ${error.message || error}`);
790
+ }
791
+ }
792
+ async clickElement(elementId, app) {
793
+ this.evictExpiredCacheEntries();
794
+ const escapedElementId = elementId.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$');
795
+ const effectiveApp = app || this.activeTarget?.appName;
796
+ const escapedApp = (effectiveApp || "").replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$');
797
+ const cached = this.elementCache.get(elementId);
798
+ if (cached && this.isCacheEntryExpired(cached)) {
799
+ this.elementCache.delete(elementId);
800
+ }
801
+ const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
802
+ const jxaScript = `
803
+ var se = Application('System Events');
804
+ var elemPath = "${escapedElementId}";
805
+ var appName = "${escapedApp}";
806
+ var cached = ${cachedJson};
807
+
808
+ function resolveElementByFullPath(path) {
809
+ var parts = path.split('/');
810
+ if (parts.length < 2) return null;
811
+
812
+ var procName = parts[0];
813
+ var winPart = parts[1];
814
+ var winIdx = 0;
815
+ var match = winPart.match(/^win(\\\\d+)$/);
816
+ if (match) {
817
+ winIdx = parseInt(match[1]);
818
+ }
819
+
820
+ try {
821
+ var proc = se.processes[procName]();
822
+ var wins = proc.windows();
823
+ if (winIdx >= wins.length) return null;
824
+ var current = wins[winIdx];
825
+
826
+ for (var i = 2; i < parts.length; i++) {
827
+ var idx = parseInt(parts[i]);
828
+ if (isNaN(idx)) return null;
829
+ try {
830
+ var kids = current.elements();
831
+ if (idx >= kids.length) return null;
832
+ current = kids[idx];
833
+ } catch(e) { return null; }
834
+ }
835
+ return current;
836
+ } catch(e) { return null; }
837
+ }
838
+
839
+ function elemString(elem, getter) {
840
+ try {
841
+ var value = getter(elem);
842
+ return value === undefined || value === null ? '' : String(value);
843
+ } catch(e) {
844
+ return '';
845
+ }
846
+ }
847
+
848
+ function getBounds(elem) {
849
+ try {
850
+ var pos = elem.position();
851
+ var sz = elem.size();
852
+ return {x: pos[0] || 0, y: pos[1] || 0, width: sz[0] || 0, height: sz[1] || 0};
853
+ } catch(e) {
854
+ return {x: 0, y: 0, width: 0, height: 0};
855
+ }
856
+ }
857
+
858
+ function descriptorMatches(elem) {
859
+ if (!cached) return true;
860
+ var role = elemString(elem, function(e) { return e.role(); });
861
+ var name = elemString(elem, function(e) { return e.name(); });
862
+ var desc = elemString(elem, function(e) { return e.description(); });
863
+ var value = elemString(elem, function(e) { return e.value(); });
864
+ if (cached.role && role && role !== cached.role) return false;
865
+ if (cached.name && name && name !== cached.name) return false;
866
+ if (cached.value && value && value !== cached.value) return false;
867
+ if (cached.description && desc && desc !== cached.description) return false;
868
+ return true;
869
+ }
870
+
871
+ function scoreEquivalent(elem) {
872
+ if (!cached) return -1;
873
+ var score = 0;
874
+ var role = elemString(elem, function(e) { return e.role(); });
875
+ var name = elemString(elem, function(e) { return e.name(); });
876
+ var desc = elemString(elem, function(e) { return e.description(); });
877
+ var value = elemString(elem, function(e) { return e.value(); });
878
+ var subrole = elemString(elem, function(e) { return e.subrole(); });
879
+ var identifier = elemString(elem, function(e) { return e.identifier(); });
880
+ if (cached.role && role === cached.role) score += 4;
881
+ if (cached.name && name === cached.name) score += 4;
882
+ if (cached.value && value === cached.value) score += 3;
883
+ if (cached.description && desc === cached.description) score += 2;
884
+ if (cached.subrole && subrole === cached.subrole) score += 2;
885
+ if (cached.identifier && identifier === cached.identifier) score += 3;
886
+ var b = getBounds(elem);
887
+ if (cached.bounds) {
888
+ var cx = b.x + b.width / 2;
889
+ var cy = b.y + b.height / 2;
890
+ var ocx = cached.bounds.x + cached.bounds.width / 2;
891
+ var ocy = cached.bounds.y + cached.bounds.height / 2;
892
+ var distance = Math.sqrt(Math.pow(cx - ocx, 2) + Math.pow(cy - ocy, 2));
893
+ if (distance < 8) score += 4;
894
+ else if (distance < 40) score += 2;
895
+ else if (distance < 120) score += 1;
896
+ }
897
+ return score;
898
+ }
899
+
900
+ function refetchEquivalent() {
901
+ if (!cached) return null;
902
+ var targetApp = appName || cached.appName || '';
903
+ var best = null;
904
+ var bestScore = 0;
905
+ var visited = [0];
906
+ function visit(elem, depth) {
907
+ if (visited[0] > 350 || depth > 10) return;
908
+ visited[0]++;
909
+ var score = scoreEquivalent(elem);
910
+ if (score > bestScore) {
911
+ best = elem;
912
+ bestScore = score;
913
+ }
914
+ try {
915
+ var kids = elem.elements();
916
+ for (var i = 0; i < kids.length; i++) visit(kids[i], depth + 1);
917
+ } catch(e) {}
918
+ }
919
+ try {
920
+ if (targetApp) {
921
+ var proc = se.processes[targetApp]();
922
+ var wins = proc.windows();
923
+ for (var w = 0; w < wins.length; w++) visit(wins[w], 0);
924
+ } else {
925
+ var procs = se.processes();
926
+ for (var p = 0; p < procs.length; p++) {
927
+ try {
928
+ var wins2 = procs[p].windows();
929
+ for (var w2 = 0; w2 < wins2.length; w2++) visit(wins2[w2], 0);
930
+ } catch(e2) {}
931
+ }
932
+ }
933
+ } catch(e) {}
934
+ return bestScore >= 6 ? best : null;
935
+ }
936
+
937
+ var elem = null;
938
+
939
+ if (appName) {
940
+ try {
941
+ var proc = se.processes[appName]();
942
+ var wins = proc.windows();
943
+ var parts = elemPath.split('/');
944
+ var winIdx = 0;
945
+ var match = parts[0].match(/^win(\\\\d+)$/);
946
+ if (match) winIdx = parseInt(match[1]);
947
+ if (winIdx < wins.length) {
948
+ var current = wins[winIdx];
949
+ for (var i = 1; i < parts.length; i++) {
950
+ var idx = parseInt(parts[i]);
951
+ if (isNaN(idx)) break;
952
+ try {
953
+ var kids = current.elements();
954
+ if (idx >= kids.length) break;
955
+ current = kids[idx];
956
+ } catch(e) { break; }
957
+ }
958
+ elem = current;
959
+ }
960
+ } catch(e) {}
961
+ }
962
+
963
+ if (!elem) {
964
+ elem = resolveElementByFullPath(elemPath);
965
+ }
966
+
967
+ if (elem && !descriptorMatches(elem)) {
968
+ elem = refetchEquivalent() || elem;
969
+ }
970
+
971
+ if (!elem) {
972
+ elem = refetchEquivalent();
973
+ }
974
+
975
+ if (!elem) {
976
+ JSON.stringify({success: false, error: "Element not found: " + elemPath});
977
+ } else {
978
+ try {
979
+ elem.actions.AXPress.perform();
980
+ JSON.stringify({success: true});
981
+ } catch(e) {
982
+ try {
983
+ var pos = elem.position();
984
+ var sz = elem.size();
985
+ var cx = pos[0] + sz[0] / 2;
986
+ var cy = pos[1] + sz[1] / 2;
987
+ ObjC.import('CoreGraphics');
988
+ var src = $.CGEventSourceCreate($.kCGEventSourceStateHIDSystemState);
989
+ var pt = $.CGPointMake(cx, cy);
990
+ var down = $.CGEventCreateMouseEvent(src, $.kCGEventLeftMouseDown, pt, $.kCGMouseButtonLeft);
991
+ $.CGEventPost($.kCGHIDEventTap, down);
992
+ var up = $.CGEventCreateMouseEvent(src, $.kCGEventLeftMouseUp, pt, $.kCGMouseButtonLeft);
993
+ $.CGEventPost($.kCGHIDEventTap, up);
994
+ JSON.stringify({success: true});
995
+ } catch(e2) {
996
+ JSON.stringify({success: false, error: "Could not click element: " + String(e2.message || e2)});
997
+ }
998
+ }
999
+ }
1000
+ `;
1001
+ try {
1002
+ const out = execFileSync("osascript", [
1003
+ "-l", "JavaScript",
1004
+ "-e", jxaScript,
1005
+ ], { encoding: "utf-8", timeout: 15000 }).trim();
1006
+ const result = JSON.parse(out);
1007
+ if (!result.success) {
1008
+ throw new Error(result.error || `click_element failed for element ${elementId}`);
1009
+ }
1010
+ }
1011
+ catch (error) {
1012
+ if (error.message && error.message.includes("click_element failed"))
1013
+ throw error;
1014
+ if (String(error.message || error).includes("not allowed") ||
1015
+ String(error.message || error).includes("permission")) {
1016
+ throw new Error("Accessibility permission required: grant System Events access in System Preferences > Privacy & Accessibility");
1017
+ }
1018
+ throw new Error(`click_element failed: ${error.message || error}`);
1019
+ }
1020
+ }
1021
+ async typeInElement(elementId, text, app, clearFirst) {
1022
+ this.evictExpiredCacheEntries();
1023
+ const escapedText = text.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$');
1024
+ const effectiveApp = app || this.activeTarget?.appName;
1025
+ const escapedApp = (effectiveApp || "").replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$');
1026
+ const escapedElementId = elementId.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$');
1027
+ const cached = this.elementCache.get(elementId);
1028
+ if (cached && this.isCacheEntryExpired(cached)) {
1029
+ this.elementCache.delete(elementId);
1030
+ }
1031
+ const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
1032
+ const jxaScript = `
1033
+ var se = Application('System Events');
1034
+ var elemPath = "${escapedElementId}";
1035
+ var appName = "${escapedApp}";
1036
+ var textToType = "${escapedText}";
1037
+ var shouldClear = ${clearFirst ? "true" : "false"};
1038
+ var cached = ${cachedJson};
1039
+
1040
+ function resolveElementByFullPath(path) {
1041
+ var parts = path.split('/');
1042
+ if (parts.length < 2) return null;
1043
+
1044
+ var procName = parts[0];
1045
+ var winPart = parts[1];
1046
+ var winIdx = 0;
1047
+ var match = winPart.match(/^win(\\\\d+)$/);
1048
+ if (match) {
1049
+ winIdx = parseInt(match[1]);
1050
+ }
1051
+
1052
+ try {
1053
+ var proc = se.processes[procName]();
1054
+ var wins = proc.windows();
1055
+ if (winIdx >= wins.length) return null;
1056
+ var current = wins[winIdx];
1057
+
1058
+ for (var i = 2; i < parts.length; i++) {
1059
+ var idx = parseInt(parts[i]);
1060
+ if (isNaN(idx)) return null;
1061
+ try {
1062
+ var kids = current.elements();
1063
+ if (idx >= kids.length) return null;
1064
+ current = kids[idx];
1065
+ } catch(e) { return null; }
1066
+ }
1067
+ return current;
1068
+ } catch(e) { return null; }
1069
+ }
1070
+
1071
+ function elemString(elem, getter) {
1072
+ try {
1073
+ var value = getter(elem);
1074
+ return value === undefined || value === null ? '' : String(value);
1075
+ } catch(e) {
1076
+ return '';
1077
+ }
1078
+ }
1079
+
1080
+ function getBounds(elem) {
1081
+ try {
1082
+ var pos = elem.position();
1083
+ var sz = elem.size();
1084
+ return {x: pos[0] || 0, y: pos[1] || 0, width: sz[0] || 0, height: sz[1] || 0};
1085
+ } catch(e) {
1086
+ return {x: 0, y: 0, width: 0, height: 0};
1087
+ }
1088
+ }
1089
+
1090
+ function descriptorMatches(elem) {
1091
+ if (!cached) return true;
1092
+ var role = elemString(elem, function(e) { return e.role(); });
1093
+ var name = elemString(elem, function(e) { return e.name(); });
1094
+ var desc = elemString(elem, function(e) { return e.description(); });
1095
+ var value = elemString(elem, function(e) { return e.value(); });
1096
+ if (cached.role && role && role !== cached.role) return false;
1097
+ if (cached.name && name && name !== cached.name) return false;
1098
+ if (cached.value && value && value !== cached.value) return false;
1099
+ if (cached.description && desc && desc !== cached.description) return false;
1100
+ return true;
1101
+ }
1102
+
1103
+ function scoreEquivalent(elem) {
1104
+ if (!cached) return -1;
1105
+ var score = 0;
1106
+ var role = elemString(elem, function(e) { return e.role(); });
1107
+ var name = elemString(elem, function(e) { return e.name(); });
1108
+ var desc = elemString(elem, function(e) { return e.description(); });
1109
+ var value = elemString(elem, function(e) { return e.value(); });
1110
+ var subrole = elemString(elem, function(e) { return e.subrole(); });
1111
+ var identifier = elemString(elem, function(e) { return e.identifier(); });
1112
+ if (cached.role && role === cached.role) score += 4;
1113
+ if (cached.name && name === cached.name) score += 4;
1114
+ if (cached.value && value === cached.value) score += 3;
1115
+ if (cached.description && desc === cached.description) score += 2;
1116
+ if (cached.subrole && subrole === cached.subrole) score += 2;
1117
+ if (cached.identifier && identifier === cached.identifier) score += 3;
1118
+ var b = getBounds(elem);
1119
+ if (cached.bounds) {
1120
+ var cx = b.x + b.width / 2;
1121
+ var cy = b.y + b.height / 2;
1122
+ var ocx = cached.bounds.x + cached.bounds.width / 2;
1123
+ var ocy = cached.bounds.y + cached.bounds.height / 2;
1124
+ var distance = Math.sqrt(Math.pow(cx - ocx, 2) + Math.pow(cy - ocy, 2));
1125
+ if (distance < 8) score += 4;
1126
+ else if (distance < 40) score += 2;
1127
+ else if (distance < 120) score += 1;
1128
+ }
1129
+ return score;
1130
+ }
1131
+
1132
+ function refetchEquivalent() {
1133
+ if (!cached) return null;
1134
+ var targetApp = appName || cached.appName || '';
1135
+ var best = null;
1136
+ var bestScore = 0;
1137
+ var visited = [0];
1138
+ function visit(elem, depth) {
1139
+ if (visited[0] > 350 || depth > 10) return;
1140
+ visited[0]++;
1141
+ var score = scoreEquivalent(elem);
1142
+ if (score > bestScore) {
1143
+ best = elem;
1144
+ bestScore = score;
1145
+ }
1146
+ try {
1147
+ var kids = elem.elements();
1148
+ for (var i = 0; i < kids.length; i++) visit(kids[i], depth + 1);
1149
+ } catch(e) {}
1150
+ }
1151
+ try {
1152
+ if (targetApp) {
1153
+ var proc = se.processes[targetApp]();
1154
+ var wins = proc.windows();
1155
+ for (var w = 0; w < wins.length; w++) visit(wins[w], 0);
1156
+ } else {
1157
+ var procs = se.processes();
1158
+ for (var p = 0; p < procs.length; p++) {
1159
+ try {
1160
+ var wins2 = procs[p].windows();
1161
+ for (var w2 = 0; w2 < wins2.length; w2++) visit(wins2[w2], 0);
1162
+ } catch(e2) {}
1163
+ }
1164
+ }
1165
+ } catch(e) {}
1166
+ return bestScore >= 6 ? best : null;
1167
+ }
1168
+
1169
+ var elem = null;
1170
+
1171
+ if (appName) {
1172
+ try {
1173
+ var proc = se.processes[appName]();
1174
+ var wins = proc.windows();
1175
+ var parts = elemPath.split('/');
1176
+ var winIdx = 0;
1177
+ var match = parts[0].match(/^win(\\\\d+)$/);
1178
+ if (match) winIdx = parseInt(match[1]);
1179
+ if (winIdx < wins.length) {
1180
+ var current = wins[winIdx];
1181
+ for (var i = 1; i < parts.length; i++) {
1182
+ var idx = parseInt(parts[i]);
1183
+ if (isNaN(idx)) break;
1184
+ try {
1185
+ var kids = current.elements();
1186
+ if (idx >= kids.length) break;
1187
+ current = kids[idx];
1188
+ } catch(e) { break; }
1189
+ }
1190
+ elem = current;
1191
+ }
1192
+ } catch(e) {}
1193
+ }
1194
+
1195
+ if (!elem) {
1196
+ elem = resolveElementByFullPath(elemPath);
1197
+ }
1198
+
1199
+ if (elem && !descriptorMatches(elem)) {
1200
+ elem = refetchEquivalent() || elem;
1201
+ }
1202
+
1203
+ if (!elem) {
1204
+ elem = refetchEquivalent();
1205
+ }
1206
+
1207
+ if (!elem) {
1208
+ JSON.stringify({success: false, error: "Element not found: " + elemPath});
1209
+ } else {
1210
+ try {
1211
+ elem.focused = true;
1212
+ } catch(e) {}
1213
+
1214
+ if (shouldClear) {
1215
+ try {
1216
+ elem.value = "";
1217
+ } catch(e) {
1218
+ try {
1219
+ se.keystroke("a", {command: true});
1220
+ se.keyDown("delete");
1221
+ se.keyUp("delete");
1222
+ } catch(e2) {}
1223
+ }
1224
+ }
1225
+
1226
+ var didSet = false;
1227
+ try {
1228
+ elem.value = textToType;
1229
+ didSet = true;
1230
+ } catch(e) {}
1231
+
1232
+ if (!didSet) {
1233
+ try {
1234
+ se.keystroke(textToType);
1235
+ } catch(e) {
1236
+ JSON.stringify({success: false, error: "Could not type into element: " + String(e.message || e)});
1237
+ }
1238
+ }
1239
+
1240
+ JSON.stringify({success: true});
1241
+ }
1242
+ `;
1243
+ try {
1244
+ const out = execFileSync("osascript", [
1245
+ "-l", "JavaScript",
1246
+ "-e", jxaScript,
1247
+ ], { encoding: "utf-8", timeout: 15000 }).trim();
1248
+ const result = JSON.parse(out);
1249
+ if (!result.success) {
1250
+ throw new Error(result.error || `type_in_element failed for element ${elementId}`);
1251
+ }
1252
+ }
1253
+ catch (error) {
1254
+ if (error.message && error.message.includes("type_in_element failed"))
1255
+ throw error;
1256
+ if (String(error.message || error).includes("not allowed") ||
1257
+ String(error.message || error).includes("permission")) {
1258
+ throw new Error("Accessibility permission required: grant System Events access in System Preferences > Privacy & Accessibility");
1259
+ }
1260
+ throw new Error(`type_in_element failed: ${error.message || error}`);
1261
+ }
1262
+ }
1263
+ async setElementValue(elementId, value, app) {
1264
+ this.evictExpiredCacheEntries();
1265
+ const effectiveApp = app || this.activeTarget?.appName;
1266
+ const valueLiteral = JSON.stringify(value);
1267
+ const appLiteral = JSON.stringify(effectiveApp || "");
1268
+ const elementIdLiteral = JSON.stringify(elementId);
1269
+ const cached = this.elementCache.get(elementId);
1270
+ if (cached && this.isCacheEntryExpired(cached)) {
1271
+ this.elementCache.delete(elementId);
1272
+ }
1273
+ const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
1274
+ const jxaScript = `
1275
+ var se = Application('System Events');
1276
+ var elemPath = ${elementIdLiteral};
1277
+ var appName = ${appLiteral};
1278
+ var valueToSet = ${valueLiteral};
1279
+ var cached = ${cachedJson};
1280
+
1281
+ function resolveElementByFullPath(path) {
1282
+ var parts = path.split('/');
1283
+ if (parts.length < 2) return null;
1284
+
1285
+ var procName = parts[0];
1286
+ var winPart = parts[1];
1287
+ var winIdx = 0;
1288
+ var match = winPart.match(/^win(\\\\d+)$/);
1289
+ if (match) winIdx = parseInt(match[1]);
1290
+
1291
+ try {
1292
+ var proc = se.processes[procName]();
1293
+ var wins = proc.windows();
1294
+ if (winIdx >= wins.length) return null;
1295
+ var current = wins[winIdx];
1296
+
1297
+ for (var i = 2; i < parts.length; i++) {
1298
+ var idx = parseInt(parts[i]);
1299
+ if (isNaN(idx)) return null;
1300
+ try {
1301
+ var kids = current.elements();
1302
+ if (idx >= kids.length) return null;
1303
+ current = kids[idx];
1304
+ } catch(e) { return null; }
1305
+ }
1306
+ return current;
1307
+ } catch(e) { return null; }
1308
+ }
1309
+
1310
+ function resolveElementInApp(path, targetApp) {
1311
+ if (!targetApp) return null;
1312
+ var parts = path.split('/');
1313
+ var start = parts[0] === targetApp ? 1 : 0;
1314
+ var winPart = parts[start] || 'win0';
1315
+ var winIdx = 0;
1316
+ var match = winPart.match(/^win(\\\\d+)$/);
1317
+ if (match) winIdx = parseInt(match[1]);
1318
+
1319
+ try {
1320
+ var proc = se.processes[targetApp]();
1321
+ var wins = proc.windows();
1322
+ if (winIdx >= wins.length) return null;
1323
+ var current = wins[winIdx];
1324
+ for (var i = start + 1; i < parts.length; i++) {
1325
+ var idx = parseInt(parts[i]);
1326
+ if (isNaN(idx)) return null;
1327
+ try {
1328
+ var kids = current.elements();
1329
+ if (idx >= kids.length) return null;
1330
+ current = kids[idx];
1331
+ } catch(e) { return null; }
1332
+ }
1333
+ return current;
1334
+ } catch(e) { return null; }
1335
+ }
1336
+
1337
+ function elemString(elem, getter) {
1338
+ try {
1339
+ var value = getter(elem);
1340
+ return value === undefined || value === null ? '' : String(value);
1341
+ } catch(e) {
1342
+ return '';
1343
+ }
1344
+ }
1345
+
1346
+ function getBounds(elem) {
1347
+ try {
1348
+ var pos = elem.position();
1349
+ var sz = elem.size();
1350
+ return {x: pos[0] || 0, y: pos[1] || 0, width: sz[0] || 0, height: sz[1] || 0};
1351
+ } catch(e) {
1352
+ return {x: 0, y: 0, width: 0, height: 0};
1353
+ }
1354
+ }
1355
+
1356
+ function descriptorMatches(elem) {
1357
+ if (!cached) return true;
1358
+ var role = elemString(elem, function(e) { return e.role(); });
1359
+ var name = elemString(elem, function(e) { return e.name(); });
1360
+ var desc = elemString(elem, function(e) { return e.description(); });
1361
+ var value = elemString(elem, function(e) { return e.value(); });
1362
+ if (cached.role && role && role !== cached.role) return false;
1363
+ if (cached.name && name && name !== cached.name) return false;
1364
+ if (cached.value && value && value !== cached.value) return false;
1365
+ if (cached.description && desc && desc !== cached.description) return false;
1366
+ return true;
1367
+ }
1368
+
1369
+ function scoreEquivalent(elem) {
1370
+ if (!cached) return -1;
1371
+ var score = 0;
1372
+ var role = elemString(elem, function(e) { return e.role(); });
1373
+ var name = elemString(elem, function(e) { return e.name(); });
1374
+ var desc = elemString(elem, function(e) { return e.description(); });
1375
+ var value = elemString(elem, function(e) { return e.value(); });
1376
+ var subrole = elemString(elem, function(e) { return e.subrole(); });
1377
+ var identifier = elemString(elem, function(e) { return e.identifier(); });
1378
+ if (cached.role && role === cached.role) score += 4;
1379
+ if (cached.name && name === cached.name) score += 4;
1380
+ if (cached.value && value === cached.value) score += 3;
1381
+ if (cached.description && desc === cached.description) score += 2;
1382
+ if (cached.subrole && subrole === cached.subrole) score += 2;
1383
+ if (cached.identifier && identifier === cached.identifier) score += 3;
1384
+ var b = getBounds(elem);
1385
+ if (cached.bounds) {
1386
+ var cx = b.x + b.width / 2;
1387
+ var cy = b.y + b.height / 2;
1388
+ var ocx = cached.bounds.x + cached.bounds.width / 2;
1389
+ var ocy = cached.bounds.y + cached.bounds.height / 2;
1390
+ var distance = Math.sqrt(Math.pow(cx - ocx, 2) + Math.pow(cy - ocy, 2));
1391
+ if (distance < 8) score += 4;
1392
+ else if (distance < 40) score += 2;
1393
+ else if (distance < 120) score += 1;
1394
+ }
1395
+ return score;
1396
+ }
1397
+
1398
+ function refetchEquivalent() {
1399
+ if (!cached) return null;
1400
+ var targetApp = appName || cached.appName || '';
1401
+ var best = null;
1402
+ var bestScore = 0;
1403
+ var visited = [0];
1404
+ function visit(elem, depth) {
1405
+ if (visited[0] > 350 || depth > 10) return;
1406
+ visited[0]++;
1407
+ var score = scoreEquivalent(elem);
1408
+ if (score > bestScore) {
1409
+ best = elem;
1410
+ bestScore = score;
1411
+ }
1412
+ try {
1413
+ var kids = elem.elements();
1414
+ for (var i = 0; i < kids.length; i++) visit(kids[i], depth + 1);
1415
+ } catch(e) {}
1416
+ }
1417
+ try {
1418
+ if (targetApp) {
1419
+ var proc = se.processes[targetApp]();
1420
+ var wins = proc.windows();
1421
+ for (var w = 0; w < wins.length; w++) visit(wins[w], 0);
1422
+ } else {
1423
+ var procs = se.processes();
1424
+ for (var p = 0; p < procs.length; p++) {
1425
+ try {
1426
+ var wins2 = procs[p].windows();
1427
+ for (var w2 = 0; w2 < wins2.length; w2++) visit(wins2[w2], 0);
1428
+ } catch(e2) {}
1429
+ }
1430
+ }
1431
+ } catch(e) {}
1432
+ return bestScore >= 6 ? best : null;
1433
+ }
1434
+
1435
+ var elem = resolveElementInApp(elemPath, appName) || resolveElementByFullPath(elemPath);
1436
+ if (elem && !descriptorMatches(elem)) {
1437
+ elem = refetchEquivalent() || elem;
1438
+ }
1439
+ if (!elem) {
1440
+ elem = refetchEquivalent();
1441
+ }
1442
+
1443
+ if (!elem) {
1444
+ JSON.stringify({success: false, error: "Element not found: " + elemPath});
1445
+ } else {
1446
+ try {
1447
+ elem.value = valueToSet;
1448
+ JSON.stringify({success: true});
1449
+ } catch(e) {
1450
+ JSON.stringify({success: false, error: "Could not set AX value: " + String(e.message || e)});
1451
+ }
1452
+ }
1453
+ `;
1454
+ try {
1455
+ const out = execFileSync("osascript", [
1456
+ "-l", "JavaScript",
1457
+ "-e", jxaScript,
1458
+ ], { encoding: "utf-8", timeout: 15000 }).trim();
1459
+ const result = JSON.parse(out);
1460
+ if (!result.success) {
1461
+ throw new Error(result.error || `set_value failed for element ${elementId}`);
1462
+ }
1463
+ const currentCached = this.elementCache.get(elementId);
1464
+ if (currentCached) {
1465
+ this.elementCache.set(elementId, { ...currentCached, value, cachedAt: Date.now() });
1466
+ }
1467
+ }
1468
+ catch (error) {
1469
+ if (error.message && error.message.includes("set_value failed"))
1470
+ throw error;
1471
+ if (String(error.message || error).includes("not allowed") ||
1472
+ String(error.message || error).includes("permission")) {
1473
+ throw new Error("Accessibility permission required: grant System Events access in System Preferences > Privacy & Accessibility");
1474
+ }
1475
+ throw new Error(`set_value failed: ${error.message || error}`);
1476
+ }
1477
+ }
1478
+ }