ucu-mcp 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -1
- package/README.md +68 -12
- package/dist/src/mcp/server.js +18 -2
- package/dist/src/mcp/tools.d.ts +1 -0
- package/dist/src/mcp/tools.js +173 -65
- package/dist/src/platform/macos.d.ts +4 -0
- package/dist/src/platform/macos.js +355 -215
- package/dist/src/util/errors.d.ts +6 -0
- package/dist/src/util/errors.js +8 -0
- package/dist/src/utils/input.js +88 -18
- package/native/cgevent/cgevent-helper +0 -0
- package/native/cgevent/main.swift +126 -0
- package/native/ocr/main.swift +89 -0
- package/native/ocr/ocr-helper +0 -0
- package/package.json +6 -3
|
@@ -1,13 +1,51 @@
|
|
|
1
1
|
import { execFile, execFileSync } from "node:child_process";
|
|
2
2
|
import { randomUUID } from "node:crypto";
|
|
3
3
|
import { promisify } from "node:util";
|
|
4
|
-
import { captureFullScreen, captureRegion
|
|
4
|
+
import { captureFullScreen, captureRegion } from "../utils/screenshot.js";
|
|
5
5
|
import { click as inputClick, doubleClick as inputDoubleClick, move as inputMove, drag as inputDrag, scroll as inputScroll, typeText, pressShortcut } from "../utils/input.js";
|
|
6
|
+
import { CaptureError, ElementNotFoundError, InputSynthesisError, PermissionError, PlatformError, UcuError, WindowNotFoundError } from "../util/errors.js";
|
|
6
7
|
const execFileAsync = promisify(execFile);
|
|
8
|
+
function errorMessage(error) {
|
|
9
|
+
return error instanceof Error ? error.message : String(error);
|
|
10
|
+
}
|
|
11
|
+
function isAccessibilityPermissionError(error) {
|
|
12
|
+
return /not allowed|permission|assistive|accessibility/i.test(errorMessage(error));
|
|
13
|
+
}
|
|
14
|
+
function rethrowCaptureError(error, operation) {
|
|
15
|
+
if (error instanceof UcuError)
|
|
16
|
+
throw error;
|
|
17
|
+
throw new CaptureError(`${operation} failed: ${errorMessage(error)}`);
|
|
18
|
+
}
|
|
19
|
+
function rethrowAccessibilityError(error, operation) {
|
|
20
|
+
if (error instanceof UcuError)
|
|
21
|
+
throw error;
|
|
22
|
+
if (isAccessibilityPermissionError(error)) {
|
|
23
|
+
throw new PermissionError("accessibility", "darwin");
|
|
24
|
+
}
|
|
25
|
+
throw new PlatformError(`${operation} failed: ${errorMessage(error)}`);
|
|
26
|
+
}
|
|
27
|
+
function rethrowElementActionError(error, operation, elementId) {
|
|
28
|
+
if (error instanceof UcuError)
|
|
29
|
+
throw error;
|
|
30
|
+
if (isAccessibilityPermissionError(error)) {
|
|
31
|
+
throw new PermissionError("accessibility", "darwin");
|
|
32
|
+
}
|
|
33
|
+
if (/element not found/i.test(errorMessage(error))) {
|
|
34
|
+
throw new ElementNotFoundError(elementId);
|
|
35
|
+
}
|
|
36
|
+
throw new PlatformError(`${operation} failed: ${errorMessage(error)}`);
|
|
37
|
+
}
|
|
38
|
+
function rethrowInputError(error, operation) {
|
|
39
|
+
if (error instanceof UcuError)
|
|
40
|
+
throw error;
|
|
41
|
+
throw new InputSynthesisError(`${operation} failed: ${errorMessage(error)}`);
|
|
42
|
+
}
|
|
7
43
|
export class MacOSPlatform {
|
|
8
44
|
elementCache = new Map();
|
|
9
45
|
elementCacheTtlMs = 30_000;
|
|
10
46
|
elementCacheMaxSize = 100;
|
|
47
|
+
windowCacheTtlMs = 300;
|
|
48
|
+
windowCache;
|
|
11
49
|
activeTarget;
|
|
12
50
|
savedFocus;
|
|
13
51
|
// ── Element Cache Management ────────────────────────────────────────────
|
|
@@ -79,14 +117,22 @@ export class MacOSPlatform {
|
|
|
79
117
|
}
|
|
80
118
|
// ── Screenshot ──────────────────────────────────────────────────────────
|
|
81
119
|
async screenshot(_display, region, options) {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
120
|
+
try {
|
|
121
|
+
const base64 = region
|
|
122
|
+
? await captureRegion(region.x, region.y, region.width, region.height, options)
|
|
123
|
+
: await captureFullScreen(options);
|
|
124
|
+
return Buffer.from(base64, "base64");
|
|
125
|
+
}
|
|
126
|
+
catch (error) {
|
|
127
|
+
rethrowCaptureError(error, region ? "capture region" : "capture full screen");
|
|
128
|
+
}
|
|
86
129
|
}
|
|
87
130
|
async screenshotWindow(windowId, options) {
|
|
88
|
-
const
|
|
89
|
-
|
|
131
|
+
const win = (await this.listWindows(true)).find((w) => w.id === windowId);
|
|
132
|
+
if (!win) {
|
|
133
|
+
throw new WindowNotFoundError(windowId);
|
|
134
|
+
}
|
|
135
|
+
return this.screenshot(undefined, win.bounds, options);
|
|
90
136
|
}
|
|
91
137
|
// ── Screen Info ─────────────────────────────────────────────────────────
|
|
92
138
|
getScreenSize(display) {
|
|
@@ -154,10 +200,26 @@ export class MacOSPlatform {
|
|
|
154
200
|
}
|
|
155
201
|
async focusApp(app) {
|
|
156
202
|
const appLower = app.toLowerCase();
|
|
157
|
-
const
|
|
158
|
-
|
|
203
|
+
const escapedApp = app.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
|
|
204
|
+
this.windowCache = undefined;
|
|
205
|
+
try {
|
|
206
|
+
execFileSync("osascript", ["-e", `tell application "${escapedApp}" to activate`], { timeout: 5000 });
|
|
207
|
+
}
|
|
208
|
+
catch {
|
|
209
|
+
// Some app names are process labels rather than AppleScript application names.
|
|
210
|
+
// Continue with the AX window lookup below so existing callers still work.
|
|
211
|
+
}
|
|
212
|
+
let target;
|
|
213
|
+
const deadline = Date.now() + 3000;
|
|
214
|
+
do {
|
|
215
|
+
const windows = await this.listWindows(true);
|
|
216
|
+
target = windows.find((w) => w.processName.toLowerCase().includes(appLower));
|
|
217
|
+
if (target)
|
|
218
|
+
break;
|
|
219
|
+
await new Promise((resolve) => setTimeout(resolve, 150));
|
|
220
|
+
} while (Date.now() < deadline);
|
|
159
221
|
if (!target) {
|
|
160
|
-
throw new
|
|
222
|
+
throw new WindowNotFoundError(app);
|
|
161
223
|
}
|
|
162
224
|
this.activeTarget = {
|
|
163
225
|
appName: target.processName,
|
|
@@ -219,44 +281,49 @@ export class MacOSPlatform {
|
|
|
219
281
|
}
|
|
220
282
|
}
|
|
221
283
|
async listWindows(_includeMinimized) {
|
|
284
|
+
const now = Date.now();
|
|
285
|
+
if (this.windowCache && now - this.windowCache.cachedAt <= this.windowCacheTtlMs) {
|
|
286
|
+
return this.windowCache.windows.map((window) => ({
|
|
287
|
+
...window,
|
|
288
|
+
bounds: { ...window.bounds },
|
|
289
|
+
}));
|
|
290
|
+
}
|
|
222
291
|
try {
|
|
292
|
+
// Use System Events instead of CGWindowListCopyWindowInfo.
|
|
293
|
+
// The CoreGraphics API returns CFArrayRef/CFDictionaryRef which JXA
|
|
294
|
+
// cannot iterate reliably — CFArrayGetCount works but objectAtIndex
|
|
295
|
+
// does not. System Events JXA is slower (~3-6s) but correct.
|
|
223
296
|
const jxaScript = `
|
|
224
|
-
|
|
225
|
-
ObjC.import('Foundation');
|
|
226
|
-
var winList = $.CGWindowListCopyWindowInfo(1, 0);
|
|
227
|
-
var count = winList.count;
|
|
297
|
+
var se = Application('System Events');
|
|
228
298
|
var result = [];
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
var
|
|
232
|
-
var
|
|
233
|
-
var
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
isMinimized: false,
|
|
258
|
-
isOnScreen: onScreenVal ? onScreenVal.boolValue : true
|
|
259
|
-
});
|
|
299
|
+
var procs = se.processes();
|
|
300
|
+
for (var i = 0; i < procs.length; i++) {
|
|
301
|
+
var p = procs[i];
|
|
302
|
+
var pName = '';
|
|
303
|
+
var pPid = 0;
|
|
304
|
+
try { pName = p.name(); } catch(e) {}
|
|
305
|
+
try { pPid = p.unixId(); } catch(e) {}
|
|
306
|
+
try {
|
|
307
|
+
var wins = p.windows();
|
|
308
|
+
for (var j = 0; j < wins.length; j++) {
|
|
309
|
+
var w = wins[j];
|
|
310
|
+
var pos, sz;
|
|
311
|
+
try { pos = w.position(); } catch(e) { pos = [0, 0]; }
|
|
312
|
+
try { sz = w.size(); } catch(e) { sz = [0, 0]; }
|
|
313
|
+
if (sz[0] === 0 && sz[1] === 0) continue;
|
|
314
|
+
var title = '';
|
|
315
|
+
try { title = w.name() || ''; } catch(e) {}
|
|
316
|
+
result.push({
|
|
317
|
+
id: pName + '/win' + j,
|
|
318
|
+
title: title,
|
|
319
|
+
processName: pName,
|
|
320
|
+
pid: pPid,
|
|
321
|
+
bounds: { x: pos[0], y: pos[1], width: sz[0], height: sz[1] },
|
|
322
|
+
isMinimized: false,
|
|
323
|
+
isOnScreen: true
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
} catch(e) {}
|
|
260
327
|
}
|
|
261
328
|
JSON.stringify(result);
|
|
262
329
|
`;
|
|
@@ -264,7 +331,15 @@ export class MacOSPlatform {
|
|
|
264
331
|
"-l", "JavaScript",
|
|
265
332
|
"-e", jxaScript
|
|
266
333
|
], { encoding: "utf-8", timeout: 15000 });
|
|
267
|
-
|
|
334
|
+
const windows = JSON.parse(jxaOut.trim());
|
|
335
|
+
this.windowCache = {
|
|
336
|
+
cachedAt: Date.now(),
|
|
337
|
+
windows: windows.map((window) => ({
|
|
338
|
+
...window,
|
|
339
|
+
bounds: { ...window.bounds },
|
|
340
|
+
})),
|
|
341
|
+
};
|
|
342
|
+
return windows;
|
|
268
343
|
}
|
|
269
344
|
catch {
|
|
270
345
|
// Fallback: return empty list if JXA fails
|
|
@@ -274,7 +349,7 @@ export class MacOSPlatform {
|
|
|
274
349
|
async getWindowState(windowId, depth, includeBounds = true) {
|
|
275
350
|
const resolvedWindowId = windowId || this.activeTarget?.windowId;
|
|
276
351
|
if (!resolvedWindowId) {
|
|
277
|
-
throw new
|
|
352
|
+
throw new WindowNotFoundError("active target");
|
|
278
353
|
}
|
|
279
354
|
const maxDepth = Math.min(depth || 3, 10);
|
|
280
355
|
const maxElements = 50;
|
|
@@ -285,6 +360,11 @@ export class MacOSPlatform {
|
|
|
285
360
|
const jxaScript = `
|
|
286
361
|
ObjC.import('AppKit');
|
|
287
362
|
var se = Application('System Events');
|
|
363
|
+
function childElements(elem) {
|
|
364
|
+
try { return elem.uiElements(); } catch(e1) {
|
|
365
|
+
try { return elem.elements(); } catch(e2) { return []; }
|
|
366
|
+
}
|
|
367
|
+
}
|
|
288
368
|
var result = {window: null, focusedElement: null, tree: null, error: null};
|
|
289
369
|
var target = ${targetJson};
|
|
290
370
|
var includeBounds = ${includeBounds ? "true" : "false"};
|
|
@@ -319,25 +399,47 @@ export class MacOSPlatform {
|
|
|
319
399
|
return false;
|
|
320
400
|
}
|
|
321
401
|
|
|
402
|
+
var foundWin = null;
|
|
403
|
+
var foundProc = null;
|
|
404
|
+
|
|
405
|
+
// Fast path: resolve "ProcessName/winN" format directly
|
|
406
|
+
var idParts = "${escapedWindowId}".split('/');
|
|
407
|
+
if (idParts.length >= 2 && idParts[0]) {
|
|
408
|
+
var procName = idParts[0];
|
|
409
|
+
var winIdx = 0;
|
|
410
|
+
var winMatch = idParts[1].match(/^win(\d+)$/);
|
|
411
|
+
if (winMatch) winIdx = parseInt(winMatch[1]);
|
|
412
|
+
try {
|
|
413
|
+
var proc = se.processes[procName]();
|
|
414
|
+
var ws = proc.windows();
|
|
415
|
+
if (winIdx < ws.length) {
|
|
416
|
+
foundWin = ws[winIdx];
|
|
417
|
+
foundProc = proc;
|
|
418
|
+
}
|
|
419
|
+
} catch(e) {}
|
|
420
|
+
}
|
|
421
|
+
|
|
322
422
|
try {
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
423
|
+
if (!foundWin) {
|
|
424
|
+
var procs = se.processes();
|
|
425
|
+
for (var p = 0; p < procs.length; p++) {
|
|
426
|
+
var proc = procs[p];
|
|
427
|
+
try {
|
|
428
|
+
var wins = proc.windows();
|
|
429
|
+
for (var w = 0; w < wins.length; w++) {
|
|
430
|
+
if (windowMatches(wins[w], proc)) {
|
|
431
|
+
foundWin = wins[w];
|
|
432
|
+
foundProc = proc;
|
|
433
|
+
break;
|
|
434
|
+
}
|
|
335
435
|
}
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
|
|
436
|
+
} catch(e) {}
|
|
437
|
+
if (foundWin) break;
|
|
438
|
+
}
|
|
339
439
|
}
|
|
340
|
-
if (!foundWin) {
|
|
440
|
+
if (!foundWin) {
|
|
441
|
+
result.error = 'Window not found';
|
|
442
|
+
} else {
|
|
341
443
|
|
|
342
444
|
var winPos = foundWin.position();
|
|
343
445
|
var winSize = foundWin.size();
|
|
@@ -423,7 +525,7 @@ export class MacOSPlatform {
|
|
|
423
525
|
|
|
424
526
|
if (currentDepth < ${maxDepth}) {
|
|
425
527
|
try {
|
|
426
|
-
var kids = axElem
|
|
528
|
+
var kids = childElements(axElem);
|
|
427
529
|
for (var k = 0; k < kids.length && elemCount[0] < ${maxElements}; k++) {
|
|
428
530
|
var child = extractElement(kids[k], currentDepth + 1);
|
|
429
531
|
if (child) info.children.push(child);
|
|
@@ -433,7 +535,8 @@ export class MacOSPlatform {
|
|
|
433
535
|
return info;
|
|
434
536
|
}
|
|
435
537
|
|
|
436
|
-
|
|
538
|
+
result.tree = extractElement(foundWin, 0);
|
|
539
|
+
}
|
|
437
540
|
} catch(e) {
|
|
438
541
|
result.error = String(e.message || e);
|
|
439
542
|
}
|
|
@@ -445,7 +548,7 @@ export class MacOSPlatform {
|
|
|
445
548
|
], { encoding: "utf-8", timeout: 15000 }).trim();
|
|
446
549
|
const parsed = JSON.parse(out);
|
|
447
550
|
if (parsed.error && !parsed.window) {
|
|
448
|
-
throw new
|
|
551
|
+
throw new WindowNotFoundError(resolvedWindowId);
|
|
449
552
|
}
|
|
450
553
|
const windowInfo = parsed.window || {
|
|
451
554
|
id: resolvedWindowId,
|
|
@@ -463,31 +566,48 @@ export class MacOSPlatform {
|
|
|
463
566
|
};
|
|
464
567
|
}
|
|
465
568
|
catch (error) {
|
|
466
|
-
if (
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
throw new Error(`Accessibility permission required: grant System Events access in System Preferences > Privacy & Accessibility`);
|
|
470
|
-
}
|
|
471
|
-
throw new Error(`Window ${resolvedWindowId} not found or Accessibility permission missing`);
|
|
569
|
+
if (error instanceof WindowNotFoundError)
|
|
570
|
+
throw error;
|
|
571
|
+
rethrowAccessibilityError(error, "get_window_state");
|
|
472
572
|
}
|
|
473
573
|
}
|
|
474
574
|
// ── Mouse ───────────────────────────────────────────────────────────────
|
|
475
575
|
async click(x, y, button, doubleClick) {
|
|
476
|
-
|
|
477
|
-
|
|
576
|
+
try {
|
|
577
|
+
if (doubleClick) {
|
|
578
|
+
await inputDoubleClick(x, y, button);
|
|
579
|
+
}
|
|
580
|
+
else {
|
|
581
|
+
await inputClick(x, y, button);
|
|
582
|
+
}
|
|
478
583
|
}
|
|
479
|
-
|
|
480
|
-
|
|
584
|
+
catch (error) {
|
|
585
|
+
rethrowInputError(error, doubleClick ? "double_click" : "click");
|
|
481
586
|
}
|
|
482
587
|
}
|
|
483
588
|
async move(x, y) {
|
|
484
|
-
|
|
589
|
+
try {
|
|
590
|
+
await inputMove(x, y);
|
|
591
|
+
}
|
|
592
|
+
catch (error) {
|
|
593
|
+
rethrowInputError(error, "move");
|
|
594
|
+
}
|
|
485
595
|
}
|
|
486
596
|
async drag(startX, startY, endX, endY, button, duration) {
|
|
487
|
-
|
|
597
|
+
try {
|
|
598
|
+
await inputDrag(startX, startY, endX, endY, button, duration);
|
|
599
|
+
}
|
|
600
|
+
catch (error) {
|
|
601
|
+
rethrowInputError(error, "drag");
|
|
602
|
+
}
|
|
488
603
|
}
|
|
489
604
|
async scroll(x, y, deltaX, deltaY) {
|
|
490
|
-
|
|
605
|
+
try {
|
|
606
|
+
await inputScroll(x, y, deltaX, deltaY);
|
|
607
|
+
}
|
|
608
|
+
catch (error) {
|
|
609
|
+
rethrowInputError(error, "scroll");
|
|
610
|
+
}
|
|
491
611
|
}
|
|
492
612
|
// ── Cursor ──────────────────────────────────────────────────────────────
|
|
493
613
|
getCursorPosition() {
|
|
@@ -502,14 +622,12 @@ export class MacOSPlatform {
|
|
|
502
622
|
return JSON.parse(out);
|
|
503
623
|
}
|
|
504
624
|
catch (error) {
|
|
505
|
-
throw new
|
|
625
|
+
throw new PlatformError(`get_cursor_position failed: ${errorMessage(error)}`);
|
|
506
626
|
}
|
|
507
627
|
}
|
|
508
628
|
// ── OCR ──────────────────────────────────────────────────────────────────
|
|
509
629
|
async ocr(display, region) {
|
|
510
|
-
// Take a screenshot first (reuse existing logic)
|
|
511
630
|
const buf = await this.screenshot(display, region);
|
|
512
|
-
// Write screenshot to a temp file so Vision framework can read it
|
|
513
631
|
const { writeFile, unlink } = await import("node:fs/promises");
|
|
514
632
|
const { join } = await import("node:path");
|
|
515
633
|
const { tmpdir } = await import("node:os");
|
|
@@ -518,93 +636,46 @@ export class MacOSPlatform {
|
|
|
518
636
|
try {
|
|
519
637
|
const screenSize = this.getScreenSize(display);
|
|
520
638
|
const scaleFactor = screenSize.scaleFactor ?? 2;
|
|
521
|
-
//
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
request.usesLanguageCorrection = true;
|
|
548
|
-
|
|
549
|
-
var handler = $.VNImageRequestHandler.alloc.initWithCGImageOptions(cgImage, null);
|
|
550
|
-
var performError = $();
|
|
551
|
-
|
|
552
|
-
var success = handler.performRequestsError([request], performError);
|
|
553
|
-
if (!success) {
|
|
554
|
-
return JSON.stringify({error: "OCR request failed", elements: [], fullText: ""});
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
var results = request.results;
|
|
558
|
-
var elements = [];
|
|
559
|
-
var fullTextParts = [];
|
|
560
|
-
|
|
561
|
-
var imgWidth = cgImage.width;
|
|
562
|
-
var imgHeight = cgImage.height;
|
|
563
|
-
|
|
564
|
-
for (var i = 0; i < results.count; i++) {
|
|
565
|
-
var obs = $(results).objectAtIndex(i);
|
|
566
|
-
var candidates = obs.topCandidates(1);
|
|
567
|
-
if (candidates && candidates.count > 0) {
|
|
568
|
-
var candidate = $(candidates).objectAtIndex(0);
|
|
569
|
-
var text = candidate.string.toString();
|
|
570
|
-
var confidence = candidate.confidence;
|
|
571
|
-
var bbox = obs.boundingBox;
|
|
572
|
-
|
|
573
|
-
// Vision boundingBox is normalized (0-1) with origin at bottom-left
|
|
574
|
-
// Convert to screen coordinates (origin at top-left)
|
|
575
|
-
var bx = bbox.origin.x * imgWidth;
|
|
576
|
-
var by = (1 - bbox.origin.y - bbox.size.height) * imgHeight;
|
|
577
|
-
var bw = bbox.size.width * imgWidth;
|
|
578
|
-
var bh = bbox.size.height * imgHeight;
|
|
579
|
-
|
|
580
|
-
elements.push({
|
|
581
|
-
text: text,
|
|
582
|
-
x: Math.round(bx),
|
|
583
|
-
y: Math.round(by),
|
|
584
|
-
width: Math.round(bw),
|
|
585
|
-
height: Math.round(bh),
|
|
586
|
-
confidence: confidence
|
|
587
|
-
});
|
|
588
|
-
fullTextParts.push(text);
|
|
639
|
+
// Try native Swift OCR helper first (avoids JXA ObjC bridge bugs on macOS Sequoia+)
|
|
640
|
+
const nativeResult = await this.ocrNative(tmpPath, scaleFactor, region);
|
|
641
|
+
if (nativeResult)
|
|
642
|
+
return nativeResult;
|
|
643
|
+
// Fallback to JXA Vision framework
|
|
644
|
+
return await this.ocrJxa(tmpPath, screenSize, scaleFactor, region, buf);
|
|
645
|
+
}
|
|
646
|
+
finally {
|
|
647
|
+
await unlink(tmpPath).catch(() => { });
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
async ocrNative(tmpPath, scaleFactor, region) {
|
|
651
|
+
const { existsSync } = await import("node:fs");
|
|
652
|
+
const { join, dirname } = await import("node:path");
|
|
653
|
+
const { fileURLToPath } = await import("node:url");
|
|
654
|
+
// Resolve native binary path (same pattern as input.ts CGEvent helper)
|
|
655
|
+
const candidates = [
|
|
656
|
+
join(dirname(fileURLToPath(import.meta.url)), "..", "..", "native", "ocr", "ocr-helper"),
|
|
657
|
+
join(dirname(fileURLToPath(import.meta.url)), "..", "native", "ocr", "ocr-helper"),
|
|
658
|
+
join(process.cwd(), "native", "ocr", "ocr-helper"),
|
|
659
|
+
];
|
|
660
|
+
let binaryPath;
|
|
661
|
+
for (const p of candidates) {
|
|
662
|
+
if (existsSync(p)) {
|
|
663
|
+
binaryPath = p;
|
|
664
|
+
break;
|
|
589
665
|
}
|
|
590
|
-
}
|
|
591
|
-
|
|
592
|
-
return JSON.stringify({elements: elements, fullText: fullTextParts.join("\\n"), error: null});
|
|
593
666
|
}
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
667
|
+
if (!binaryPath)
|
|
668
|
+
return null;
|
|
669
|
+
try {
|
|
670
|
+
const input = JSON.stringify({ imagePath: tmpPath });
|
|
671
|
+
const out = execFileSync(binaryPath, [], {
|
|
672
|
+
input,
|
|
673
|
+
encoding: "utf-8",
|
|
674
|
+
timeout: 30000,
|
|
675
|
+
}).trim();
|
|
600
676
|
const parsed = JSON.parse(out);
|
|
601
|
-
if (parsed.error)
|
|
602
|
-
|
|
603
|
-
}
|
|
604
|
-
// Scale coordinates from image space to screen space
|
|
605
|
-
// The screenshot may be taken at a different resolution than screen coordinates
|
|
606
|
-
const imgWidth = buf.readUInt32BE(16); // PNG width at offset 16
|
|
607
|
-
const scaleFactorX = screenSize.width / (region ? region.width : (imgWidth / scaleFactor));
|
|
677
|
+
if (parsed.error)
|
|
678
|
+
return null;
|
|
608
679
|
const elements = parsed.elements.map((el) => ({
|
|
609
680
|
text: el.text,
|
|
610
681
|
x: Math.round(el.x / scaleFactor) + (region ? region.x : 0),
|
|
@@ -613,15 +684,81 @@ export class MacOSPlatform {
|
|
|
613
684
|
height: Math.round(el.height / scaleFactor),
|
|
614
685
|
confidence: el.confidence,
|
|
615
686
|
}));
|
|
616
|
-
return {
|
|
617
|
-
elements,
|
|
618
|
-
fullText: parsed.fullText,
|
|
619
|
-
};
|
|
687
|
+
return { elements, fullText: parsed.fullText };
|
|
620
688
|
}
|
|
621
|
-
|
|
622
|
-
|
|
689
|
+
catch {
|
|
690
|
+
return null;
|
|
623
691
|
}
|
|
624
692
|
}
|
|
693
|
+
async ocrJxa(tmpPath, screenSize, scaleFactor, region, buf) {
|
|
694
|
+
const escapedPath = tmpPath.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, "\\`").replace(/$/g, "\\$");
|
|
695
|
+
const jxaScript = `
|
|
696
|
+
function run() {
|
|
697
|
+
ObjC.import('Vision');
|
|
698
|
+
ObjC.import('AppKit');
|
|
699
|
+
ObjC.import('Foundation');
|
|
700
|
+
var app = Application.currentApplication();
|
|
701
|
+
app.includeStandardAdditions = true;
|
|
702
|
+
var path = "${escapedPath}";
|
|
703
|
+
var url = $.NSURL.fileURLWithPath(path);
|
|
704
|
+
var image = $.NSImage.alloc.initWithContentsOfURL(url);
|
|
705
|
+
if (!image || !image.isValid) {
|
|
706
|
+
return JSON.stringify({error: "Failed to load screenshot image", elements: [], fullText: ""});
|
|
707
|
+
}
|
|
708
|
+
var cgImage = image.CGImageForProposedRectContextHints(null, null, null);
|
|
709
|
+
if (!cgImage) {
|
|
710
|
+
return JSON.stringify({error: "Failed to get CGImage from screenshot", elements: [], fullText: ""});
|
|
711
|
+
}
|
|
712
|
+
var request = $.VNRecognizeTextRequest.alloc.init;
|
|
713
|
+
request.recognitionLevel = $.VNRequestTextRecognitionLevelAccurate;
|
|
714
|
+
request.usesLanguageCorrection = true;
|
|
715
|
+
var handler = $.VNImageRequestHandler.alloc.initWithCGImageOptions(cgImage, null);
|
|
716
|
+
var performError = $();
|
|
717
|
+
var success = handler.performRequestsError([request], performError);
|
|
718
|
+
if (!success) {
|
|
719
|
+
return JSON.stringify({error: "OCR request failed", elements: [], fullText: ""});
|
|
720
|
+
}
|
|
721
|
+
var results = request.results;
|
|
722
|
+
var elements = [];
|
|
723
|
+
var fullTextParts = [];
|
|
724
|
+
var imgWidth = cgImage.width;
|
|
725
|
+
var imgHeight = cgImage.height;
|
|
726
|
+
for (var i = 0; i < results.count; i++) {
|
|
727
|
+
var obs = $(results).objectAtIndex(i);
|
|
728
|
+
var candidates = obs.topCandidates(1);
|
|
729
|
+
if (candidates && candidates.count > 0) {
|
|
730
|
+
var candidate = $(candidates).objectAtIndex(0);
|
|
731
|
+
var text = candidate.string.toString();
|
|
732
|
+
var confidence = candidate.confidence;
|
|
733
|
+
var bbox = obs.boundingBox;
|
|
734
|
+
var bx = bbox.origin.x * imgWidth;
|
|
735
|
+
var by = (1 - bbox.origin.y - bbox.size.height) * imgHeight;
|
|
736
|
+
var bw = bbox.size.width * imgWidth;
|
|
737
|
+
var bh = bbox.size.height * imgHeight;
|
|
738
|
+
elements.push({text:text,x:Math.round(bx),y:Math.round(by),width:Math.round(bw),height:Math.round(bh),confidence:confidence});
|
|
739
|
+
fullTextParts.push(text);
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
return JSON.stringify({elements:elements,fullText:fullTextParts.join("\\n"),error:null});
|
|
743
|
+
}
|
|
744
|
+
run();
|
|
745
|
+
`;
|
|
746
|
+
const out = execFileSync("osascript", ["-l", "JavaScript", "-e", jxaScript], { encoding: "utf-8", timeout: 30000 }).trim();
|
|
747
|
+
const parsed = JSON.parse(out);
|
|
748
|
+
if (parsed.error)
|
|
749
|
+
throw new CaptureError(`ocr failed: ${parsed.error}`);
|
|
750
|
+
const imgWidth = buf.readUInt32BE(16);
|
|
751
|
+
const scaleFactorX = screenSize.width / (region ? region.width : (imgWidth / scaleFactor));
|
|
752
|
+
const elements = parsed.elements.map((el) => ({
|
|
753
|
+
text: el.text,
|
|
754
|
+
x: Math.round(el.x / scaleFactor) + (region ? region.x : 0),
|
|
755
|
+
y: Math.round(el.y / scaleFactor) + (region ? region.y : 0),
|
|
756
|
+
width: Math.round(el.width / scaleFactor),
|
|
757
|
+
height: Math.round(el.height / scaleFactor),
|
|
758
|
+
confidence: el.confidence,
|
|
759
|
+
}));
|
|
760
|
+
return { elements, fullText: parsed.fullText };
|
|
761
|
+
}
|
|
625
762
|
// ── Keyboard ────────────────────────────────────────────────────────────
|
|
626
763
|
async type(text, delay) {
|
|
627
764
|
await typeText(text, delay);
|
|
@@ -640,8 +777,13 @@ export class MacOSPlatform {
|
|
|
640
777
|
const escapedText = text ? text.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$') : "";
|
|
641
778
|
const escapedRole = role ? role.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$') : "";
|
|
642
779
|
const jxaScript = `
|
|
643
|
-
|
|
644
|
-
|
|
780
|
+
var se = Application('System Events');
|
|
781
|
+
function childElements(elem) {
|
|
782
|
+
try { return elem.uiElements(); } catch(e1) {
|
|
783
|
+
try { return elem.elements(); } catch(e2) { return []; }
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
var results = [];
|
|
645
787
|
var resultCount = [0];
|
|
646
788
|
var maxResults = ${maxResults};
|
|
647
789
|
var includeBounds = ${includeBounds ? "true" : "false"};
|
|
@@ -723,7 +865,7 @@ export class MacOSPlatform {
|
|
|
723
865
|
|
|
724
866
|
if (currentDepth < ${maxDepth}) {
|
|
725
867
|
try {
|
|
726
|
-
var kids = elem
|
|
868
|
+
var kids = childElements(elem);
|
|
727
869
|
for (var k = 0; k < kids.length && resultCount[0] < maxResults; k++) {
|
|
728
870
|
traverse(kids[k], path + '/' + k, currentDepth + 1);
|
|
729
871
|
}
|
|
@@ -736,7 +878,7 @@ export class MacOSPlatform {
|
|
|
736
878
|
var proc = se.processes["${escapedApp}"]();
|
|
737
879
|
var wins = proc.windows();
|
|
738
880
|
for (var w = 0; w < wins.length && resultCount[0] < maxResults; w++) {
|
|
739
|
-
traverse(wins[w], "win" + w, 0);
|
|
881
|
+
traverse(wins[w], "${escapedApp}/win" + w, 0);
|
|
740
882
|
}
|
|
741
883
|
} else {
|
|
742
884
|
var procs = se.processes();
|
|
@@ -779,12 +921,7 @@ export class MacOSPlatform {
|
|
|
779
921
|
return results;
|
|
780
922
|
}
|
|
781
923
|
catch (error) {
|
|
782
|
-
|
|
783
|
-
String(error.message || error).includes("permission") ||
|
|
784
|
-
String(error.message || error).includes("assistive")) {
|
|
785
|
-
throw new Error("Accessibility permission required: grant System Events access in System Preferences > Privacy & Accessibility");
|
|
786
|
-
}
|
|
787
|
-
throw new Error(`find_element failed: ${error.message || error}`);
|
|
924
|
+
rethrowAccessibilityError(error, "find_element");
|
|
788
925
|
}
|
|
789
926
|
}
|
|
790
927
|
async clickElement(elementId, app) {
|
|
@@ -799,6 +936,11 @@ export class MacOSPlatform {
|
|
|
799
936
|
const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
|
|
800
937
|
const jxaScript = `
|
|
801
938
|
var se = Application('System Events');
|
|
939
|
+
function childElements(elem) {
|
|
940
|
+
try { return elem.uiElements(); } catch(e1) {
|
|
941
|
+
try { return elem.elements(); } catch(e2) { return []; }
|
|
942
|
+
}
|
|
943
|
+
}
|
|
802
944
|
var elemPath = "${escapedElementId}";
|
|
803
945
|
var appName = "${escapedApp}";
|
|
804
946
|
var cached = ${cachedJson};
|
|
@@ -825,7 +967,7 @@ export class MacOSPlatform {
|
|
|
825
967
|
var idx = parseInt(parts[i]);
|
|
826
968
|
if (isNaN(idx)) return null;
|
|
827
969
|
try {
|
|
828
|
-
var kids = current
|
|
970
|
+
var kids = childElements(current);
|
|
829
971
|
if (idx >= kids.length) return null;
|
|
830
972
|
current = kids[idx];
|
|
831
973
|
} catch(e) { return null; }
|
|
@@ -910,7 +1052,7 @@ export class MacOSPlatform {
|
|
|
910
1052
|
bestScore = score;
|
|
911
1053
|
}
|
|
912
1054
|
try {
|
|
913
|
-
var kids = elem
|
|
1055
|
+
var kids = childElements(elem);
|
|
914
1056
|
for (var i = 0; i < kids.length; i++) visit(kids[i], depth + 1);
|
|
915
1057
|
} catch(e) {}
|
|
916
1058
|
}
|
|
@@ -948,7 +1090,7 @@ export class MacOSPlatform {
|
|
|
948
1090
|
var idx = parseInt(parts[i]);
|
|
949
1091
|
if (isNaN(idx)) break;
|
|
950
1092
|
try {
|
|
951
|
-
var kids = current
|
|
1093
|
+
var kids = childElements(current);
|
|
952
1094
|
if (idx >= kids.length) break;
|
|
953
1095
|
current = kids[idx];
|
|
954
1096
|
} catch(e) { break; }
|
|
@@ -1003,17 +1145,13 @@ export class MacOSPlatform {
|
|
|
1003
1145
|
], { encoding: "utf-8", timeout: 15000 }).trim();
|
|
1004
1146
|
const result = JSON.parse(out);
|
|
1005
1147
|
if (!result.success) {
|
|
1006
|
-
throw
|
|
1148
|
+
throw result.error
|
|
1149
|
+
? new Error(result.error)
|
|
1150
|
+
: new ElementNotFoundError(elementId);
|
|
1007
1151
|
}
|
|
1008
1152
|
}
|
|
1009
1153
|
catch (error) {
|
|
1010
|
-
|
|
1011
|
-
throw error;
|
|
1012
|
-
if (String(error.message || error).includes("not allowed") ||
|
|
1013
|
-
String(error.message || error).includes("permission")) {
|
|
1014
|
-
throw new Error("Accessibility permission required: grant System Events access in System Preferences > Privacy & Accessibility");
|
|
1015
|
-
}
|
|
1016
|
-
throw new Error(`click_element failed: ${error.message || error}`);
|
|
1154
|
+
rethrowElementActionError(error, "click_element", elementId);
|
|
1017
1155
|
}
|
|
1018
1156
|
}
|
|
1019
1157
|
async typeInElement(elementId, text, app, clearFirst) {
|
|
@@ -1029,6 +1167,11 @@ export class MacOSPlatform {
|
|
|
1029
1167
|
const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
|
|
1030
1168
|
const jxaScript = `
|
|
1031
1169
|
var se = Application('System Events');
|
|
1170
|
+
function childElements(elem) {
|
|
1171
|
+
try { return elem.uiElements(); } catch(e1) {
|
|
1172
|
+
try { return elem.elements(); } catch(e2) { return []; }
|
|
1173
|
+
}
|
|
1174
|
+
}
|
|
1032
1175
|
var elemPath = "${escapedElementId}";
|
|
1033
1176
|
var appName = "${escapedApp}";
|
|
1034
1177
|
var textToType = "${escapedText}";
|
|
@@ -1057,7 +1200,7 @@ export class MacOSPlatform {
|
|
|
1057
1200
|
var idx = parseInt(parts[i]);
|
|
1058
1201
|
if (isNaN(idx)) return null;
|
|
1059
1202
|
try {
|
|
1060
|
-
var kids = current
|
|
1203
|
+
var kids = childElements(current);
|
|
1061
1204
|
if (idx >= kids.length) return null;
|
|
1062
1205
|
current = kids[idx];
|
|
1063
1206
|
} catch(e) { return null; }
|
|
@@ -1142,7 +1285,7 @@ export class MacOSPlatform {
|
|
|
1142
1285
|
bestScore = score;
|
|
1143
1286
|
}
|
|
1144
1287
|
try {
|
|
1145
|
-
var kids = elem
|
|
1288
|
+
var kids = childElements(elem);
|
|
1146
1289
|
for (var i = 0; i < kids.length; i++) visit(kids[i], depth + 1);
|
|
1147
1290
|
} catch(e) {}
|
|
1148
1291
|
}
|
|
@@ -1180,7 +1323,7 @@ export class MacOSPlatform {
|
|
|
1180
1323
|
var idx = parseInt(parts[i]);
|
|
1181
1324
|
if (isNaN(idx)) break;
|
|
1182
1325
|
try {
|
|
1183
|
-
var kids = current
|
|
1326
|
+
var kids = childElements(current);
|
|
1184
1327
|
if (idx >= kids.length) break;
|
|
1185
1328
|
current = kids[idx];
|
|
1186
1329
|
} catch(e) { break; }
|
|
@@ -1245,17 +1388,13 @@ export class MacOSPlatform {
|
|
|
1245
1388
|
], { encoding: "utf-8", timeout: 15000 }).trim();
|
|
1246
1389
|
const result = JSON.parse(out);
|
|
1247
1390
|
if (!result.success) {
|
|
1248
|
-
throw
|
|
1391
|
+
throw result.error
|
|
1392
|
+
? new Error(result.error)
|
|
1393
|
+
: new ElementNotFoundError(elementId);
|
|
1249
1394
|
}
|
|
1250
1395
|
}
|
|
1251
1396
|
catch (error) {
|
|
1252
|
-
|
|
1253
|
-
throw error;
|
|
1254
|
-
if (String(error.message || error).includes("not allowed") ||
|
|
1255
|
-
String(error.message || error).includes("permission")) {
|
|
1256
|
-
throw new Error("Accessibility permission required: grant System Events access in System Preferences > Privacy & Accessibility");
|
|
1257
|
-
}
|
|
1258
|
-
throw new Error(`type_in_element failed: ${error.message || error}`);
|
|
1397
|
+
rethrowElementActionError(error, "type_in_element", elementId);
|
|
1259
1398
|
}
|
|
1260
1399
|
}
|
|
1261
1400
|
async setElementValue(elementId, value, app) {
|
|
@@ -1271,6 +1410,11 @@ export class MacOSPlatform {
|
|
|
1271
1410
|
const cachedJson = JSON.stringify(this.elementCache.get(elementId) ?? null);
|
|
1272
1411
|
const jxaScript = `
|
|
1273
1412
|
var se = Application('System Events');
|
|
1413
|
+
function childElements(elem) {
|
|
1414
|
+
try { return elem.uiElements(); } catch(e1) {
|
|
1415
|
+
try { return elem.elements(); } catch(e2) { return []; }
|
|
1416
|
+
}
|
|
1417
|
+
}
|
|
1274
1418
|
var elemPath = ${elementIdLiteral};
|
|
1275
1419
|
var appName = ${appLiteral};
|
|
1276
1420
|
var valueToSet = ${valueLiteral};
|
|
@@ -1296,7 +1440,7 @@ export class MacOSPlatform {
|
|
|
1296
1440
|
var idx = parseInt(parts[i]);
|
|
1297
1441
|
if (isNaN(idx)) return null;
|
|
1298
1442
|
try {
|
|
1299
|
-
var kids = current
|
|
1443
|
+
var kids = childElements(current);
|
|
1300
1444
|
if (idx >= kids.length) return null;
|
|
1301
1445
|
current = kids[idx];
|
|
1302
1446
|
} catch(e) { return null; }
|
|
@@ -1323,7 +1467,7 @@ export class MacOSPlatform {
|
|
|
1323
1467
|
var idx = parseInt(parts[i]);
|
|
1324
1468
|
if (isNaN(idx)) return null;
|
|
1325
1469
|
try {
|
|
1326
|
-
var kids = current
|
|
1470
|
+
var kids = childElements(current);
|
|
1327
1471
|
if (idx >= kids.length) return null;
|
|
1328
1472
|
current = kids[idx];
|
|
1329
1473
|
} catch(e) { return null; }
|
|
@@ -1408,7 +1552,7 @@ export class MacOSPlatform {
|
|
|
1408
1552
|
bestScore = score;
|
|
1409
1553
|
}
|
|
1410
1554
|
try {
|
|
1411
|
-
var kids = elem
|
|
1555
|
+
var kids = childElements(elem);
|
|
1412
1556
|
for (var i = 0; i < kids.length; i++) visit(kids[i], depth + 1);
|
|
1413
1557
|
} catch(e) {}
|
|
1414
1558
|
}
|
|
@@ -1456,7 +1600,9 @@ export class MacOSPlatform {
|
|
|
1456
1600
|
], { encoding: "utf-8", timeout: 15000 }).trim();
|
|
1457
1601
|
const result = JSON.parse(out);
|
|
1458
1602
|
if (!result.success) {
|
|
1459
|
-
throw
|
|
1603
|
+
throw result.error
|
|
1604
|
+
? new Error(result.error)
|
|
1605
|
+
: new ElementNotFoundError(elementId);
|
|
1460
1606
|
}
|
|
1461
1607
|
const currentCached = this.elementCache.get(elementId);
|
|
1462
1608
|
if (currentCached) {
|
|
@@ -1464,13 +1610,7 @@ export class MacOSPlatform {
|
|
|
1464
1610
|
}
|
|
1465
1611
|
}
|
|
1466
1612
|
catch (error) {
|
|
1467
|
-
|
|
1468
|
-
throw error;
|
|
1469
|
-
if (String(error.message || error).includes("not allowed") ||
|
|
1470
|
-
String(error.message || error).includes("permission")) {
|
|
1471
|
-
throw new Error("Accessibility permission required: grant System Events access in System Preferences > Privacy & Accessibility");
|
|
1472
|
-
}
|
|
1473
|
-
throw new Error(`set_value failed: ${error.message || error}`);
|
|
1613
|
+
rethrowElementActionError(error, "set_value", elementId);
|
|
1474
1614
|
}
|
|
1475
1615
|
}
|
|
1476
1616
|
}
|