ucu-mcp 0.4.0 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -4
- package/dist/bin/ucu-mcp.js +1 -1
- package/dist/src/index.d.ts +2 -2
- package/dist/src/index.js +2 -2
- package/dist/src/mcp/server.js +1 -1
- package/dist/src/mcp/tools/app-tools.d.ts +2 -0
- package/dist/src/mcp/tools/app-tools.js +225 -0
- package/dist/src/mcp/tools/element-tools.d.ts +23 -0
- package/dist/src/mcp/tools/element-tools.js +59 -0
- package/dist/src/mcp/tools/helpers.d.ts +84 -0
- package/dist/src/mcp/tools/helpers.js +247 -0
- package/dist/src/mcp/tools/index.d.ts +19 -0
- package/dist/src/mcp/tools/index.js +55 -0
- package/dist/src/mcp/tools/input-tools.d.ts +2 -0
- package/dist/src/mcp/tools/input-tools.js +66 -0
- package/dist/src/mcp/tools/keyboard-tools.d.ts +2 -0
- package/dist/src/mcp/tools/keyboard-tools.js +35 -0
- package/dist/src/mcp/tools/screen-tools.d.ts +2 -0
- package/dist/src/mcp/tools/screen-tools.js +69 -0
- package/dist/src/mcp/tools.d.ts +9 -0
- package/dist/src/mcp/tools.js +87 -23
- package/dist/src/platform/base.d.ts +3 -0
- package/dist/src/platform/jxa-helpers.d.ts +11 -0
- package/dist/src/platform/jxa-helpers.js +206 -0
- package/dist/src/platform/macos/ax-tree.d.ts +4 -0
- package/dist/src/platform/macos/ax-tree.js +462 -0
- package/dist/src/platform/macos/base.d.ts +57 -0
- package/dist/src/platform/macos/base.js +92 -0
- package/dist/src/platform/macos/clipboard.d.ts +3 -0
- package/dist/src/platform/macos/clipboard.js +20 -0
- package/dist/src/platform/macos/element.d.ts +4 -0
- package/dist/src/platform/macos/element.js +212 -0
- package/dist/src/platform/macos/focus.d.ts +3 -0
- package/dist/src/platform/macos/focus.js +33 -0
- package/dist/src/platform/macos/helpers.d.ts +35 -0
- package/dist/src/platform/macos/helpers.js +54 -0
- package/dist/src/platform/macos/index.d.ts +2 -0
- package/dist/src/platform/macos/index.js +1 -0
- package/dist/src/platform/macos/input.d.ts +9 -0
- package/dist/src/platform/macos/input.js +62 -0
- package/dist/src/platform/macos/screen.d.ts +7 -0
- package/dist/src/platform/macos/screen.js +197 -0
- package/dist/src/platform/macos/window.d.ts +6 -0
- package/dist/src/platform/macos/window.js +251 -0
- package/dist/src/platform/macos.js +71 -563
- package/dist/src/util/errors.d.ts +7 -2
- package/dist/src/util/errors.js +7 -3
- package/native/cgevent/cgevent-helper +0 -0
- package/native/ocr/ocr-helper +0 -0
- package/native/windowlist/windowlist-helper +0 -0
- package/package.json +1 -1
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import { execFileSync } from "node:child_process";
|
|
2
|
+
import { ElementNotFoundError } from "../../util/errors.js";
|
|
3
|
+
import { rethrowElementActionError } from "./helpers.js";
|
|
4
|
+
import { jxaElementActionHelpers } from "../jxa-helpers.js";
|
|
5
|
+
function prepareCache(elementId) {
|
|
6
|
+
this.evictExpiredCacheEntries();
|
|
7
|
+
const cached = this.elementCache.get(elementId);
|
|
8
|
+
if (cached && this.isCacheEntryExpired(cached)) {
|
|
9
|
+
this.elementCache.delete(elementId);
|
|
10
|
+
}
|
|
11
|
+
return this.elementCache.get(elementId) ?? null;
|
|
12
|
+
}
|
|
13
|
+
export async function clickElement(elementId, app) {
|
|
14
|
+
const elementIdLiteral = JSON.stringify(elementId);
|
|
15
|
+
const effectiveApp = app || this.activeTarget?.appName;
|
|
16
|
+
const appLiteral = JSON.stringify(effectiveApp || "");
|
|
17
|
+
const cachedDescriptor = prepareCache.call(this, elementId);
|
|
18
|
+
const cachedJson = JSON.stringify(cachedDescriptor);
|
|
19
|
+
const jxaScript = `
|
|
20
|
+
var se = Application('System Events');
|
|
21
|
+
var _result = null;
|
|
22
|
+
${jxaElementActionHelpers()}
|
|
23
|
+
var elemPath = ${elementIdLiteral};
|
|
24
|
+
var appName = ${appLiteral};
|
|
25
|
+
var cached = ${cachedJson};
|
|
26
|
+
|
|
27
|
+
var elem = resolveElementInApp(elemPath, appName) || resolveElementByFullPath(elemPath);
|
|
28
|
+
if (elem && !descriptorMatches(elem)) {
|
|
29
|
+
elem = refetchEquivalent() || elem;
|
|
30
|
+
}
|
|
31
|
+
if (!elem) {
|
|
32
|
+
elem = refetchEquivalent();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (!elem) {
|
|
36
|
+
_result = {success: false, error: "Element not found: " + elemPath};
|
|
37
|
+
} else {
|
|
38
|
+
try {
|
|
39
|
+
elem.actions.AXPress.perform();
|
|
40
|
+
_result = {success: true};
|
|
41
|
+
} catch(e) {
|
|
42
|
+
try {
|
|
43
|
+
var pos = elem.position();
|
|
44
|
+
var sz = elem.size();
|
|
45
|
+
var cx = pos[0] + sz[0] / 2;
|
|
46
|
+
var cy = pos[1] + sz[1] / 2;
|
|
47
|
+
ObjC.import('CoreGraphics');
|
|
48
|
+
var src = $.CGEventSourceCreate($.kCGEventSourceStateHIDSystemState);
|
|
49
|
+
var pt = $.CGPointMake(cx, cy);
|
|
50
|
+
var down = $.CGEventCreateMouseEvent(src, $.kCGEventLeftMouseDown, pt, $.kCGMouseButtonLeft);
|
|
51
|
+
$.CGEventPost($.kCGHIDEventTap, down);
|
|
52
|
+
var up = $.CGEventCreateMouseEvent(src, $.kCGEventLeftMouseUp, pt, $.kCGMouseButtonLeft);
|
|
53
|
+
$.CGEventPost($.kCGHIDEventTap, up);
|
|
54
|
+
_result = {success: true};
|
|
55
|
+
} catch(e2) {
|
|
56
|
+
_result = {success: false, error: "Could not click element: " + String(e2.message || e2)};
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
JSON.stringify(_result);
|
|
61
|
+
`;
|
|
62
|
+
try {
|
|
63
|
+
const out = execFileSync("osascript", [
|
|
64
|
+
"-l", "JavaScript",
|
|
65
|
+
"-e", jxaScript,
|
|
66
|
+
], { encoding: "utf-8", timeout: 15000 }).trim();
|
|
67
|
+
const result = JSON.parse(out);
|
|
68
|
+
if (!result.success) {
|
|
69
|
+
throw result.error
|
|
70
|
+
? new Error(result.error)
|
|
71
|
+
: new ElementNotFoundError(elementId);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
catch (error) {
|
|
75
|
+
rethrowElementActionError(error, "click_element", elementId);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
export async function typeInElement(elementId, text, app, clearFirst) {
|
|
79
|
+
const textLiteral = JSON.stringify(text);
|
|
80
|
+
const effectiveApp = app || this.activeTarget?.appName;
|
|
81
|
+
const appLiteral = JSON.stringify(effectiveApp || "");
|
|
82
|
+
const elementIdLiteral = JSON.stringify(elementId);
|
|
83
|
+
const cachedDescriptor = prepareCache.call(this, elementId);
|
|
84
|
+
const cachedJson = JSON.stringify(cachedDescriptor);
|
|
85
|
+
const jxaScript = `
|
|
86
|
+
var se = Application('System Events');
|
|
87
|
+
var _result = null;
|
|
88
|
+
${jxaElementActionHelpers()}
|
|
89
|
+
var elemPath = ${elementIdLiteral};
|
|
90
|
+
var appName = ${appLiteral};
|
|
91
|
+
var textToType = ${textLiteral};
|
|
92
|
+
var shouldClear = ${clearFirst ? "true" : "false"};
|
|
93
|
+
var cached = ${cachedJson};
|
|
94
|
+
|
|
95
|
+
var elem = resolveElementInApp(elemPath, appName) || resolveElementByFullPath(elemPath);
|
|
96
|
+
if (elem && !descriptorMatches(elem)) {
|
|
97
|
+
elem = refetchEquivalent() || elem;
|
|
98
|
+
}
|
|
99
|
+
if (!elem) {
|
|
100
|
+
elem = refetchEquivalent();
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (!elem) {
|
|
104
|
+
_result = {success: false, error: "Element not found: " + elemPath};
|
|
105
|
+
} else {
|
|
106
|
+
try {
|
|
107
|
+
elem.focused = true;
|
|
108
|
+
} catch(e) {}
|
|
109
|
+
|
|
110
|
+
if (shouldClear) {
|
|
111
|
+
try {
|
|
112
|
+
elem.value = "";
|
|
113
|
+
} catch(e) {
|
|
114
|
+
try {
|
|
115
|
+
se.keystroke("a", {command: true});
|
|
116
|
+
se.keyDown("delete");
|
|
117
|
+
se.keyUp("delete");
|
|
118
|
+
} catch(e2) {}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
var didSet = false;
|
|
123
|
+
try {
|
|
124
|
+
elem.value = textToType;
|
|
125
|
+
didSet = true;
|
|
126
|
+
} catch(e) {}
|
|
127
|
+
|
|
128
|
+
if (!didSet) {
|
|
129
|
+
try {
|
|
130
|
+
se.keystroke(textToType);
|
|
131
|
+
_result = {success: true};
|
|
132
|
+
} catch(e) {
|
|
133
|
+
_result = {success: false, error: "Could not type into element: " + String(e.message || e)};
|
|
134
|
+
}
|
|
135
|
+
} else {
|
|
136
|
+
_result = {success: true};
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
JSON.stringify(_result);
|
|
140
|
+
`;
|
|
141
|
+
try {
|
|
142
|
+
const out = execFileSync("osascript", [
|
|
143
|
+
"-l", "JavaScript",
|
|
144
|
+
"-e", jxaScript,
|
|
145
|
+
], { encoding: "utf-8", timeout: 15000 }).trim();
|
|
146
|
+
const result = JSON.parse(out);
|
|
147
|
+
if (!result.success) {
|
|
148
|
+
throw result.error
|
|
149
|
+
? new Error(result.error)
|
|
150
|
+
: new ElementNotFoundError(elementId);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
catch (error) {
|
|
154
|
+
rethrowElementActionError(error, "type_in_element", elementId);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
export async function setElementValue(elementId, value, app) {
|
|
158
|
+
const effectiveApp = app || this.activeTarget?.appName;
|
|
159
|
+
const valueLiteral = JSON.stringify(value);
|
|
160
|
+
const appLiteral = JSON.stringify(effectiveApp || "");
|
|
161
|
+
const elementIdLiteral = JSON.stringify(elementId);
|
|
162
|
+
const cachedDescriptor = prepareCache.call(this, elementId);
|
|
163
|
+
const cachedJson = JSON.stringify(cachedDescriptor);
|
|
164
|
+
const jxaScript = `
|
|
165
|
+
var se = Application('System Events');
|
|
166
|
+
var _result = null;
|
|
167
|
+
${jxaElementActionHelpers()}
|
|
168
|
+
var elemPath = ${elementIdLiteral};
|
|
169
|
+
var appName = ${appLiteral};
|
|
170
|
+
var valueToSet = ${valueLiteral};
|
|
171
|
+
var cached = ${cachedJson};
|
|
172
|
+
|
|
173
|
+
var elem = resolveElementInApp(elemPath, appName) || resolveElementByFullPath(elemPath);
|
|
174
|
+
if (elem && !descriptorMatches(elem)) {
|
|
175
|
+
elem = refetchEquivalent() || elem;
|
|
176
|
+
}
|
|
177
|
+
if (!elem) {
|
|
178
|
+
elem = refetchEquivalent();
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (!elem) {
|
|
182
|
+
_result = {success: false, error: "Element not found: " + elemPath};
|
|
183
|
+
} else {
|
|
184
|
+
try {
|
|
185
|
+
elem.value = valueToSet;
|
|
186
|
+
_result = {success: true};
|
|
187
|
+
} catch(e) {
|
|
188
|
+
_result = {success: false, error: "Could not set AX value: " + String(e.message || e)};
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
JSON.stringify(_result);
|
|
192
|
+
`;
|
|
193
|
+
try {
|
|
194
|
+
const out = execFileSync("osascript", [
|
|
195
|
+
"-l", "JavaScript",
|
|
196
|
+
"-e", jxaScript,
|
|
197
|
+
], { encoding: "utf-8", timeout: 15000 }).trim();
|
|
198
|
+
const result = JSON.parse(out);
|
|
199
|
+
if (!result.success) {
|
|
200
|
+
throw result.error
|
|
201
|
+
? new Error(result.error)
|
|
202
|
+
: new ElementNotFoundError(elementId);
|
|
203
|
+
}
|
|
204
|
+
const currentCached = this.elementCache.get(elementId);
|
|
205
|
+
if (currentCached) {
|
|
206
|
+
this.elementCache.set(elementId, { ...currentCached, value, cachedAt: Date.now() });
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
catch (error) {
|
|
210
|
+
rethrowElementActionError(error, "set_value", elementId);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { execFileSync } from "node:child_process";
|
|
2
|
+
export async function saveFocus() {
|
|
3
|
+
try {
|
|
4
|
+
const apps = await this.listApps();
|
|
5
|
+
const front = apps.find((a) => a.isFrontmost);
|
|
6
|
+
if (front) {
|
|
7
|
+
const windows = await this.listWindows();
|
|
8
|
+
const win = windows.find((w) => w.processName === front.name && w.isOnScreen);
|
|
9
|
+
this.savedFocus = {
|
|
10
|
+
appName: front.name,
|
|
11
|
+
windowTitle: win?.title ?? "",
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
catch {
|
|
16
|
+
this.savedFocus = undefined;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
export async function restoreFocus() {
|
|
20
|
+
if (!this.savedFocus)
|
|
21
|
+
return;
|
|
22
|
+
try {
|
|
23
|
+
const { appName } = this.savedFocus;
|
|
24
|
+
const appNameLiteral = JSON.stringify(appName);
|
|
25
|
+
execFileSync("osascript", [
|
|
26
|
+
"-e", `tell application ${appNameLiteral} to activate`,
|
|
27
|
+
], { timeout: 5000 });
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
// Best effort — don't fail the action if restore fails
|
|
31
|
+
}
|
|
32
|
+
this.savedFocus = undefined;
|
|
33
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
export declare function errorMessage(error: unknown): string;
|
|
2
|
+
export declare function isAccessibilityPermissionError(error: unknown): boolean;
|
|
3
|
+
export declare function rethrowCaptureError(error: unknown, operation: string): never;
|
|
4
|
+
export declare function rethrowAccessibilityError(error: unknown, operation: string): never;
|
|
5
|
+
export declare function rethrowElementActionError(error: unknown, operation: string, elementId: string): never;
|
|
6
|
+
export declare function rethrowInputError(error: unknown, operation: string): never;
|
|
7
|
+
export declare function normalizeAppName(name: string): string;
|
|
8
|
+
export declare function appNameMatches(processName: string, requestedApp: string): boolean;
|
|
9
|
+
export declare function selectWindowForApp(windows: import("../base.js").WindowInfo[], requestedApp: string): import("../base.js").WindowInfo | undefined;
|
|
10
|
+
export interface MacOSPlatformOptions {
|
|
11
|
+
/**
|
|
12
|
+
* Override native helper resolution.
|
|
13
|
+
* - Map of folder name to absolute binary path to inject a specific helper.
|
|
14
|
+
* - Set a value to null to skip that helper (force JXA fallback).
|
|
15
|
+
* Used by tests to control native helper behavior without filesystem tricks.
|
|
16
|
+
*/
|
|
17
|
+
nativeHelperPaths?: Record<string, string | null>;
|
|
18
|
+
}
|
|
19
|
+
export interface CachedElementDescriptor {
|
|
20
|
+
elementId: string;
|
|
21
|
+
appName: string;
|
|
22
|
+
role: string;
|
|
23
|
+
name: string;
|
|
24
|
+
value?: string;
|
|
25
|
+
description?: string;
|
|
26
|
+
subrole?: string;
|
|
27
|
+
identifier?: string;
|
|
28
|
+
bounds?: {
|
|
29
|
+
x: number;
|
|
30
|
+
y: number;
|
|
31
|
+
width: number;
|
|
32
|
+
height: number;
|
|
33
|
+
};
|
|
34
|
+
cachedAt: number;
|
|
35
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { UcuError, CaptureError, PermissionError, PlatformError, InputSynthesisError, ElementNotFoundError } from "../../util/errors.js";
|
|
2
|
+
export function errorMessage(error) {
|
|
3
|
+
return error instanceof Error ? error.message : String(error);
|
|
4
|
+
}
|
|
5
|
+
export function isAccessibilityPermissionError(error) {
|
|
6
|
+
return /not allowed|permission|assistive|accessibility/i.test(errorMessage(error));
|
|
7
|
+
}
|
|
8
|
+
export function rethrowCaptureError(error, operation) {
|
|
9
|
+
if (error instanceof UcuError)
|
|
10
|
+
throw error;
|
|
11
|
+
throw new CaptureError(`${operation} failed: ${errorMessage(error)}`);
|
|
12
|
+
}
|
|
13
|
+
export function rethrowAccessibilityError(error, operation) {
|
|
14
|
+
if (error instanceof UcuError)
|
|
15
|
+
throw error;
|
|
16
|
+
if (isAccessibilityPermissionError(error)) {
|
|
17
|
+
throw new PermissionError("accessibility", "darwin");
|
|
18
|
+
}
|
|
19
|
+
throw new PlatformError(`${operation} failed: ${errorMessage(error)}`);
|
|
20
|
+
}
|
|
21
|
+
export function rethrowElementActionError(error, operation, elementId) {
|
|
22
|
+
if (error instanceof UcuError)
|
|
23
|
+
throw error;
|
|
24
|
+
if (isAccessibilityPermissionError(error)) {
|
|
25
|
+
throw new PermissionError("accessibility", "darwin");
|
|
26
|
+
}
|
|
27
|
+
if (/element not found/i.test(errorMessage(error))) {
|
|
28
|
+
throw new ElementNotFoundError(elementId);
|
|
29
|
+
}
|
|
30
|
+
throw new PlatformError(`${operation} failed: ${errorMessage(error)}`);
|
|
31
|
+
}
|
|
32
|
+
export function rethrowInputError(error, operation) {
|
|
33
|
+
if (error instanceof UcuError)
|
|
34
|
+
throw error;
|
|
35
|
+
throw new InputSynthesisError(`${operation} failed: ${errorMessage(error)}`);
|
|
36
|
+
}
|
|
37
|
+
export function normalizeAppName(name) {
|
|
38
|
+
return name.trim().toLowerCase();
|
|
39
|
+
}
|
|
40
|
+
export function appNameMatches(processName, requestedApp) {
|
|
41
|
+
const process = normalizeAppName(processName);
|
|
42
|
+
const requested = normalizeAppName(requestedApp);
|
|
43
|
+
if (!process || !requested)
|
|
44
|
+
return false;
|
|
45
|
+
return process === requested ||
|
|
46
|
+
process.startsWith(`${requested} `) ||
|
|
47
|
+
process.startsWith(`${requested}-`) ||
|
|
48
|
+
process.includes(` ${requested} `);
|
|
49
|
+
}
|
|
50
|
+
export function selectWindowForApp(windows, requestedApp) {
|
|
51
|
+
const requested = normalizeAppName(requestedApp);
|
|
52
|
+
return windows.find((window) => normalizeAppName(window.processName) === requested) ??
|
|
53
|
+
windows.find((window) => appNameMatches(window.processName, requestedApp));
|
|
54
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { MacOSPlatform } from "./base.js";
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { MacOSPlatform } from "./base.js";
|
|
2
|
+
import type { CursorPosition } from "../base.js";
|
|
3
|
+
export declare function click(this: MacOSPlatform, x: number, y: number, button?: "left" | "right" | "middle", doubleClick?: boolean): Promise<void>;
|
|
4
|
+
export declare function move(this: MacOSPlatform, x: number, y: number): Promise<void>;
|
|
5
|
+
export declare function drag(this: MacOSPlatform, startX: number, startY: number, endX: number, endY: number, button?: "left" | "right" | "middle", duration?: number): Promise<void>;
|
|
6
|
+
export declare function scroll(this: MacOSPlatform, x: number, y: number, deltaX: number, deltaY: number): Promise<void>;
|
|
7
|
+
export declare function getCursorPosition(this: MacOSPlatform): CursorPosition;
|
|
8
|
+
export declare function type(this: MacOSPlatform, text: string, delay?: number): Promise<void>;
|
|
9
|
+
export declare function key(this: MacOSPlatform, keys: string[]): Promise<void>;
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { execFileSync } from "node:child_process";
|
|
2
|
+
import { click as inputClick, doubleClick as inputDoubleClick, move as inputMove, drag as inputDrag, scroll as inputScroll, typeText, pressShortcut } from "../../utils/input.js";
|
|
3
|
+
import { PlatformError } from "../../util/errors.js";
|
|
4
|
+
import { rethrowInputError, errorMessage } from "./helpers.js";
|
|
5
|
+
export async function click(x, y, button, doubleClick) {
|
|
6
|
+
try {
|
|
7
|
+
if (doubleClick) {
|
|
8
|
+
await inputDoubleClick(x, y, button);
|
|
9
|
+
}
|
|
10
|
+
else {
|
|
11
|
+
await inputClick(x, y, button);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
catch (error) {
|
|
15
|
+
rethrowInputError(error, doubleClick ? "double_click" : "click");
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
export async function move(x, y) {
|
|
19
|
+
try {
|
|
20
|
+
await inputMove(x, y);
|
|
21
|
+
}
|
|
22
|
+
catch (error) {
|
|
23
|
+
rethrowInputError(error, "move");
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
export async function drag(startX, startY, endX, endY, button, duration) {
|
|
27
|
+
try {
|
|
28
|
+
await inputDrag(startX, startY, endX, endY, button, duration);
|
|
29
|
+
}
|
|
30
|
+
catch (error) {
|
|
31
|
+
rethrowInputError(error, "drag");
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
export async function scroll(x, y, deltaX, deltaY) {
|
|
35
|
+
try {
|
|
36
|
+
await inputScroll(x, y, deltaX, deltaY);
|
|
37
|
+
}
|
|
38
|
+
catch (error) {
|
|
39
|
+
rethrowInputError(error, "scroll");
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
export function getCursorPosition() {
|
|
43
|
+
try {
|
|
44
|
+
const out = execFileSync("osascript", [
|
|
45
|
+
"-l", "JavaScript",
|
|
46
|
+
"-e",
|
|
47
|
+
`ObjC.import('AppKit');
|
|
48
|
+
var pt = $.NSEvent.mouseLocation;
|
|
49
|
+
JSON.stringify({x:Math.round(pt.x),y:Math.round($.NSScreen.mainScreen.frame.size.height - pt.y)});`,
|
|
50
|
+
], { encoding: "utf-8", timeout: 5000 }).trim();
|
|
51
|
+
return JSON.parse(out);
|
|
52
|
+
}
|
|
53
|
+
catch (error) {
|
|
54
|
+
throw new PlatformError(`get_cursor_position failed: ${errorMessage(error)}`);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
export async function type(text, delay) {
|
|
58
|
+
await typeText(text, delay);
|
|
59
|
+
}
|
|
60
|
+
export async function key(keys) {
|
|
61
|
+
await pressShortcut(keys);
|
|
62
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { MacOSPlatform } from "./base.js";
|
|
2
|
+
import type { ScreenRegion, ScreenSize, ScreenshotOptions, OcrResult } from "../base.js";
|
|
3
|
+
export declare function screenshot(this: MacOSPlatform, _display?: number, region?: ScreenRegion, options?: ScreenshotOptions): Promise<Buffer>;
|
|
4
|
+
export declare function screenshotWindow(this: MacOSPlatform, windowId: string, options?: ScreenshotOptions): Promise<Buffer>;
|
|
5
|
+
export declare function getScreenSize(this: MacOSPlatform, display?: number): ScreenSize;
|
|
6
|
+
export declare function isScreenLocked(this: MacOSPlatform): boolean;
|
|
7
|
+
export declare function ocr(this: MacOSPlatform, display?: number, region?: ScreenRegion): Promise<OcrResult>;
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import { execFileSync } from "node:child_process";
|
|
2
|
+
import { randomUUID } from "node:crypto";
|
|
3
|
+
import { existsSync } from "node:fs";
|
|
4
|
+
import { join, dirname } from "node:path";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
6
|
+
import { captureFullScreen, captureRegion } from "../../utils/screenshot.js";
|
|
7
|
+
import { WindowNotFoundError, CaptureError } from "../../util/errors.js";
|
|
8
|
+
import { logger } from "../../util/logger.js";
|
|
9
|
+
import { rethrowCaptureError, errorMessage } from "./helpers.js";
|
|
10
|
+
export async function screenshot(_display, region, options) {
|
|
11
|
+
try {
|
|
12
|
+
const base64 = region
|
|
13
|
+
? await captureRegion(region.x, region.y, region.width, region.height, options)
|
|
14
|
+
: await captureFullScreen(options);
|
|
15
|
+
return Buffer.from(base64, "base64");
|
|
16
|
+
}
|
|
17
|
+
catch (error) {
|
|
18
|
+
rethrowCaptureError(error, region ? "capture region" : "capture full screen");
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
export async function screenshotWindow(windowId, options) {
|
|
22
|
+
const win = (await this.listWindows(true)).find((w) => w.id === windowId);
|
|
23
|
+
if (!win) {
|
|
24
|
+
throw new WindowNotFoundError(windowId);
|
|
25
|
+
}
|
|
26
|
+
return this.screenshot(undefined, win.bounds, options);
|
|
27
|
+
}
|
|
28
|
+
export function getScreenSize(display) {
|
|
29
|
+
try {
|
|
30
|
+
const idx = display ?? 0;
|
|
31
|
+
const out = execFileSync("osascript", [
|
|
32
|
+
"-l", "JavaScript",
|
|
33
|
+
"-e",
|
|
34
|
+
`ObjC.import('AppKit');
|
|
35
|
+
var screens = $.NSScreen.screens;
|
|
36
|
+
var idx = ${idx};
|
|
37
|
+
if (idx < 0 || idx >= screens.count) idx = 0;
|
|
38
|
+
var screen = $(screens).objectAtIndex(idx);
|
|
39
|
+
var frame = screen.frame;
|
|
40
|
+
var scaleFactor = screen.backingScaleFactor;
|
|
41
|
+
JSON.stringify({width:Math.round(frame.size.width),height:Math.round(frame.size.height),scaleFactor:scaleFactor})`,
|
|
42
|
+
], { encoding: "utf-8", timeout: 5000 }).trim();
|
|
43
|
+
return JSON.parse(out);
|
|
44
|
+
}
|
|
45
|
+
catch (error) {
|
|
46
|
+
logger.warn("getScreenSize failed, using fallback", { error: errorMessage(error) });
|
|
47
|
+
return { width: 1920, height: 1080, scaleFactor: 2, estimated: true };
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
export function isScreenLocked() {
|
|
51
|
+
try {
|
|
52
|
+
const out = execFileSync("/usr/sbin/ioreg", ["-n", "Root", "-d1"], {
|
|
53
|
+
encoding: "utf-8",
|
|
54
|
+
timeout: 5000,
|
|
55
|
+
});
|
|
56
|
+
return /"IOConsoleLocked"\s*=\s*Yes/.test(out);
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
// Fail-closed: if we can't determine lock state, assume locked
|
|
60
|
+
logger.warn("isScreenLocked check failed, assuming locked");
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
export async function ocr(display, region) {
|
|
65
|
+
const buf = await this.screenshot(display, region);
|
|
66
|
+
const { writeFile, unlink } = await import("node:fs/promises");
|
|
67
|
+
const { join } = await import("node:path");
|
|
68
|
+
const { tmpdir } = await import("node:os");
|
|
69
|
+
const tmpPath = join(tmpdir(), `ucu-ocr-${randomUUID()}.png`);
|
|
70
|
+
await writeFile(tmpPath, buf);
|
|
71
|
+
try {
|
|
72
|
+
const screenSize = this.getScreenSize(display);
|
|
73
|
+
const scaleFactor = screenSize.scaleFactor ?? 2;
|
|
74
|
+
const nativeResult = await ocrNative(tmpPath, scaleFactor, region);
|
|
75
|
+
if (nativeResult)
|
|
76
|
+
return nativeResult;
|
|
77
|
+
return await ocrJxa(tmpPath, screenSize, scaleFactor, region, buf);
|
|
78
|
+
}
|
|
79
|
+
finally {
|
|
80
|
+
await unlink(tmpPath).catch(() => { });
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
async function ocrNative(tmpPath, scaleFactor, region) {
|
|
84
|
+
const screenDirname = dirname(fileURLToPath(import.meta.url));
|
|
85
|
+
const candidates = [
|
|
86
|
+
join(screenDirname, "..", "..", "..", "native", "ocr", "ocr-helper"),
|
|
87
|
+
join(screenDirname, "..", "..", "native", "ocr", "ocr-helper"),
|
|
88
|
+
join(process.cwd(), "native", "ocr", "ocr-helper"),
|
|
89
|
+
];
|
|
90
|
+
let binaryPath;
|
|
91
|
+
for (const p of candidates) {
|
|
92
|
+
if (existsSync(p)) {
|
|
93
|
+
binaryPath = p;
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (!binaryPath)
|
|
98
|
+
return null;
|
|
99
|
+
try {
|
|
100
|
+
const input = JSON.stringify({ imagePath: tmpPath });
|
|
101
|
+
const out = execFileSync(binaryPath, [], {
|
|
102
|
+
input,
|
|
103
|
+
encoding: "utf-8",
|
|
104
|
+
timeout: 30000,
|
|
105
|
+
}).trim();
|
|
106
|
+
const parsed = JSON.parse(out);
|
|
107
|
+
if (parsed.error)
|
|
108
|
+
return null;
|
|
109
|
+
const elements = parsed.elements.map((el) => ({
|
|
110
|
+
text: el.text,
|
|
111
|
+
x: Math.round(el.x / scaleFactor) + (region ? region.x : 0),
|
|
112
|
+
y: Math.round(el.y / scaleFactor) + (region ? region.y : 0),
|
|
113
|
+
width: Math.round(el.width / scaleFactor),
|
|
114
|
+
height: Math.round(el.height / scaleFactor),
|
|
115
|
+
confidence: el.confidence,
|
|
116
|
+
}));
|
|
117
|
+
return { elements, fullText: parsed.fullText };
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
async function ocrJxa(tmpPath, screenSize, scaleFactor, region, buf) {
|
|
124
|
+
const pathLiteral = JSON.stringify(tmpPath);
|
|
125
|
+
const jxaScript = `
|
|
126
|
+
function run() {
|
|
127
|
+
ObjC.import('Vision');
|
|
128
|
+
ObjC.import('AppKit');
|
|
129
|
+
ObjC.import('Foundation');
|
|
130
|
+
var app = Application.currentApplication();
|
|
131
|
+
app.includeStandardAdditions = true;
|
|
132
|
+
var path = ${pathLiteral};
|
|
133
|
+
var url = $.NSURL.fileURLWithPath(path);
|
|
134
|
+
var image = $.NSImage.alloc.initWithContentsOfURL(url);
|
|
135
|
+
if (!image || !image.isValid) {
|
|
136
|
+
return JSON.stringify({error: "Failed to load screenshot image", elements: [], fullText: ""});
|
|
137
|
+
}
|
|
138
|
+
var cgImage = image.CGImageForProposedRectContextHints(null, null, null);
|
|
139
|
+
if (!cgImage) {
|
|
140
|
+
return JSON.stringify({error: "Failed to get CGImage from screenshot", elements: [], fullText: ""});
|
|
141
|
+
}
|
|
142
|
+
var request = $.VNRecognizeTextRequest.alloc.init;
|
|
143
|
+
request.recognitionLevel = $.VNRequestTextRecognitionLevelAccurate;
|
|
144
|
+
request.usesLanguageCorrection = true;
|
|
145
|
+
var handler = $.VNImageRequestHandler.alloc.initWithCGImageOptions(cgImage, null);
|
|
146
|
+
var performError = $();
|
|
147
|
+
var success = handler.performRequestsError([request], performError);
|
|
148
|
+
if (!success) {
|
|
149
|
+
return JSON.stringify({error: "OCR request failed", elements: [], fullText: ""});
|
|
150
|
+
}
|
|
151
|
+
var results = request.results;
|
|
152
|
+
var elements = [];
|
|
153
|
+
var fullTextParts = [];
|
|
154
|
+
var imgWidth = cgImage.width;
|
|
155
|
+
var imgHeight = cgImage.height;
|
|
156
|
+
for (var i = 0; i < results.count; i++) {
|
|
157
|
+
var obs = $(results).objectAtIndex(i);
|
|
158
|
+
var candidates = obs.topCandidates(1);
|
|
159
|
+
if (candidates && candidates.count > 0) {
|
|
160
|
+
var candidate = $(candidates).objectAtIndex(0);
|
|
161
|
+
var text = candidate.string.toString();
|
|
162
|
+
var confidence = candidate.confidence;
|
|
163
|
+
var bbox = obs.boundingBox;
|
|
164
|
+
var bx = bbox.origin.x * imgWidth;
|
|
165
|
+
var by = (1 - bbox.origin.y - bbox.size.height) * imgHeight;
|
|
166
|
+
var bw = bbox.size.width * imgWidth;
|
|
167
|
+
var bh = bbox.size.height * imgHeight;
|
|
168
|
+
elements.push({text:text,x:Math.round(bx),y:Math.round(by),width:Math.round(bw),height:Math.round(bh),confidence:confidence});
|
|
169
|
+
fullTextParts.push(text);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return JSON.stringify({elements:elements,fullText:fullTextParts.join("\\n"),error:null});
|
|
173
|
+
}
|
|
174
|
+
run();
|
|
175
|
+
`;
|
|
176
|
+
const out = execFileSync("osascript", ["-l", "JavaScript", "-e", jxaScript], { encoding: "utf-8", timeout: 30000 }).trim();
|
|
177
|
+
const parsed = JSON.parse(out);
|
|
178
|
+
if (parsed.error) {
|
|
179
|
+
const hint = parsed.error === "Failed to load screenshot image"
|
|
180
|
+
? " (the screenshot file is empty or unreadable — Screen Recording permission is most likely missing; run `doctor` and grant Screen Recording to the host terminal, then retry)"
|
|
181
|
+
: parsed.error === "Failed to get CGImage from screenshot"
|
|
182
|
+
? " (the screenshot could not be decoded — likely an empty capture; check Screen Recording permission)"
|
|
183
|
+
: "";
|
|
184
|
+
throw new CaptureError(`ocr failed: ${parsed.error}${hint}`);
|
|
185
|
+
}
|
|
186
|
+
const imgWidth = buf.readUInt32BE(16);
|
|
187
|
+
const scaleFactorX = screenSize.width / (region ? region.width : (imgWidth / scaleFactor));
|
|
188
|
+
const elements = parsed.elements.map((el) => ({
|
|
189
|
+
text: el.text,
|
|
190
|
+
x: Math.round(el.x / scaleFactor) + (region ? region.x : 0),
|
|
191
|
+
y: Math.round(el.y / scaleFactor) + (region ? region.y : 0),
|
|
192
|
+
width: Math.round(el.width / scaleFactor),
|
|
193
|
+
height: Math.round(el.height / scaleFactor),
|
|
194
|
+
confidence: el.confidence,
|
|
195
|
+
}));
|
|
196
|
+
return { elements, fullText: parsed.fullText };
|
|
197
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { MacOSPlatform } from "./base.js";
|
|
2
|
+
import type { AppInfo, AppTarget, WindowInfo, BrowserContext } from "../base.js";
|
|
3
|
+
export declare function listApps(this: MacOSPlatform): Promise<AppInfo[]>;
|
|
4
|
+
export declare function focusApp(this: MacOSPlatform, app: string): Promise<AppTarget>;
|
|
5
|
+
export declare function getActiveBrowserContext(this: MacOSPlatform, app?: string): Promise<BrowserContext | undefined>;
|
|
6
|
+
export declare function listWindows(this: MacOSPlatform, _includeMinimized?: boolean): Promise<WindowInfo[]>;
|