ucu-mcp 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -1
- package/README.md +68 -12
- package/dist/src/mcp/server.js +18 -2
- package/dist/src/mcp/tools.d.ts +1 -0
- package/dist/src/mcp/tools.js +173 -65
- package/dist/src/platform/macos.d.ts +4 -0
- package/dist/src/platform/macos.js +355 -215
- package/dist/src/util/errors.d.ts +6 -0
- package/dist/src/util/errors.js +8 -0
- package/dist/src/utils/input.js +88 -18
- package/native/cgevent/cgevent-helper +0 -0
- package/native/cgevent/main.swift +126 -0
- package/native/ocr/main.swift +89 -0
- package/native/ocr/ocr-helper +0 -0
- package/package.json +6 -3
package/dist/src/mcp/tools.js
CHANGED
|
@@ -10,7 +10,7 @@ import { SafetyGuard } from "../safety/guard.js";
|
|
|
10
10
|
import { checkPermission } from "../safety/permissions.js";
|
|
11
11
|
import { retry } from "../util/retry.js";
|
|
12
12
|
import { createLogger } from "../util/logger.js";
|
|
13
|
-
import { SafetyError, PermissionError, UnsupportedParameterError } from "../util/errors.js";
|
|
13
|
+
import { SafetyError, PermissionError, UnsupportedParameterError, UcuError, WindowNotFoundError } from "../util/errors.js";
|
|
14
14
|
const log = createLogger("tools");
|
|
15
15
|
let _platform;
|
|
16
16
|
function getPlatform() {
|
|
@@ -31,16 +31,89 @@ const captureAfterFields = {
|
|
|
31
31
|
async function resolvePoint(x, y, windowId) {
|
|
32
32
|
if (!windowId)
|
|
33
33
|
return { x, y };
|
|
34
|
+
const win = (await getPlatform().listWindows()).find(w => w.id === windowId);
|
|
35
|
+
if (!win)
|
|
36
|
+
throw new WindowNotFoundError(windowId);
|
|
37
|
+
return { x: win.bounds.x + x, y: win.bounds.y + y };
|
|
38
|
+
}
|
|
39
|
+
function jsonText(value) {
|
|
40
|
+
return { type: "text", text: JSON.stringify(value, null, 2) };
|
|
41
|
+
}
|
|
42
|
+
function recoveryHint(code) {
|
|
43
|
+
switch (code) {
|
|
44
|
+
case "WINDOW_NOT_FOUND":
|
|
45
|
+
return "Run list_windows again, then retry with a fresh windowId or omit windowId for screen coordinates.";
|
|
46
|
+
case "ELEMENT_NOT_FOUND":
|
|
47
|
+
return "Run find_element again, then retry with a fresh elementId.";
|
|
48
|
+
case "PERMISSION_DENIED":
|
|
49
|
+
return "Run doctor and grant the missing macOS permission, then restart the launching client.";
|
|
50
|
+
case "UNSUPPORTED_PARAMETER":
|
|
51
|
+
return "Remove or replace the unsupported parameter; inspect tools/list for this tool schema.";
|
|
52
|
+
case "SAFETY_BLOCKED":
|
|
53
|
+
return "Choose a less risky action or ask the user to perform it manually.";
|
|
54
|
+
case "INPUT_FAILED":
|
|
55
|
+
return "Observe current state with screenshot or get_window_state before retrying manually.";
|
|
56
|
+
case "CAPTURE_FAILED":
|
|
57
|
+
return "Run doctor to check Screen Recording permission, then retry screenshot or ocr.";
|
|
58
|
+
case "COORDINATE_OUT_OF_BOUNDS":
|
|
59
|
+
return "Run get_screen_size or list_windows, then retry with coordinates inside the active display or window bounds.";
|
|
60
|
+
default:
|
|
61
|
+
return "Inspect the error message, observe the current UI state, and retry only if the operation is safe.";
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
function mcpErrorResponse(error) {
|
|
65
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
66
|
+
const code = error instanceof UcuError ? error.code : "UNKNOWN_ERROR";
|
|
67
|
+
const retryable = error instanceof UcuError ? error.retryable : false;
|
|
68
|
+
return {
|
|
69
|
+
isError: true,
|
|
70
|
+
content: [
|
|
71
|
+
jsonText({
|
|
72
|
+
error: {
|
|
73
|
+
name: err.name,
|
|
74
|
+
code,
|
|
75
|
+
retryable,
|
|
76
|
+
message: err.message,
|
|
77
|
+
recovery: recoveryHint(code),
|
|
78
|
+
},
|
|
79
|
+
}),
|
|
80
|
+
],
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
async function actionResponse(result, captureAfter, captureFormat = "jpeg", captureMaxWidth = 1280) {
|
|
84
|
+
if (!captureAfter)
|
|
85
|
+
return { content: [jsonText(result)] };
|
|
34
86
|
try {
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
87
|
+
const buf = await getPlatform().screenshot(undefined, undefined, {
|
|
88
|
+
format: captureFormat,
|
|
89
|
+
maxWidth: captureMaxWidth,
|
|
90
|
+
});
|
|
91
|
+
return {
|
|
92
|
+
content: [
|
|
93
|
+
jsonText({ actionResult: result }),
|
|
94
|
+
{
|
|
95
|
+
type: "image",
|
|
96
|
+
data: buf.toString("base64"),
|
|
97
|
+
mimeType: `image/${captureFormat}`,
|
|
98
|
+
},
|
|
99
|
+
],
|
|
100
|
+
};
|
|
39
101
|
}
|
|
40
102
|
catch {
|
|
41
|
-
return {
|
|
103
|
+
return { content: [jsonText(result)] };
|
|
42
104
|
}
|
|
43
105
|
}
|
|
106
|
+
const retryableActions = new Set([
|
|
107
|
+
"screenshot",
|
|
108
|
+
"list_windows",
|
|
109
|
+
"list_apps",
|
|
110
|
+
"get_window_state",
|
|
111
|
+
"get_cursor_position",
|
|
112
|
+
"get_screen_size",
|
|
113
|
+
"ocr",
|
|
114
|
+
"doctor",
|
|
115
|
+
"find_element",
|
|
116
|
+
]);
|
|
44
117
|
async function withSafety(sa) {
|
|
45
118
|
const platform = getPlatform();
|
|
46
119
|
if (platform.isScreenLocked?.())
|
|
@@ -64,27 +137,24 @@ async function withSafety(sa) {
|
|
|
64
137
|
if (shouldManageFocus)
|
|
65
138
|
await platform.saveFocus?.();
|
|
66
139
|
try {
|
|
67
|
-
return
|
|
140
|
+
return retryableActions.has(sa.action)
|
|
141
|
+
? await retry(() => sa.execute())
|
|
142
|
+
: await sa.execute();
|
|
68
143
|
}
|
|
69
144
|
finally {
|
|
70
145
|
if (shouldManageFocus)
|
|
71
146
|
await platform.restoreFocus?.();
|
|
72
147
|
}
|
|
73
148
|
}
|
|
74
|
-
|
|
75
|
-
if (
|
|
76
|
-
return
|
|
149
|
+
export function startUserActivityMonitor() {
|
|
150
|
+
if (userActivityInterval)
|
|
151
|
+
return;
|
|
77
152
|
try {
|
|
78
|
-
|
|
79
|
-
return { actionResult: result, screenshot: { type: "image", data: buf.toString("base64"), mimeType: "image/png" } };
|
|
153
|
+
lastCursorPos = getPlatform().getCursorPosition();
|
|
80
154
|
}
|
|
81
155
|
catch {
|
|
82
|
-
|
|
156
|
+
// Keep the default when the cursor cannot be queried during startup.
|
|
83
157
|
}
|
|
84
|
-
}
|
|
85
|
-
export function startUserActivityMonitor() {
|
|
86
|
-
if (userActivityInterval)
|
|
87
|
-
return;
|
|
88
158
|
userActivityInterval = setInterval(() => {
|
|
89
159
|
try {
|
|
90
160
|
const pos = getPlatform().getCursorPosition();
|
|
@@ -95,8 +165,9 @@ export function startUserActivityMonitor() {
|
|
|
95
165
|
}
|
|
96
166
|
catch { /* can't check cursor */ }
|
|
97
167
|
}, 250);
|
|
168
|
+
userActivityInterval.unref?.();
|
|
98
169
|
}
|
|
99
|
-
function stopUserActivityMonitor() {
|
|
170
|
+
export function stopUserActivityMonitor() {
|
|
100
171
|
if (userActivityInterval) {
|
|
101
172
|
clearInterval(userActivityInterval);
|
|
102
173
|
userActivityInterval = undefined;
|
|
@@ -104,43 +175,66 @@ function stopUserActivityMonitor() {
|
|
|
104
175
|
}
|
|
105
176
|
export function registerTools(server) {
|
|
106
177
|
const registry = ToolRegistry.instance;
|
|
107
|
-
|
|
178
|
+
const registerTool = (name, description, schema, handler) => {
|
|
179
|
+
server.tool(name, description, schema, async (params) => {
|
|
180
|
+
try {
|
|
181
|
+
return await handler(params);
|
|
182
|
+
}
|
|
183
|
+
catch (error) {
|
|
184
|
+
return mcpErrorResponse(error);
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
};
|
|
188
|
+
registerTool("screenshot", "Capture a screenshot of the entire screen or a region", {
|
|
108
189
|
display: z.number().optional().describe("Display index (default 0)"),
|
|
190
|
+
windowId: z.string().optional().describe("Window ID from list_windows; when set, captures that window"),
|
|
109
191
|
region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to capture"),
|
|
110
192
|
format: z.enum(["png", "jpeg"]).default("png").describe("Image format"),
|
|
111
193
|
maxWidth: z.number().default(1280).describe("Maximum output width in pixels. Aspect ratio is preserved."),
|
|
112
194
|
}, async (params) => {
|
|
113
|
-
|
|
195
|
+
if (params.windowId && params.region)
|
|
196
|
+
throw new UnsupportedParameterError("screenshot windowId cannot be combined with region");
|
|
197
|
+
const options = { format: params.format, maxWidth: params.maxWidth };
|
|
198
|
+
const buf = await withSafety({
|
|
199
|
+
action: "screenshot",
|
|
200
|
+
params,
|
|
201
|
+
requiresScreenRecording: true,
|
|
202
|
+
execute: () => params.windowId
|
|
203
|
+
? getPlatform().screenshotWindow
|
|
204
|
+
? getPlatform().screenshotWindow(params.windowId, options)
|
|
205
|
+
: Promise.reject(new UnsupportedParameterError("window screenshots are not implemented on this platform"))
|
|
206
|
+
: getPlatform().screenshot(params.display, params.region, options),
|
|
207
|
+
});
|
|
114
208
|
return { content: [{ type: "image", data: buf.toString("base64"), mimeType: `image/${params.format}` }] };
|
|
115
209
|
});
|
|
116
210
|
registry.register("screenshot");
|
|
117
|
-
|
|
211
|
+
registerTool("list_windows", "List all visible windows on screen", {
|
|
118
212
|
includeMinimized: z.boolean().optional().describe("Include minimized windows"),
|
|
119
213
|
}, async (params) => {
|
|
120
214
|
const windows = await withSafety({ action: "list_windows", params: {}, requiresAccessibility: true, execute: () => getPlatform().listWindows(params.includeMinimized) });
|
|
121
215
|
return { content: [{ type: "text", text: JSON.stringify(windows, null, 2) }] };
|
|
122
216
|
});
|
|
123
217
|
registry.register("list_windows");
|
|
124
|
-
|
|
218
|
+
registerTool("list_apps", "List all running applications", {}, async () => {
|
|
125
219
|
const apps = await withSafety({ action: "list_apps", params: {}, requiresAccessibility: true, execute: async () => getPlatform().listApps() });
|
|
126
220
|
return { content: [{ type: "text", text: JSON.stringify(apps, null, 2) }] };
|
|
127
221
|
});
|
|
128
222
|
registry.register("list_apps");
|
|
129
|
-
|
|
223
|
+
registerTool("focus_app", "Select an application/window as the active target context", {
|
|
130
224
|
app: z.string().describe("Application name to focus"),
|
|
131
225
|
}, async (params) => {
|
|
132
226
|
const target = await withSafety({ action: "focus_app", params: {}, requiresAccessibility: true, execute: () => getPlatform().focusApp(params.app) });
|
|
133
227
|
return { content: [{ type: "text", text: JSON.stringify(target, null, 2) }] };
|
|
134
228
|
});
|
|
135
229
|
registry.register("focus_app");
|
|
136
|
-
|
|
230
|
+
registerTool("get_window_state", "Get detailed state of a window including accessibility tree", {
|
|
137
231
|
windowId: z.string().optional().describe("Window ID"), depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().optional().describe("Include element bounds"),
|
|
138
232
|
}, async (params) => {
|
|
139
233
|
const state = await withSafety({ action: "get_window_state", params: {}, requiresAccessibility: true, execute: () => getPlatform().getWindowState(params.windowId, params.depth, params.includeBounds) });
|
|
140
234
|
return { content: [{ type: "text", text: JSON.stringify(state, null, 2) }] };
|
|
141
235
|
});
|
|
142
236
|
registry.register("get_window_state");
|
|
143
|
-
|
|
237
|
+
registerTool("click", "Click at screen coordinates", {
|
|
144
238
|
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
145
239
|
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
146
240
|
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
@@ -148,10 +242,10 @@ export function registerTools(server) {
|
|
|
148
242
|
}, async (params) => {
|
|
149
243
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
150
244
|
await withSafety({ action: "click", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button) });
|
|
151
|
-
return
|
|
245
|
+
return actionResponse({ clicked: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
152
246
|
});
|
|
153
247
|
registry.register("click");
|
|
154
|
-
|
|
248
|
+
registerTool("double_click", "Double-click at screen coordinates", {
|
|
155
249
|
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
156
250
|
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
157
251
|
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
@@ -159,10 +253,10 @@ export function registerTools(server) {
|
|
|
159
253
|
}, async (params) => {
|
|
160
254
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
161
255
|
await withSafety({ action: "click", params: { x: pt.x, y: pt.y, doubleClick: true }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button, true) });
|
|
162
|
-
return
|
|
256
|
+
return actionResponse({ doubleClicked: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
163
257
|
});
|
|
164
258
|
registry.register("double_click");
|
|
165
|
-
|
|
259
|
+
registerTool("type_text", "Type text at the current cursor position", {
|
|
166
260
|
text: z.string().describe("Text to type"), delay: z.number().optional().describe("Delay between keystrokes in ms"),
|
|
167
261
|
windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted keyboard typing is not implemented"),
|
|
168
262
|
...captureAfterFields,
|
|
@@ -170,47 +264,55 @@ export function registerTools(server) {
|
|
|
170
264
|
if (params.windowId)
|
|
171
265
|
throw new UnsupportedParameterError("windowId-targeted keyboard typing is not implemented");
|
|
172
266
|
await withSafety({ action: "type_text", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().type(params.text, params.delay) });
|
|
173
|
-
return
|
|
267
|
+
return actionResponse({ typed: true, charCount: params.text.length }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
174
268
|
});
|
|
175
269
|
registry.register("type_text");
|
|
176
|
-
|
|
270
|
+
registerTool("press_key", "Press a keyboard shortcut", {
|
|
177
271
|
keys: z.array(z.string()).optional().describe("Keys to press simultaneously"),
|
|
178
272
|
key: z.string().optional().describe("Single key to press (alias for keys)"),
|
|
273
|
+
modifiers: z.array(z.string()).optional().describe("Modifier keys used with key, such as cmd, shift, alt, or ctrl"),
|
|
179
274
|
windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted key events are not implemented"),
|
|
180
275
|
...captureAfterFields,
|
|
181
276
|
}, async (params) => {
|
|
182
277
|
if (params.windowId)
|
|
183
278
|
throw new UnsupportedParameterError("windowId-targeted key events are not implemented");
|
|
184
|
-
const keys = params.keys ??
|
|
279
|
+
const keys = params.keys ?? [
|
|
280
|
+
...(params.modifiers ?? []),
|
|
281
|
+
...(params.key ? [params.key] : []),
|
|
282
|
+
];
|
|
185
283
|
if (keys.length === 0)
|
|
186
|
-
throw new
|
|
284
|
+
throw new UnsupportedParameterError("press_key requires at least one key");
|
|
187
285
|
await withSafety({ action: "press_key", params: { keys }, requiresAccessibility: true, execute: () => getPlatform().key(keys) });
|
|
188
|
-
return
|
|
286
|
+
return actionResponse({ pressed: true, keys }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
189
287
|
});
|
|
190
288
|
registry.register("press_key");
|
|
191
|
-
|
|
289
|
+
registerTool("scroll", "Scroll at coordinates", {
|
|
192
290
|
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
193
|
-
deltaX: z.number().describe("Horizontal scroll"), deltaY: z.number().describe("Vertical scroll (negative = up)"),
|
|
291
|
+
deltaX: z.number().default(0).describe("Horizontal scroll"), deltaY: z.number().describe("Vertical scroll (negative = up)"),
|
|
194
292
|
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
195
293
|
...captureAfterFields,
|
|
196
294
|
}, async (params) => {
|
|
197
295
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
198
|
-
|
|
199
|
-
|
|
296
|
+
const deltaX = params.deltaX ?? 0;
|
|
297
|
+
await withSafety({ action: "scroll", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().scroll(pt.x, pt.y, deltaX, params.deltaY) });
|
|
298
|
+
return actionResponse({ scrolled: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
200
299
|
});
|
|
201
300
|
registry.register("scroll");
|
|
202
|
-
|
|
301
|
+
registerTool("drag", "Drag from one point to another", {
|
|
203
302
|
startX: z.number().describe("Start X"), startY: z.number().describe("Start Y"),
|
|
204
303
|
endX: z.number().describe("End X"), endY: z.number().describe("End Y"),
|
|
205
304
|
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
305
|
+
windowId: z.string().optional().describe("If set, start/end coordinates are relative to this window"),
|
|
206
306
|
duration: z.number().optional().describe("Drag duration in ms"),
|
|
207
307
|
...captureAfterFields,
|
|
208
308
|
}, async (params) => {
|
|
209
|
-
|
|
210
|
-
|
|
309
|
+
const start = await resolvePoint(params.startX, params.startY, params.windowId);
|
|
310
|
+
const end = await resolvePoint(params.endX, params.endY, params.windowId);
|
|
311
|
+
await withSafety({ action: "drag", params: { startX: start.x, startY: start.y, endX: end.x, endY: end.y }, requiresAccessibility: true, execute: () => getPlatform().drag(start.x, start.y, end.x, end.y, params.button, params.duration) });
|
|
312
|
+
return actionResponse({ dragged: true, startX: start.x, startY: start.y, endX: end.x, endY: end.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
211
313
|
});
|
|
212
314
|
registry.register("drag");
|
|
213
|
-
|
|
315
|
+
registerTool("doctor", "Check system permissions and diagnose common issues", {}, async () => {
|
|
214
316
|
const { checkPermissions } = await import("../safety/permissions.js");
|
|
215
317
|
const { MacOSPlatform: MacPlat } = await import("../platform/macos.js");
|
|
216
318
|
const permissions = await checkPermissions();
|
|
@@ -236,41 +338,46 @@ export function registerTools(server) {
|
|
|
236
338
|
return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
|
|
237
339
|
});
|
|
238
340
|
registry.register("doctor");
|
|
239
|
-
|
|
341
|
+
registerTool("wait", "Wait for a specified duration", { ms: z.number().describe("Duration in milliseconds") }, async (params) => {
|
|
240
342
|
await new Promise(r => setTimeout(r, params.ms));
|
|
241
343
|
return { content: [{ type: "text", text: JSON.stringify({ waited: params.ms }) }] };
|
|
242
344
|
});
|
|
243
345
|
registry.register("wait");
|
|
244
|
-
|
|
346
|
+
registerTool("wait_for_element", "Poll until an accessibility element matching the criteria appears", {
|
|
245
347
|
text: z.string().optional().describe("Element text"), role: z.string().optional().describe("Element role"),
|
|
246
|
-
app: z.string().optional().describe("Target app"),
|
|
348
|
+
app: z.string().optional().describe("Target app"),
|
|
349
|
+
timeout: z.number().optional().describe("Timeout ms (default 5000)"),
|
|
350
|
+
timeoutMs: z.number().optional().describe("Alias for timeout"),
|
|
351
|
+
interval: z.number().optional().describe("Poll interval ms (default 500)"),
|
|
352
|
+
intervalMs: z.number().optional().describe("Alias for interval"),
|
|
247
353
|
}, async (params) => {
|
|
248
|
-
const deadline = Date.now() + (params.timeout ?? 5000);
|
|
249
|
-
const interval = params.interval ?? 500;
|
|
354
|
+
const deadline = Date.now() + (params.timeout ?? params.timeoutMs ?? 5000);
|
|
355
|
+
const interval = params.interval ?? params.intervalMs ?? 500;
|
|
356
|
+
const query = { text: params.text, role: params.role, app: params.app, maxResults: 1 };
|
|
357
|
+
const { granted } = await checkPermission("accessibility");
|
|
358
|
+
if (!granted)
|
|
359
|
+
throw new PermissionError("accessibility", process.platform);
|
|
250
360
|
while (Date.now() < deadline) {
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
return { content: [{ type: "text", text: JSON.stringify({ found: true, element: results[0] }, null, 2) }] };
|
|
255
|
-
}
|
|
256
|
-
catch { /* retry */ }
|
|
361
|
+
const results = await getPlatform().findElement(query);
|
|
362
|
+
if (results.length > 0)
|
|
363
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: true, element: results[0] }, null, 2) }] };
|
|
257
364
|
await new Promise(r => setTimeout(r, interval));
|
|
258
365
|
}
|
|
259
366
|
return { content: [{ type: "text", text: JSON.stringify({ found: false, reason: "timeout" }) }] };
|
|
260
367
|
});
|
|
261
368
|
registry.register("wait_for_element");
|
|
262
|
-
|
|
369
|
+
registerTool("get_cursor_position", "Get current cursor position", {}, async () => {
|
|
263
370
|
const pos = await withSafety({ action: "get_cursor_position", params: {}, execute: () => Promise.resolve(getPlatform().getCursorPosition()) });
|
|
264
371
|
return { content: [{ type: "text", text: JSON.stringify(pos, null, 2) }] };
|
|
265
372
|
});
|
|
266
373
|
registry.register("get_cursor_position");
|
|
267
|
-
|
|
374
|
+
registerTool("get_screen_size", "Get screen dimensions and scale factor", {
|
|
268
375
|
display: z.number().optional().describe("Display index"),
|
|
269
376
|
}, async (params) => {
|
|
270
377
|
return { content: [{ type: "text", text: JSON.stringify(getPlatform().getScreenSize(params.display), null, 2) }] };
|
|
271
378
|
});
|
|
272
379
|
registry.register("get_screen_size");
|
|
273
|
-
|
|
380
|
+
registerTool("ocr", "Perform OCR on screen region", {
|
|
274
381
|
display: z.number().optional().describe("Display index"),
|
|
275
382
|
region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to OCR"),
|
|
276
383
|
}, async (params) => {
|
|
@@ -278,16 +385,17 @@ export function registerTools(server) {
|
|
|
278
385
|
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
279
386
|
});
|
|
280
387
|
registry.register("ocr");
|
|
281
|
-
|
|
388
|
+
registerTool("move", "Move cursor to coordinates", {
|
|
282
389
|
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
283
390
|
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
391
|
+
...captureAfterFields,
|
|
284
392
|
}, async (params) => {
|
|
285
393
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
286
394
|
await withSafety({ action: "move", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().move(pt.x, pt.y) });
|
|
287
|
-
return
|
|
395
|
+
return actionResponse({ moved: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
288
396
|
});
|
|
289
397
|
registry.register("move");
|
|
290
|
-
|
|
398
|
+
registerTool("find_element", "Find accessibility elements by text, role, or app", {
|
|
291
399
|
text: z.string().optional().describe("Text to search"), role: z.string().optional().describe("AX role"), app: z.string().optional().describe("Target app"),
|
|
292
400
|
depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().default(true).describe("Include bounds"), maxResults: z.number().min(1).max(200).default(50).describe("Max results"),
|
|
293
401
|
}, async (params) => {
|
|
@@ -296,26 +404,26 @@ export function registerTools(server) {
|
|
|
296
404
|
return { content: [{ type: "text", text: JSON.stringify(results, null, 2) }] };
|
|
297
405
|
});
|
|
298
406
|
registry.register("find_element");
|
|
299
|
-
|
|
407
|
+
registerTool("click_element", "Click an accessibility element by its ID", {
|
|
300
408
|
elementId: z.string().describe("AX element identifier"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
|
|
301
409
|
}, async (params) => {
|
|
302
410
|
await withSafety({ action: "click_element", params: {}, requiresAccessibility: true, execute: () => getPlatform().clickElement(params.elementId, params.app) });
|
|
303
|
-
return
|
|
411
|
+
return actionResponse({ clicked: true, elementId: params.elementId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
304
412
|
});
|
|
305
413
|
registry.register("click_element");
|
|
306
|
-
|
|
414
|
+
registerTool("set_value", "Set the value of an accessibility element", {
|
|
307
415
|
elementId: z.string().describe("AX element identifier"), value: z.string().describe("Value to set"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
|
|
308
416
|
}, async (params) => {
|
|
309
417
|
await withSafety({ action: "set_value", params: { value: params.value }, requiresAccessibility: true, execute: () => getPlatform().setElementValue(params.elementId, params.value, params.app) });
|
|
310
|
-
return
|
|
418
|
+
return actionResponse({ setValue: true, elementId: params.elementId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
311
419
|
});
|
|
312
420
|
registry.register("set_value");
|
|
313
|
-
|
|
421
|
+
registerTool("type_in_element", "Type text into an accessibility element, optionally clearing first", {
|
|
314
422
|
elementId: z.string().describe("AX element identifier"), text: z.string().describe("Text to type"),
|
|
315
423
|
app: z.string().optional().describe("Target app"), clearFirst: z.boolean().optional().describe("Clear existing text before typing"), ...captureAfterFields,
|
|
316
424
|
}, async (params) => {
|
|
317
425
|
await withSafety({ action: "type_in_element", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().typeInElement(params.elementId, params.text, params.app, params.clearFirst) });
|
|
318
|
-
return
|
|
426
|
+
return actionResponse({ typed: true, elementId: params.elementId, charCount: params.text.length }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
319
427
|
});
|
|
320
428
|
registry.register("type_in_element");
|
|
321
429
|
log.info("Registered tools", { count: registry.tools.length, tools: registry.tools.join(", ") });
|
|
@@ -3,6 +3,8 @@ export declare class MacOSPlatform implements Platform {
|
|
|
3
3
|
private readonly elementCache;
|
|
4
4
|
private readonly elementCacheTtlMs;
|
|
5
5
|
private readonly elementCacheMaxSize;
|
|
6
|
+
private readonly windowCacheTtlMs;
|
|
7
|
+
private windowCache;
|
|
6
8
|
private activeTarget;
|
|
7
9
|
private savedFocus;
|
|
8
10
|
/** Remove expired entries from the element cache. */
|
|
@@ -30,6 +32,8 @@ export declare class MacOSPlatform implements Platform {
|
|
|
30
32
|
scroll(x: number, y: number, deltaX: number, deltaY: number): Promise<void>;
|
|
31
33
|
getCursorPosition(): CursorPosition;
|
|
32
34
|
ocr(display?: number, region?: ScreenRegion): Promise<OcrResult>;
|
|
35
|
+
private ocrNative;
|
|
36
|
+
private ocrJxa;
|
|
33
37
|
type(text: string, delay?: number): Promise<void>;
|
|
34
38
|
key(keys: string[]): Promise<void>;
|
|
35
39
|
findElement(options: FindElementOptions): Promise<FindElementResult[]>;
|