ucu-mcp 0.1.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -20
- package/README.md +156 -14
- package/dist/src/mcp/server.js +29 -8
- package/dist/src/mcp/tools.d.ts +7 -1
- package/dist/src/mcp/tools.js +349 -82
- package/dist/src/platform/base.d.ts +26 -1
- package/dist/src/platform/linux.d.ts +4 -2
- package/dist/src/platform/linux.js +51 -0
- package/dist/src/platform/macos.d.ts +10 -2
- package/dist/src/platform/macos.js +513 -229
- package/dist/src/platform/windows.d.ts +4 -2
- package/dist/src/platform/windows.js +33 -0
- package/dist/src/safety/guard.d.ts +8 -1
- package/dist/src/safety/guard.js +43 -4
- package/dist/src/util/errors.d.ts +12 -0
- package/dist/src/util/errors.js +16 -0
- package/dist/src/utils/input.js +88 -18
- package/native/cgevent/cgevent-helper +0 -0
- package/native/cgevent/main.swift +126 -0
- package/native/ocr/main.swift +89 -0
- package/native/ocr/ocr-helper +0 -0
- package/package.json +7 -4
package/dist/src/mcp/tools.js
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Tool registry for UCU-MCP.
|
|
3
3
|
*
|
|
4
|
-
* Registers
|
|
4
|
+
* Registers 24 MCP tools on the server and dispatches each call through
|
|
5
5
|
* a shared safety/permission/retry pipeline (`withSafety`).
|
|
6
6
|
*/
|
|
7
7
|
import { z } from "zod";
|
|
8
8
|
import { MacOSPlatform } from "../platform/macos.js";
|
|
9
|
-
import { SafetyGuard } from "../safety/guard.js";
|
|
9
|
+
import { SafetyGuard, classifyAction } from "../safety/guard.js";
|
|
10
10
|
import { checkPermission } from "../safety/permissions.js";
|
|
11
11
|
import { retry } from "../util/retry.js";
|
|
12
12
|
import { createLogger } from "../util/logger.js";
|
|
13
|
-
import { SafetyError, PermissionError, UnsupportedParameterError } from "../util/errors.js";
|
|
13
|
+
import { SafetyError, PermissionError, UnsupportedParameterError, UcuError, WindowNotFoundError } from "../util/errors.js";
|
|
14
14
|
const log = createLogger("tools");
|
|
15
15
|
let _platform;
|
|
16
16
|
function getPlatform() {
|
|
@@ -20,6 +20,14 @@ function getPlatform() {
|
|
|
20
20
|
return _platform;
|
|
21
21
|
}
|
|
22
22
|
const safety = new SafetyGuard();
|
|
23
|
+
// Active target context — set by focus_app, used by AX element tools
|
|
24
|
+
let activeTargetContext;
|
|
25
|
+
/**
|
|
26
|
+
* Get the currently active target context (set by focus_app).
|
|
27
|
+
*/
|
|
28
|
+
export function getActiveTarget() {
|
|
29
|
+
return activeTargetContext;
|
|
30
|
+
}
|
|
23
31
|
// User activity monitor — pauses automation when user moves the cursor
|
|
24
32
|
let lastCursorPos = { x: 0, y: 0 };
|
|
25
33
|
let userActivityInterval;
|
|
@@ -31,21 +39,134 @@ const captureAfterFields = {
|
|
|
31
39
|
async function resolvePoint(x, y, windowId) {
|
|
32
40
|
if (!windowId)
|
|
33
41
|
return { x, y };
|
|
42
|
+
const win = (await getPlatform().listWindows()).find(w => w.id === windowId);
|
|
43
|
+
if (!win)
|
|
44
|
+
throw new WindowNotFoundError(windowId);
|
|
45
|
+
return { x: win.bounds.x + x, y: win.bounds.y + y };
|
|
46
|
+
}
|
|
47
|
+
function jsonText(value) {
|
|
48
|
+
return { type: "text", text: JSON.stringify(value, null, 2) };
|
|
49
|
+
}
|
|
50
|
+
function recoveryHint(code) {
|
|
51
|
+
switch (code) {
|
|
52
|
+
case "WINDOW_NOT_FOUND":
|
|
53
|
+
return "Run list_windows again, then retry with a fresh windowId or omit windowId for screen coordinates.";
|
|
54
|
+
case "TARGET_STALE":
|
|
55
|
+
return "Run focus_app again for the target app, or run list_windows and retry with a fresh windowId.";
|
|
56
|
+
case "ELEMENT_NOT_FOUND":
|
|
57
|
+
return "Run find_element again, then retry with a fresh elementId.";
|
|
58
|
+
case "PERMISSION_DENIED":
|
|
59
|
+
return "Run doctor and grant the missing macOS permission, then restart the launching client.";
|
|
60
|
+
case "UNSUPPORTED_PARAMETER":
|
|
61
|
+
return "Remove or replace the unsupported parameter; inspect tools/list for this tool schema.";
|
|
62
|
+
case "SAFETY_BLOCKED":
|
|
63
|
+
return "Choose a less risky action or ask the user to perform it manually.";
|
|
64
|
+
case "INPUT_FAILED":
|
|
65
|
+
return "Observe current state with screenshot or get_window_state before retrying manually.";
|
|
66
|
+
case "CAPTURE_FAILED":
|
|
67
|
+
return "Run doctor to check Screen Recording permission, then retry screenshot or ocr.";
|
|
68
|
+
case "COORDINATE_OUT_OF_BOUNDS":
|
|
69
|
+
return "Run get_screen_size or list_windows, then retry with coordinates inside the active display or window bounds.";
|
|
70
|
+
default:
|
|
71
|
+
return "Inspect the error message, observe the current UI state, and retry only if the operation is safe.";
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
function errorDetails(error) {
|
|
75
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
76
|
+
const code = error instanceof UcuError ? error.code : "UNKNOWN_ERROR";
|
|
77
|
+
const retryable = error instanceof UcuError ? error.retryable : false;
|
|
78
|
+
return {
|
|
79
|
+
name: err.name,
|
|
80
|
+
code,
|
|
81
|
+
retryable,
|
|
82
|
+
message: err.message,
|
|
83
|
+
recovery: recoveryHint(code),
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
let _actionCounter = 0;
|
|
87
|
+
function nextActionId() {
|
|
88
|
+
_actionCounter = (_actionCounter + 1) % 1_000_000;
|
|
89
|
+
return `a${Date.now().toString(36)}-${_actionCounter.toString(36)}`;
|
|
90
|
+
}
|
|
91
|
+
function buildActionReceipt(action, status, target, result, captureRequested, captureFormat, captureMaxWidth, captureError, warnings = []) {
|
|
92
|
+
const captureStatus = captureRequested
|
|
93
|
+
? captureError ? "error" : "ok"
|
|
94
|
+
: "skipped";
|
|
95
|
+
return {
|
|
96
|
+
actionId: nextActionId(),
|
|
97
|
+
action,
|
|
98
|
+
status,
|
|
99
|
+
target,
|
|
100
|
+
result,
|
|
101
|
+
capture: {
|
|
102
|
+
requested: captureRequested,
|
|
103
|
+
status: captureStatus,
|
|
104
|
+
...(captureFormat && { format: captureFormat }),
|
|
105
|
+
...(captureMaxWidth && { maxWidth: captureMaxWidth }),
|
|
106
|
+
...(captureError && { error: captureError }),
|
|
107
|
+
},
|
|
108
|
+
warnings,
|
|
109
|
+
next: captureError
|
|
110
|
+
? "screenshot"
|
|
111
|
+
: status === "partial"
|
|
112
|
+
? "get_window_state"
|
|
113
|
+
: "find_element or get_window_state",
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
function mcpErrorResponse(error) {
|
|
117
|
+
return {
|
|
118
|
+
isError: true,
|
|
119
|
+
content: [
|
|
120
|
+
jsonText({
|
|
121
|
+
error: errorDetails(error),
|
|
122
|
+
}),
|
|
123
|
+
],
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
async function actionResponse(action, result, target, captureAfter, captureFormat = "jpeg", captureMaxWidth = 1280, warnings = []) {
|
|
127
|
+
const receipt = buildActionReceipt(action, "ok", target, result, captureAfter ?? false, captureFormat, captureMaxWidth, undefined, warnings);
|
|
128
|
+
if (!captureAfter) {
|
|
129
|
+
return { content: [jsonText(receipt)] };
|
|
130
|
+
}
|
|
34
131
|
try {
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
132
|
+
const buf = await getPlatform().screenshot(undefined, undefined, {
|
|
133
|
+
format: captureFormat,
|
|
134
|
+
maxWidth: captureMaxWidth,
|
|
135
|
+
});
|
|
136
|
+
return {
|
|
137
|
+
content: [
|
|
138
|
+
jsonText(receipt),
|
|
139
|
+
{
|
|
140
|
+
type: "image",
|
|
141
|
+
data: buf.toString("base64"),
|
|
142
|
+
mimeType: `image/${captureFormat}`,
|
|
143
|
+
},
|
|
144
|
+
],
|
|
145
|
+
};
|
|
39
146
|
}
|
|
40
|
-
catch {
|
|
41
|
-
|
|
147
|
+
catch (error) {
|
|
148
|
+
const partialReceipt = buildActionReceipt(action, "partial", target, result, true, captureFormat, captureMaxWidth, errorDetails(error), [...warnings, "Post-action screenshot capture failed"]);
|
|
149
|
+
return { content: [jsonText(partialReceipt)] };
|
|
42
150
|
}
|
|
43
151
|
}
|
|
152
|
+
const retryableActions = new Set([
|
|
153
|
+
"screenshot",
|
|
154
|
+
"list_windows",
|
|
155
|
+
"list_apps",
|
|
156
|
+
"get_window_state",
|
|
157
|
+
"get_cursor_position",
|
|
158
|
+
"get_screen_size",
|
|
159
|
+
"ocr",
|
|
160
|
+
"doctor",
|
|
161
|
+
"find_element",
|
|
162
|
+
]);
|
|
44
163
|
async function withSafety(sa) {
|
|
45
164
|
const platform = getPlatform();
|
|
46
165
|
if (platform.isScreenLocked?.())
|
|
47
166
|
throw new SafetyError("Screen is locked");
|
|
48
|
-
const check = safety.checkAction(sa.action, sa.params
|
|
167
|
+
const check = safety.checkAction(sa.action, sa.params, {
|
|
168
|
+
skipUserActivityPause: sa.skipUserActivityPause ?? classifyAction(sa.action) === "observe",
|
|
169
|
+
});
|
|
49
170
|
if (!check.allowed)
|
|
50
171
|
throw new SafetyError(check.reason ?? "Action blocked by safety guard");
|
|
51
172
|
if (sa.requiresAccessibility) {
|
|
@@ -64,27 +185,24 @@ async function withSafety(sa) {
|
|
|
64
185
|
if (shouldManageFocus)
|
|
65
186
|
await platform.saveFocus?.();
|
|
66
187
|
try {
|
|
67
|
-
return
|
|
188
|
+
return retryableActions.has(sa.action)
|
|
189
|
+
? await retry(() => sa.execute())
|
|
190
|
+
: await sa.execute();
|
|
68
191
|
}
|
|
69
192
|
finally {
|
|
70
193
|
if (shouldManageFocus)
|
|
71
194
|
await platform.restoreFocus?.();
|
|
72
195
|
}
|
|
73
196
|
}
|
|
74
|
-
|
|
75
|
-
if (
|
|
76
|
-
return
|
|
197
|
+
export function startUserActivityMonitor() {
|
|
198
|
+
if (userActivityInterval)
|
|
199
|
+
return;
|
|
77
200
|
try {
|
|
78
|
-
|
|
79
|
-
return { actionResult: result, screenshot: { type: "image", data: buf.toString("base64"), mimeType: "image/png" } };
|
|
201
|
+
lastCursorPos = getPlatform().getCursorPosition();
|
|
80
202
|
}
|
|
81
203
|
catch {
|
|
82
|
-
|
|
204
|
+
// Keep the default when the cursor cannot be queried during startup.
|
|
83
205
|
}
|
|
84
|
-
}
|
|
85
|
-
export function startUserActivityMonitor() {
|
|
86
|
-
if (userActivityInterval)
|
|
87
|
-
return;
|
|
88
206
|
userActivityInterval = setInterval(() => {
|
|
89
207
|
try {
|
|
90
208
|
const pos = getPlatform().getCursorPosition();
|
|
@@ -95,8 +213,9 @@ export function startUserActivityMonitor() {
|
|
|
95
213
|
}
|
|
96
214
|
catch { /* can't check cursor */ }
|
|
97
215
|
}, 250);
|
|
216
|
+
userActivityInterval.unref?.();
|
|
98
217
|
}
|
|
99
|
-
function stopUserActivityMonitor() {
|
|
218
|
+
export function stopUserActivityMonitor() {
|
|
100
219
|
if (userActivityInterval) {
|
|
101
220
|
clearInterval(userActivityInterval);
|
|
102
221
|
userActivityInterval = undefined;
|
|
@@ -104,43 +223,68 @@ function stopUserActivityMonitor() {
|
|
|
104
223
|
}
|
|
105
224
|
export function registerTools(server) {
|
|
106
225
|
const registry = ToolRegistry.instance;
|
|
107
|
-
|
|
226
|
+
const registerTool = (name, description, schema, handler) => {
|
|
227
|
+
server.tool(name, description, schema, async (params) => {
|
|
228
|
+
try {
|
|
229
|
+
return await handler(params);
|
|
230
|
+
}
|
|
231
|
+
catch (error) {
|
|
232
|
+
return mcpErrorResponse(error);
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
};
|
|
236
|
+
registerTool("screenshot", "Capture a screenshot of the entire screen or a region", {
|
|
108
237
|
display: z.number().optional().describe("Display index (default 0)"),
|
|
238
|
+
windowId: z.string().optional().describe("Window ID from list_windows; when set, captures that window"),
|
|
109
239
|
region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to capture"),
|
|
110
240
|
format: z.enum(["png", "jpeg"]).default("png").describe("Image format"),
|
|
111
241
|
maxWidth: z.number().default(1280).describe("Maximum output width in pixels. Aspect ratio is preserved."),
|
|
112
242
|
}, async (params) => {
|
|
113
|
-
|
|
243
|
+
if (params.windowId && params.region)
|
|
244
|
+
throw new UnsupportedParameterError("screenshot windowId cannot be combined with region");
|
|
245
|
+
const options = { format: params.format, maxWidth: params.maxWidth };
|
|
246
|
+
const buf = await withSafety({
|
|
247
|
+
action: "screenshot",
|
|
248
|
+
params,
|
|
249
|
+
requiresScreenRecording: true,
|
|
250
|
+
execute: () => params.windowId
|
|
251
|
+
? getPlatform().screenshotWindow
|
|
252
|
+
? getPlatform().screenshotWindow(params.windowId, options)
|
|
253
|
+
: Promise.reject(new UnsupportedParameterError("window screenshots are not implemented on this platform"))
|
|
254
|
+
: getPlatform().screenshot(params.display, params.region, options),
|
|
255
|
+
});
|
|
114
256
|
return { content: [{ type: "image", data: buf.toString("base64"), mimeType: `image/${params.format}` }] };
|
|
115
257
|
});
|
|
116
258
|
registry.register("screenshot");
|
|
117
|
-
|
|
259
|
+
registerTool("list_windows", "List all visible windows on screen", {
|
|
118
260
|
includeMinimized: z.boolean().optional().describe("Include minimized windows"),
|
|
119
261
|
}, async (params) => {
|
|
120
262
|
const windows = await withSafety({ action: "list_windows", params: {}, requiresAccessibility: true, execute: () => getPlatform().listWindows(params.includeMinimized) });
|
|
121
263
|
return { content: [{ type: "text", text: JSON.stringify(windows, null, 2) }] };
|
|
122
264
|
});
|
|
123
265
|
registry.register("list_windows");
|
|
124
|
-
|
|
266
|
+
registerTool("list_apps", "List all running applications", {}, async () => {
|
|
125
267
|
const apps = await withSafety({ action: "list_apps", params: {}, requiresAccessibility: true, execute: async () => getPlatform().listApps() });
|
|
126
268
|
return { content: [{ type: "text", text: JSON.stringify(apps, null, 2) }] };
|
|
127
269
|
});
|
|
128
270
|
registry.register("list_apps");
|
|
129
|
-
|
|
271
|
+
registerTool("focus_app", "Select an application/window as the active target context", {
|
|
130
272
|
app: z.string().describe("Application name to focus"),
|
|
131
273
|
}, async (params) => {
|
|
132
274
|
const target = await withSafety({ action: "focus_app", params: {}, requiresAccessibility: true, execute: () => getPlatform().focusApp(params.app) });
|
|
275
|
+
activeTargetContext = target;
|
|
133
276
|
return { content: [{ type: "text", text: JSON.stringify(target, null, 2) }] };
|
|
134
277
|
});
|
|
135
278
|
registry.register("focus_app");
|
|
136
|
-
|
|
279
|
+
registerTool("get_window_state", "Get detailed state of a window including accessibility tree", {
|
|
137
280
|
windowId: z.string().optional().describe("Window ID"), depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().optional().describe("Include element bounds"),
|
|
138
281
|
}, async (params) => {
|
|
139
|
-
const
|
|
282
|
+
const effectiveWindowId = params.windowId || getActiveTarget()?.windowId;
|
|
283
|
+
const state = await withSafety({ action: "get_window_state", params: {}, requiresAccessibility: true, execute: () => getPlatform().getWindowState(effectiveWindowId, params.depth, params.includeBounds) });
|
|
140
284
|
return { content: [{ type: "text", text: JSON.stringify(state, null, 2) }] };
|
|
141
285
|
});
|
|
142
286
|
registry.register("get_window_state");
|
|
143
|
-
|
|
287
|
+
registerTool("click", "Click at screen coordinates", {
|
|
144
288
|
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
145
289
|
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
146
290
|
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
@@ -148,10 +292,10 @@ export function registerTools(server) {
|
|
|
148
292
|
}, async (params) => {
|
|
149
293
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
150
294
|
await withSafety({ action: "click", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button) });
|
|
151
|
-
return {
|
|
295
|
+
return actionResponse("click", { clicked: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
152
296
|
});
|
|
153
297
|
registry.register("click");
|
|
154
|
-
|
|
298
|
+
registerTool("double_click", "Double-click at screen coordinates", {
|
|
155
299
|
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
156
300
|
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
157
301
|
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
@@ -159,10 +303,10 @@ export function registerTools(server) {
|
|
|
159
303
|
}, async (params) => {
|
|
160
304
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
161
305
|
await withSafety({ action: "click", params: { x: pt.x, y: pt.y, doubleClick: true }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button, true) });
|
|
162
|
-
return {
|
|
306
|
+
return actionResponse("double_click", { doubleClicked: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
163
307
|
});
|
|
164
308
|
registry.register("double_click");
|
|
165
|
-
|
|
309
|
+
registerTool("type_text", "Type text at the current cursor position", {
|
|
166
310
|
text: z.string().describe("Text to type"), delay: z.number().optional().describe("Delay between keystrokes in ms"),
|
|
167
311
|
windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted keyboard typing is not implemented"),
|
|
168
312
|
...captureAfterFields,
|
|
@@ -170,107 +314,208 @@ export function registerTools(server) {
|
|
|
170
314
|
if (params.windowId)
|
|
171
315
|
throw new UnsupportedParameterError("windowId-targeted keyboard typing is not implemented");
|
|
172
316
|
await withSafety({ action: "type_text", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().type(params.text, params.delay) });
|
|
173
|
-
return
|
|
317
|
+
return actionResponse("type_text", { typed: true, charCount: params.text.length }, {}, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
174
318
|
});
|
|
175
319
|
registry.register("type_text");
|
|
176
|
-
|
|
320
|
+
registerTool("press_key", "Press a keyboard shortcut", {
|
|
177
321
|
keys: z.array(z.string()).optional().describe("Keys to press simultaneously"),
|
|
178
322
|
key: z.string().optional().describe("Single key to press (alias for keys)"),
|
|
323
|
+
modifiers: z.array(z.string()).optional().describe("Modifier keys used with key, such as cmd, shift, alt, or ctrl"),
|
|
179
324
|
windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted key events are not implemented"),
|
|
180
325
|
...captureAfterFields,
|
|
181
326
|
}, async (params) => {
|
|
182
327
|
if (params.windowId)
|
|
183
328
|
throw new UnsupportedParameterError("windowId-targeted key events are not implemented");
|
|
184
|
-
const keys = params.keys ??
|
|
329
|
+
const keys = params.keys ?? [
|
|
330
|
+
...(params.modifiers ?? []),
|
|
331
|
+
...(params.key ? [params.key] : []),
|
|
332
|
+
];
|
|
185
333
|
if (keys.length === 0)
|
|
186
|
-
throw new
|
|
334
|
+
throw new UnsupportedParameterError("press_key requires at least one key");
|
|
187
335
|
await withSafety({ action: "press_key", params: { keys }, requiresAccessibility: true, execute: () => getPlatform().key(keys) });
|
|
188
|
-
return
|
|
336
|
+
return actionResponse("press_key", { pressed: true, keys }, {}, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
189
337
|
});
|
|
190
338
|
registry.register("press_key");
|
|
191
|
-
|
|
339
|
+
registerTool("scroll", "Scroll at coordinates", {
|
|
192
340
|
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
193
|
-
deltaX: z.number().describe("Horizontal scroll"), deltaY: z.number().describe("Vertical scroll (negative = up)"),
|
|
341
|
+
deltaX: z.number().default(0).describe("Horizontal scroll"), deltaY: z.number().describe("Vertical scroll (negative = up)"),
|
|
194
342
|
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
195
343
|
...captureAfterFields,
|
|
196
344
|
}, async (params) => {
|
|
197
345
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
198
|
-
|
|
199
|
-
|
|
346
|
+
const deltaX = params.deltaX ?? 0;
|
|
347
|
+
await withSafety({ action: "scroll", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().scroll(pt.x, pt.y, deltaX, params.deltaY) });
|
|
348
|
+
return actionResponse("scroll", { scrolled: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
200
349
|
});
|
|
201
350
|
registry.register("scroll");
|
|
202
|
-
|
|
351
|
+
registerTool("drag", "Drag from one point to another", {
|
|
203
352
|
startX: z.number().describe("Start X"), startY: z.number().describe("Start Y"),
|
|
204
353
|
endX: z.number().describe("End X"), endY: z.number().describe("End Y"),
|
|
205
354
|
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
355
|
+
windowId: z.string().optional().describe("If set, start/end coordinates are relative to this window"),
|
|
206
356
|
duration: z.number().optional().describe("Drag duration in ms"),
|
|
207
357
|
...captureAfterFields,
|
|
208
358
|
}, async (params) => {
|
|
209
|
-
|
|
210
|
-
|
|
359
|
+
const start = await resolvePoint(params.startX, params.startY, params.windowId);
|
|
360
|
+
const end = await resolvePoint(params.endX, params.endY, params.windowId);
|
|
361
|
+
await withSafety({ action: "drag", params: { startX: start.x, startY: start.y, endX: end.x, endY: end.y }, requiresAccessibility: true, execute: () => getPlatform().drag(start.x, start.y, end.x, end.y, params.button, params.duration) });
|
|
362
|
+
return actionResponse("drag", { dragged: true, startX: start.x, startY: start.y, endX: end.x, endY: end.y }, { startX: start.x, startY: start.y, endX: end.x, endY: end.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
211
363
|
});
|
|
212
364
|
registry.register("drag");
|
|
213
|
-
|
|
365
|
+
registerTool("doctor", "Check system permissions, native helpers, and client readiness", {}, async () => {
|
|
214
366
|
const { checkPermissions } = await import("../safety/permissions.js");
|
|
215
367
|
const { MacOSPlatform: MacPlat } = await import("../platform/macos.js");
|
|
368
|
+
const { existsSync } = await import("node:fs");
|
|
369
|
+
const { join, dirname } = await import("node:path");
|
|
370
|
+
const { fileURLToPath } = await import("node:url");
|
|
371
|
+
const { execFileSync } = await import("node:child_process");
|
|
216
372
|
const permissions = await checkPermissions();
|
|
217
373
|
const screenLocked = process.platform === "darwin" ? new MacPlat().isScreenLocked?.() ?? false : false;
|
|
374
|
+
let nativeHelpers;
|
|
375
|
+
if (process.platform === "darwin") {
|
|
376
|
+
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
377
|
+
const checkPaths = (subdirs) => {
|
|
378
|
+
const paths = [
|
|
379
|
+
join(process.cwd(), ...subdirs),
|
|
380
|
+
join(moduleDir, "..", ...subdirs),
|
|
381
|
+
join(moduleDir, "..", "..", ...subdirs),
|
|
382
|
+
];
|
|
383
|
+
return paths.some(p => { try {
|
|
384
|
+
return existsSync(p);
|
|
385
|
+
}
|
|
386
|
+
catch {
|
|
387
|
+
return false;
|
|
388
|
+
} });
|
|
389
|
+
};
|
|
390
|
+
nativeHelpers = {
|
|
391
|
+
cgevent: checkPaths(["native", "cgevent", "cgevent-helper"]),
|
|
392
|
+
ocr: checkPaths(["native", "ocr", "ocr-helper"]),
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
let readiness = "ready";
|
|
396
|
+
const issues = [];
|
|
397
|
+
if (!permissions.granted) {
|
|
398
|
+
readiness = "blocked";
|
|
399
|
+
issues.push("Missing macOS permissions: " + permissions.missing.join(", "));
|
|
400
|
+
}
|
|
401
|
+
if (screenLocked) {
|
|
402
|
+
readiness = "blocked";
|
|
403
|
+
issues.push("Screen is locked");
|
|
404
|
+
}
|
|
405
|
+
if (process.platform === "darwin" && nativeHelpers) {
|
|
406
|
+
if (!nativeHelpers.cgevent) {
|
|
407
|
+
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
408
|
+
issues.push("Native CGEvent helper not found (input synthesis may crash on macOS Sequoia+)");
|
|
409
|
+
}
|
|
410
|
+
if (!nativeHelpers.ocr) {
|
|
411
|
+
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
412
|
+
issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+)");
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
const clients = {};
|
|
416
|
+
for (const bin of ["claude", "codex", "opencode", "npx"]) {
|
|
417
|
+
try {
|
|
418
|
+
const path = execFileSync("which", [bin], { encoding: "utf-8", timeout: 2000 }).trim();
|
|
419
|
+
clients[bin] = path || "not found";
|
|
420
|
+
}
|
|
421
|
+
catch {
|
|
422
|
+
clients[bin] = "not found";
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
const recommendations = [];
|
|
426
|
+
if (readiness === "blocked") {
|
|
427
|
+
recommendations.push("Grant missing permissions in System Settings > Privacy & Security, then restart the MCP client.");
|
|
428
|
+
}
|
|
429
|
+
else if (readiness === "degraded") {
|
|
430
|
+
if (nativeHelpers && (!nativeHelpers.cgevent || !nativeHelpers.ocr)) {
|
|
431
|
+
recommendations.push("Run 'npm run build' to compile native Swift helpers.");
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
else {
|
|
435
|
+
recommendations.push("All checks passed. MCP client can proceed with automation.");
|
|
436
|
+
}
|
|
218
437
|
const report = {
|
|
219
|
-
|
|
438
|
+
readiness,
|
|
439
|
+
issues: issues.length > 0 ? issues : undefined,
|
|
440
|
+
recommendations,
|
|
220
441
|
platform: process.platform,
|
|
221
442
|
node: process.version,
|
|
222
443
|
permissions,
|
|
223
444
|
screenLocked,
|
|
445
|
+
nativeHelpers,
|
|
446
|
+
clients,
|
|
224
447
|
safety: {
|
|
225
448
|
urlBlocklist: true,
|
|
226
449
|
lockScreenGuard: process.platform === "darwin",
|
|
227
450
|
typedTextInjectionScan: true,
|
|
228
451
|
},
|
|
229
452
|
stdioCommand: "ucu-mcp",
|
|
230
|
-
clients: {
|
|
231
|
-
claudeCodeCli: "Run ucu-mcp as an MCP stdio server.",
|
|
232
|
-
claudeCodeDesktop: "Configure ucu-mcp as a local MCP stdio server.",
|
|
233
|
-
openCode: "Configure ucu-mcp as a local MCP stdio server.",
|
|
234
|
-
},
|
|
235
453
|
};
|
|
236
454
|
return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
|
|
237
455
|
});
|
|
238
456
|
registry.register("doctor");
|
|
239
|
-
|
|
457
|
+
registerTool("wait", "Wait for a specified duration", { ms: z.number().describe("Duration in milliseconds") }, async (params) => {
|
|
240
458
|
await new Promise(r => setTimeout(r, params.ms));
|
|
241
459
|
return { content: [{ type: "text", text: JSON.stringify({ waited: params.ms }) }] };
|
|
242
460
|
});
|
|
243
461
|
registry.register("wait");
|
|
244
|
-
|
|
462
|
+
registerTool("wait_for_element", "Poll until an accessibility element matching the criteria reaches the desired state", {
|
|
245
463
|
text: z.string().optional().describe("Element text"), role: z.string().optional().describe("Element role"),
|
|
246
|
-
app: z.string().optional().describe("Target app"),
|
|
464
|
+
app: z.string().optional().describe("Target app"),
|
|
465
|
+
timeout: z.number().optional().describe("Timeout ms (default 5000)"),
|
|
466
|
+
timeoutMs: z.number().optional().describe("Alias for timeout"),
|
|
467
|
+
interval: z.number().optional().describe("Poll interval ms (default 500)"),
|
|
468
|
+
intervalMs: z.number().optional().describe("Alias for interval"),
|
|
469
|
+
until: z.enum(["appear", "disappear", "value_change"]).default("appear").describe("Wait condition: 'appear' (default) waits for a match, 'disappear' waits until no match, 'value_change' waits until first match's value changes"),
|
|
247
470
|
}, async (params) => {
|
|
248
|
-
const deadline = Date.now() + (params.timeout ?? 5000);
|
|
249
|
-
const interval = params.interval ?? 500;
|
|
471
|
+
const deadline = Date.now() + (params.timeout ?? params.timeoutMs ?? 5000);
|
|
472
|
+
const interval = params.interval ?? params.intervalMs ?? 500;
|
|
473
|
+
const until = params.until ?? "appear";
|
|
474
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
475
|
+
const query = { text: params.text, role: params.role, app: effectiveApp, maxResults: 1 };
|
|
476
|
+
const { granted } = await checkPermission("accessibility");
|
|
477
|
+
if (!granted)
|
|
478
|
+
throw new PermissionError("accessibility", process.platform);
|
|
479
|
+
let initialValue;
|
|
250
480
|
while (Date.now() < deadline) {
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
481
|
+
const response = await getPlatform().findElement(query);
|
|
482
|
+
const matched = response.results[0];
|
|
483
|
+
if (until === "appear") {
|
|
484
|
+
if (matched)
|
|
485
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: true, element: matched }, null, 2) }] };
|
|
486
|
+
}
|
|
487
|
+
else if (until === "disappear") {
|
|
488
|
+
if (!matched)
|
|
489
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: true, reason: "disappeared" }, null, 2) }] };
|
|
490
|
+
}
|
|
491
|
+
else {
|
|
492
|
+
// value_change: capture the initial value of the first match, then wait for it to differ
|
|
493
|
+
if (matched) {
|
|
494
|
+
if (initialValue === undefined) {
|
|
495
|
+
initialValue = matched.value;
|
|
496
|
+
}
|
|
497
|
+
else if (matched.value !== initialValue) {
|
|
498
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: true, oldValue: initialValue, newValue: matched.value }, null, 2) }] };
|
|
499
|
+
}
|
|
500
|
+
}
|
|
255
501
|
}
|
|
256
|
-
catch { /* retry */ }
|
|
257
502
|
await new Promise(r => setTimeout(r, interval));
|
|
258
503
|
}
|
|
259
|
-
return { content: [{ type: "text", text: JSON.stringify({ found: false, reason: "timeout" }) }] };
|
|
504
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: false, reason: "timeout" }, null, 2) }] };
|
|
260
505
|
});
|
|
261
506
|
registry.register("wait_for_element");
|
|
262
|
-
|
|
507
|
+
registerTool("get_cursor_position", "Get current cursor position", {}, async () => {
|
|
263
508
|
const pos = await withSafety({ action: "get_cursor_position", params: {}, execute: () => Promise.resolve(getPlatform().getCursorPosition()) });
|
|
264
509
|
return { content: [{ type: "text", text: JSON.stringify(pos, null, 2) }] };
|
|
265
510
|
});
|
|
266
511
|
registry.register("get_cursor_position");
|
|
267
|
-
|
|
512
|
+
registerTool("get_screen_size", "Get screen dimensions and scale factor", {
|
|
268
513
|
display: z.number().optional().describe("Display index"),
|
|
269
514
|
}, async (params) => {
|
|
270
515
|
return { content: [{ type: "text", text: JSON.stringify(getPlatform().getScreenSize(params.display), null, 2) }] };
|
|
271
516
|
});
|
|
272
517
|
registry.register("get_screen_size");
|
|
273
|
-
|
|
518
|
+
registerTool("ocr", "Perform OCR on screen region", {
|
|
274
519
|
display: z.number().optional().describe("Display index"),
|
|
275
520
|
region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to OCR"),
|
|
276
521
|
}, async (params) => {
|
|
@@ -278,46 +523,68 @@ export function registerTools(server) {
|
|
|
278
523
|
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
279
524
|
});
|
|
280
525
|
registry.register("ocr");
|
|
281
|
-
|
|
526
|
+
registerTool("move", "Move cursor to coordinates", {
|
|
282
527
|
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
283
528
|
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
529
|
+
...captureAfterFields,
|
|
284
530
|
}, async (params) => {
|
|
285
531
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
286
532
|
await withSafety({ action: "move", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().move(pt.x, pt.y) });
|
|
287
|
-
return {
|
|
533
|
+
return actionResponse("move", { moved: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
288
534
|
});
|
|
289
535
|
registry.register("move");
|
|
290
|
-
|
|
536
|
+
registerTool("find_element", "Find accessibility elements by text, role, or app", {
|
|
291
537
|
text: z.string().optional().describe("Text to search"), role: z.string().optional().describe("AX role"), app: z.string().optional().describe("Target app"),
|
|
292
538
|
depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().default(true).describe("Include bounds"), maxResults: z.number().min(1).max(200).default(50).describe("Max results"),
|
|
539
|
+
textMode: z.enum(["contains", "exact", "regex"]).default("contains").describe("Text matching mode: contains (default), exact, or regex"),
|
|
540
|
+
visibleOnly: z.boolean().default(false).describe("Only return elements with valid on-screen bounds"),
|
|
541
|
+
value: z.string().optional().describe("Filter by AX element value (respects textMode)"),
|
|
542
|
+
index: z.number().int().nonnegative().optional().describe("Return only the Nth match (0-based) after all other filtering and sorting"),
|
|
543
|
+
near: z.object({ x: z.number(), y: z.number() }).optional().describe("Sort results by ascending distance to this point and return closest first"),
|
|
293
544
|
}, async (params) => {
|
|
294
|
-
const
|
|
295
|
-
|
|
296
|
-
|
|
545
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
546
|
+
const response = await withSafety({ action: "find_element", params: {}, requiresAccessibility: true,
|
|
547
|
+
execute: () => getPlatform().findElement({ text: params.text, role: params.role, app: effectiveApp, depth: params.depth, includeBounds: params.includeBounds, maxResults: params.maxResults, textMode: params.textMode, visibleOnly: params.visibleOnly, value: params.value, index: params.index, near: params.near }) });
|
|
548
|
+
return { content: [{ type: "text", text: JSON.stringify({ results: response.results, metrics: response.metrics }, null, 2) }] };
|
|
297
549
|
});
|
|
298
550
|
registry.register("find_element");
|
|
299
|
-
|
|
551
|
+
registerTool("click_element", "Click an accessibility element by its ID", {
|
|
300
552
|
elementId: z.string().describe("AX element identifier"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
|
|
301
553
|
}, async (params) => {
|
|
302
|
-
|
|
303
|
-
|
|
554
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
555
|
+
await withSafety({ action: "click_element", params: {}, requiresAccessibility: true, execute: () => getPlatform().clickElement(params.elementId, effectiveApp) });
|
|
556
|
+
return actionResponse("click_element", { clicked: true, elementId: params.elementId }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
304
557
|
});
|
|
305
558
|
registry.register("click_element");
|
|
306
|
-
|
|
559
|
+
registerTool("set_value", "Set the value of an accessibility element", {
|
|
307
560
|
elementId: z.string().describe("AX element identifier"), value: z.string().describe("Value to set"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
|
|
308
561
|
}, async (params) => {
|
|
309
|
-
|
|
310
|
-
|
|
562
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
563
|
+
await withSafety({ action: "set_value", params: { value: params.value }, requiresAccessibility: true, execute: () => getPlatform().setElementValue(params.elementId, params.value, effectiveApp) });
|
|
564
|
+
return actionResponse("set_value", { setValue: true, elementId: params.elementId }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
311
565
|
});
|
|
312
566
|
registry.register("set_value");
|
|
313
|
-
|
|
567
|
+
registerTool("type_in_element", "Type text into an accessibility element, optionally clearing first", {
|
|
314
568
|
elementId: z.string().describe("AX element identifier"), text: z.string().describe("Text to type"),
|
|
315
569
|
app: z.string().optional().describe("Target app"), clearFirst: z.boolean().optional().describe("Clear existing text before typing"), ...captureAfterFields,
|
|
316
570
|
}, async (params) => {
|
|
317
|
-
|
|
318
|
-
|
|
571
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
572
|
+
await withSafety({ action: "type_in_element", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().typeInElement(params.elementId, params.text, effectiveApp, params.clearFirst) });
|
|
573
|
+
return actionResponse("type_in_element", { typed: true, elementId: params.elementId, charCount: params.text.length }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
319
574
|
});
|
|
320
575
|
registry.register("type_in_element");
|
|
576
|
+
registerTool("clipboard_read", "Read the current contents of the system clipboard", {}, async () => {
|
|
577
|
+
const text = await withSafety({ action: "clipboard_read", params: {}, execute: () => getPlatform().readClipboard() });
|
|
578
|
+
return { content: [{ type: "text", text: JSON.stringify({ text }, null, 2) }] };
|
|
579
|
+
});
|
|
580
|
+
registry.register("clipboard_read");
|
|
581
|
+
registerTool("clipboard_write", "Write text to the system clipboard (text injection patterns are blocked)", {
|
|
582
|
+
text: z.string().describe("Text to place on the clipboard"),
|
|
583
|
+
}, async (params) => {
|
|
584
|
+
await withSafety({ action: "clipboard_write", params: { text: params.text }, execute: () => getPlatform().writeClipboard(params.text) });
|
|
585
|
+
return { content: [{ type: "text", text: JSON.stringify({ written: true }, null, 2) }] };
|
|
586
|
+
});
|
|
587
|
+
registry.register("clipboard_write");
|
|
321
588
|
log.info("Registered tools", { count: registry.tools.length, tools: registry.tools.join(", ") });
|
|
322
589
|
}
|
|
323
590
|
export class ToolRegistry {
|