ucu-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +55 -0
- package/README.md +393 -0
- package/dist/bin/ucu-mcp.d.ts +2 -0
- package/dist/bin/ucu-mcp.js +47 -0
- package/dist/src/index.d.ts +7 -0
- package/dist/src/index.js +6 -0
- package/dist/src/mcp/server.d.ts +1 -0
- package/dist/src/mcp/server.js +26 -0
- package/dist/src/mcp/tools.d.ts +17 -0
- package/dist/src/mcp/tools.js +340 -0
- package/dist/src/mcp/transport.d.ts +2 -0
- package/dist/src/mcp/transport.js +4 -0
- package/dist/src/platform/base.d.ts +127 -0
- package/dist/src/platform/base.js +1 -0
- package/dist/src/platform/linux.d.ts +22 -0
- package/dist/src/platform/linux.js +62 -0
- package/dist/src/platform/macos.d.ts +39 -0
- package/dist/src/platform/macos.js +1478 -0
- package/dist/src/platform/windows.d.ts +18 -0
- package/dist/src/platform/windows.js +48 -0
- package/dist/src/safety/guard.d.ts +50 -0
- package/dist/src/safety/guard.js +220 -0
- package/dist/src/safety/permissions.d.ts +17 -0
- package/dist/src/safety/permissions.js +184 -0
- package/dist/src/util/errors.d.ts +64 -0
- package/dist/src/util/errors.js +109 -0
- package/dist/src/util/logger.d.ts +41 -0
- package/dist/src/util/logger.js +92 -0
- package/dist/src/util/retry.d.ts +30 -0
- package/dist/src/util/retry.js +53 -0
- package/dist/src/utils/input.d.ts +23 -0
- package/dist/src/utils/input.js +425 -0
- package/dist/src/utils/screenshot.d.ts +20 -0
- package/dist/src/utils/screenshot.js +157 -0
- package/package.json +50 -0
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool registry for UCU-MCP.
|
|
3
|
+
*
|
|
4
|
+
* Registers 22 MCP tools on the server and dispatches each call through
|
|
5
|
+
* a shared safety/permission/retry pipeline (`withSafety`).
|
|
6
|
+
*/
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import { MacOSPlatform } from "../platform/macos.js";
|
|
9
|
+
import { SafetyGuard } from "../safety/guard.js";
|
|
10
|
+
import { checkPermission } from "../safety/permissions.js";
|
|
11
|
+
import { retry } from "../util/retry.js";
|
|
12
|
+
import { createLogger } from "../util/logger.js";
|
|
13
|
+
import { SafetyError, PermissionError, UnsupportedParameterError } from "../util/errors.js";
|
|
14
|
+
const log = createLogger("tools");
|
|
15
|
+
let _platform;
|
|
16
|
+
function getPlatform() {
|
|
17
|
+
if (!_platform) {
|
|
18
|
+
_platform = process.platform === "darwin" ? new MacOSPlatform() : undefined;
|
|
19
|
+
}
|
|
20
|
+
return _platform;
|
|
21
|
+
}
|
|
22
|
+
const safety = new SafetyGuard();
|
|
23
|
+
// User activity monitor — pauses automation when user moves the cursor
|
|
24
|
+
let lastCursorPos = { x: 0, y: 0 };
|
|
25
|
+
let userActivityInterval;
|
|
26
|
+
const captureAfterFields = {
|
|
27
|
+
captureAfter: z.boolean().default(false).describe("Take a screenshot after the action completes and include it in the response"),
|
|
28
|
+
captureMaxWidth: z.number().default(1280).describe("Maximum width for the post-action screenshot"),
|
|
29
|
+
captureFormat: z.enum(["png", "jpeg"]).default("jpeg").describe("Format for the post-action screenshot"),
|
|
30
|
+
};
|
|
31
|
+
async function resolvePoint(x, y, windowId) {
|
|
32
|
+
if (!windowId)
|
|
33
|
+
return { x, y };
|
|
34
|
+
try {
|
|
35
|
+
const win = (await getPlatform().listWindows()).find(w => w.id === windowId);
|
|
36
|
+
if (!win)
|
|
37
|
+
return { x, y };
|
|
38
|
+
return { x: win.bounds.x + x, y: win.bounds.y + y };
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
return { x, y };
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
async function withSafety(sa) {
|
|
45
|
+
const platform = getPlatform();
|
|
46
|
+
if (platform.isScreenLocked?.())
|
|
47
|
+
throw new SafetyError("Screen is locked");
|
|
48
|
+
const check = safety.checkAction(sa.action, sa.params);
|
|
49
|
+
if (!check.allowed)
|
|
50
|
+
throw new SafetyError(check.reason ?? "Action blocked by safety guard");
|
|
51
|
+
if (sa.requiresAccessibility) {
|
|
52
|
+
const { granted } = await checkPermission("accessibility");
|
|
53
|
+
if (!granted)
|
|
54
|
+
throw new PermissionError("accessibility", process.platform);
|
|
55
|
+
}
|
|
56
|
+
if (sa.requiresScreenRecording) {
|
|
57
|
+
const { granted } = await checkPermission("screenRecording");
|
|
58
|
+
if (!granted)
|
|
59
|
+
throw new PermissionError("screenRecording", process.platform);
|
|
60
|
+
}
|
|
61
|
+
if (sa.dryRun)
|
|
62
|
+
return `[DRY-RUN] ${await sa.dryRun()}`;
|
|
63
|
+
const shouldManageFocus = sa.requiresAccessibility && !["screenshot", "list_windows", "list_apps", "get_window_state", "get_cursor_position", "get_screen_size", "ocr", "doctor", "wait", "wait_for_element", "find_element", "focus_app"].includes(sa.action);
|
|
64
|
+
if (shouldManageFocus)
|
|
65
|
+
await platform.saveFocus?.();
|
|
66
|
+
try {
|
|
67
|
+
return await retry(() => sa.execute());
|
|
68
|
+
}
|
|
69
|
+
finally {
|
|
70
|
+
if (shouldManageFocus)
|
|
71
|
+
await platform.restoreFocus?.();
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
async function appendCaptureAfter(result, captureAfter) {
|
|
75
|
+
if (!captureAfter)
|
|
76
|
+
return result;
|
|
77
|
+
try {
|
|
78
|
+
const buf = await getPlatform().screenshot();
|
|
79
|
+
return { actionResult: result, screenshot: { type: "image", data: buf.toString("base64"), mimeType: "image/png" } };
|
|
80
|
+
}
|
|
81
|
+
catch {
|
|
82
|
+
return result;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
export function startUserActivityMonitor() {
|
|
86
|
+
if (userActivityInterval)
|
|
87
|
+
return;
|
|
88
|
+
userActivityInterval = setInterval(() => {
|
|
89
|
+
try {
|
|
90
|
+
const pos = getPlatform().getCursorPosition();
|
|
91
|
+
if (pos.x !== lastCursorPos.x || pos.y !== lastCursorPos.y) {
|
|
92
|
+
safety.recordUserActivity();
|
|
93
|
+
lastCursorPos = pos;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
catch { /* can't check cursor */ }
|
|
97
|
+
}, 250);
|
|
98
|
+
}
|
|
99
|
+
function stopUserActivityMonitor() {
|
|
100
|
+
if (userActivityInterval) {
|
|
101
|
+
clearInterval(userActivityInterval);
|
|
102
|
+
userActivityInterval = undefined;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
export function registerTools(server) {
|
|
106
|
+
const registry = ToolRegistry.instance;
|
|
107
|
+
server.tool("screenshot", "Capture a screenshot of the entire screen or a region", {
|
|
108
|
+
display: z.number().optional().describe("Display index (default 0)"),
|
|
109
|
+
region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to capture"),
|
|
110
|
+
format: z.enum(["png", "jpeg"]).default("png").describe("Image format"),
|
|
111
|
+
maxWidth: z.number().default(1280).describe("Maximum output width in pixels. Aspect ratio is preserved."),
|
|
112
|
+
}, async (params) => {
|
|
113
|
+
const buf = await withSafety({ action: "screenshot", params: {}, requiresScreenRecording: true, execute: () => getPlatform().screenshot(params.display, params.region, { format: params.format }) });
|
|
114
|
+
return { content: [{ type: "image", data: buf.toString("base64"), mimeType: `image/${params.format}` }] };
|
|
115
|
+
});
|
|
116
|
+
registry.register("screenshot");
|
|
117
|
+
server.tool("list_windows", "List all visible windows on screen", {
|
|
118
|
+
includeMinimized: z.boolean().optional().describe("Include minimized windows"),
|
|
119
|
+
}, async (params) => {
|
|
120
|
+
const windows = await withSafety({ action: "list_windows", params: {}, requiresAccessibility: true, execute: () => getPlatform().listWindows(params.includeMinimized) });
|
|
121
|
+
return { content: [{ type: "text", text: JSON.stringify(windows, null, 2) }] };
|
|
122
|
+
});
|
|
123
|
+
registry.register("list_windows");
|
|
124
|
+
server.tool("list_apps", "List all running applications", {}, async () => {
|
|
125
|
+
const apps = await withSafety({ action: "list_apps", params: {}, requiresAccessibility: true, execute: async () => getPlatform().listApps() });
|
|
126
|
+
return { content: [{ type: "text", text: JSON.stringify(apps, null, 2) }] };
|
|
127
|
+
});
|
|
128
|
+
registry.register("list_apps");
|
|
129
|
+
server.tool("focus_app", "Bring an application to the foreground", {
|
|
130
|
+
app: z.string().describe("Application name to focus"),
|
|
131
|
+
}, async (params) => {
|
|
132
|
+
const target = await withSafety({ action: "focus_app", params: {}, requiresAccessibility: true, execute: () => getPlatform().focusApp(params.app) });
|
|
133
|
+
return { content: [{ type: "text", text: JSON.stringify(target, null, 2) }] };
|
|
134
|
+
});
|
|
135
|
+
registry.register("focus_app");
|
|
136
|
+
server.tool("get_window_state", "Get detailed state of a window including accessibility tree", {
|
|
137
|
+
windowId: z.string().optional().describe("Window ID"), depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().optional().describe("Include element bounds"),
|
|
138
|
+
}, async (params) => {
|
|
139
|
+
const state = await withSafety({ action: "get_window_state", params: {}, requiresAccessibility: true, execute: () => getPlatform().getWindowState(params.windowId, params.depth, params.includeBounds) });
|
|
140
|
+
return { content: [{ type: "text", text: JSON.stringify(state, null, 2) }] };
|
|
141
|
+
});
|
|
142
|
+
registry.register("get_window_state");
|
|
143
|
+
server.tool("click", "Click at screen coordinates", {
|
|
144
|
+
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
145
|
+
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
146
|
+
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
147
|
+
...captureAfterFields,
|
|
148
|
+
}, async (params) => {
|
|
149
|
+
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
150
|
+
await withSafety({ action: "click", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button) });
|
|
151
|
+
return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ clicked: true, x: pt.x, y: pt.y }, params.captureAfter), null, 2) }] };
|
|
152
|
+
});
|
|
153
|
+
registry.register("click");
|
|
154
|
+
server.tool("double_click", "Double-click at screen coordinates", {
|
|
155
|
+
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
156
|
+
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
157
|
+
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
158
|
+
...captureAfterFields,
|
|
159
|
+
}, async (params) => {
|
|
160
|
+
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
161
|
+
await withSafety({ action: "click", params: { x: pt.x, y: pt.y, doubleClick: true }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button, true) });
|
|
162
|
+
return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ doubleClicked: true, x: pt.x, y: pt.y }, params.captureAfter), null, 2) }] };
|
|
163
|
+
});
|
|
164
|
+
registry.register("double_click");
|
|
165
|
+
server.tool("type_text", "Type text at the current cursor position", {
|
|
166
|
+
text: z.string().describe("Text to type"), delay: z.number().optional().describe("Delay between keystrokes in ms"),
|
|
167
|
+
windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted keyboard typing is not implemented"),
|
|
168
|
+
...captureAfterFields,
|
|
169
|
+
}, async (params) => {
|
|
170
|
+
if (params.windowId)
|
|
171
|
+
throw new UnsupportedParameterError("windowId-targeted keyboard typing is not implemented");
|
|
172
|
+
await withSafety({ action: "type_text", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().type(params.text, params.delay) });
|
|
173
|
+
return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ typed: true, charCount: params.text.length }, params.captureAfter), null, 2) }] };
|
|
174
|
+
});
|
|
175
|
+
registry.register("type_text");
|
|
176
|
+
server.tool("press_key", "Press a keyboard shortcut", {
|
|
177
|
+
keys: z.array(z.string()).optional().describe("Keys to press simultaneously"),
|
|
178
|
+
key: z.string().optional().describe("Single key to press (alias for keys)"),
|
|
179
|
+
windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted key events are not implemented"),
|
|
180
|
+
...captureAfterFields,
|
|
181
|
+
}, async (params) => {
|
|
182
|
+
if (params.windowId)
|
|
183
|
+
throw new UnsupportedParameterError("windowId-targeted key events are not implemented");
|
|
184
|
+
const keys = params.keys ?? (params.key ? [params.key] : []);
|
|
185
|
+
if (keys.length === 0)
|
|
186
|
+
throw new Error("press_key requires at least one key");
|
|
187
|
+
await withSafety({ action: "press_key", params: { keys }, requiresAccessibility: true, execute: () => getPlatform().key(keys) });
|
|
188
|
+
return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ pressed: true, keys: params.keys }, params.captureAfter), null, 2) }] };
|
|
189
|
+
});
|
|
190
|
+
registry.register("press_key");
|
|
191
|
+
server.tool("scroll", "Scroll at coordinates", {
|
|
192
|
+
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
193
|
+
deltaX: z.number().describe("Horizontal scroll"), deltaY: z.number().describe("Vertical scroll (negative = up)"),
|
|
194
|
+
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
195
|
+
...captureAfterFields,
|
|
196
|
+
}, async (params) => {
|
|
197
|
+
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
198
|
+
await withSafety({ action: "scroll", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().scroll(pt.x, pt.y, params.deltaX, params.deltaY) });
|
|
199
|
+
return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ scrolled: true, x: pt.x, y: pt.y }, params.captureAfter), null, 2) }] };
|
|
200
|
+
});
|
|
201
|
+
registry.register("scroll");
|
|
202
|
+
server.tool("drag", "Drag from one point to another", {
|
|
203
|
+
startX: z.number().describe("Start X"), startY: z.number().describe("Start Y"),
|
|
204
|
+
endX: z.number().describe("End X"), endY: z.number().describe("End Y"),
|
|
205
|
+
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
206
|
+
duration: z.number().optional().describe("Drag duration in ms"),
|
|
207
|
+
...captureAfterFields,
|
|
208
|
+
}, async (params) => {
|
|
209
|
+
await withSafety({ action: "drag", params: {}, requiresAccessibility: true, execute: () => getPlatform().drag(params.startX, params.startY, params.endX, params.endY, params.button, params.duration) });
|
|
210
|
+
return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ dragged: true }, params.captureAfter), null, 2) }] };
|
|
211
|
+
});
|
|
212
|
+
registry.register("drag");
|
|
213
|
+
server.tool("doctor", "Check system permissions and diagnose common issues", {}, async () => {
|
|
214
|
+
const { checkPermissions } = await import("../safety/permissions.js");
|
|
215
|
+
const { MacOSPlatform: MacPlat } = await import("../platform/macos.js");
|
|
216
|
+
const permissions = await checkPermissions();
|
|
217
|
+
const screenLocked = process.platform === "darwin" ? new MacPlat().isScreenLocked?.() ?? false : false;
|
|
218
|
+
const report = {
|
|
219
|
+
ok: permissions.granted && !screenLocked,
|
|
220
|
+
platform: process.platform,
|
|
221
|
+
node: process.version,
|
|
222
|
+
permissions,
|
|
223
|
+
screenLocked,
|
|
224
|
+
safety: {
|
|
225
|
+
urlBlocklist: true,
|
|
226
|
+
lockScreenGuard: process.platform === "darwin",
|
|
227
|
+
typedTextInjectionScan: true,
|
|
228
|
+
},
|
|
229
|
+
stdioCommand: "ucu-mcp",
|
|
230
|
+
clients: {
|
|
231
|
+
claudeCodeCli: "Run ucu-mcp as an MCP stdio server.",
|
|
232
|
+
claudeCodeDesktop: "Configure ucu-mcp as a local MCP stdio server.",
|
|
233
|
+
openCode: "Configure ucu-mcp as a local MCP stdio server.",
|
|
234
|
+
},
|
|
235
|
+
};
|
|
236
|
+
return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
|
|
237
|
+
});
|
|
238
|
+
registry.register("doctor");
|
|
239
|
+
server.tool("wait", "Wait for a specified duration", { ms: z.number().describe("Duration in milliseconds") }, async (params) => {
|
|
240
|
+
await new Promise(r => setTimeout(r, params.ms));
|
|
241
|
+
return { content: [{ type: "text", text: JSON.stringify({ waited: params.ms }) }] };
|
|
242
|
+
});
|
|
243
|
+
registry.register("wait");
|
|
244
|
+
server.tool("wait_for_element", "Poll until an accessibility element matching the criteria appears", {
|
|
245
|
+
text: z.string().optional().describe("Element text"), role: z.string().optional().describe("Element role"),
|
|
246
|
+
app: z.string().optional().describe("Target app"), timeout: z.number().optional().describe("Timeout ms (default 5000)"), interval: z.number().optional().describe("Poll interval ms (default 500)"),
|
|
247
|
+
}, async (params) => {
|
|
248
|
+
const deadline = Date.now() + (params.timeout ?? 5000);
|
|
249
|
+
const interval = params.interval ?? 500;
|
|
250
|
+
while (Date.now() < deadline) {
|
|
251
|
+
try {
|
|
252
|
+
const results = await getPlatform().findElement({ text: params.text, role: params.role, app: params.app, maxResults: 1 });
|
|
253
|
+
if (results.length > 0)
|
|
254
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: true, element: results[0] }, null, 2) }] };
|
|
255
|
+
}
|
|
256
|
+
catch { /* retry */ }
|
|
257
|
+
await new Promise(r => setTimeout(r, interval));
|
|
258
|
+
}
|
|
259
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: false, reason: "timeout" }) }] };
|
|
260
|
+
});
|
|
261
|
+
registry.register("wait_for_element");
|
|
262
|
+
server.tool("get_cursor_position", "Get current cursor position", {}, async () => {
|
|
263
|
+
const pos = await withSafety({ action: "get_cursor_position", params: {}, execute: () => Promise.resolve(getPlatform().getCursorPosition()) });
|
|
264
|
+
return { content: [{ type: "text", text: JSON.stringify(pos, null, 2) }] };
|
|
265
|
+
});
|
|
266
|
+
registry.register("get_cursor_position");
|
|
267
|
+
server.tool("get_screen_size", "Get screen dimensions and scale factor", {
|
|
268
|
+
display: z.number().optional().describe("Display index"),
|
|
269
|
+
}, async (params) => {
|
|
270
|
+
return { content: [{ type: "text", text: JSON.stringify(getPlatform().getScreenSize(params.display), null, 2) }] };
|
|
271
|
+
});
|
|
272
|
+
registry.register("get_screen_size");
|
|
273
|
+
server.tool("ocr", "Perform OCR on screen region", {
|
|
274
|
+
display: z.number().optional().describe("Display index"),
|
|
275
|
+
region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to OCR"),
|
|
276
|
+
}, async (params) => {
|
|
277
|
+
const result = await withSafety({ action: "ocr", params: {}, requiresScreenRecording: true, execute: () => getPlatform().ocr(params.display, params.region) });
|
|
278
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
279
|
+
});
|
|
280
|
+
registry.register("ocr");
|
|
281
|
+
server.tool("move", "Move cursor to coordinates", {
|
|
282
|
+
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
283
|
+
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
284
|
+
}, async (params) => {
|
|
285
|
+
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
286
|
+
await withSafety({ action: "move", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().move(pt.x, pt.y) });
|
|
287
|
+
return { content: [{ type: "text", text: JSON.stringify({ moved: true, x: pt.x, y: pt.y }, null, 2) }] };
|
|
288
|
+
});
|
|
289
|
+
registry.register("move");
|
|
290
|
+
server.tool("find_element", "Find accessibility elements by text, role, or app", {
|
|
291
|
+
text: z.string().optional().describe("Text to search"), role: z.string().optional().describe("AX role"), app: z.string().optional().describe("Target app"),
|
|
292
|
+
depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().default(true).describe("Include bounds"), maxResults: z.number().min(1).max(200).default(50).describe("Max results"),
|
|
293
|
+
}, async (params) => {
|
|
294
|
+
const results = await withSafety({ action: "find_element", params: {}, requiresAccessibility: true,
|
|
295
|
+
execute: () => getPlatform().findElement({ text: params.text, role: params.role, app: params.app, depth: params.depth, includeBounds: params.includeBounds, maxResults: params.maxResults }) });
|
|
296
|
+
return { content: [{ type: "text", text: JSON.stringify(results, null, 2) }] };
|
|
297
|
+
});
|
|
298
|
+
registry.register("find_element");
|
|
299
|
+
server.tool("click_element", "Click an accessibility element by its ID", {
|
|
300
|
+
elementId: z.string().describe("AX element identifier"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
|
|
301
|
+
}, async (params) => {
|
|
302
|
+
await withSafety({ action: "click_element", params: {}, requiresAccessibility: true, execute: () => getPlatform().clickElement(params.elementId, params.app) });
|
|
303
|
+
return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ clicked: true, elementId: params.elementId }, params.captureAfter), null, 2) }] };
|
|
304
|
+
});
|
|
305
|
+
registry.register("click_element");
|
|
306
|
+
server.tool("set_value", "Set the value of an accessibility element", {
|
|
307
|
+
elementId: z.string().describe("AX element identifier"), value: z.string().describe("Value to set"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
|
|
308
|
+
}, async (params) => {
|
|
309
|
+
await withSafety({ action: "set_value", params: { value: params.value }, requiresAccessibility: true, execute: () => getPlatform().setElementValue(params.elementId, params.value, params.app) });
|
|
310
|
+
return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ setValue: true, elementId: params.elementId }, params.captureAfter), null, 2) }] };
|
|
311
|
+
});
|
|
312
|
+
registry.register("set_value");
|
|
313
|
+
server.tool("type_in_element", "Type text into an accessibility element, optionally clearing first", {
|
|
314
|
+
elementId: z.string().describe("AX element identifier"), text: z.string().describe("Text to type"),
|
|
315
|
+
app: z.string().optional().describe("Target app"), clearFirst: z.boolean().optional().describe("Clear existing text before typing"), ...captureAfterFields,
|
|
316
|
+
}, async (params) => {
|
|
317
|
+
await withSafety({ action: "type_in_element", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().typeInElement(params.elementId, params.text, params.app, params.clearFirst) });
|
|
318
|
+
return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ typed: true, elementId: params.elementId, charCount: params.text.length }, params.captureAfter), null, 2) }] };
|
|
319
|
+
});
|
|
320
|
+
registry.register("type_in_element");
|
|
321
|
+
log.info("Registered tools", { count: registry.tools.length, tools: registry.tools.join(", ") });
|
|
322
|
+
}
|
|
323
|
+
export class ToolRegistry {
|
|
324
|
+
static _instance;
|
|
325
|
+
tools = [];
|
|
326
|
+
_handlers = new Map();
|
|
327
|
+
static get instance() { if (!ToolRegistry._instance)
|
|
328
|
+
ToolRegistry._instance = new ToolRegistry(); return ToolRegistry._instance; }
|
|
329
|
+
register(name, handler) {
|
|
330
|
+
this.tools.push(name);
|
|
331
|
+
if (handler)
|
|
332
|
+
this._handlers.set(name, handler);
|
|
333
|
+
}
|
|
334
|
+
async dispatch(name, args) {
|
|
335
|
+
const handler = this._handlers.get(name);
|
|
336
|
+
if (!handler)
|
|
337
|
+
return { isError: true, content: [{ type: "text", text: `Unknown tool: ${name}` }] };
|
|
338
|
+
return handler(args);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
export interface ScreenRegion {
|
|
2
|
+
x: number;
|
|
3
|
+
y: number;
|
|
4
|
+
width: number;
|
|
5
|
+
height: number;
|
|
6
|
+
}
|
|
7
|
+
export interface ScreenSize {
|
|
8
|
+
width: number;
|
|
9
|
+
height: number;
|
|
10
|
+
scaleFactor?: number;
|
|
11
|
+
}
|
|
12
|
+
export interface ScreenshotOptions {
|
|
13
|
+
format?: "png" | "jpeg";
|
|
14
|
+
maxWidth?: number;
|
|
15
|
+
}
|
|
16
|
+
export interface CursorPosition {
|
|
17
|
+
x: number;
|
|
18
|
+
y: number;
|
|
19
|
+
}
|
|
20
|
+
export interface WindowInfo {
|
|
21
|
+
id: string;
|
|
22
|
+
title: string;
|
|
23
|
+
processName: string;
|
|
24
|
+
pid: number;
|
|
25
|
+
bounds: {
|
|
26
|
+
x: number;
|
|
27
|
+
y: number;
|
|
28
|
+
width: number;
|
|
29
|
+
height: number;
|
|
30
|
+
};
|
|
31
|
+
isMinimized: boolean;
|
|
32
|
+
isOnScreen: boolean;
|
|
33
|
+
}
|
|
34
|
+
export interface AppInfo {
|
|
35
|
+
name: string;
|
|
36
|
+
pid: number;
|
|
37
|
+
isFrontmost: boolean;
|
|
38
|
+
windowCount: number;
|
|
39
|
+
}
|
|
40
|
+
export interface AppTarget {
|
|
41
|
+
appName: string;
|
|
42
|
+
pid: number;
|
|
43
|
+
windowId?: string;
|
|
44
|
+
title?: string;
|
|
45
|
+
}
|
|
46
|
+
export interface BrowserContext {
|
|
47
|
+
appName: string;
|
|
48
|
+
url?: string;
|
|
49
|
+
title?: string;
|
|
50
|
+
}
|
|
51
|
+
export interface ElementInfo {
|
|
52
|
+
role: string;
|
|
53
|
+
name: string;
|
|
54
|
+
description?: string;
|
|
55
|
+
value?: string;
|
|
56
|
+
bounds?: {
|
|
57
|
+
x: number;
|
|
58
|
+
y: number;
|
|
59
|
+
width: number;
|
|
60
|
+
height: number;
|
|
61
|
+
};
|
|
62
|
+
children?: ElementInfo[];
|
|
63
|
+
states: string[];
|
|
64
|
+
}
|
|
65
|
+
export interface OcrElement {
|
|
66
|
+
text: string;
|
|
67
|
+
x: number;
|
|
68
|
+
y: number;
|
|
69
|
+
width: number;
|
|
70
|
+
height: number;
|
|
71
|
+
confidence: number;
|
|
72
|
+
}
|
|
73
|
+
export interface OcrResult {
|
|
74
|
+
elements: OcrElement[];
|
|
75
|
+
fullText: string;
|
|
76
|
+
}
|
|
77
|
+
export interface FindElementOptions {
|
|
78
|
+
text?: string;
|
|
79
|
+
role?: string;
|
|
80
|
+
app?: string;
|
|
81
|
+
depth?: number;
|
|
82
|
+
includeBounds?: boolean;
|
|
83
|
+
maxResults?: number;
|
|
84
|
+
}
|
|
85
|
+
export interface FindElementResult {
|
|
86
|
+
id: string;
|
|
87
|
+
role: string;
|
|
88
|
+
name: string;
|
|
89
|
+
value?: string;
|
|
90
|
+
bounds?: {
|
|
91
|
+
x: number;
|
|
92
|
+
y: number;
|
|
93
|
+
width: number;
|
|
94
|
+
height: number;
|
|
95
|
+
};
|
|
96
|
+
description?: string;
|
|
97
|
+
}
|
|
98
|
+
export interface WindowState {
|
|
99
|
+
window: WindowInfo;
|
|
100
|
+
focusedElement?: ElementInfo;
|
|
101
|
+
tree?: ElementInfo;
|
|
102
|
+
}
|
|
103
|
+
export interface Platform {
|
|
104
|
+
screenshot(display?: number, region?: ScreenRegion, options?: ScreenshotOptions): Promise<Buffer>;
|
|
105
|
+
screenshotWindow?(windowId: string, options?: ScreenshotOptions): Promise<Buffer>;
|
|
106
|
+
getScreenSize(display?: number): ScreenSize;
|
|
107
|
+
listApps?(): Promise<AppInfo[]>;
|
|
108
|
+
focusApp?(app: string): Promise<AppTarget>;
|
|
109
|
+
getActiveBrowserContext?(app?: string): Promise<BrowserContext | undefined>;
|
|
110
|
+
listWindows(includeMinimized?: boolean): Promise<WindowInfo[]>;
|
|
111
|
+
getWindowState(windowId?: string, depth?: number, includeBounds?: boolean): Promise<WindowState>;
|
|
112
|
+
click(x: number, y: number, button?: "left" | "right" | "middle", doubleClick?: boolean): Promise<void>;
|
|
113
|
+
move(x: number, y: number): Promise<void>;
|
|
114
|
+
drag(startX: number, startY: number, endX: number, endY: number, button?: "left" | "right" | "middle", duration?: number): Promise<void>;
|
|
115
|
+
scroll(x: number, y: number, deltaX: number, deltaY: number): Promise<void>;
|
|
116
|
+
getCursorPosition(): CursorPosition;
|
|
117
|
+
ocr(display?: number, region?: ScreenRegion): Promise<OcrResult>;
|
|
118
|
+
type(text: string, delay?: number): Promise<void>;
|
|
119
|
+
key(keys: string[]): Promise<void>;
|
|
120
|
+
findElement(options: FindElementOptions): Promise<FindElementResult[]>;
|
|
121
|
+
clickElement(elementId: string, app?: string): Promise<void>;
|
|
122
|
+
typeInElement(elementId: string, text: string, app?: string, clearFirst?: boolean): Promise<void>;
|
|
123
|
+
setElementValue?(elementId: string, value: string, app?: string): Promise<void>;
|
|
124
|
+
isScreenLocked?(): boolean;
|
|
125
|
+
saveFocus?(): Promise<void>;
|
|
126
|
+
restoreFocus?(): Promise<void>;
|
|
127
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResult } from "./base.js";
|
|
2
|
+
/**
|
|
3
|
+
* Linux platform adapter (AT-SPI2 + xdotool fallback)
|
|
4
|
+
* TODO: Implement with D-Bus AT-SPI2 bindings
|
|
5
|
+
*/
|
|
6
|
+
export declare class LinuxPlatform implements Platform {
|
|
7
|
+
screenshot(display?: number, region?: ScreenRegion): Promise<Buffer>;
|
|
8
|
+
getScreenSize(display?: number): ScreenSize;
|
|
9
|
+
listWindows(_includeMinimized?: boolean): Promise<WindowInfo[]>;
|
|
10
|
+
getWindowState(_windowId?: string, _depth?: number, _includeBounds?: boolean): Promise<WindowState>;
|
|
11
|
+
click(x: number, y: number, button?: "left" | "right" | "middle", doubleClick?: boolean): Promise<void>;
|
|
12
|
+
move(x: number, y: number): Promise<void>;
|
|
13
|
+
drag(startX: number, startY: number, endX: number, endY: number, button?: "left" | "right" | "middle", duration?: number): Promise<void>;
|
|
14
|
+
scroll(x: number, y: number, deltaX: number, deltaY: number): Promise<void>;
|
|
15
|
+
getCursorPosition(): CursorPosition;
|
|
16
|
+
type(text: string, delay?: number): Promise<void>;
|
|
17
|
+
key(keys: string[]): Promise<void>;
|
|
18
|
+
ocr(_display?: number, _region?: ScreenRegion): Promise<OcrResult>;
|
|
19
|
+
findElement(_options: FindElementOptions): Promise<FindElementResult[]>;
|
|
20
|
+
clickElement(_elementId: string, _app?: string): Promise<void>;
|
|
21
|
+
typeInElement(_elementId: string, _text: string, _app?: string, _clearFirst?: boolean): Promise<void>;
|
|
22
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Linux platform adapter (AT-SPI2 + xdotool fallback)
|
|
3
|
+
* TODO: Implement with D-Bus AT-SPI2 bindings
|
|
4
|
+
*/
|
|
5
|
+
export class LinuxPlatform {
|
|
6
|
+
async screenshot(display, region) {
|
|
7
|
+
// TODO: Use scrot or grim (Wayland)
|
|
8
|
+
throw new Error("Linux adapter not yet implemented");
|
|
9
|
+
}
|
|
10
|
+
getScreenSize(display) {
|
|
11
|
+
// TODO: Use xrandr or xdpyinfo
|
|
12
|
+
throw new Error("Linux adapter not yet implemented");
|
|
13
|
+
}
|
|
14
|
+
async listWindows(_includeMinimized) {
|
|
15
|
+
throw new Error("Not implemented: Linux listWindows");
|
|
16
|
+
}
|
|
17
|
+
async getWindowState(_windowId, _depth, _includeBounds) {
|
|
18
|
+
// TODO: Implement using AT-SPI2 D-Bus bindings
|
|
19
|
+
throw new Error("Not implemented: Linux getWindowState");
|
|
20
|
+
}
|
|
21
|
+
async click(x, y, button = "left", doubleClick = false) {
|
|
22
|
+
// TODO: Use xdotool click
|
|
23
|
+
throw new Error("Linux adapter not yet implemented");
|
|
24
|
+
}
|
|
25
|
+
async move(x, y) {
|
|
26
|
+
// TODO: Use xdotool mousemove
|
|
27
|
+
throw new Error("Linux adapter not yet implemented");
|
|
28
|
+
}
|
|
29
|
+
async drag(startX, startY, endX, endY, button = "left", duration) {
|
|
30
|
+
// TODO: Use xdotool mousedown + mousemove + mouseup
|
|
31
|
+
throw new Error("Linux adapter not yet implemented");
|
|
32
|
+
}
|
|
33
|
+
async scroll(x, y, deltaX, deltaY) {
|
|
34
|
+
// TODO: Use xdotool mousewheel
|
|
35
|
+
throw new Error("Linux adapter not yet implemented");
|
|
36
|
+
}
|
|
37
|
+
getCursorPosition() {
|
|
38
|
+
// TODO: Use xdotool getmouselocation
|
|
39
|
+
throw new Error("Linux adapter not yet implemented");
|
|
40
|
+
}
|
|
41
|
+
async type(text, delay) {
|
|
42
|
+
// TODO: Use xdotool type
|
|
43
|
+
throw new Error("Linux adapter not yet implemented");
|
|
44
|
+
}
|
|
45
|
+
async key(keys) {
|
|
46
|
+
// TODO: Use xdotool key
|
|
47
|
+
throw new Error("Linux adapter not yet implemented");
|
|
48
|
+
}
|
|
49
|
+
async ocr(_display, _region) {
|
|
50
|
+
// TODO: Use tesseract or similar
|
|
51
|
+
throw new Error("Linux OCR not yet implemented");
|
|
52
|
+
}
|
|
53
|
+
async findElement(_options) {
|
|
54
|
+
throw new Error("Not implemented: Linux findElement");
|
|
55
|
+
}
|
|
56
|
+
async clickElement(_elementId, _app) {
|
|
57
|
+
throw new Error("Not implemented: Linux clickElement");
|
|
58
|
+
}
|
|
59
|
+
async typeInElement(_elementId, _text, _app, _clearFirst) {
|
|
60
|
+
throw new Error("Not implemented: Linux typeInElement");
|
|
61
|
+
}
|
|
62
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResult, AppInfo, AppTarget, BrowserContext, ScreenshotOptions } from "./base.js";
|
|
2
|
+
export declare class MacOSPlatform implements Platform {
|
|
3
|
+
private readonly elementCache;
|
|
4
|
+
private readonly elementCacheTtlMs;
|
|
5
|
+
private readonly elementCacheMaxSize;
|
|
6
|
+
private activeTarget;
|
|
7
|
+
private savedFocus;
|
|
8
|
+
/** Remove expired entries from the element cache. */
|
|
9
|
+
private evictExpiredCacheEntries;
|
|
10
|
+
/** Evict oldest entries when cache exceeds the maximum size (LRU-style). */
|
|
11
|
+
private evictOverflowCacheEntries;
|
|
12
|
+
/** Check whether a cached element descriptor has expired. */
|
|
13
|
+
private isCacheEntryExpired;
|
|
14
|
+
/** Save the current frontmost app/window so we can restore after an action. */
|
|
15
|
+
saveFocus(): Promise<void>;
|
|
16
|
+
/** Restore the previously saved frontmost app/window. */
|
|
17
|
+
restoreFocus(): Promise<void>;
|
|
18
|
+
screenshot(_display?: number, region?: ScreenRegion, options?: ScreenshotOptions): Promise<Buffer>;
|
|
19
|
+
screenshotWindow(windowId: string, options?: ScreenshotOptions): Promise<Buffer>;
|
|
20
|
+
getScreenSize(display?: number): ScreenSize;
|
|
21
|
+
isScreenLocked(): boolean;
|
|
22
|
+
listApps(): Promise<AppInfo[]>;
|
|
23
|
+
focusApp(app: string): Promise<AppTarget>;
|
|
24
|
+
getActiveBrowserContext(app?: string): Promise<BrowserContext | undefined>;
|
|
25
|
+
listWindows(_includeMinimized?: boolean): Promise<WindowInfo[]>;
|
|
26
|
+
getWindowState(windowId?: string, depth?: number, includeBounds?: boolean): Promise<WindowState>;
|
|
27
|
+
click(x: number, y: number, button?: "left" | "right" | "middle", doubleClick?: boolean): Promise<void>;
|
|
28
|
+
move(x: number, y: number): Promise<void>;
|
|
29
|
+
drag(startX: number, startY: number, endX: number, endY: number, button?: "left" | "right" | "middle", duration?: number): Promise<void>;
|
|
30
|
+
scroll(x: number, y: number, deltaX: number, deltaY: number): Promise<void>;
|
|
31
|
+
getCursorPosition(): CursorPosition;
|
|
32
|
+
ocr(display?: number, region?: ScreenRegion): Promise<OcrResult>;
|
|
33
|
+
type(text: string, delay?: number): Promise<void>;
|
|
34
|
+
key(keys: string[]): Promise<void>;
|
|
35
|
+
findElement(options: FindElementOptions): Promise<FindElementResult[]>;
|
|
36
|
+
clickElement(elementId: string, app?: string): Promise<void>;
|
|
37
|
+
typeInElement(elementId: string, text: string, app?: string, clearFirst?: boolean): Promise<void>;
|
|
38
|
+
setElementValue(elementId: string, value: string, app?: string): Promise<void>;
|
|
39
|
+
}
|