ucu-mcp 0.3.9 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +67 -3
- package/dist/bin/ucu-mcp.js +1 -1
- package/dist/src/index.d.ts +2 -2
- package/dist/src/index.js +2 -2
- package/dist/src/mcp/server.js +1 -1
- package/dist/src/mcp/tools/app-tools.d.ts +2 -0
- package/dist/src/mcp/tools/app-tools.js +220 -0
- package/dist/src/mcp/tools/element-tools.d.ts +23 -0
- package/dist/src/mcp/tools/element-tools.js +59 -0
- package/dist/src/mcp/tools/helpers.d.ts +82 -0
- package/dist/src/mcp/tools/helpers.js +243 -0
- package/dist/src/mcp/tools/index.d.ts +19 -0
- package/dist/src/mcp/tools/index.js +54 -0
- package/dist/src/mcp/tools/input-tools.d.ts +2 -0
- package/dist/src/mcp/tools/input-tools.js +66 -0
- package/dist/src/mcp/tools/keyboard-tools.d.ts +2 -0
- package/dist/src/mcp/tools/keyboard-tools.js +35 -0
- package/dist/src/mcp/tools/screen-tools.d.ts +2 -0
- package/dist/src/mcp/tools/screen-tools.js +69 -0
- package/dist/src/mcp/tools.d.ts +9 -0
- package/dist/src/mcp/tools.js +96 -25
- package/dist/src/platform/base.d.ts +3 -0
- package/dist/src/platform/jxa-helpers.d.ts +11 -0
- package/dist/src/platform/jxa-helpers.js +206 -0
- package/dist/src/platform/macos/ax-tree.d.ts +4 -0
- package/dist/src/platform/macos/ax-tree.js +462 -0
- package/dist/src/platform/macos/base.d.ts +57 -0
- package/dist/src/platform/macos/base.js +92 -0
- package/dist/src/platform/macos/clipboard.d.ts +3 -0
- package/dist/src/platform/macos/clipboard.js +20 -0
- package/dist/src/platform/macos/element.d.ts +4 -0
- package/dist/src/platform/macos/element.js +212 -0
- package/dist/src/platform/macos/focus.d.ts +3 -0
- package/dist/src/platform/macos/focus.js +33 -0
- package/dist/src/platform/macos/helpers.d.ts +35 -0
- package/dist/src/platform/macos/helpers.js +54 -0
- package/dist/src/platform/macos/index.d.ts +2 -0
- package/dist/src/platform/macos/index.js +1 -0
- package/dist/src/platform/macos/input.d.ts +9 -0
- package/dist/src/platform/macos/input.js +62 -0
- package/dist/src/platform/macos/screen.d.ts +7 -0
- package/dist/src/platform/macos/screen.js +197 -0
- package/dist/src/platform/macos/window.d.ts +6 -0
- package/dist/src/platform/macos/window.js +251 -0
- package/dist/src/platform/macos.d.ts +1 -0
- package/dist/src/platform/macos.js +114 -583
- package/dist/src/safety/guard.js +1 -1
- package/dist/src/util/errors.d.ts +7 -2
- package/dist/src/util/errors.js +7 -3
- package/native/cgevent/cgevent-helper +0 -0
- package/native/ocr/ocr-helper +0 -0
- package/native/windowlist/windowlist-helper +0 -0
- package/package.json +1 -1
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { MacOSPlatform } from "../../platform/macos/index.js";
|
|
3
|
+
import { SafetyGuard, classifyAction } from "../../safety/guard.js";
|
|
4
|
+
import { checkPermission } from "../../safety/permissions.js";
|
|
5
|
+
import { retry } from "../../util/retry.js";
|
|
6
|
+
import { metrics } from "../../util/metrics.js";
|
|
7
|
+
import { SafetyError, PermissionError, UcuError, WindowNotFoundError } from "../../util/errors.js";
|
|
8
|
+
let _platform;
|
|
9
|
+
export function getPlatform() {
|
|
10
|
+
if (!_platform) {
|
|
11
|
+
_platform = process.platform === "darwin" ? new MacOSPlatform() : undefined;
|
|
12
|
+
}
|
|
13
|
+
return _platform;
|
|
14
|
+
}
|
|
15
|
+
export const safety = new SafetyGuard();
|
|
16
|
+
let activeTargetContext;
|
|
17
|
+
export function getActiveTarget() {
|
|
18
|
+
return activeTargetContext;
|
|
19
|
+
}
|
|
20
|
+
export function setActiveTarget(target) {
|
|
21
|
+
activeTargetContext = target;
|
|
22
|
+
}
|
|
23
|
+
let lastCursorPos = { x: 0, y: 0 };
|
|
24
|
+
let userActivityInterval;
|
|
25
|
+
export const captureAfterFields = {
|
|
26
|
+
captureAfter: z.boolean().default(false).describe("Take a screenshot after the action completes and include it in the response"),
|
|
27
|
+
captureMaxWidth: z.number().default(1280).describe("Maximum width for the post-action screenshot"),
|
|
28
|
+
captureFormat: z.enum(["png", "jpeg"]).default("jpeg").describe("Format for the post-action screenshot"),
|
|
29
|
+
};
|
|
30
|
+
export async function resolvePoint(x, y, windowId) {
|
|
31
|
+
if (!windowId)
|
|
32
|
+
return { x, y };
|
|
33
|
+
const win = (await getPlatform().listWindows()).find(w => w.id === windowId);
|
|
34
|
+
if (!win)
|
|
35
|
+
throw new WindowNotFoundError(windowId);
|
|
36
|
+
return { x: win.bounds.x + x, y: win.bounds.y + y };
|
|
37
|
+
}
|
|
38
|
+
export async function getSafetyContext(windowId) {
|
|
39
|
+
const target = activeTargetContext;
|
|
40
|
+
const effectiveWindowId = windowId ?? target?.windowId;
|
|
41
|
+
let windowTitle;
|
|
42
|
+
if (effectiveWindowId) {
|
|
43
|
+
try {
|
|
44
|
+
const windows = await getPlatform().listWindows();
|
|
45
|
+
const win = windows.find(w => w.id === effectiveWindowId);
|
|
46
|
+
windowTitle = win?.title;
|
|
47
|
+
}
|
|
48
|
+
catch { /* best effort */ }
|
|
49
|
+
}
|
|
50
|
+
if (!windowTitle && target?.title) {
|
|
51
|
+
windowTitle = target.title;
|
|
52
|
+
}
|
|
53
|
+
let url;
|
|
54
|
+
const platform = getPlatform();
|
|
55
|
+
if (platform.getActiveBrowserContext) {
|
|
56
|
+
try {
|
|
57
|
+
const appName = target?.appName;
|
|
58
|
+
const ctx = await platform.getActiveBrowserContext(appName);
|
|
59
|
+
url = ctx?.url;
|
|
60
|
+
}
|
|
61
|
+
catch { /* best effort */ }
|
|
62
|
+
}
|
|
63
|
+
return { windowTitle, url };
|
|
64
|
+
}
|
|
65
|
+
export function jsonText(value) {
|
|
66
|
+
return { type: "text", text: JSON.stringify(value, null, 2) };
|
|
67
|
+
}
|
|
68
|
+
export function recoveryHint(code) {
|
|
69
|
+
switch (code) {
|
|
70
|
+
case "WINDOW_NOT_FOUND":
|
|
71
|
+
return "Run list_windows again, then retry with a fresh windowId or omit windowId for screen coordinates.";
|
|
72
|
+
case "TARGET_STALE":
|
|
73
|
+
return "Run focus_app again for the target app, or run list_windows and retry with a fresh windowId.";
|
|
74
|
+
case "ELEMENT_NOT_FOUND":
|
|
75
|
+
return "Run find_element again, then retry with a fresh elementId.";
|
|
76
|
+
case "PERMISSION_DENIED":
|
|
77
|
+
return "Run doctor and grant the missing macOS permission, then restart the launching client.";
|
|
78
|
+
case "UNSUPPORTED_PARAMETER":
|
|
79
|
+
return "Remove or replace the unsupported parameter; inspect tools/list for this tool schema.";
|
|
80
|
+
case "SAFETY_BLOCKED":
|
|
81
|
+
return "Choose a less risky action or ask the user to perform it manually.";
|
|
82
|
+
case "INPUT_FAILED":
|
|
83
|
+
return "Observe current state with screenshot or get_window_state before retrying manually.";
|
|
84
|
+
case "CAPTURE_FAILED":
|
|
85
|
+
return "Run doctor to check Screen Recording permission, then retry screenshot or ocr.";
|
|
86
|
+
case "COORDINATE_OUT_OF_BOUNDS":
|
|
87
|
+
return "Run get_screen_size or list_windows, then retry with coordinates inside the active display or window bounds.";
|
|
88
|
+
default:
|
|
89
|
+
return "Inspect the error message, observe the current UI state, and retry only if the operation is safe.";
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
export function errorDetails(error) {
|
|
93
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
94
|
+
const code = error instanceof UcuError ? error.code : "UNKNOWN_ERROR";
|
|
95
|
+
const retryable = error instanceof UcuError ? error.retryable : false;
|
|
96
|
+
const inlineHint = err instanceof UcuError ? err.hint : undefined;
|
|
97
|
+
const details = {
|
|
98
|
+
name: err.name,
|
|
99
|
+
code,
|
|
100
|
+
retryable,
|
|
101
|
+
message: err.message,
|
|
102
|
+
recovery: recoveryHint(code),
|
|
103
|
+
};
|
|
104
|
+
if (inlineHint) {
|
|
105
|
+
details.hint = inlineHint;
|
|
106
|
+
}
|
|
107
|
+
return details;
|
|
108
|
+
}
|
|
109
|
+
let _actionCounter = 0;
|
|
110
|
+
function nextActionId() {
|
|
111
|
+
_actionCounter = (_actionCounter + 1) % 1_000_000;
|
|
112
|
+
return `a${Date.now().toString(36)}-${_actionCounter.toString(36)}`;
|
|
113
|
+
}
|
|
114
|
+
export function buildActionReceipt(action, status, target, result, captureRequested, captureFormat, captureMaxWidth, captureError, warnings = []) {
|
|
115
|
+
const captureStatus = captureRequested
|
|
116
|
+
? captureError ? "error" : "ok"
|
|
117
|
+
: "skipped";
|
|
118
|
+
return {
|
|
119
|
+
actionId: nextActionId(),
|
|
120
|
+
action,
|
|
121
|
+
status,
|
|
122
|
+
target,
|
|
123
|
+
result,
|
|
124
|
+
capture: {
|
|
125
|
+
requested: captureRequested,
|
|
126
|
+
status: captureStatus,
|
|
127
|
+
...(captureFormat && { format: captureFormat }),
|
|
128
|
+
...(captureMaxWidth && { maxWidth: captureMaxWidth }),
|
|
129
|
+
...(captureError && { error: captureError }),
|
|
130
|
+
},
|
|
131
|
+
warnings,
|
|
132
|
+
next: captureError
|
|
133
|
+
? "screenshot"
|
|
134
|
+
: status === "partial"
|
|
135
|
+
? "get_window_state"
|
|
136
|
+
: "find_element or get_window_state",
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
export function mcpErrorResponse(error) {
|
|
140
|
+
return {
|
|
141
|
+
isError: true,
|
|
142
|
+
content: [
|
|
143
|
+
jsonText({
|
|
144
|
+
error: errorDetails(error),
|
|
145
|
+
}),
|
|
146
|
+
],
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
export async function actionResponse(action, result, target, captureAfter, captureFormat = "jpeg", captureMaxWidth = 1280, warnings = []) {
|
|
150
|
+
const receipt = buildActionReceipt(action, "ok", target, result, captureAfter ?? false, captureFormat, captureMaxWidth, undefined, warnings);
|
|
151
|
+
if (!captureAfter) {
|
|
152
|
+
return { content: [jsonText(receipt)] };
|
|
153
|
+
}
|
|
154
|
+
try {
|
|
155
|
+
const buf = await getPlatform().screenshot(undefined, undefined, {
|
|
156
|
+
format: captureFormat,
|
|
157
|
+
maxWidth: captureMaxWidth,
|
|
158
|
+
});
|
|
159
|
+
return {
|
|
160
|
+
content: [
|
|
161
|
+
jsonText(receipt),
|
|
162
|
+
{
|
|
163
|
+
type: "image",
|
|
164
|
+
data: buf.toString("base64"),
|
|
165
|
+
mimeType: `image/${captureFormat}`,
|
|
166
|
+
},
|
|
167
|
+
],
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
const partialReceipt = buildActionReceipt(action, "partial", target, result, true, captureFormat, captureMaxWidth, errorDetails(error), [...warnings, "Post-action screenshot capture failed"]);
|
|
172
|
+
return { content: [jsonText(partialReceipt)] };
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
const retryableActions = new Set([
|
|
176
|
+
"screenshot",
|
|
177
|
+
"list_windows",
|
|
178
|
+
"list_apps",
|
|
179
|
+
"get_window_state",
|
|
180
|
+
"get_cursor_position",
|
|
181
|
+
"get_screen_size",
|
|
182
|
+
"ocr",
|
|
183
|
+
"doctor",
|
|
184
|
+
"find_element",
|
|
185
|
+
]);
|
|
186
|
+
export async function withSafety(sa) {
|
|
187
|
+
const platform = getPlatform();
|
|
188
|
+
if (platform.isScreenLocked?.())
|
|
189
|
+
throw new SafetyError("Screen is locked");
|
|
190
|
+
const check = safety.checkAction(sa.action, sa.params, {
|
|
191
|
+
skipUserActivityPause: sa.skipUserActivityPause ?? classifyAction(sa.action) === "observe",
|
|
192
|
+
});
|
|
193
|
+
if (!check.allowed)
|
|
194
|
+
throw new SafetyError(check.reason ?? "Action blocked by safety guard");
|
|
195
|
+
if (sa.requiresAccessibility) {
|
|
196
|
+
const { granted } = await checkPermission("accessibility");
|
|
197
|
+
if (!granted)
|
|
198
|
+
throw new PermissionError("accessibility", process.platform);
|
|
199
|
+
}
|
|
200
|
+
if (sa.requiresScreenRecording) {
|
|
201
|
+
const { granted } = await checkPermission("screenRecording");
|
|
202
|
+
if (!granted)
|
|
203
|
+
throw new PermissionError("screenRecording", process.platform);
|
|
204
|
+
}
|
|
205
|
+
if (sa.dryRun)
|
|
206
|
+
return `[DRY-RUN] ${await sa.dryRun()}`;
|
|
207
|
+
const start = Date.now();
|
|
208
|
+
try {
|
|
209
|
+
return retryableActions.has(sa.action)
|
|
210
|
+
? await retry(() => sa.execute())
|
|
211
|
+
: await sa.execute();
|
|
212
|
+
}
|
|
213
|
+
finally {
|
|
214
|
+
metrics.record(sa.action, Date.now() - start);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
export function startUserActivityMonitor() {
|
|
218
|
+
if (userActivityInterval)
|
|
219
|
+
return;
|
|
220
|
+
try {
|
|
221
|
+
lastCursorPos = getPlatform().getCursorPosition();
|
|
222
|
+
}
|
|
223
|
+
catch {
|
|
224
|
+
// Keep the default when the cursor cannot be queried during startup.
|
|
225
|
+
}
|
|
226
|
+
userActivityInterval = setInterval(() => {
|
|
227
|
+
try {
|
|
228
|
+
const pos = getPlatform().getCursorPosition();
|
|
229
|
+
if (pos.x !== lastCursorPos.x || pos.y !== lastCursorPos.y) {
|
|
230
|
+
safety.recordUserActivity();
|
|
231
|
+
lastCursorPos = pos;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
catch { /* can't check cursor */ }
|
|
235
|
+
}, 500);
|
|
236
|
+
userActivityInterval.unref?.();
|
|
237
|
+
}
|
|
238
|
+
export function stopUserActivityMonitor() {
|
|
239
|
+
if (userActivityInterval) {
|
|
240
|
+
clearInterval(userActivityInterval);
|
|
241
|
+
userActivityInterval = undefined;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool registry for UCU-MCP.
|
|
3
|
+
*
|
|
4
|
+
* Registers 24 MCP tools on the server and dispatches each call through
|
|
5
|
+
* a shared safety/permission/retry pipeline (`withSafety`).
|
|
6
|
+
*/
|
|
7
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
8
|
+
export { getActiveTarget } from "./helpers.js";
|
|
9
|
+
export { startUserActivityMonitor, stopUserActivityMonitor } from "./helpers.js";
|
|
10
|
+
export { findElementInputSchema } from "./element-tools.js";
|
|
11
|
+
export declare class ToolRegistry {
|
|
12
|
+
private static _instance;
|
|
13
|
+
readonly tools: string[];
|
|
14
|
+
private readonly _handlers;
|
|
15
|
+
static get instance(): ToolRegistry;
|
|
16
|
+
register(name: string, handler?: (args: Record<string, unknown>) => Promise<unknown>): void;
|
|
17
|
+
dispatch(name: string, args: Record<string, unknown>): Promise<any>;
|
|
18
|
+
}
|
|
19
|
+
export declare function registerTools(server: McpServer): void;
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool registry for UCU-MCP.
|
|
3
|
+
*
|
|
4
|
+
* Registers 24 MCP tools on the server and dispatches each call through
|
|
5
|
+
* a shared safety/permission/retry pipeline (`withSafety`).
|
|
6
|
+
*/
|
|
7
|
+
import { createLogger } from "../../util/logger.js";
|
|
8
|
+
import { mcpErrorResponse } from "./helpers.js";
|
|
9
|
+
import { registerScreenTools } from "./screen-tools.js";
|
|
10
|
+
import { registerInputTools } from "./input-tools.js";
|
|
11
|
+
import { registerKeyboardTools } from "./keyboard-tools.js";
|
|
12
|
+
import { registerElementTools } from "./element-tools.js";
|
|
13
|
+
import { registerAppTools } from "./app-tools.js";
|
|
14
|
+
export { getActiveTarget } from "./helpers.js";
|
|
15
|
+
export { startUserActivityMonitor, stopUserActivityMonitor } from "./helpers.js";
|
|
16
|
+
export { findElementInputSchema } from "./element-tools.js";
|
|
17
|
+
const log = createLogger("tools");
|
|
18
|
+
export class ToolRegistry {
|
|
19
|
+
static _instance;
|
|
20
|
+
tools = [];
|
|
21
|
+
_handlers = new Map();
|
|
22
|
+
static get instance() { if (!ToolRegistry._instance)
|
|
23
|
+
ToolRegistry._instance = new ToolRegistry(); return ToolRegistry._instance; }
|
|
24
|
+
register(name, handler) {
|
|
25
|
+
this.tools.push(name);
|
|
26
|
+
if (handler)
|
|
27
|
+
this._handlers.set(name, handler);
|
|
28
|
+
}
|
|
29
|
+
async dispatch(name, args) {
|
|
30
|
+
const handler = this._handlers.get(name);
|
|
31
|
+
if (!handler)
|
|
32
|
+
return { isError: true, content: [{ type: "text", text: `Unknown tool: ${name}` }] };
|
|
33
|
+
return handler(args);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
export function registerTools(server) {
|
|
37
|
+
const registry = ToolRegistry.instance;
|
|
38
|
+
const registerTool = (name, description, schema, handler) => {
|
|
39
|
+
server.tool(name, description, schema, async (params) => {
|
|
40
|
+
try {
|
|
41
|
+
return await handler(params);
|
|
42
|
+
}
|
|
43
|
+
catch (error) {
|
|
44
|
+
return mcpErrorResponse(error);
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
};
|
|
48
|
+
registerScreenTools(registerTool);
|
|
49
|
+
registerInputTools(registerTool);
|
|
50
|
+
registerKeyboardTools(registerTool);
|
|
51
|
+
registerElementTools(registerTool);
|
|
52
|
+
registerAppTools(registerTool);
|
|
53
|
+
log.info("Registered tools", { count: registry.tools.length, tools: registry.tools.join(", ") });
|
|
54
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { getPlatform, resolvePoint, getSafetyContext, withSafety, actionResponse, captureAfterFields, } from "./helpers.js";
|
|
3
|
+
export function registerInputTools(registerTool) {
|
|
4
|
+
registerTool("click", "Click at screen coordinates", {
|
|
5
|
+
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
6
|
+
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
7
|
+
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
8
|
+
...captureAfterFields,
|
|
9
|
+
}, async (params) => {
|
|
10
|
+
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
11
|
+
const safetyCtx = await getSafetyContext(params.windowId);
|
|
12
|
+
await withSafety({ action: "click", params: { x: pt.x, y: pt.y, ...safetyCtx }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button) });
|
|
13
|
+
return actionResponse("click", { clicked: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
14
|
+
});
|
|
15
|
+
registerTool("double_click", "Double-click at screen coordinates", {
|
|
16
|
+
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
17
|
+
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
18
|
+
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
19
|
+
...captureAfterFields,
|
|
20
|
+
}, async (params) => {
|
|
21
|
+
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
22
|
+
const safetyCtx = await getSafetyContext(params.windowId);
|
|
23
|
+
await withSafety({ action: "double_click", params: { x: pt.x, y: pt.y, doubleClick: true, ...safetyCtx }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button, true) });
|
|
24
|
+
return actionResponse("double_click", { doubleClicked: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
25
|
+
});
|
|
26
|
+
registerTool("scroll", "Scroll at coordinates", {
|
|
27
|
+
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
28
|
+
deltaX: z.number().default(0).describe("Horizontal scroll"), deltaY: z.number().describe("Vertical scroll (negative = up)"),
|
|
29
|
+
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
30
|
+
...captureAfterFields,
|
|
31
|
+
}, async (params) => {
|
|
32
|
+
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
33
|
+
const deltaX = params.deltaX ?? 0;
|
|
34
|
+
const safetyCtx = await getSafetyContext(params.windowId);
|
|
35
|
+
await withSafety({ action: "scroll", params: { x: pt.x, y: pt.y, ...safetyCtx }, requiresAccessibility: true, execute: () => getPlatform().scroll(pt.x, pt.y, deltaX, params.deltaY) });
|
|
36
|
+
return actionResponse("scroll", { scrolled: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
37
|
+
});
|
|
38
|
+
registerTool("drag", "Drag from one point to another", {
|
|
39
|
+
startX: z.number().describe("Start X"), startY: z.number().describe("Start Y"),
|
|
40
|
+
endX: z.number().describe("End X"), endY: z.number().describe("End Y"),
|
|
41
|
+
button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
|
|
42
|
+
windowId: z.string().optional().describe("If set, start/end coordinates are relative to this window"),
|
|
43
|
+
duration: z.number().optional().describe("Drag duration in ms"),
|
|
44
|
+
...captureAfterFields,
|
|
45
|
+
}, async (params) => {
|
|
46
|
+
const start = await resolvePoint(params.startX, params.startY, params.windowId);
|
|
47
|
+
const end = await resolvePoint(params.endX, params.endY, params.windowId);
|
|
48
|
+
const safetyCtx = await getSafetyContext(params.windowId);
|
|
49
|
+
await withSafety({ action: "drag", params: { startX: start.x, startY: start.y, endX: end.x, endY: end.y, ...safetyCtx }, requiresAccessibility: true, execute: () => getPlatform().drag(start.x, start.y, end.x, end.y, params.button, params.duration) });
|
|
50
|
+
return actionResponse("drag", { dragged: true, startX: start.x, startY: start.y, endX: end.x, endY: end.y }, { startX: start.x, startY: start.y, endX: end.x, endY: end.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
51
|
+
});
|
|
52
|
+
registerTool("move", "Move cursor to coordinates", {
|
|
53
|
+
x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
|
|
54
|
+
windowId: z.string().optional().describe("If set, x/y are relative to this window"),
|
|
55
|
+
...captureAfterFields,
|
|
56
|
+
}, async (params) => {
|
|
57
|
+
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
58
|
+
const safetyCtx = await getSafetyContext(params.windowId);
|
|
59
|
+
await withSafety({ action: "move", params: { x: pt.x, y: pt.y, ...safetyCtx }, requiresAccessibility: true, execute: () => getPlatform().move(pt.x, pt.y) });
|
|
60
|
+
return actionResponse("move", { moved: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
61
|
+
});
|
|
62
|
+
registerTool("get_cursor_position", "Get current cursor position", {}, async () => {
|
|
63
|
+
const pos = await withSafety({ action: "get_cursor_position", params: {}, execute: () => Promise.resolve(getPlatform().getCursorPosition()) });
|
|
64
|
+
return { content: [{ type: "text", text: JSON.stringify(pos, null, 2) }] };
|
|
65
|
+
});
|
|
66
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { UnsupportedParameterError } from "../../util/errors.js";
|
|
3
|
+
import { getPlatform, getSafetyContext, withSafety, actionResponse, captureAfterFields, } from "./helpers.js";
|
|
4
|
+
export function registerKeyboardTools(registerTool) {
|
|
5
|
+
registerTool("type_text", "Type text at the current cursor position", {
|
|
6
|
+
text: z.string().describe("Text to type"), delay: z.number().optional().describe("Delay between keystrokes in ms"),
|
|
7
|
+
windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted keyboard typing is not implemented"),
|
|
8
|
+
...captureAfterFields,
|
|
9
|
+
}, async (params) => {
|
|
10
|
+
if (params.windowId)
|
|
11
|
+
throw new UnsupportedParameterError("windowId-targeted keyboard typing is not implemented");
|
|
12
|
+
const safetyCtx = await getSafetyContext();
|
|
13
|
+
await withSafety({ action: "type_text", params: { text: params.text, ...safetyCtx }, requiresAccessibility: true, execute: () => getPlatform().type(params.text, params.delay) });
|
|
14
|
+
return actionResponse("type_text", { typed: true, charCount: params.text.length }, {}, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
15
|
+
});
|
|
16
|
+
registerTool("press_key", "Press a keyboard shortcut", {
|
|
17
|
+
keys: z.array(z.string()).optional().describe("Keys to press simultaneously"),
|
|
18
|
+
key: z.string().optional().describe("Single key to press (alias for keys)"),
|
|
19
|
+
modifiers: z.array(z.string()).optional().describe("Modifier keys used with key, such as cmd, shift, alt, or ctrl"),
|
|
20
|
+
windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted key events are not implemented"),
|
|
21
|
+
...captureAfterFields,
|
|
22
|
+
}, async (params) => {
|
|
23
|
+
if (params.windowId)
|
|
24
|
+
throw new UnsupportedParameterError("windowId-targeted key events are not implemented");
|
|
25
|
+
const keys = params.keys ?? [
|
|
26
|
+
...(params.modifiers ?? []),
|
|
27
|
+
...(params.key ? [params.key] : []),
|
|
28
|
+
];
|
|
29
|
+
if (keys.length === 0)
|
|
30
|
+
throw new UnsupportedParameterError("press_key requires at least one key");
|
|
31
|
+
const safetyCtx = await getSafetyContext();
|
|
32
|
+
await withSafety({ action: "press_key", params: { keys, ...safetyCtx }, requiresAccessibility: true, execute: () => getPlatform().key(keys) });
|
|
33
|
+
return actionResponse("press_key", { pressed: true, keys }, {}, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
34
|
+
});
|
|
35
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { checkPermission } from "../../safety/permissions.js";
|
|
3
|
+
import { UnsupportedParameterError } from "../../util/errors.js";
|
|
4
|
+
import { getPlatform, getActiveTarget, withSafety, } from "./helpers.js";
|
|
5
|
+
export function registerScreenTools(registerTool) {
|
|
6
|
+
registerTool("screenshot", "Capture a screenshot of the entire screen or a region", {
|
|
7
|
+
display: z.number().optional().describe("Display index (default 0)"),
|
|
8
|
+
windowId: z.string().optional().describe("Window ID from list_windows; when set, captures that window"),
|
|
9
|
+
region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to capture"),
|
|
10
|
+
format: z.enum(["png", "jpeg"]).default("png").describe("Image format"),
|
|
11
|
+
maxWidth: z.number().default(1280).describe("Maximum output width in pixels. Aspect ratio is preserved."),
|
|
12
|
+
}, async (params) => {
|
|
13
|
+
if (params.windowId && params.region)
|
|
14
|
+
throw new UnsupportedParameterError("screenshot windowId cannot be combined with region");
|
|
15
|
+
const options = { format: params.format, maxWidth: params.maxWidth };
|
|
16
|
+
const buf = await withSafety({
|
|
17
|
+
action: "screenshot",
|
|
18
|
+
params,
|
|
19
|
+
requiresScreenRecording: true,
|
|
20
|
+
execute: () => params.windowId
|
|
21
|
+
? getPlatform().screenshotWindow
|
|
22
|
+
? getPlatform().screenshotWindow(params.windowId, options)
|
|
23
|
+
: Promise.reject(new UnsupportedParameterError("window screenshots are not implemented on this platform"))
|
|
24
|
+
: getPlatform().screenshot(params.display, params.region, options),
|
|
25
|
+
});
|
|
26
|
+
return { content: [{ type: "image", data: buf.toString("base64"), mimeType: `image/${params.format}` }] };
|
|
27
|
+
});
|
|
28
|
+
registerTool("list_windows", "List all visible windows on screen", {
|
|
29
|
+
includeMinimized: z.boolean().optional().describe("Include minimized windows"),
|
|
30
|
+
}, async (params) => {
|
|
31
|
+
const windows = await withSafety({ action: "list_windows", params: {}, requiresAccessibility: true, execute: () => getPlatform().listWindows(params.includeMinimized) });
|
|
32
|
+
let diagnostics;
|
|
33
|
+
if (windows.length === 0) {
|
|
34
|
+
let accessibility = "unknown";
|
|
35
|
+
try {
|
|
36
|
+
const { granted } = await checkPermission("accessibility");
|
|
37
|
+
accessibility = granted ? "granted" : "denied";
|
|
38
|
+
}
|
|
39
|
+
catch { /* keep unknown */ }
|
|
40
|
+
const axNote = accessibility === "denied"
|
|
41
|
+
? "Accessibility is currently denied to this terminal — grant it via System Settings > Privacy & Security > Accessibility, then retry."
|
|
42
|
+
: accessibility === "granted"
|
|
43
|
+
? "Accessibility is granted. If you expected a specific app to appear here, it is likely an Electron app whose AX tree is not exposed to System Events. Pixel-level workaround: call screenshot, then ocr to locate the target UI text and get its bounding box, then click(x, y) at those screen coordinates. Alternatively, modify the app's config file or database directly."
|
|
44
|
+
: "Accessibility status is unknown. Run `doctor` first to verify.";
|
|
45
|
+
diagnostics = { hint: `list_windows returned 0 windows. ${axNote}`, accessibility };
|
|
46
|
+
}
|
|
47
|
+
return { content: [{ type: "text", text: JSON.stringify(diagnostics ? { windows, diagnostics } : windows, null, 2) }] };
|
|
48
|
+
});
|
|
49
|
+
registerTool("get_window_state", "Get detailed state of a window including accessibility tree", {
|
|
50
|
+
windowId: z.string().optional().describe("Window ID"), depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().optional().describe("Include element bounds"),
|
|
51
|
+
}, async (params) => {
|
|
52
|
+
const effectiveWindowId = params.windowId || getActiveTarget()?.windowId;
|
|
53
|
+
const state = await withSafety({ action: "get_window_state", params: {}, requiresAccessibility: true, execute: () => getPlatform().getWindowState(effectiveWindowId, params.depth, params.includeBounds) });
|
|
54
|
+
return { content: [{ type: "text", text: JSON.stringify(state, null, 2) }] };
|
|
55
|
+
});
|
|
56
|
+
registerTool("get_screen_size", "Get screen dimensions and scale factor", {
|
|
57
|
+
display: z.number().optional().describe("Display index"),
|
|
58
|
+
}, async (params) => {
|
|
59
|
+
const result = await withSafety({ action: "get_screen_size", params: {}, execute: () => Promise.resolve(getPlatform().getScreenSize(params.display)) });
|
|
60
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
61
|
+
});
|
|
62
|
+
registerTool("ocr", "Perform OCR on screen region", {
|
|
63
|
+
display: z.number().optional().describe("Display index"),
|
|
64
|
+
region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to OCR"),
|
|
65
|
+
}, async (params) => {
|
|
66
|
+
const result = await withSafety({ action: "ocr", params: {}, requiresScreenRecording: true, execute: () => getPlatform().ocr(params.display, params.region) });
|
|
67
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
68
|
+
});
|
|
69
|
+
}
|
package/dist/src/mcp/tools.d.ts
CHANGED
|
@@ -11,6 +11,15 @@ import type { AppTarget } from "../platform/base.js";
|
|
|
11
11
|
* Get the currently active target context (set by focus_app).
|
|
12
12
|
*/
|
|
13
13
|
export declare function getActiveTarget(): AppTarget | undefined;
|
|
14
|
+
/**
|
|
15
|
+
* Exported so unit tests can pin the schema constraint directly instead
|
|
16
|
+
* of going through the McpServer wrapper (which `handler()` calls
|
|
17
|
+
* bypass). (Herschel review Major: 0.3.5's value='' test was a
|
|
18
|
+
* tautology because it re-created a local zod schema instead of
|
|
19
|
+
* asserting against this one.)
|
|
20
|
+
*
|
|
21
|
+
* @internal Not part of the public API — may change without a semver bump.
|
|
22
|
+
*/
|
|
14
23
|
export declare const findElementInputSchema: {
|
|
15
24
|
text: z.ZodOptional<z.ZodString>;
|
|
16
25
|
role: z.ZodOptional<z.ZodString>;
|