ucu-mcp 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -53
- package/README.md +90 -4
- package/dist/src/mcp/server.js +11 -6
- package/dist/src/mcp/tools.d.ts +6 -1
- package/dist/src/mcp/tools.js +219 -46
- package/dist/src/platform/base.d.ts +26 -1
- package/dist/src/platform/linux.d.ts +4 -2
- package/dist/src/platform/linux.js +51 -0
- package/dist/src/platform/macos.d.ts +6 -2
- package/dist/src/platform/macos.js +160 -16
- package/dist/src/platform/windows.d.ts +4 -2
- package/dist/src/platform/windows.js +33 -0
- package/dist/src/safety/guard.d.ts +8 -1
- package/dist/src/safety/guard.js +43 -4
- package/dist/src/util/errors.d.ts +26 -1
- package/dist/src/util/errors.js +43 -11
- package/dist/src/util/metrics.d.ts +37 -0
- package/dist/src/util/metrics.js +97 -0
- package/native/cgevent/cgevent-helper +0 -0
- package/native/ocr/ocr-helper +0 -0
- package/package.json +2 -2
package/dist/src/mcp/tools.js
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Tool registry for UCU-MCP.
|
|
3
3
|
*
|
|
4
|
-
* Registers
|
|
4
|
+
* Registers 24 MCP tools on the server and dispatches each call through
|
|
5
5
|
* a shared safety/permission/retry pipeline (`withSafety`).
|
|
6
6
|
*/
|
|
7
7
|
import { z } from "zod";
|
|
8
8
|
import { MacOSPlatform } from "../platform/macos.js";
|
|
9
|
-
import { SafetyGuard } from "../safety/guard.js";
|
|
9
|
+
import { SafetyGuard, classifyAction } from "../safety/guard.js";
|
|
10
10
|
import { checkPermission } from "../safety/permissions.js";
|
|
11
11
|
import { retry } from "../util/retry.js";
|
|
12
12
|
import { createLogger } from "../util/logger.js";
|
|
13
|
+
import { metrics } from "../util/metrics.js";
|
|
13
14
|
import { SafetyError, PermissionError, UnsupportedParameterError, UcuError, WindowNotFoundError } from "../util/errors.js";
|
|
14
15
|
const log = createLogger("tools");
|
|
15
16
|
let _platform;
|
|
@@ -20,6 +21,14 @@ function getPlatform() {
|
|
|
20
21
|
return _platform;
|
|
21
22
|
}
|
|
22
23
|
const safety = new SafetyGuard();
|
|
24
|
+
// Active target context — set by focus_app, used by AX element tools
|
|
25
|
+
let activeTargetContext;
|
|
26
|
+
/**
|
|
27
|
+
* Get the currently active target context (set by focus_app).
|
|
28
|
+
*/
|
|
29
|
+
export function getActiveTarget() {
|
|
30
|
+
return activeTargetContext;
|
|
31
|
+
}
|
|
23
32
|
// User activity monitor — pauses automation when user moves the cursor
|
|
24
33
|
let lastCursorPos = { x: 0, y: 0 };
|
|
25
34
|
let userActivityInterval;
|
|
@@ -43,6 +52,8 @@ function recoveryHint(code) {
|
|
|
43
52
|
switch (code) {
|
|
44
53
|
case "WINDOW_NOT_FOUND":
|
|
45
54
|
return "Run list_windows again, then retry with a fresh windowId or omit windowId for screen coordinates.";
|
|
55
|
+
case "TARGET_STALE":
|
|
56
|
+
return "Run focus_app again for the target app, or run list_windows and retry with a fresh windowId.";
|
|
46
57
|
case "ELEMENT_NOT_FOUND":
|
|
47
58
|
return "Run find_element again, then retry with a fresh elementId.";
|
|
48
59
|
case "PERMISSION_DENIED":
|
|
@@ -61,28 +72,63 @@ function recoveryHint(code) {
|
|
|
61
72
|
return "Inspect the error message, observe the current UI state, and retry only if the operation is safe.";
|
|
62
73
|
}
|
|
63
74
|
}
|
|
64
|
-
function
|
|
75
|
+
function errorDetails(error) {
|
|
65
76
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
66
77
|
const code = error instanceof UcuError ? error.code : "UNKNOWN_ERROR";
|
|
67
78
|
const retryable = error instanceof UcuError ? error.retryable : false;
|
|
79
|
+
return {
|
|
80
|
+
name: err.name,
|
|
81
|
+
code,
|
|
82
|
+
retryable,
|
|
83
|
+
message: err.message,
|
|
84
|
+
recovery: recoveryHint(code),
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
let _actionCounter = 0;
|
|
88
|
+
function nextActionId() {
|
|
89
|
+
_actionCounter = (_actionCounter + 1) % 1_000_000;
|
|
90
|
+
return `a${Date.now().toString(36)}-${_actionCounter.toString(36)}`;
|
|
91
|
+
}
|
|
92
|
+
function buildActionReceipt(action, status, target, result, captureRequested, captureFormat, captureMaxWidth, captureError, warnings = []) {
|
|
93
|
+
const captureStatus = captureRequested
|
|
94
|
+
? captureError ? "error" : "ok"
|
|
95
|
+
: "skipped";
|
|
96
|
+
return {
|
|
97
|
+
actionId: nextActionId(),
|
|
98
|
+
action,
|
|
99
|
+
status,
|
|
100
|
+
target,
|
|
101
|
+
result,
|
|
102
|
+
capture: {
|
|
103
|
+
requested: captureRequested,
|
|
104
|
+
status: captureStatus,
|
|
105
|
+
...(captureFormat && { format: captureFormat }),
|
|
106
|
+
...(captureMaxWidth && { maxWidth: captureMaxWidth }),
|
|
107
|
+
...(captureError && { error: captureError }),
|
|
108
|
+
},
|
|
109
|
+
warnings,
|
|
110
|
+
next: captureError
|
|
111
|
+
? "screenshot"
|
|
112
|
+
: status === "partial"
|
|
113
|
+
? "get_window_state"
|
|
114
|
+
: "find_element or get_window_state",
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
function mcpErrorResponse(error) {
|
|
68
118
|
return {
|
|
69
119
|
isError: true,
|
|
70
120
|
content: [
|
|
71
121
|
jsonText({
|
|
72
|
-
error:
|
|
73
|
-
name: err.name,
|
|
74
|
-
code,
|
|
75
|
-
retryable,
|
|
76
|
-
message: err.message,
|
|
77
|
-
recovery: recoveryHint(code),
|
|
78
|
-
},
|
|
122
|
+
error: errorDetails(error),
|
|
79
123
|
}),
|
|
80
124
|
],
|
|
81
125
|
};
|
|
82
126
|
}
|
|
83
|
-
async function actionResponse(result, captureAfter, captureFormat = "jpeg", captureMaxWidth = 1280) {
|
|
84
|
-
|
|
85
|
-
|
|
127
|
+
async function actionResponse(action, result, target, captureAfter, captureFormat = "jpeg", captureMaxWidth = 1280, warnings = []) {
|
|
128
|
+
const receipt = buildActionReceipt(action, "ok", target, result, captureAfter ?? false, captureFormat, captureMaxWidth, undefined, warnings);
|
|
129
|
+
if (!captureAfter) {
|
|
130
|
+
return { content: [jsonText(receipt)] };
|
|
131
|
+
}
|
|
86
132
|
try {
|
|
87
133
|
const buf = await getPlatform().screenshot(undefined, undefined, {
|
|
88
134
|
format: captureFormat,
|
|
@@ -90,7 +136,7 @@ async function actionResponse(result, captureAfter, captureFormat = "jpeg", capt
|
|
|
90
136
|
});
|
|
91
137
|
return {
|
|
92
138
|
content: [
|
|
93
|
-
jsonText(
|
|
139
|
+
jsonText(receipt),
|
|
94
140
|
{
|
|
95
141
|
type: "image",
|
|
96
142
|
data: buf.toString("base64"),
|
|
@@ -99,8 +145,9 @@ async function actionResponse(result, captureAfter, captureFormat = "jpeg", capt
|
|
|
99
145
|
],
|
|
100
146
|
};
|
|
101
147
|
}
|
|
102
|
-
catch {
|
|
103
|
-
|
|
148
|
+
catch (error) {
|
|
149
|
+
const partialReceipt = buildActionReceipt(action, "partial", target, result, true, captureFormat, captureMaxWidth, errorDetails(error), [...warnings, "Post-action screenshot capture failed"]);
|
|
150
|
+
return { content: [jsonText(partialReceipt)] };
|
|
104
151
|
}
|
|
105
152
|
}
|
|
106
153
|
const retryableActions = new Set([
|
|
@@ -118,7 +165,9 @@ async function withSafety(sa) {
|
|
|
118
165
|
const platform = getPlatform();
|
|
119
166
|
if (platform.isScreenLocked?.())
|
|
120
167
|
throw new SafetyError("Screen is locked");
|
|
121
|
-
const check = safety.checkAction(sa.action, sa.params
|
|
168
|
+
const check = safety.checkAction(sa.action, sa.params, {
|
|
169
|
+
skipUserActivityPause: sa.skipUserActivityPause ?? classifyAction(sa.action) === "observe",
|
|
170
|
+
});
|
|
122
171
|
if (!check.allowed)
|
|
123
172
|
throw new SafetyError(check.reason ?? "Action blocked by safety guard");
|
|
124
173
|
if (sa.requiresAccessibility) {
|
|
@@ -136,12 +185,14 @@ async function withSafety(sa) {
|
|
|
136
185
|
const shouldManageFocus = sa.requiresAccessibility && !["screenshot", "list_windows", "list_apps", "get_window_state", "get_cursor_position", "get_screen_size", "ocr", "doctor", "wait", "wait_for_element", "find_element", "focus_app"].includes(sa.action);
|
|
137
186
|
if (shouldManageFocus)
|
|
138
187
|
await platform.saveFocus?.();
|
|
188
|
+
const start = Date.now();
|
|
139
189
|
try {
|
|
140
190
|
return retryableActions.has(sa.action)
|
|
141
191
|
? await retry(() => sa.execute())
|
|
142
192
|
: await sa.execute();
|
|
143
193
|
}
|
|
144
194
|
finally {
|
|
195
|
+
metrics.record(sa.action, Date.now() - start);
|
|
145
196
|
if (shouldManageFocus)
|
|
146
197
|
await platform.restoreFocus?.();
|
|
147
198
|
}
|
|
@@ -224,13 +275,15 @@ export function registerTools(server) {
|
|
|
224
275
|
app: z.string().describe("Application name to focus"),
|
|
225
276
|
}, async (params) => {
|
|
226
277
|
const target = await withSafety({ action: "focus_app", params: {}, requiresAccessibility: true, execute: () => getPlatform().focusApp(params.app) });
|
|
278
|
+
activeTargetContext = target;
|
|
227
279
|
return { content: [{ type: "text", text: JSON.stringify(target, null, 2) }] };
|
|
228
280
|
});
|
|
229
281
|
registry.register("focus_app");
|
|
230
282
|
registerTool("get_window_state", "Get detailed state of a window including accessibility tree", {
|
|
231
283
|
windowId: z.string().optional().describe("Window ID"), depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().optional().describe("Include element bounds"),
|
|
232
284
|
}, async (params) => {
|
|
233
|
-
const
|
|
285
|
+
const effectiveWindowId = params.windowId || getActiveTarget()?.windowId;
|
|
286
|
+
const state = await withSafety({ action: "get_window_state", params: {}, requiresAccessibility: true, execute: () => getPlatform().getWindowState(effectiveWindowId, params.depth, params.includeBounds) });
|
|
234
287
|
return { content: [{ type: "text", text: JSON.stringify(state, null, 2) }] };
|
|
235
288
|
});
|
|
236
289
|
registry.register("get_window_state");
|
|
@@ -242,7 +295,7 @@ export function registerTools(server) {
|
|
|
242
295
|
}, async (params) => {
|
|
243
296
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
244
297
|
await withSafety({ action: "click", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button) });
|
|
245
|
-
return actionResponse({ clicked: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
298
|
+
return actionResponse("click", { clicked: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
246
299
|
});
|
|
247
300
|
registry.register("click");
|
|
248
301
|
registerTool("double_click", "Double-click at screen coordinates", {
|
|
@@ -253,7 +306,7 @@ export function registerTools(server) {
|
|
|
253
306
|
}, async (params) => {
|
|
254
307
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
255
308
|
await withSafety({ action: "click", params: { x: pt.x, y: pt.y, doubleClick: true }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button, true) });
|
|
256
|
-
return actionResponse({ doubleClicked: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
309
|
+
return actionResponse("double_click", { doubleClicked: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
257
310
|
});
|
|
258
311
|
registry.register("double_click");
|
|
259
312
|
registerTool("type_text", "Type text at the current cursor position", {
|
|
@@ -264,7 +317,7 @@ export function registerTools(server) {
|
|
|
264
317
|
if (params.windowId)
|
|
265
318
|
throw new UnsupportedParameterError("windowId-targeted keyboard typing is not implemented");
|
|
266
319
|
await withSafety({ action: "type_text", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().type(params.text, params.delay) });
|
|
267
|
-
return actionResponse({ typed: true, charCount: params.text.length }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
320
|
+
return actionResponse("type_text", { typed: true, charCount: params.text.length }, {}, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
268
321
|
});
|
|
269
322
|
registry.register("type_text");
|
|
270
323
|
registerTool("press_key", "Press a keyboard shortcut", {
|
|
@@ -283,7 +336,7 @@ export function registerTools(server) {
|
|
|
283
336
|
if (keys.length === 0)
|
|
284
337
|
throw new UnsupportedParameterError("press_key requires at least one key");
|
|
285
338
|
await withSafety({ action: "press_key", params: { keys }, requiresAccessibility: true, execute: () => getPlatform().key(keys) });
|
|
286
|
-
return actionResponse({ pressed: true, keys }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
339
|
+
return actionResponse("press_key", { pressed: true, keys }, {}, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
287
340
|
});
|
|
288
341
|
registry.register("press_key");
|
|
289
342
|
registerTool("scroll", "Scroll at coordinates", {
|
|
@@ -295,7 +348,7 @@ export function registerTools(server) {
|
|
|
295
348
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
296
349
|
const deltaX = params.deltaX ?? 0;
|
|
297
350
|
await withSafety({ action: "scroll", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().scroll(pt.x, pt.y, deltaX, params.deltaY) });
|
|
298
|
-
return actionResponse({ scrolled: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
351
|
+
return actionResponse("scroll", { scrolled: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
299
352
|
});
|
|
300
353
|
registry.register("scroll");
|
|
301
354
|
registerTool("drag", "Drag from one point to another", {
|
|
@@ -309,30 +362,100 @@ export function registerTools(server) {
|
|
|
309
362
|
const start = await resolvePoint(params.startX, params.startY, params.windowId);
|
|
310
363
|
const end = await resolvePoint(params.endX, params.endY, params.windowId);
|
|
311
364
|
await withSafety({ action: "drag", params: { startX: start.x, startY: start.y, endX: end.x, endY: end.y }, requiresAccessibility: true, execute: () => getPlatform().drag(start.x, start.y, end.x, end.y, params.button, params.duration) });
|
|
312
|
-
return actionResponse({ dragged: true, startX: start.x, startY: start.y, endX: end.x, endY: end.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
365
|
+
return actionResponse("drag", { dragged: true, startX: start.x, startY: start.y, endX: end.x, endY: end.y }, { startX: start.x, startY: start.y, endX: end.x, endY: end.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
313
366
|
});
|
|
314
367
|
registry.register("drag");
|
|
315
|
-
registerTool("doctor", "Check system permissions and
|
|
368
|
+
registerTool("doctor", "Check system permissions, native helpers, and client readiness", {}, async () => {
|
|
316
369
|
const { checkPermissions } = await import("../safety/permissions.js");
|
|
317
370
|
const { MacOSPlatform: MacPlat } = await import("../platform/macos.js");
|
|
371
|
+
const { existsSync } = await import("node:fs");
|
|
372
|
+
const { join, dirname } = await import("node:path");
|
|
373
|
+
const { fileURLToPath } = await import("node:url");
|
|
374
|
+
const { execFileSync } = await import("node:child_process");
|
|
318
375
|
const permissions = await checkPermissions();
|
|
319
376
|
const screenLocked = process.platform === "darwin" ? new MacPlat().isScreenLocked?.() ?? false : false;
|
|
377
|
+
let nativeHelpers;
|
|
378
|
+
if (process.platform === "darwin") {
|
|
379
|
+
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
380
|
+
const checkPaths = (subdirs) => {
|
|
381
|
+
const paths = [
|
|
382
|
+
join(process.cwd(), ...subdirs),
|
|
383
|
+
join(moduleDir, "..", ...subdirs),
|
|
384
|
+
join(moduleDir, "..", "..", ...subdirs),
|
|
385
|
+
];
|
|
386
|
+
return paths.some(p => { try {
|
|
387
|
+
return existsSync(p);
|
|
388
|
+
}
|
|
389
|
+
catch {
|
|
390
|
+
return false;
|
|
391
|
+
} });
|
|
392
|
+
};
|
|
393
|
+
nativeHelpers = {
|
|
394
|
+
cgevent: checkPaths(["native", "cgevent", "cgevent-helper"]),
|
|
395
|
+
ocr: checkPaths(["native", "ocr", "ocr-helper"]),
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
let readiness = "ready";
|
|
399
|
+
const issues = [];
|
|
400
|
+
if (!permissions.granted) {
|
|
401
|
+
readiness = "blocked";
|
|
402
|
+
issues.push("Missing macOS permissions: " + permissions.missing.join(", "));
|
|
403
|
+
}
|
|
404
|
+
if (screenLocked) {
|
|
405
|
+
readiness = "blocked";
|
|
406
|
+
issues.push("Screen is locked");
|
|
407
|
+
}
|
|
408
|
+
if (process.platform === "darwin" && nativeHelpers) {
|
|
409
|
+
if (!nativeHelpers.cgevent) {
|
|
410
|
+
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
411
|
+
issues.push("Native CGEvent helper not found (input synthesis may crash on macOS Sequoia+)");
|
|
412
|
+
}
|
|
413
|
+
if (!nativeHelpers.ocr) {
|
|
414
|
+
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
415
|
+
issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+)");
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
const clients = {};
|
|
419
|
+
for (const bin of ["claude", "codex", "opencode", "npx"]) {
|
|
420
|
+
try {
|
|
421
|
+
const path = execFileSync("which", [bin], { encoding: "utf-8", timeout: 2000 }).trim();
|
|
422
|
+
clients[bin] = path || "not found";
|
|
423
|
+
}
|
|
424
|
+
catch {
|
|
425
|
+
clients[bin] = "not found";
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
const recommendations = [];
|
|
429
|
+
if (readiness === "blocked") {
|
|
430
|
+
recommendations.push("Grant missing permissions in System Settings > Privacy & Security, then restart the MCP client.");
|
|
431
|
+
}
|
|
432
|
+
else if (readiness === "degraded") {
|
|
433
|
+
if (nativeHelpers && (!nativeHelpers.cgevent || !nativeHelpers.ocr)) {
|
|
434
|
+
recommendations.push("Run 'npm run build' to compile native Swift helpers.");
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
else {
|
|
438
|
+
recommendations.push("All checks passed. MCP client can proceed with automation.");
|
|
439
|
+
}
|
|
320
440
|
const report = {
|
|
321
|
-
|
|
441
|
+
readiness,
|
|
442
|
+
issues: issues.length > 0 ? issues : undefined,
|
|
443
|
+
recommendations,
|
|
322
444
|
platform: process.platform,
|
|
323
445
|
node: process.version,
|
|
324
446
|
permissions,
|
|
325
447
|
screenLocked,
|
|
448
|
+
nativeHelpers,
|
|
449
|
+
clients,
|
|
326
450
|
safety: {
|
|
327
451
|
urlBlocklist: true,
|
|
328
452
|
lockScreenGuard: process.platform === "darwin",
|
|
329
453
|
typedTextInjectionScan: true,
|
|
330
454
|
},
|
|
331
455
|
stdioCommand: "ucu-mcp",
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
openCode: "Configure ucu-mcp as a local MCP stdio server.",
|
|
456
|
+
metrics: {
|
|
457
|
+
global: metrics.stats(),
|
|
458
|
+
byTool: metrics.byTool(),
|
|
336
459
|
},
|
|
337
460
|
};
|
|
338
461
|
return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
|
|
@@ -343,27 +466,56 @@ export function registerTools(server) {
|
|
|
343
466
|
return { content: [{ type: "text", text: JSON.stringify({ waited: params.ms }) }] };
|
|
344
467
|
});
|
|
345
468
|
registry.register("wait");
|
|
346
|
-
registerTool("wait_for_element", "Poll until an accessibility element matching the criteria
|
|
469
|
+
registerTool("wait_for_element", "Poll until an accessibility element matching the criteria reaches the desired state", {
|
|
347
470
|
text: z.string().optional().describe("Element text"), role: z.string().optional().describe("Element role"),
|
|
348
471
|
app: z.string().optional().describe("Target app"),
|
|
349
472
|
timeout: z.number().optional().describe("Timeout ms (default 5000)"),
|
|
350
473
|
timeoutMs: z.number().optional().describe("Alias for timeout"),
|
|
351
474
|
interval: z.number().optional().describe("Poll interval ms (default 500)"),
|
|
352
475
|
intervalMs: z.number().optional().describe("Alias for interval"),
|
|
476
|
+
until: z.enum(["appear", "disappear", "value_change"]).default("appear").describe("Wait condition: 'appear' (default) waits for a match, 'disappear' waits until no match, 'value_change' waits until first match's value changes"),
|
|
353
477
|
}, async (params) => {
|
|
354
478
|
const deadline = Date.now() + (params.timeout ?? params.timeoutMs ?? 5000);
|
|
355
479
|
const interval = params.interval ?? params.intervalMs ?? 500;
|
|
356
|
-
const
|
|
480
|
+
const until = params.until ?? "appear";
|
|
481
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
482
|
+
const query = { text: params.text, role: params.role, app: effectiveApp, maxResults: 1 };
|
|
357
483
|
const { granted } = await checkPermission("accessibility");
|
|
358
484
|
if (!granted)
|
|
359
485
|
throw new PermissionError("accessibility", process.platform);
|
|
486
|
+
let initialValue;
|
|
487
|
+
let hasInitial = false;
|
|
360
488
|
while (Date.now() < deadline) {
|
|
361
|
-
const
|
|
362
|
-
|
|
363
|
-
|
|
489
|
+
const response = await getPlatform().findElement(query);
|
|
490
|
+
const matched = response.results[0];
|
|
491
|
+
if (until === "appear") {
|
|
492
|
+
if (matched)
|
|
493
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: true, element: matched }, null, 2) }] };
|
|
494
|
+
}
|
|
495
|
+
else if (until === "disappear") {
|
|
496
|
+
if (!matched)
|
|
497
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: true, reason: "disappeared" }, null, 2) }] };
|
|
498
|
+
}
|
|
499
|
+
else {
|
|
500
|
+
// value_change: capture the initial value of the first match, then wait for it to differ.
|
|
501
|
+
// A separate `hasInitial` flag is required because the first match's `value` may itself be
|
|
502
|
+
// undefined; using `initialValue === undefined` to mean "not yet captured" would loop
|
|
503
|
+
// forever in that case. On timeout, distinguish "element never appeared" from "value stayed
|
|
504
|
+
// the same" so the model can branch on the result.
|
|
505
|
+
if (matched) {
|
|
506
|
+
if (!hasInitial) {
|
|
507
|
+
initialValue = matched.value;
|
|
508
|
+
hasInitial = true;
|
|
509
|
+
}
|
|
510
|
+
else if (matched.value !== initialValue) {
|
|
511
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: true, oldValue: initialValue, newValue: matched.value }, null, 2) }] };
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
}
|
|
364
515
|
await new Promise(r => setTimeout(r, interval));
|
|
365
516
|
}
|
|
366
|
-
|
|
517
|
+
const reason = until === "value_change" ? (hasInitial ? "value_unchanged" : "never_appeared") : "timeout";
|
|
518
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: false, reason }, null, 2) }] };
|
|
367
519
|
});
|
|
368
520
|
registry.register("wait_for_element");
|
|
369
521
|
registerTool("get_cursor_position", "Get current cursor position", {}, async () => {
|
|
@@ -392,40 +544,61 @@ export function registerTools(server) {
|
|
|
392
544
|
}, async (params) => {
|
|
393
545
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
394
546
|
await withSafety({ action: "move", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().move(pt.x, pt.y) });
|
|
395
|
-
return actionResponse({ moved: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
547
|
+
return actionResponse("move", { moved: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
396
548
|
});
|
|
397
549
|
registry.register("move");
|
|
398
550
|
registerTool("find_element", "Find accessibility elements by text, role, or app", {
|
|
399
551
|
text: z.string().optional().describe("Text to search"), role: z.string().optional().describe("AX role"), app: z.string().optional().describe("Target app"),
|
|
400
552
|
depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().default(true).describe("Include bounds"), maxResults: z.number().min(1).max(200).default(50).describe("Max results"),
|
|
553
|
+
textMode: z.enum(["contains", "exact", "regex"]).default("contains").describe("Text matching mode: contains (default), exact, or regex"),
|
|
554
|
+
visibleOnly: z.boolean().default(false).describe("Only return elements with valid on-screen bounds"),
|
|
555
|
+
value: z.string().optional().describe("Filter by AX element value (respects textMode)"),
|
|
556
|
+
index: z.number().int().nonnegative().optional().describe("Return only the Nth match (0-based) after all other filtering and sorting"),
|
|
557
|
+
near: z.object({ x: z.number(), y: z.number() }).optional().describe("Sort results by ascending distance to this point and return closest first"),
|
|
401
558
|
}, async (params) => {
|
|
402
|
-
const
|
|
403
|
-
|
|
404
|
-
|
|
559
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
560
|
+
const response = await withSafety({ action: "find_element", params: {}, requiresAccessibility: true,
|
|
561
|
+
execute: () => getPlatform().findElement({ text: params.text, role: params.role, app: effectiveApp, depth: params.depth, includeBounds: params.includeBounds, maxResults: params.maxResults, textMode: params.textMode, visibleOnly: params.visibleOnly, value: params.value, index: params.index, near: params.near }) });
|
|
562
|
+
return { content: [{ type: "text", text: JSON.stringify({ results: response.results, metrics: response.metrics }, null, 2) }] };
|
|
405
563
|
});
|
|
406
564
|
registry.register("find_element");
|
|
407
565
|
registerTool("click_element", "Click an accessibility element by its ID", {
|
|
408
566
|
elementId: z.string().describe("AX element identifier"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
|
|
409
567
|
}, async (params) => {
|
|
410
|
-
|
|
411
|
-
|
|
568
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
569
|
+
await withSafety({ action: "click_element", params: {}, requiresAccessibility: true, execute: () => getPlatform().clickElement(params.elementId, effectiveApp) });
|
|
570
|
+
return actionResponse("click_element", { clicked: true, elementId: params.elementId }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
412
571
|
});
|
|
413
572
|
registry.register("click_element");
|
|
414
573
|
registerTool("set_value", "Set the value of an accessibility element", {
|
|
415
574
|
elementId: z.string().describe("AX element identifier"), value: z.string().describe("Value to set"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
|
|
416
575
|
}, async (params) => {
|
|
417
|
-
|
|
418
|
-
|
|
576
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
577
|
+
await withSafety({ action: "set_value", params: { value: params.value }, requiresAccessibility: true, execute: () => getPlatform().setElementValue(params.elementId, params.value, effectiveApp) });
|
|
578
|
+
return actionResponse("set_value", { setValue: true, elementId: params.elementId }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
419
579
|
});
|
|
420
580
|
registry.register("set_value");
|
|
421
581
|
registerTool("type_in_element", "Type text into an accessibility element, optionally clearing first", {
|
|
422
582
|
elementId: z.string().describe("AX element identifier"), text: z.string().describe("Text to type"),
|
|
423
583
|
app: z.string().optional().describe("Target app"), clearFirst: z.boolean().optional().describe("Clear existing text before typing"), ...captureAfterFields,
|
|
424
584
|
}, async (params) => {
|
|
425
|
-
|
|
426
|
-
|
|
585
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
586
|
+
await withSafety({ action: "type_in_element", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().typeInElement(params.elementId, params.text, effectiveApp, params.clearFirst) });
|
|
587
|
+
return actionResponse("type_in_element", { typed: true, elementId: params.elementId, charCount: params.text.length }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
427
588
|
});
|
|
428
589
|
registry.register("type_in_element");
|
|
590
|
+
registerTool("clipboard_read", "Read the current contents of the system clipboard", {}, async () => {
|
|
591
|
+
const text = await withSafety({ action: "clipboard_read", params: {}, execute: () => getPlatform().readClipboard() });
|
|
592
|
+
return { content: [{ type: "text", text: JSON.stringify({ text }, null, 2) }] };
|
|
593
|
+
});
|
|
594
|
+
registry.register("clipboard_read");
|
|
595
|
+
registerTool("clipboard_write", "Write text to the system clipboard (text injection patterns are blocked)", {
|
|
596
|
+
text: z.string().describe("Text to place on the clipboard"),
|
|
597
|
+
}, async (params) => {
|
|
598
|
+
await withSafety({ action: "clipboard_write", params: { text: params.text }, execute: () => getPlatform().writeClipboard(params.text) });
|
|
599
|
+
return { content: [{ type: "text", text: JSON.stringify({ written: true }, null, 2) }] };
|
|
600
|
+
});
|
|
601
|
+
registry.register("clipboard_write");
|
|
429
602
|
log.info("Registered tools", { count: registry.tools.length, tools: registry.tools.join(", ") });
|
|
430
603
|
}
|
|
431
604
|
export class ToolRegistry {
|
|
@@ -38,10 +38,12 @@ export interface AppInfo {
|
|
|
38
38
|
windowCount: number;
|
|
39
39
|
}
|
|
40
40
|
export interface AppTarget {
|
|
41
|
+
targetId: string;
|
|
41
42
|
appName: string;
|
|
42
43
|
pid: number;
|
|
43
44
|
windowId?: string;
|
|
44
45
|
title?: string;
|
|
46
|
+
capturedAt: string;
|
|
45
47
|
}
|
|
46
48
|
export interface BrowserContext {
|
|
47
49
|
appName: string;
|
|
@@ -81,6 +83,17 @@ export interface FindElementOptions {
|
|
|
81
83
|
depth?: number;
|
|
82
84
|
includeBounds?: boolean;
|
|
83
85
|
maxResults?: number;
|
|
86
|
+
textMode?: "contains" | "exact" | "regex";
|
|
87
|
+
visibleOnly?: boolean;
|
|
88
|
+
/** Match against the AX element's current value attribute (respects textMode). */
|
|
89
|
+
value?: string;
|
|
90
|
+
/** Return only the Nth match (0-based) after all other filtering and sorting. */
|
|
91
|
+
index?: number;
|
|
92
|
+
/** Sort results by ascending distance to this point and return closest first. */
|
|
93
|
+
near?: {
|
|
94
|
+
x: number;
|
|
95
|
+
y: number;
|
|
96
|
+
};
|
|
84
97
|
}
|
|
85
98
|
export interface FindElementResult {
|
|
86
99
|
id: string;
|
|
@@ -95,6 +108,16 @@ export interface FindElementResult {
|
|
|
95
108
|
};
|
|
96
109
|
description?: string;
|
|
97
110
|
}
|
|
111
|
+
export interface FindElementMetrics {
|
|
112
|
+
scannedCount: number;
|
|
113
|
+
matchedCount: number;
|
|
114
|
+
durationMs: number;
|
|
115
|
+
truncated: boolean;
|
|
116
|
+
}
|
|
117
|
+
export interface FindElementResponse {
|
|
118
|
+
results: FindElementResult[];
|
|
119
|
+
metrics: FindElementMetrics;
|
|
120
|
+
}
|
|
98
121
|
export interface WindowState {
|
|
99
122
|
window: WindowInfo;
|
|
100
123
|
focusedElement?: ElementInfo;
|
|
@@ -117,11 +140,13 @@ export interface Platform {
|
|
|
117
140
|
ocr(display?: number, region?: ScreenRegion): Promise<OcrResult>;
|
|
118
141
|
type(text: string, delay?: number): Promise<void>;
|
|
119
142
|
key(keys: string[]): Promise<void>;
|
|
120
|
-
findElement(options: FindElementOptions): Promise<
|
|
143
|
+
findElement(options: FindElementOptions): Promise<FindElementResponse>;
|
|
121
144
|
clickElement(elementId: string, app?: string): Promise<void>;
|
|
122
145
|
typeInElement(elementId: string, text: string, app?: string, clearFirst?: boolean): Promise<void>;
|
|
123
146
|
setElementValue?(elementId: string, value: string, app?: string): Promise<void>;
|
|
124
147
|
isScreenLocked?(): boolean;
|
|
125
148
|
saveFocus?(): Promise<void>;
|
|
126
149
|
restoreFocus?(): Promise<void>;
|
|
150
|
+
readClipboard(): Promise<string>;
|
|
151
|
+
writeClipboard(text: string): Promise<void>;
|
|
127
152
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions,
|
|
1
|
+
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResponse } from "./base.js";
|
|
2
2
|
/**
|
|
3
3
|
* Linux platform adapter (AT-SPI2 + xdotool fallback)
|
|
4
4
|
* TODO: Implement with D-Bus AT-SPI2 bindings
|
|
@@ -16,7 +16,9 @@ export declare class LinuxPlatform implements Platform {
|
|
|
16
16
|
type(text: string, delay?: number): Promise<void>;
|
|
17
17
|
key(keys: string[]): Promise<void>;
|
|
18
18
|
ocr(_display?: number, _region?: ScreenRegion): Promise<OcrResult>;
|
|
19
|
-
findElement(_options: FindElementOptions): Promise<
|
|
19
|
+
findElement(_options: FindElementOptions): Promise<FindElementResponse>;
|
|
20
20
|
clickElement(_elementId: string, _app?: string): Promise<void>;
|
|
21
21
|
typeInElement(_elementId: string, _text: string, _app?: string, _clearFirst?: boolean): Promise<void>;
|
|
22
|
+
readClipboard(): Promise<string>;
|
|
23
|
+
writeClipboard(text: string): Promise<void>;
|
|
22
24
|
}
|
|
@@ -1,3 +1,27 @@
|
|
|
1
|
+
import { execFileSync } from "node:child_process";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { PlatformError } from "../util/errors.js";
|
|
4
|
+
/** Pick the first available clipboard utility, preferring xclip. */
|
|
5
|
+
function pickClipboardTool() {
|
|
6
|
+
for (const bin of ["/usr/bin/xclip", "/usr/local/bin/xclip", "xclip"]) {
|
|
7
|
+
if (bin.startsWith("/") ? existsSync(bin) : which(bin))
|
|
8
|
+
return "xclip";
|
|
9
|
+
}
|
|
10
|
+
for (const bin of ["/usr/bin/xsel", "/usr/local/bin/xsel", "xsel"]) {
|
|
11
|
+
if (bin.startsWith("/") ? existsSync(bin) : which(bin))
|
|
12
|
+
return "xsel";
|
|
13
|
+
}
|
|
14
|
+
return undefined;
|
|
15
|
+
}
|
|
16
|
+
function which(bin) {
|
|
17
|
+
try {
|
|
18
|
+
execFileSync("which", [bin], { encoding: "utf-8", timeout: 2000, stdio: "ignore" });
|
|
19
|
+
return true;
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
1
25
|
/**
|
|
2
26
|
* Linux platform adapter (AT-SPI2 + xdotool fallback)
|
|
3
27
|
* TODO: Implement with D-Bus AT-SPI2 bindings
|
|
@@ -59,4 +83,31 @@ export class LinuxPlatform {
|
|
|
59
83
|
async typeInElement(_elementId, _text, _app, _clearFirst) {
|
|
60
84
|
throw new Error("Not implemented: Linux typeInElement");
|
|
61
85
|
}
|
|
86
|
+
async readClipboard() {
|
|
87
|
+
const tool = pickClipboardTool();
|
|
88
|
+
if (!tool) {
|
|
89
|
+
throw new PlatformError("readClipboard requires xclip or xsel on PATH", false);
|
|
90
|
+
}
|
|
91
|
+
try {
|
|
92
|
+
const args = tool === "xclip" ? ["-selection", "clipboard", "-o"] : ["--clipboard", "--output"];
|
|
93
|
+
const out = execFileSync(tool, args, { encoding: "utf-8", timeout: 5000 });
|
|
94
|
+
return out;
|
|
95
|
+
}
|
|
96
|
+
catch (error) {
|
|
97
|
+
throw new PlatformError(`read_clipboard failed: ${error.message}`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
async writeClipboard(text) {
|
|
101
|
+
const tool = pickClipboardTool();
|
|
102
|
+
if (!tool) {
|
|
103
|
+
throw new PlatformError("writeClipboard requires xclip or xsel on PATH", false);
|
|
104
|
+
}
|
|
105
|
+
try {
|
|
106
|
+
const args = tool === "xclip" ? ["-selection", "clipboard"] : ["--clipboard", "--input"];
|
|
107
|
+
execFileSync(tool, args, { input: text, encoding: "utf-8", timeout: 5000 });
|
|
108
|
+
}
|
|
109
|
+
catch (error) {
|
|
110
|
+
throw new PlatformError(`write_clipboard failed: ${error.message}`);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
62
113
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions,
|
|
1
|
+
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResponse, AppInfo, AppTarget, BrowserContext, ScreenshotOptions } from "./base.js";
|
|
2
2
|
export declare class MacOSPlatform implements Platform {
|
|
3
3
|
private readonly elementCache;
|
|
4
4
|
private readonly elementCacheTtlMs;
|
|
@@ -13,6 +13,8 @@ export declare class MacOSPlatform implements Platform {
|
|
|
13
13
|
private evictOverflowCacheEntries;
|
|
14
14
|
/** Check whether a cached element descriptor has expired. */
|
|
15
15
|
private isCacheEntryExpired;
|
|
16
|
+
/** Validate that the active target window still exists. */
|
|
17
|
+
validateActiveTarget(): Promise<void>;
|
|
16
18
|
/** Save the current frontmost app/window so we can restore after an action. */
|
|
17
19
|
saveFocus(): Promise<void>;
|
|
18
20
|
/** Restore the previously saved frontmost app/window. */
|
|
@@ -36,8 +38,10 @@ export declare class MacOSPlatform implements Platform {
|
|
|
36
38
|
private ocrJxa;
|
|
37
39
|
type(text: string, delay?: number): Promise<void>;
|
|
38
40
|
key(keys: string[]): Promise<void>;
|
|
39
|
-
findElement(options: FindElementOptions): Promise<
|
|
41
|
+
findElement(options: FindElementOptions): Promise<FindElementResponse>;
|
|
40
42
|
clickElement(elementId: string, app?: string): Promise<void>;
|
|
41
43
|
typeInElement(elementId: string, text: string, app?: string, clearFirst?: boolean): Promise<void>;
|
|
44
|
+
readClipboard(): Promise<string>;
|
|
45
|
+
writeClipboard(text: string): Promise<void>;
|
|
42
46
|
setElementValue(elementId: string, value: string, app?: string): Promise<void>;
|
|
43
47
|
}
|