ucu-mcp 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -54
- package/README.md +90 -4
- package/dist/src/mcp/server.js +11 -6
- package/dist/src/mcp/tools.d.ts +6 -1
- package/dist/src/mcp/tools.js +206 -47
- package/dist/src/platform/base.d.ts +26 -1
- package/dist/src/platform/linux.d.ts +4 -2
- package/dist/src/platform/linux.js +51 -0
- package/dist/src/platform/macos.d.ts +6 -2
- package/dist/src/platform/macos.js +160 -16
- package/dist/src/platform/windows.d.ts +4 -2
- package/dist/src/platform/windows.js +33 -0
- package/dist/src/safety/guard.d.ts +8 -1
- package/dist/src/safety/guard.js +43 -4
- package/dist/src/util/errors.d.ts +6 -0
- package/dist/src/util/errors.js +8 -0
- package/package.json +2 -2
package/dist/src/mcp/tools.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Tool registry for UCU-MCP.
|
|
3
3
|
*
|
|
4
|
-
* Registers
|
|
4
|
+
* Registers 24 MCP tools on the server and dispatches each call through
|
|
5
5
|
* a shared safety/permission/retry pipeline (`withSafety`).
|
|
6
6
|
*/
|
|
7
7
|
import { z } from "zod";
|
|
8
8
|
import { MacOSPlatform } from "../platform/macos.js";
|
|
9
|
-
import { SafetyGuard } from "../safety/guard.js";
|
|
9
|
+
import { SafetyGuard, classifyAction } from "../safety/guard.js";
|
|
10
10
|
import { checkPermission } from "../safety/permissions.js";
|
|
11
11
|
import { retry } from "../util/retry.js";
|
|
12
12
|
import { createLogger } from "../util/logger.js";
|
|
@@ -20,6 +20,14 @@ function getPlatform() {
|
|
|
20
20
|
return _platform;
|
|
21
21
|
}
|
|
22
22
|
const safety = new SafetyGuard();
|
|
23
|
+
// Active target context — set by focus_app, used by AX element tools
|
|
24
|
+
let activeTargetContext;
|
|
25
|
+
/**
|
|
26
|
+
* Get the currently active target context (set by focus_app).
|
|
27
|
+
*/
|
|
28
|
+
export function getActiveTarget() {
|
|
29
|
+
return activeTargetContext;
|
|
30
|
+
}
|
|
23
31
|
// User activity monitor — pauses automation when user moves the cursor
|
|
24
32
|
let lastCursorPos = { x: 0, y: 0 };
|
|
25
33
|
let userActivityInterval;
|
|
@@ -43,6 +51,8 @@ function recoveryHint(code) {
|
|
|
43
51
|
switch (code) {
|
|
44
52
|
case "WINDOW_NOT_FOUND":
|
|
45
53
|
return "Run list_windows again, then retry with a fresh windowId or omit windowId for screen coordinates.";
|
|
54
|
+
case "TARGET_STALE":
|
|
55
|
+
return "Run focus_app again for the target app, or run list_windows and retry with a fresh windowId.";
|
|
46
56
|
case "ELEMENT_NOT_FOUND":
|
|
47
57
|
return "Run find_element again, then retry with a fresh elementId.";
|
|
48
58
|
case "PERMISSION_DENIED":
|
|
@@ -61,28 +71,63 @@ function recoveryHint(code) {
|
|
|
61
71
|
return "Inspect the error message, observe the current UI state, and retry only if the operation is safe.";
|
|
62
72
|
}
|
|
63
73
|
}
|
|
64
|
-
function
|
|
74
|
+
function errorDetails(error) {
|
|
65
75
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
66
76
|
const code = error instanceof UcuError ? error.code : "UNKNOWN_ERROR";
|
|
67
77
|
const retryable = error instanceof UcuError ? error.retryable : false;
|
|
78
|
+
return {
|
|
79
|
+
name: err.name,
|
|
80
|
+
code,
|
|
81
|
+
retryable,
|
|
82
|
+
message: err.message,
|
|
83
|
+
recovery: recoveryHint(code),
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
let _actionCounter = 0;
|
|
87
|
+
function nextActionId() {
|
|
88
|
+
_actionCounter = (_actionCounter + 1) % 1_000_000;
|
|
89
|
+
return `a${Date.now().toString(36)}-${_actionCounter.toString(36)}`;
|
|
90
|
+
}
|
|
91
|
+
function buildActionReceipt(action, status, target, result, captureRequested, captureFormat, captureMaxWidth, captureError, warnings = []) {
|
|
92
|
+
const captureStatus = captureRequested
|
|
93
|
+
? captureError ? "error" : "ok"
|
|
94
|
+
: "skipped";
|
|
95
|
+
return {
|
|
96
|
+
actionId: nextActionId(),
|
|
97
|
+
action,
|
|
98
|
+
status,
|
|
99
|
+
target,
|
|
100
|
+
result,
|
|
101
|
+
capture: {
|
|
102
|
+
requested: captureRequested,
|
|
103
|
+
status: captureStatus,
|
|
104
|
+
...(captureFormat && { format: captureFormat }),
|
|
105
|
+
...(captureMaxWidth && { maxWidth: captureMaxWidth }),
|
|
106
|
+
...(captureError && { error: captureError }),
|
|
107
|
+
},
|
|
108
|
+
warnings,
|
|
109
|
+
next: captureError
|
|
110
|
+
? "screenshot"
|
|
111
|
+
: status === "partial"
|
|
112
|
+
? "get_window_state"
|
|
113
|
+
: "find_element or get_window_state",
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
function mcpErrorResponse(error) {
|
|
68
117
|
return {
|
|
69
118
|
isError: true,
|
|
70
119
|
content: [
|
|
71
120
|
jsonText({
|
|
72
|
-
error:
|
|
73
|
-
name: err.name,
|
|
74
|
-
code,
|
|
75
|
-
retryable,
|
|
76
|
-
message: err.message,
|
|
77
|
-
recovery: recoveryHint(code),
|
|
78
|
-
},
|
|
121
|
+
error: errorDetails(error),
|
|
79
122
|
}),
|
|
80
123
|
],
|
|
81
124
|
};
|
|
82
125
|
}
|
|
83
|
-
async function actionResponse(result, captureAfter, captureFormat = "jpeg", captureMaxWidth = 1280) {
|
|
84
|
-
|
|
85
|
-
|
|
126
|
+
async function actionResponse(action, result, target, captureAfter, captureFormat = "jpeg", captureMaxWidth = 1280, warnings = []) {
|
|
127
|
+
const receipt = buildActionReceipt(action, "ok", target, result, captureAfter ?? false, captureFormat, captureMaxWidth, undefined, warnings);
|
|
128
|
+
if (!captureAfter) {
|
|
129
|
+
return { content: [jsonText(receipt)] };
|
|
130
|
+
}
|
|
86
131
|
try {
|
|
87
132
|
const buf = await getPlatform().screenshot(undefined, undefined, {
|
|
88
133
|
format: captureFormat,
|
|
@@ -90,7 +135,7 @@ async function actionResponse(result, captureAfter, captureFormat = "jpeg", capt
|
|
|
90
135
|
});
|
|
91
136
|
return {
|
|
92
137
|
content: [
|
|
93
|
-
jsonText(
|
|
138
|
+
jsonText(receipt),
|
|
94
139
|
{
|
|
95
140
|
type: "image",
|
|
96
141
|
data: buf.toString("base64"),
|
|
@@ -99,8 +144,9 @@ async function actionResponse(result, captureAfter, captureFormat = "jpeg", capt
|
|
|
99
144
|
],
|
|
100
145
|
};
|
|
101
146
|
}
|
|
102
|
-
catch {
|
|
103
|
-
|
|
147
|
+
catch (error) {
|
|
148
|
+
const partialReceipt = buildActionReceipt(action, "partial", target, result, true, captureFormat, captureMaxWidth, errorDetails(error), [...warnings, "Post-action screenshot capture failed"]);
|
|
149
|
+
return { content: [jsonText(partialReceipt)] };
|
|
104
150
|
}
|
|
105
151
|
}
|
|
106
152
|
const retryableActions = new Set([
|
|
@@ -118,7 +164,9 @@ async function withSafety(sa) {
|
|
|
118
164
|
const platform = getPlatform();
|
|
119
165
|
if (platform.isScreenLocked?.())
|
|
120
166
|
throw new SafetyError("Screen is locked");
|
|
121
|
-
const check = safety.checkAction(sa.action, sa.params
|
|
167
|
+
const check = safety.checkAction(sa.action, sa.params, {
|
|
168
|
+
skipUserActivityPause: sa.skipUserActivityPause ?? classifyAction(sa.action) === "observe",
|
|
169
|
+
});
|
|
122
170
|
if (!check.allowed)
|
|
123
171
|
throw new SafetyError(check.reason ?? "Action blocked by safety guard");
|
|
124
172
|
if (sa.requiresAccessibility) {
|
|
@@ -224,13 +272,15 @@ export function registerTools(server) {
|
|
|
224
272
|
app: z.string().describe("Application name to focus"),
|
|
225
273
|
}, async (params) => {
|
|
226
274
|
const target = await withSafety({ action: "focus_app", params: {}, requiresAccessibility: true, execute: () => getPlatform().focusApp(params.app) });
|
|
275
|
+
activeTargetContext = target;
|
|
227
276
|
return { content: [{ type: "text", text: JSON.stringify(target, null, 2) }] };
|
|
228
277
|
});
|
|
229
278
|
registry.register("focus_app");
|
|
230
279
|
registerTool("get_window_state", "Get detailed state of a window including accessibility tree", {
|
|
231
280
|
windowId: z.string().optional().describe("Window ID"), depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().optional().describe("Include element bounds"),
|
|
232
281
|
}, async (params) => {
|
|
233
|
-
const
|
|
282
|
+
const effectiveWindowId = params.windowId || getActiveTarget()?.windowId;
|
|
283
|
+
const state = await withSafety({ action: "get_window_state", params: {}, requiresAccessibility: true, execute: () => getPlatform().getWindowState(effectiveWindowId, params.depth, params.includeBounds) });
|
|
234
284
|
return { content: [{ type: "text", text: JSON.stringify(state, null, 2) }] };
|
|
235
285
|
});
|
|
236
286
|
registry.register("get_window_state");
|
|
@@ -242,7 +292,7 @@ export function registerTools(server) {
|
|
|
242
292
|
}, async (params) => {
|
|
243
293
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
244
294
|
await withSafety({ action: "click", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button) });
|
|
245
|
-
return actionResponse({ clicked: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
295
|
+
return actionResponse("click", { clicked: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
246
296
|
});
|
|
247
297
|
registry.register("click");
|
|
248
298
|
registerTool("double_click", "Double-click at screen coordinates", {
|
|
@@ -253,7 +303,7 @@ export function registerTools(server) {
|
|
|
253
303
|
}, async (params) => {
|
|
254
304
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
255
305
|
await withSafety({ action: "click", params: { x: pt.x, y: pt.y, doubleClick: true }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button, true) });
|
|
256
|
-
return actionResponse({ doubleClicked: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
306
|
+
return actionResponse("double_click", { doubleClicked: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
257
307
|
});
|
|
258
308
|
registry.register("double_click");
|
|
259
309
|
registerTool("type_text", "Type text at the current cursor position", {
|
|
@@ -264,7 +314,7 @@ export function registerTools(server) {
|
|
|
264
314
|
if (params.windowId)
|
|
265
315
|
throw new UnsupportedParameterError("windowId-targeted keyboard typing is not implemented");
|
|
266
316
|
await withSafety({ action: "type_text", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().type(params.text, params.delay) });
|
|
267
|
-
return actionResponse({ typed: true, charCount: params.text.length }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
317
|
+
return actionResponse("type_text", { typed: true, charCount: params.text.length }, {}, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
268
318
|
});
|
|
269
319
|
registry.register("type_text");
|
|
270
320
|
registerTool("press_key", "Press a keyboard shortcut", {
|
|
@@ -283,7 +333,7 @@ export function registerTools(server) {
|
|
|
283
333
|
if (keys.length === 0)
|
|
284
334
|
throw new UnsupportedParameterError("press_key requires at least one key");
|
|
285
335
|
await withSafety({ action: "press_key", params: { keys }, requiresAccessibility: true, execute: () => getPlatform().key(keys) });
|
|
286
|
-
return actionResponse({ pressed: true, keys }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
336
|
+
return actionResponse("press_key", { pressed: true, keys }, {}, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
287
337
|
});
|
|
288
338
|
registry.register("press_key");
|
|
289
339
|
registerTool("scroll", "Scroll at coordinates", {
|
|
@@ -295,7 +345,7 @@ export function registerTools(server) {
|
|
|
295
345
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
296
346
|
const deltaX = params.deltaX ?? 0;
|
|
297
347
|
await withSafety({ action: "scroll", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().scroll(pt.x, pt.y, deltaX, params.deltaY) });
|
|
298
|
-
return actionResponse({ scrolled: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
348
|
+
return actionResponse("scroll", { scrolled: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
299
349
|
});
|
|
300
350
|
registry.register("scroll");
|
|
301
351
|
registerTool("drag", "Drag from one point to another", {
|
|
@@ -309,31 +359,97 @@ export function registerTools(server) {
|
|
|
309
359
|
const start = await resolvePoint(params.startX, params.startY, params.windowId);
|
|
310
360
|
const end = await resolvePoint(params.endX, params.endY, params.windowId);
|
|
311
361
|
await withSafety({ action: "drag", params: { startX: start.x, startY: start.y, endX: end.x, endY: end.y }, requiresAccessibility: true, execute: () => getPlatform().drag(start.x, start.y, end.x, end.y, params.button, params.duration) });
|
|
312
|
-
return actionResponse({ dragged: true, startX: start.x, startY: start.y, endX: end.x, endY: end.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
362
|
+
return actionResponse("drag", { dragged: true, startX: start.x, startY: start.y, endX: end.x, endY: end.y }, { startX: start.x, startY: start.y, endX: end.x, endY: end.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
313
363
|
});
|
|
314
364
|
registry.register("drag");
|
|
315
|
-
registerTool("doctor", "Check system permissions and
|
|
365
|
+
registerTool("doctor", "Check system permissions, native helpers, and client readiness", {}, async () => {
|
|
316
366
|
const { checkPermissions } = await import("../safety/permissions.js");
|
|
317
367
|
const { MacOSPlatform: MacPlat } = await import("../platform/macos.js");
|
|
368
|
+
const { existsSync } = await import("node:fs");
|
|
369
|
+
const { join, dirname } = await import("node:path");
|
|
370
|
+
const { fileURLToPath } = await import("node:url");
|
|
371
|
+
const { execFileSync } = await import("node:child_process");
|
|
318
372
|
const permissions = await checkPermissions();
|
|
319
373
|
const screenLocked = process.platform === "darwin" ? new MacPlat().isScreenLocked?.() ?? false : false;
|
|
374
|
+
let nativeHelpers;
|
|
375
|
+
if (process.platform === "darwin") {
|
|
376
|
+
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
377
|
+
const checkPaths = (subdirs) => {
|
|
378
|
+
const paths = [
|
|
379
|
+
join(process.cwd(), ...subdirs),
|
|
380
|
+
join(moduleDir, "..", ...subdirs),
|
|
381
|
+
join(moduleDir, "..", "..", ...subdirs),
|
|
382
|
+
];
|
|
383
|
+
return paths.some(p => { try {
|
|
384
|
+
return existsSync(p);
|
|
385
|
+
}
|
|
386
|
+
catch {
|
|
387
|
+
return false;
|
|
388
|
+
} });
|
|
389
|
+
};
|
|
390
|
+
nativeHelpers = {
|
|
391
|
+
cgevent: checkPaths(["native", "cgevent", "cgevent-helper"]),
|
|
392
|
+
ocr: checkPaths(["native", "ocr", "ocr-helper"]),
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
let readiness = "ready";
|
|
396
|
+
const issues = [];
|
|
397
|
+
if (!permissions.granted) {
|
|
398
|
+
readiness = "blocked";
|
|
399
|
+
issues.push("Missing macOS permissions: " + permissions.missing.join(", "));
|
|
400
|
+
}
|
|
401
|
+
if (screenLocked) {
|
|
402
|
+
readiness = "blocked";
|
|
403
|
+
issues.push("Screen is locked");
|
|
404
|
+
}
|
|
405
|
+
if (process.platform === "darwin" && nativeHelpers) {
|
|
406
|
+
if (!nativeHelpers.cgevent) {
|
|
407
|
+
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
408
|
+
issues.push("Native CGEvent helper not found (input synthesis may crash on macOS Sequoia+)");
|
|
409
|
+
}
|
|
410
|
+
if (!nativeHelpers.ocr) {
|
|
411
|
+
readiness = readiness === "ready" ? "degraded" : readiness;
|
|
412
|
+
issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+)");
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
const clients = {};
|
|
416
|
+
for (const bin of ["claude", "codex", "opencode", "npx"]) {
|
|
417
|
+
try {
|
|
418
|
+
const path = execFileSync("which", [bin], { encoding: "utf-8", timeout: 2000 }).trim();
|
|
419
|
+
clients[bin] = path || "not found";
|
|
420
|
+
}
|
|
421
|
+
catch {
|
|
422
|
+
clients[bin] = "not found";
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
const recommendations = [];
|
|
426
|
+
if (readiness === "blocked") {
|
|
427
|
+
recommendations.push("Grant missing permissions in System Settings > Privacy & Security, then restart the MCP client.");
|
|
428
|
+
}
|
|
429
|
+
else if (readiness === "degraded") {
|
|
430
|
+
if (nativeHelpers && (!nativeHelpers.cgevent || !nativeHelpers.ocr)) {
|
|
431
|
+
recommendations.push("Run 'npm run build' to compile native Swift helpers.");
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
else {
|
|
435
|
+
recommendations.push("All checks passed. MCP client can proceed with automation.");
|
|
436
|
+
}
|
|
320
437
|
const report = {
|
|
321
|
-
|
|
438
|
+
readiness,
|
|
439
|
+
issues: issues.length > 0 ? issues : undefined,
|
|
440
|
+
recommendations,
|
|
322
441
|
platform: process.platform,
|
|
323
442
|
node: process.version,
|
|
324
443
|
permissions,
|
|
325
444
|
screenLocked,
|
|
445
|
+
nativeHelpers,
|
|
446
|
+
clients,
|
|
326
447
|
safety: {
|
|
327
448
|
urlBlocklist: true,
|
|
328
449
|
lockScreenGuard: process.platform === "darwin",
|
|
329
450
|
typedTextInjectionScan: true,
|
|
330
451
|
},
|
|
331
452
|
stdioCommand: "ucu-mcp",
|
|
332
|
-
clients: {
|
|
333
|
-
claudeCodeCli: "Run ucu-mcp as an MCP stdio server.",
|
|
334
|
-
claudeCodeDesktop: "Configure ucu-mcp as a local MCP stdio server.",
|
|
335
|
-
openCode: "Configure ucu-mcp as a local MCP stdio server.",
|
|
336
|
-
},
|
|
337
453
|
};
|
|
338
454
|
return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
|
|
339
455
|
});
|
|
@@ -343,27 +459,49 @@ export function registerTools(server) {
|
|
|
343
459
|
return { content: [{ type: "text", text: JSON.stringify({ waited: params.ms }) }] };
|
|
344
460
|
});
|
|
345
461
|
registry.register("wait");
|
|
346
|
-
registerTool("wait_for_element", "Poll until an accessibility element matching the criteria
|
|
462
|
+
registerTool("wait_for_element", "Poll until an accessibility element matching the criteria reaches the desired state", {
|
|
347
463
|
text: z.string().optional().describe("Element text"), role: z.string().optional().describe("Element role"),
|
|
348
464
|
app: z.string().optional().describe("Target app"),
|
|
349
465
|
timeout: z.number().optional().describe("Timeout ms (default 5000)"),
|
|
350
466
|
timeoutMs: z.number().optional().describe("Alias for timeout"),
|
|
351
467
|
interval: z.number().optional().describe("Poll interval ms (default 500)"),
|
|
352
468
|
intervalMs: z.number().optional().describe("Alias for interval"),
|
|
469
|
+
until: z.enum(["appear", "disappear", "value_change"]).default("appear").describe("Wait condition: 'appear' (default) waits for a match, 'disappear' waits until no match, 'value_change' waits until first match's value changes"),
|
|
353
470
|
}, async (params) => {
|
|
354
471
|
const deadline = Date.now() + (params.timeout ?? params.timeoutMs ?? 5000);
|
|
355
472
|
const interval = params.interval ?? params.intervalMs ?? 500;
|
|
356
|
-
const
|
|
473
|
+
const until = params.until ?? "appear";
|
|
474
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
475
|
+
const query = { text: params.text, role: params.role, app: effectiveApp, maxResults: 1 };
|
|
357
476
|
const { granted } = await checkPermission("accessibility");
|
|
358
477
|
if (!granted)
|
|
359
478
|
throw new PermissionError("accessibility", process.platform);
|
|
479
|
+
let initialValue;
|
|
360
480
|
while (Date.now() < deadline) {
|
|
361
|
-
const
|
|
362
|
-
|
|
363
|
-
|
|
481
|
+
const response = await getPlatform().findElement(query);
|
|
482
|
+
const matched = response.results[0];
|
|
483
|
+
if (until === "appear") {
|
|
484
|
+
if (matched)
|
|
485
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: true, element: matched }, null, 2) }] };
|
|
486
|
+
}
|
|
487
|
+
else if (until === "disappear") {
|
|
488
|
+
if (!matched)
|
|
489
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: true, reason: "disappeared" }, null, 2) }] };
|
|
490
|
+
}
|
|
491
|
+
else {
|
|
492
|
+
// value_change: capture the initial value of the first match, then wait for it to differ
|
|
493
|
+
if (matched) {
|
|
494
|
+
if (initialValue === undefined) {
|
|
495
|
+
initialValue = matched.value;
|
|
496
|
+
}
|
|
497
|
+
else if (matched.value !== initialValue) {
|
|
498
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: true, oldValue: initialValue, newValue: matched.value }, null, 2) }] };
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
}
|
|
364
502
|
await new Promise(r => setTimeout(r, interval));
|
|
365
503
|
}
|
|
366
|
-
return { content: [{ type: "text", text: JSON.stringify({ found: false, reason: "timeout" }) }] };
|
|
504
|
+
return { content: [{ type: "text", text: JSON.stringify({ found: false, reason: "timeout" }, null, 2) }] };
|
|
367
505
|
});
|
|
368
506
|
registry.register("wait_for_element");
|
|
369
507
|
registerTool("get_cursor_position", "Get current cursor position", {}, async () => {
|
|
@@ -392,40 +530,61 @@ export function registerTools(server) {
|
|
|
392
530
|
}, async (params) => {
|
|
393
531
|
const pt = await resolvePoint(params.x, params.y, params.windowId);
|
|
394
532
|
await withSafety({ action: "move", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().move(pt.x, pt.y) });
|
|
395
|
-
return actionResponse({ moved: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
533
|
+
return actionResponse("move", { moved: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
396
534
|
});
|
|
397
535
|
registry.register("move");
|
|
398
536
|
registerTool("find_element", "Find accessibility elements by text, role, or app", {
|
|
399
537
|
text: z.string().optional().describe("Text to search"), role: z.string().optional().describe("AX role"), app: z.string().optional().describe("Target app"),
|
|
400
538
|
depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().default(true).describe("Include bounds"), maxResults: z.number().min(1).max(200).default(50).describe("Max results"),
|
|
539
|
+
textMode: z.enum(["contains", "exact", "regex"]).default("contains").describe("Text matching mode: contains (default), exact, or regex"),
|
|
540
|
+
visibleOnly: z.boolean().default(false).describe("Only return elements with valid on-screen bounds"),
|
|
541
|
+
value: z.string().optional().describe("Filter by AX element value (respects textMode)"),
|
|
542
|
+
index: z.number().int().nonnegative().optional().describe("Return only the Nth match (0-based) after all other filtering and sorting"),
|
|
543
|
+
near: z.object({ x: z.number(), y: z.number() }).optional().describe("Sort results by ascending distance to this point and return closest first"),
|
|
401
544
|
}, async (params) => {
|
|
402
|
-
const
|
|
403
|
-
|
|
404
|
-
|
|
545
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
546
|
+
const response = await withSafety({ action: "find_element", params: {}, requiresAccessibility: true,
|
|
547
|
+
execute: () => getPlatform().findElement({ text: params.text, role: params.role, app: effectiveApp, depth: params.depth, includeBounds: params.includeBounds, maxResults: params.maxResults, textMode: params.textMode, visibleOnly: params.visibleOnly, value: params.value, index: params.index, near: params.near }) });
|
|
548
|
+
return { content: [{ type: "text", text: JSON.stringify({ results: response.results, metrics: response.metrics }, null, 2) }] };
|
|
405
549
|
});
|
|
406
550
|
registry.register("find_element");
|
|
407
551
|
registerTool("click_element", "Click an accessibility element by its ID", {
|
|
408
552
|
elementId: z.string().describe("AX element identifier"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
|
|
409
553
|
}, async (params) => {
|
|
410
|
-
|
|
411
|
-
|
|
554
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
555
|
+
await withSafety({ action: "click_element", params: {}, requiresAccessibility: true, execute: () => getPlatform().clickElement(params.elementId, effectiveApp) });
|
|
556
|
+
return actionResponse("click_element", { clicked: true, elementId: params.elementId }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
412
557
|
});
|
|
413
558
|
registry.register("click_element");
|
|
414
559
|
registerTool("set_value", "Set the value of an accessibility element", {
|
|
415
560
|
elementId: z.string().describe("AX element identifier"), value: z.string().describe("Value to set"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
|
|
416
561
|
}, async (params) => {
|
|
417
|
-
|
|
418
|
-
|
|
562
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
563
|
+
await withSafety({ action: "set_value", params: { value: params.value }, requiresAccessibility: true, execute: () => getPlatform().setElementValue(params.elementId, params.value, effectiveApp) });
|
|
564
|
+
return actionResponse("set_value", { setValue: true, elementId: params.elementId }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
419
565
|
});
|
|
420
566
|
registry.register("set_value");
|
|
421
567
|
registerTool("type_in_element", "Type text into an accessibility element, optionally clearing first", {
|
|
422
568
|
elementId: z.string().describe("AX element identifier"), text: z.string().describe("Text to type"),
|
|
423
569
|
app: z.string().optional().describe("Target app"), clearFirst: z.boolean().optional().describe("Clear existing text before typing"), ...captureAfterFields,
|
|
424
570
|
}, async (params) => {
|
|
425
|
-
|
|
426
|
-
|
|
571
|
+
const effectiveApp = params.app || getActiveTarget()?.appName;
|
|
572
|
+
await withSafety({ action: "type_in_element", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().typeInElement(params.elementId, params.text, effectiveApp, params.clearFirst) });
|
|
573
|
+
return actionResponse("type_in_element", { typed: true, elementId: params.elementId, charCount: params.text.length }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
|
|
427
574
|
});
|
|
428
575
|
registry.register("type_in_element");
|
|
576
|
+
registerTool("clipboard_read", "Read the current contents of the system clipboard", {}, async () => {
|
|
577
|
+
const text = await withSafety({ action: "clipboard_read", params: {}, execute: () => getPlatform().readClipboard() });
|
|
578
|
+
return { content: [{ type: "text", text: JSON.stringify({ text }, null, 2) }] };
|
|
579
|
+
});
|
|
580
|
+
registry.register("clipboard_read");
|
|
581
|
+
registerTool("clipboard_write", "Write text to the system clipboard (text injection patterns are blocked)", {
|
|
582
|
+
text: z.string().describe("Text to place on the clipboard"),
|
|
583
|
+
}, async (params) => {
|
|
584
|
+
await withSafety({ action: "clipboard_write", params: { text: params.text }, execute: () => getPlatform().writeClipboard(params.text) });
|
|
585
|
+
return { content: [{ type: "text", text: JSON.stringify({ written: true }, null, 2) }] };
|
|
586
|
+
});
|
|
587
|
+
registry.register("clipboard_write");
|
|
429
588
|
log.info("Registered tools", { count: registry.tools.length, tools: registry.tools.join(", ") });
|
|
430
589
|
}
|
|
431
590
|
export class ToolRegistry {
|
|
@@ -38,10 +38,12 @@ export interface AppInfo {
|
|
|
38
38
|
windowCount: number;
|
|
39
39
|
}
|
|
40
40
|
export interface AppTarget {
|
|
41
|
+
targetId: string;
|
|
41
42
|
appName: string;
|
|
42
43
|
pid: number;
|
|
43
44
|
windowId?: string;
|
|
44
45
|
title?: string;
|
|
46
|
+
capturedAt: string;
|
|
45
47
|
}
|
|
46
48
|
export interface BrowserContext {
|
|
47
49
|
appName: string;
|
|
@@ -81,6 +83,17 @@ export interface FindElementOptions {
|
|
|
81
83
|
depth?: number;
|
|
82
84
|
includeBounds?: boolean;
|
|
83
85
|
maxResults?: number;
|
|
86
|
+
textMode?: "contains" | "exact" | "regex";
|
|
87
|
+
visibleOnly?: boolean;
|
|
88
|
+
/** Match against the AX element's current value attribute (respects textMode). */
|
|
89
|
+
value?: string;
|
|
90
|
+
/** Return only the Nth match (0-based) after all other filtering and sorting. */
|
|
91
|
+
index?: number;
|
|
92
|
+
/** Sort results by ascending distance to this point and return closest first. */
|
|
93
|
+
near?: {
|
|
94
|
+
x: number;
|
|
95
|
+
y: number;
|
|
96
|
+
};
|
|
84
97
|
}
|
|
85
98
|
export interface FindElementResult {
|
|
86
99
|
id: string;
|
|
@@ -95,6 +108,16 @@ export interface FindElementResult {
|
|
|
95
108
|
};
|
|
96
109
|
description?: string;
|
|
97
110
|
}
|
|
111
|
+
export interface FindElementMetrics {
|
|
112
|
+
scannedCount: number;
|
|
113
|
+
matchedCount: number;
|
|
114
|
+
durationMs: number;
|
|
115
|
+
truncated: boolean;
|
|
116
|
+
}
|
|
117
|
+
export interface FindElementResponse {
|
|
118
|
+
results: FindElementResult[];
|
|
119
|
+
metrics: FindElementMetrics;
|
|
120
|
+
}
|
|
98
121
|
export interface WindowState {
|
|
99
122
|
window: WindowInfo;
|
|
100
123
|
focusedElement?: ElementInfo;
|
|
@@ -117,11 +140,13 @@ export interface Platform {
|
|
|
117
140
|
ocr(display?: number, region?: ScreenRegion): Promise<OcrResult>;
|
|
118
141
|
type(text: string, delay?: number): Promise<void>;
|
|
119
142
|
key(keys: string[]): Promise<void>;
|
|
120
|
-
findElement(options: FindElementOptions): Promise<
|
|
143
|
+
findElement(options: FindElementOptions): Promise<FindElementResponse>;
|
|
121
144
|
clickElement(elementId: string, app?: string): Promise<void>;
|
|
122
145
|
typeInElement(elementId: string, text: string, app?: string, clearFirst?: boolean): Promise<void>;
|
|
123
146
|
setElementValue?(elementId: string, value: string, app?: string): Promise<void>;
|
|
124
147
|
isScreenLocked?(): boolean;
|
|
125
148
|
saveFocus?(): Promise<void>;
|
|
126
149
|
restoreFocus?(): Promise<void>;
|
|
150
|
+
readClipboard(): Promise<string>;
|
|
151
|
+
writeClipboard(text: string): Promise<void>;
|
|
127
152
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions,
|
|
1
|
+
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResponse } from "./base.js";
|
|
2
2
|
/**
|
|
3
3
|
* Linux platform adapter (AT-SPI2 + xdotool fallback)
|
|
4
4
|
* TODO: Implement with D-Bus AT-SPI2 bindings
|
|
@@ -16,7 +16,9 @@ export declare class LinuxPlatform implements Platform {
|
|
|
16
16
|
type(text: string, delay?: number): Promise<void>;
|
|
17
17
|
key(keys: string[]): Promise<void>;
|
|
18
18
|
ocr(_display?: number, _region?: ScreenRegion): Promise<OcrResult>;
|
|
19
|
-
findElement(_options: FindElementOptions): Promise<
|
|
19
|
+
findElement(_options: FindElementOptions): Promise<FindElementResponse>;
|
|
20
20
|
clickElement(_elementId: string, _app?: string): Promise<void>;
|
|
21
21
|
typeInElement(_elementId: string, _text: string, _app?: string, _clearFirst?: boolean): Promise<void>;
|
|
22
|
+
readClipboard(): Promise<string>;
|
|
23
|
+
writeClipboard(text: string): Promise<void>;
|
|
22
24
|
}
|
|
@@ -1,3 +1,27 @@
|
|
|
1
|
+
import { execFileSync } from "node:child_process";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { PlatformError } from "../util/errors.js";
|
|
4
|
+
/** Pick the first available clipboard utility, preferring xclip. */
|
|
5
|
+
function pickClipboardTool() {
|
|
6
|
+
for (const bin of ["/usr/bin/xclip", "/usr/local/bin/xclip", "xclip"]) {
|
|
7
|
+
if (bin.startsWith("/") ? existsSync(bin) : which(bin))
|
|
8
|
+
return "xclip";
|
|
9
|
+
}
|
|
10
|
+
for (const bin of ["/usr/bin/xsel", "/usr/local/bin/xsel", "xsel"]) {
|
|
11
|
+
if (bin.startsWith("/") ? existsSync(bin) : which(bin))
|
|
12
|
+
return "xsel";
|
|
13
|
+
}
|
|
14
|
+
return undefined;
|
|
15
|
+
}
|
|
16
|
+
function which(bin) {
|
|
17
|
+
try {
|
|
18
|
+
execFileSync("which", [bin], { encoding: "utf-8", timeout: 2000, stdio: "ignore" });
|
|
19
|
+
return true;
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
1
25
|
/**
|
|
2
26
|
* Linux platform adapter (AT-SPI2 + xdotool fallback)
|
|
3
27
|
* TODO: Implement with D-Bus AT-SPI2 bindings
|
|
@@ -59,4 +83,31 @@ export class LinuxPlatform {
|
|
|
59
83
|
async typeInElement(_elementId, _text, _app, _clearFirst) {
|
|
60
84
|
throw new Error("Not implemented: Linux typeInElement");
|
|
61
85
|
}
|
|
86
|
+
async readClipboard() {
|
|
87
|
+
const tool = pickClipboardTool();
|
|
88
|
+
if (!tool) {
|
|
89
|
+
throw new PlatformError("readClipboard requires xclip or xsel on PATH", false);
|
|
90
|
+
}
|
|
91
|
+
try {
|
|
92
|
+
const args = tool === "xclip" ? ["-selection", "clipboard", "-o"] : ["--clipboard", "--output"];
|
|
93
|
+
const out = execFileSync(tool, args, { encoding: "utf-8", timeout: 5000 });
|
|
94
|
+
return out;
|
|
95
|
+
}
|
|
96
|
+
catch (error) {
|
|
97
|
+
throw new PlatformError(`read_clipboard failed: ${error.message}`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
async writeClipboard(text) {
|
|
101
|
+
const tool = pickClipboardTool();
|
|
102
|
+
if (!tool) {
|
|
103
|
+
throw new PlatformError("writeClipboard requires xclip or xsel on PATH", false);
|
|
104
|
+
}
|
|
105
|
+
try {
|
|
106
|
+
const args = tool === "xclip" ? ["-selection", "clipboard"] : ["--clipboard", "--input"];
|
|
107
|
+
execFileSync(tool, args, { input: text, encoding: "utf-8", timeout: 5000 });
|
|
108
|
+
}
|
|
109
|
+
catch (error) {
|
|
110
|
+
throw new PlatformError(`write_clipboard failed: ${error.message}`);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
62
113
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions,
|
|
1
|
+
import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResponse, AppInfo, AppTarget, BrowserContext, ScreenshotOptions } from "./base.js";
|
|
2
2
|
export declare class MacOSPlatform implements Platform {
|
|
3
3
|
private readonly elementCache;
|
|
4
4
|
private readonly elementCacheTtlMs;
|
|
@@ -13,6 +13,8 @@ export declare class MacOSPlatform implements Platform {
|
|
|
13
13
|
private evictOverflowCacheEntries;
|
|
14
14
|
/** Check whether a cached element descriptor has expired. */
|
|
15
15
|
private isCacheEntryExpired;
|
|
16
|
+
/** Validate that the active target window still exists. */
|
|
17
|
+
validateActiveTarget(): Promise<void>;
|
|
16
18
|
/** Save the current frontmost app/window so we can restore after an action. */
|
|
17
19
|
saveFocus(): Promise<void>;
|
|
18
20
|
/** Restore the previously saved frontmost app/window. */
|
|
@@ -36,8 +38,10 @@ export declare class MacOSPlatform implements Platform {
|
|
|
36
38
|
private ocrJxa;
|
|
37
39
|
type(text: string, delay?: number): Promise<void>;
|
|
38
40
|
key(keys: string[]): Promise<void>;
|
|
39
|
-
findElement(options: FindElementOptions): Promise<
|
|
41
|
+
findElement(options: FindElementOptions): Promise<FindElementResponse>;
|
|
40
42
|
clickElement(elementId: string, app?: string): Promise<void>;
|
|
41
43
|
typeInElement(elementId: string, text: string, app?: string, clearFirst?: boolean): Promise<void>;
|
|
44
|
+
readClipboard(): Promise<string>;
|
|
45
|
+
writeClipboard(text: string): Promise<void>;
|
|
42
46
|
setElementValue(elementId: string, value: string, app?: string): Promise<void>;
|
|
43
47
|
}
|