ucu-mcp 0.1.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,16 @@
1
1
  /**
2
2
  * Tool registry for UCU-MCP.
3
3
  *
4
- * Registers 22 MCP tools on the server and dispatches each call through
4
+ * Registers 24 MCP tools on the server and dispatches each call through
5
5
  * a shared safety/permission/retry pipeline (`withSafety`).
6
6
  */
7
7
  import { z } from "zod";
8
8
  import { MacOSPlatform } from "../platform/macos.js";
9
- import { SafetyGuard } from "../safety/guard.js";
9
+ import { SafetyGuard, classifyAction } from "../safety/guard.js";
10
10
  import { checkPermission } from "../safety/permissions.js";
11
11
  import { retry } from "../util/retry.js";
12
12
  import { createLogger } from "../util/logger.js";
13
- import { SafetyError, PermissionError, UnsupportedParameterError } from "../util/errors.js";
13
+ import { SafetyError, PermissionError, UnsupportedParameterError, UcuError, WindowNotFoundError } from "../util/errors.js";
14
14
  const log = createLogger("tools");
15
15
  let _platform;
16
16
  function getPlatform() {
@@ -20,6 +20,14 @@ function getPlatform() {
20
20
  return _platform;
21
21
  }
22
22
  const safety = new SafetyGuard();
23
+ // Active target context — set by focus_app, used by AX element tools
24
+ let activeTargetContext;
25
+ /**
26
+ * Get the currently active target context (set by focus_app).
27
+ */
28
+ export function getActiveTarget() {
29
+ return activeTargetContext;
30
+ }
23
31
  // User activity monitor — pauses automation when user moves the cursor
24
32
  let lastCursorPos = { x: 0, y: 0 };
25
33
  let userActivityInterval;
@@ -31,21 +39,134 @@ const captureAfterFields = {
31
39
  async function resolvePoint(x, y, windowId) {
32
40
  if (!windowId)
33
41
  return { x, y };
42
+ const win = (await getPlatform().listWindows()).find(w => w.id === windowId);
43
+ if (!win)
44
+ throw new WindowNotFoundError(windowId);
45
+ return { x: win.bounds.x + x, y: win.bounds.y + y };
46
+ }
47
+ function jsonText(value) {
48
+ return { type: "text", text: JSON.stringify(value, null, 2) };
49
+ }
50
+ function recoveryHint(code) {
51
+ switch (code) {
52
+ case "WINDOW_NOT_FOUND":
53
+ return "Run list_windows again, then retry with a fresh windowId or omit windowId for screen coordinates.";
54
+ case "TARGET_STALE":
55
+ return "Run focus_app again for the target app, or run list_windows and retry with a fresh windowId.";
56
+ case "ELEMENT_NOT_FOUND":
57
+ return "Run find_element again, then retry with a fresh elementId.";
58
+ case "PERMISSION_DENIED":
59
+ return "Run doctor and grant the missing macOS permission, then restart the launching client.";
60
+ case "UNSUPPORTED_PARAMETER":
61
+ return "Remove or replace the unsupported parameter; inspect tools/list for this tool schema.";
62
+ case "SAFETY_BLOCKED":
63
+ return "Choose a less risky action or ask the user to perform it manually.";
64
+ case "INPUT_FAILED":
65
+ return "Observe current state with screenshot or get_window_state before retrying manually.";
66
+ case "CAPTURE_FAILED":
67
+ return "Run doctor to check Screen Recording permission, then retry screenshot or ocr.";
68
+ case "COORDINATE_OUT_OF_BOUNDS":
69
+ return "Run get_screen_size or list_windows, then retry with coordinates inside the active display or window bounds.";
70
+ default:
71
+ return "Inspect the error message, observe the current UI state, and retry only if the operation is safe.";
72
+ }
73
+ }
74
+ function errorDetails(error) {
75
+ const err = error instanceof Error ? error : new Error(String(error));
76
+ const code = error instanceof UcuError ? error.code : "UNKNOWN_ERROR";
77
+ const retryable = error instanceof UcuError ? error.retryable : false;
78
+ return {
79
+ name: err.name,
80
+ code,
81
+ retryable,
82
+ message: err.message,
83
+ recovery: recoveryHint(code),
84
+ };
85
+ }
86
+ let _actionCounter = 0;
87
+ function nextActionId() {
88
+ _actionCounter = (_actionCounter + 1) % 1_000_000;
89
+ return `a${Date.now().toString(36)}-${_actionCounter.toString(36)}`;
90
+ }
91
+ function buildActionReceipt(action, status, target, result, captureRequested, captureFormat, captureMaxWidth, captureError, warnings = []) {
92
+ const captureStatus = captureRequested
93
+ ? captureError ? "error" : "ok"
94
+ : "skipped";
95
+ return {
96
+ actionId: nextActionId(),
97
+ action,
98
+ status,
99
+ target,
100
+ result,
101
+ capture: {
102
+ requested: captureRequested,
103
+ status: captureStatus,
104
+ ...(captureFormat && { format: captureFormat }),
105
+ ...(captureMaxWidth && { maxWidth: captureMaxWidth }),
106
+ ...(captureError && { error: captureError }),
107
+ },
108
+ warnings,
109
+ next: captureError
110
+ ? "screenshot"
111
+ : status === "partial"
112
+ ? "get_window_state"
113
+ : "find_element or get_window_state",
114
+ };
115
+ }
116
+ function mcpErrorResponse(error) {
117
+ return {
118
+ isError: true,
119
+ content: [
120
+ jsonText({
121
+ error: errorDetails(error),
122
+ }),
123
+ ],
124
+ };
125
+ }
126
+ async function actionResponse(action, result, target, captureAfter, captureFormat = "jpeg", captureMaxWidth = 1280, warnings = []) {
127
+ const receipt = buildActionReceipt(action, "ok", target, result, captureAfter ?? false, captureFormat, captureMaxWidth, undefined, warnings);
128
+ if (!captureAfter) {
129
+ return { content: [jsonText(receipt)] };
130
+ }
34
131
  try {
35
- const win = (await getPlatform().listWindows()).find(w => w.id === windowId);
36
- if (!win)
37
- return { x, y };
38
- return { x: win.bounds.x + x, y: win.bounds.y + y };
132
+ const buf = await getPlatform().screenshot(undefined, undefined, {
133
+ format: captureFormat,
134
+ maxWidth: captureMaxWidth,
135
+ });
136
+ return {
137
+ content: [
138
+ jsonText(receipt),
139
+ {
140
+ type: "image",
141
+ data: buf.toString("base64"),
142
+ mimeType: `image/${captureFormat}`,
143
+ },
144
+ ],
145
+ };
39
146
  }
40
- catch {
41
- return { x, y };
147
+ catch (error) {
148
+ const partialReceipt = buildActionReceipt(action, "partial", target, result, true, captureFormat, captureMaxWidth, errorDetails(error), [...warnings, "Post-action screenshot capture failed"]);
149
+ return { content: [jsonText(partialReceipt)] };
42
150
  }
43
151
  }
152
+ const retryableActions = new Set([
153
+ "screenshot",
154
+ "list_windows",
155
+ "list_apps",
156
+ "get_window_state",
157
+ "get_cursor_position",
158
+ "get_screen_size",
159
+ "ocr",
160
+ "doctor",
161
+ "find_element",
162
+ ]);
44
163
  async function withSafety(sa) {
45
164
  const platform = getPlatform();
46
165
  if (platform.isScreenLocked?.())
47
166
  throw new SafetyError("Screen is locked");
48
- const check = safety.checkAction(sa.action, sa.params);
167
+ const check = safety.checkAction(sa.action, sa.params, {
168
+ skipUserActivityPause: sa.skipUserActivityPause ?? classifyAction(sa.action) === "observe",
169
+ });
49
170
  if (!check.allowed)
50
171
  throw new SafetyError(check.reason ?? "Action blocked by safety guard");
51
172
  if (sa.requiresAccessibility) {
@@ -64,27 +185,24 @@ async function withSafety(sa) {
64
185
  if (shouldManageFocus)
65
186
  await platform.saveFocus?.();
66
187
  try {
67
- return await retry(() => sa.execute());
188
+ return retryableActions.has(sa.action)
189
+ ? await retry(() => sa.execute())
190
+ : await sa.execute();
68
191
  }
69
192
  finally {
70
193
  if (shouldManageFocus)
71
194
  await platform.restoreFocus?.();
72
195
  }
73
196
  }
74
- async function appendCaptureAfter(result, captureAfter) {
75
- if (!captureAfter)
76
- return result;
197
+ export function startUserActivityMonitor() {
198
+ if (userActivityInterval)
199
+ return;
77
200
  try {
78
- const buf = await getPlatform().screenshot();
79
- return { actionResult: result, screenshot: { type: "image", data: buf.toString("base64"), mimeType: "image/png" } };
201
+ lastCursorPos = getPlatform().getCursorPosition();
80
202
  }
81
203
  catch {
82
- return result;
204
+ // Keep the default when the cursor cannot be queried during startup.
83
205
  }
84
- }
85
- export function startUserActivityMonitor() {
86
- if (userActivityInterval)
87
- return;
88
206
  userActivityInterval = setInterval(() => {
89
207
  try {
90
208
  const pos = getPlatform().getCursorPosition();
@@ -95,8 +213,9 @@ export function startUserActivityMonitor() {
95
213
  }
96
214
  catch { /* can't check cursor */ }
97
215
  }, 250);
216
+ userActivityInterval.unref?.();
98
217
  }
99
- function stopUserActivityMonitor() {
218
+ export function stopUserActivityMonitor() {
100
219
  if (userActivityInterval) {
101
220
  clearInterval(userActivityInterval);
102
221
  userActivityInterval = undefined;
@@ -104,43 +223,68 @@ function stopUserActivityMonitor() {
104
223
  }
105
224
  export function registerTools(server) {
106
225
  const registry = ToolRegistry.instance;
107
- server.tool("screenshot", "Capture a screenshot of the entire screen or a region", {
226
+ const registerTool = (name, description, schema, handler) => {
227
+ server.tool(name, description, schema, async (params) => {
228
+ try {
229
+ return await handler(params);
230
+ }
231
+ catch (error) {
232
+ return mcpErrorResponse(error);
233
+ }
234
+ });
235
+ };
236
+ registerTool("screenshot", "Capture a screenshot of the entire screen or a region", {
108
237
  display: z.number().optional().describe("Display index (default 0)"),
238
+ windowId: z.string().optional().describe("Window ID from list_windows; when set, captures that window"),
109
239
  region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to capture"),
110
240
  format: z.enum(["png", "jpeg"]).default("png").describe("Image format"),
111
241
  maxWidth: z.number().default(1280).describe("Maximum output width in pixels. Aspect ratio is preserved."),
112
242
  }, async (params) => {
113
- const buf = await withSafety({ action: "screenshot", params: {}, requiresScreenRecording: true, execute: () => getPlatform().screenshot(params.display, params.region, { format: params.format }) });
243
+ if (params.windowId && params.region)
244
+ throw new UnsupportedParameterError("screenshot windowId cannot be combined with region");
245
+ const options = { format: params.format, maxWidth: params.maxWidth };
246
+ const buf = await withSafety({
247
+ action: "screenshot",
248
+ params,
249
+ requiresScreenRecording: true,
250
+ execute: () => params.windowId
251
+ ? getPlatform().screenshotWindow
252
+ ? getPlatform().screenshotWindow(params.windowId, options)
253
+ : Promise.reject(new UnsupportedParameterError("window screenshots are not implemented on this platform"))
254
+ : getPlatform().screenshot(params.display, params.region, options),
255
+ });
114
256
  return { content: [{ type: "image", data: buf.toString("base64"), mimeType: `image/${params.format}` }] };
115
257
  });
116
258
  registry.register("screenshot");
117
- server.tool("list_windows", "List all visible windows on screen", {
259
+ registerTool("list_windows", "List all visible windows on screen", {
118
260
  includeMinimized: z.boolean().optional().describe("Include minimized windows"),
119
261
  }, async (params) => {
120
262
  const windows = await withSafety({ action: "list_windows", params: {}, requiresAccessibility: true, execute: () => getPlatform().listWindows(params.includeMinimized) });
121
263
  return { content: [{ type: "text", text: JSON.stringify(windows, null, 2) }] };
122
264
  });
123
265
  registry.register("list_windows");
124
- server.tool("list_apps", "List all running applications", {}, async () => {
266
+ registerTool("list_apps", "List all running applications", {}, async () => {
125
267
  const apps = await withSafety({ action: "list_apps", params: {}, requiresAccessibility: true, execute: async () => getPlatform().listApps() });
126
268
  return { content: [{ type: "text", text: JSON.stringify(apps, null, 2) }] };
127
269
  });
128
270
  registry.register("list_apps");
129
- server.tool("focus_app", "Bring an application to the foreground", {
271
+ registerTool("focus_app", "Select an application/window as the active target context", {
130
272
  app: z.string().describe("Application name to focus"),
131
273
  }, async (params) => {
132
274
  const target = await withSafety({ action: "focus_app", params: {}, requiresAccessibility: true, execute: () => getPlatform().focusApp(params.app) });
275
+ activeTargetContext = target;
133
276
  return { content: [{ type: "text", text: JSON.stringify(target, null, 2) }] };
134
277
  });
135
278
  registry.register("focus_app");
136
- server.tool("get_window_state", "Get detailed state of a window including accessibility tree", {
279
+ registerTool("get_window_state", "Get detailed state of a window including accessibility tree", {
137
280
  windowId: z.string().optional().describe("Window ID"), depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().optional().describe("Include element bounds"),
138
281
  }, async (params) => {
139
- const state = await withSafety({ action: "get_window_state", params: {}, requiresAccessibility: true, execute: () => getPlatform().getWindowState(params.windowId, params.depth, params.includeBounds) });
282
+ const effectiveWindowId = params.windowId || getActiveTarget()?.windowId;
283
+ const state = await withSafety({ action: "get_window_state", params: {}, requiresAccessibility: true, execute: () => getPlatform().getWindowState(effectiveWindowId, params.depth, params.includeBounds) });
140
284
  return { content: [{ type: "text", text: JSON.stringify(state, null, 2) }] };
141
285
  });
142
286
  registry.register("get_window_state");
143
- server.tool("click", "Click at screen coordinates", {
287
+ registerTool("click", "Click at screen coordinates", {
144
288
  x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
145
289
  button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
146
290
  windowId: z.string().optional().describe("If set, x/y are relative to this window"),
@@ -148,10 +292,10 @@ export function registerTools(server) {
148
292
  }, async (params) => {
149
293
  const pt = await resolvePoint(params.x, params.y, params.windowId);
150
294
  await withSafety({ action: "click", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button) });
151
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ clicked: true, x: pt.x, y: pt.y }, params.captureAfter), null, 2) }] };
295
+ return actionResponse("click", { clicked: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
152
296
  });
153
297
  registry.register("click");
154
- server.tool("double_click", "Double-click at screen coordinates", {
298
+ registerTool("double_click", "Double-click at screen coordinates", {
155
299
  x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
156
300
  button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
157
301
  windowId: z.string().optional().describe("If set, x/y are relative to this window"),
@@ -159,10 +303,10 @@ export function registerTools(server) {
159
303
  }, async (params) => {
160
304
  const pt = await resolvePoint(params.x, params.y, params.windowId);
161
305
  await withSafety({ action: "click", params: { x: pt.x, y: pt.y, doubleClick: true }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button, true) });
162
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ doubleClicked: true, x: pt.x, y: pt.y }, params.captureAfter), null, 2) }] };
306
+ return actionResponse("double_click", { doubleClicked: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
163
307
  });
164
308
  registry.register("double_click");
165
- server.tool("type_text", "Type text at the current cursor position", {
309
+ registerTool("type_text", "Type text at the current cursor position", {
166
310
  text: z.string().describe("Text to type"), delay: z.number().optional().describe("Delay between keystrokes in ms"),
167
311
  windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted keyboard typing is not implemented"),
168
312
  ...captureAfterFields,
@@ -170,107 +314,208 @@ export function registerTools(server) {
170
314
  if (params.windowId)
171
315
  throw new UnsupportedParameterError("windowId-targeted keyboard typing is not implemented");
172
316
  await withSafety({ action: "type_text", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().type(params.text, params.delay) });
173
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ typed: true, charCount: params.text.length }, params.captureAfter), null, 2) }] };
317
+ return actionResponse("type_text", { typed: true, charCount: params.text.length }, {}, params.captureAfter, params.captureFormat, params.captureMaxWidth);
174
318
  });
175
319
  registry.register("type_text");
176
- server.tool("press_key", "Press a keyboard shortcut", {
320
+ registerTool("press_key", "Press a keyboard shortcut", {
177
321
  keys: z.array(z.string()).optional().describe("Keys to press simultaneously"),
178
322
  key: z.string().optional().describe("Single key to press (alias for keys)"),
323
+ modifiers: z.array(z.string()).optional().describe("Modifier keys used with key, such as cmd, shift, alt, or ctrl"),
179
324
  windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted key events are not implemented"),
180
325
  ...captureAfterFields,
181
326
  }, async (params) => {
182
327
  if (params.windowId)
183
328
  throw new UnsupportedParameterError("windowId-targeted key events are not implemented");
184
- const keys = params.keys ?? (params.key ? [params.key] : []);
329
+ const keys = params.keys ?? [
330
+ ...(params.modifiers ?? []),
331
+ ...(params.key ? [params.key] : []),
332
+ ];
185
333
  if (keys.length === 0)
186
- throw new Error("press_key requires at least one key");
334
+ throw new UnsupportedParameterError("press_key requires at least one key");
187
335
  await withSafety({ action: "press_key", params: { keys }, requiresAccessibility: true, execute: () => getPlatform().key(keys) });
188
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ pressed: true, keys: params.keys }, params.captureAfter), null, 2) }] };
336
+ return actionResponse("press_key", { pressed: true, keys }, {}, params.captureAfter, params.captureFormat, params.captureMaxWidth);
189
337
  });
190
338
  registry.register("press_key");
191
- server.tool("scroll", "Scroll at coordinates", {
339
+ registerTool("scroll", "Scroll at coordinates", {
192
340
  x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
193
- deltaX: z.number().describe("Horizontal scroll"), deltaY: z.number().describe("Vertical scroll (negative = up)"),
341
+ deltaX: z.number().default(0).describe("Horizontal scroll"), deltaY: z.number().describe("Vertical scroll (negative = up)"),
194
342
  windowId: z.string().optional().describe("If set, x/y are relative to this window"),
195
343
  ...captureAfterFields,
196
344
  }, async (params) => {
197
345
  const pt = await resolvePoint(params.x, params.y, params.windowId);
198
- await withSafety({ action: "scroll", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().scroll(pt.x, pt.y, params.deltaX, params.deltaY) });
199
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ scrolled: true, x: pt.x, y: pt.y }, params.captureAfter), null, 2) }] };
346
+ const deltaX = params.deltaX ?? 0;
347
+ await withSafety({ action: "scroll", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().scroll(pt.x, pt.y, deltaX, params.deltaY) });
348
+ return actionResponse("scroll", { scrolled: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
200
349
  });
201
350
  registry.register("scroll");
202
- server.tool("drag", "Drag from one point to another", {
351
+ registerTool("drag", "Drag from one point to another", {
203
352
  startX: z.number().describe("Start X"), startY: z.number().describe("Start Y"),
204
353
  endX: z.number().describe("End X"), endY: z.number().describe("End Y"),
205
354
  button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
355
+ windowId: z.string().optional().describe("If set, start/end coordinates are relative to this window"),
206
356
  duration: z.number().optional().describe("Drag duration in ms"),
207
357
  ...captureAfterFields,
208
358
  }, async (params) => {
209
- await withSafety({ action: "drag", params: {}, requiresAccessibility: true, execute: () => getPlatform().drag(params.startX, params.startY, params.endX, params.endY, params.button, params.duration) });
210
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ dragged: true }, params.captureAfter), null, 2) }] };
359
+ const start = await resolvePoint(params.startX, params.startY, params.windowId);
360
+ const end = await resolvePoint(params.endX, params.endY, params.windowId);
361
+ await withSafety({ action: "drag", params: { startX: start.x, startY: start.y, endX: end.x, endY: end.y }, requiresAccessibility: true, execute: () => getPlatform().drag(start.x, start.y, end.x, end.y, params.button, params.duration) });
362
+ return actionResponse("drag", { dragged: true, startX: start.x, startY: start.y, endX: end.x, endY: end.y }, { startX: start.x, startY: start.y, endX: end.x, endY: end.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
211
363
  });
212
364
  registry.register("drag");
213
- server.tool("doctor", "Check system permissions and diagnose common issues", {}, async () => {
365
+ registerTool("doctor", "Check system permissions, native helpers, and client readiness", {}, async () => {
214
366
  const { checkPermissions } = await import("../safety/permissions.js");
215
367
  const { MacOSPlatform: MacPlat } = await import("../platform/macos.js");
368
+ const { existsSync } = await import("node:fs");
369
+ const { join, dirname } = await import("node:path");
370
+ const { fileURLToPath } = await import("node:url");
371
+ const { execFileSync } = await import("node:child_process");
216
372
  const permissions = await checkPermissions();
217
373
  const screenLocked = process.platform === "darwin" ? new MacPlat().isScreenLocked?.() ?? false : false;
374
+ let nativeHelpers;
375
+ if (process.platform === "darwin") {
376
+ const moduleDir = dirname(fileURLToPath(import.meta.url));
377
+ const checkPaths = (subdirs) => {
378
+ const paths = [
379
+ join(process.cwd(), ...subdirs),
380
+ join(moduleDir, "..", ...subdirs),
381
+ join(moduleDir, "..", "..", ...subdirs),
382
+ ];
383
+ return paths.some(p => { try {
384
+ return existsSync(p);
385
+ }
386
+ catch {
387
+ return false;
388
+ } });
389
+ };
390
+ nativeHelpers = {
391
+ cgevent: checkPaths(["native", "cgevent", "cgevent-helper"]),
392
+ ocr: checkPaths(["native", "ocr", "ocr-helper"]),
393
+ };
394
+ }
395
+ let readiness = "ready";
396
+ const issues = [];
397
+ if (!permissions.granted) {
398
+ readiness = "blocked";
399
+ issues.push("Missing macOS permissions: " + permissions.missing.join(", "));
400
+ }
401
+ if (screenLocked) {
402
+ readiness = "blocked";
403
+ issues.push("Screen is locked");
404
+ }
405
+ if (process.platform === "darwin" && nativeHelpers) {
406
+ if (!nativeHelpers.cgevent) {
407
+ readiness = readiness === "ready" ? "degraded" : readiness;
408
+ issues.push("Native CGEvent helper not found (input synthesis may crash on macOS Sequoia+)");
409
+ }
410
+ if (!nativeHelpers.ocr) {
411
+ readiness = readiness === "ready" ? "degraded" : readiness;
412
+ issues.push("Native OCR helper not found (OCR may fail on macOS Sequoia+)");
413
+ }
414
+ }
415
+ const clients = {};
416
+ for (const bin of ["claude", "codex", "opencode", "npx"]) {
417
+ try {
418
+ const path = execFileSync("which", [bin], { encoding: "utf-8", timeout: 2000 }).trim();
419
+ clients[bin] = path || "not found";
420
+ }
421
+ catch {
422
+ clients[bin] = "not found";
423
+ }
424
+ }
425
+ const recommendations = [];
426
+ if (readiness === "blocked") {
427
+ recommendations.push("Grant missing permissions in System Settings > Privacy & Security, then restart the MCP client.");
428
+ }
429
+ else if (readiness === "degraded") {
430
+ if (nativeHelpers && (!nativeHelpers.cgevent || !nativeHelpers.ocr)) {
431
+ recommendations.push("Run 'npm run build' to compile native Swift helpers.");
432
+ }
433
+ }
434
+ else {
435
+ recommendations.push("All checks passed. MCP client can proceed with automation.");
436
+ }
218
437
  const report = {
219
- ok: permissions.granted && !screenLocked,
438
+ readiness,
439
+ issues: issues.length > 0 ? issues : undefined,
440
+ recommendations,
220
441
  platform: process.platform,
221
442
  node: process.version,
222
443
  permissions,
223
444
  screenLocked,
445
+ nativeHelpers,
446
+ clients,
224
447
  safety: {
225
448
  urlBlocklist: true,
226
449
  lockScreenGuard: process.platform === "darwin",
227
450
  typedTextInjectionScan: true,
228
451
  },
229
452
  stdioCommand: "ucu-mcp",
230
- clients: {
231
- claudeCodeCli: "Run ucu-mcp as an MCP stdio server.",
232
- claudeCodeDesktop: "Configure ucu-mcp as a local MCP stdio server.",
233
- openCode: "Configure ucu-mcp as a local MCP stdio server.",
234
- },
235
453
  };
236
454
  return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
237
455
  });
238
456
  registry.register("doctor");
239
- server.tool("wait", "Wait for a specified duration", { ms: z.number().describe("Duration in milliseconds") }, async (params) => {
457
+ registerTool("wait", "Wait for a specified duration", { ms: z.number().describe("Duration in milliseconds") }, async (params) => {
240
458
  await new Promise(r => setTimeout(r, params.ms));
241
459
  return { content: [{ type: "text", text: JSON.stringify({ waited: params.ms }) }] };
242
460
  });
243
461
  registry.register("wait");
244
- server.tool("wait_for_element", "Poll until an accessibility element matching the criteria appears", {
462
+ registerTool("wait_for_element", "Poll until an accessibility element matching the criteria reaches the desired state", {
245
463
  text: z.string().optional().describe("Element text"), role: z.string().optional().describe("Element role"),
246
- app: z.string().optional().describe("Target app"), timeout: z.number().optional().describe("Timeout ms (default 5000)"), interval: z.number().optional().describe("Poll interval ms (default 500)"),
464
+ app: z.string().optional().describe("Target app"),
465
+ timeout: z.number().optional().describe("Timeout ms (default 5000)"),
466
+ timeoutMs: z.number().optional().describe("Alias for timeout"),
467
+ interval: z.number().optional().describe("Poll interval ms (default 500)"),
468
+ intervalMs: z.number().optional().describe("Alias for interval"),
469
+ until: z.enum(["appear", "disappear", "value_change"]).default("appear").describe("Wait condition: 'appear' (default) waits for a match, 'disappear' waits until no match, 'value_change' waits until first match's value changes"),
247
470
  }, async (params) => {
248
- const deadline = Date.now() + (params.timeout ?? 5000);
249
- const interval = params.interval ?? 500;
471
+ const deadline = Date.now() + (params.timeout ?? params.timeoutMs ?? 5000);
472
+ const interval = params.interval ?? params.intervalMs ?? 500;
473
+ const until = params.until ?? "appear";
474
+ const effectiveApp = params.app || getActiveTarget()?.appName;
475
+ const query = { text: params.text, role: params.role, app: effectiveApp, maxResults: 1 };
476
+ const { granted } = await checkPermission("accessibility");
477
+ if (!granted)
478
+ throw new PermissionError("accessibility", process.platform);
479
+ let initialValue;
250
480
  while (Date.now() < deadline) {
251
- try {
252
- const results = await getPlatform().findElement({ text: params.text, role: params.role, app: params.app, maxResults: 1 });
253
- if (results.length > 0)
254
- return { content: [{ type: "text", text: JSON.stringify({ found: true, element: results[0] }, null, 2) }] };
481
+ const response = await getPlatform().findElement(query);
482
+ const matched = response.results[0];
483
+ if (until === "appear") {
484
+ if (matched)
485
+ return { content: [{ type: "text", text: JSON.stringify({ found: true, element: matched }, null, 2) }] };
486
+ }
487
+ else if (until === "disappear") {
488
+ if (!matched)
489
+ return { content: [{ type: "text", text: JSON.stringify({ found: true, reason: "disappeared" }, null, 2) }] };
490
+ }
491
+ else {
492
+ // value_change: capture the initial value of the first match, then wait for it to differ
493
+ if (matched) {
494
+ if (initialValue === undefined) {
495
+ initialValue = matched.value;
496
+ }
497
+ else if (matched.value !== initialValue) {
498
+ return { content: [{ type: "text", text: JSON.stringify({ found: true, oldValue: initialValue, newValue: matched.value }, null, 2) }] };
499
+ }
500
+ }
255
501
  }
256
- catch { /* retry */ }
257
502
  await new Promise(r => setTimeout(r, interval));
258
503
  }
259
- return { content: [{ type: "text", text: JSON.stringify({ found: false, reason: "timeout" }) }] };
504
+ return { content: [{ type: "text", text: JSON.stringify({ found: false, reason: "timeout" }, null, 2) }] };
260
505
  });
261
506
  registry.register("wait_for_element");
262
- server.tool("get_cursor_position", "Get current cursor position", {}, async () => {
507
+ registerTool("get_cursor_position", "Get current cursor position", {}, async () => {
263
508
  const pos = await withSafety({ action: "get_cursor_position", params: {}, execute: () => Promise.resolve(getPlatform().getCursorPosition()) });
264
509
  return { content: [{ type: "text", text: JSON.stringify(pos, null, 2) }] };
265
510
  });
266
511
  registry.register("get_cursor_position");
267
- server.tool("get_screen_size", "Get screen dimensions and scale factor", {
512
+ registerTool("get_screen_size", "Get screen dimensions and scale factor", {
268
513
  display: z.number().optional().describe("Display index"),
269
514
  }, async (params) => {
270
515
  return { content: [{ type: "text", text: JSON.stringify(getPlatform().getScreenSize(params.display), null, 2) }] };
271
516
  });
272
517
  registry.register("get_screen_size");
273
- server.tool("ocr", "Perform OCR on screen region", {
518
+ registerTool("ocr", "Perform OCR on screen region", {
274
519
  display: z.number().optional().describe("Display index"),
275
520
  region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to OCR"),
276
521
  }, async (params) => {
@@ -278,46 +523,68 @@ export function registerTools(server) {
278
523
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
279
524
  });
280
525
  registry.register("ocr");
281
- server.tool("move", "Move cursor to coordinates", {
526
+ registerTool("move", "Move cursor to coordinates", {
282
527
  x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
283
528
  windowId: z.string().optional().describe("If set, x/y are relative to this window"),
529
+ ...captureAfterFields,
284
530
  }, async (params) => {
285
531
  const pt = await resolvePoint(params.x, params.y, params.windowId);
286
532
  await withSafety({ action: "move", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().move(pt.x, pt.y) });
287
- return { content: [{ type: "text", text: JSON.stringify({ moved: true, x: pt.x, y: pt.y }, null, 2) }] };
533
+ return actionResponse("move", { moved: true, x: pt.x, y: pt.y }, { x: pt.x, y: pt.y, windowId: params.windowId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
288
534
  });
289
535
  registry.register("move");
290
- server.tool("find_element", "Find accessibility elements by text, role, or app", {
536
+ registerTool("find_element", "Find accessibility elements by text, role, or app", {
291
537
  text: z.string().optional().describe("Text to search"), role: z.string().optional().describe("AX role"), app: z.string().optional().describe("Target app"),
292
538
  depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().default(true).describe("Include bounds"), maxResults: z.number().min(1).max(200).default(50).describe("Max results"),
539
+ textMode: z.enum(["contains", "exact", "regex"]).default("contains").describe("Text matching mode: contains (default), exact, or regex"),
540
+ visibleOnly: z.boolean().default(false).describe("Only return elements with valid on-screen bounds"),
541
+ value: z.string().optional().describe("Filter by AX element value (respects textMode)"),
542
+ index: z.number().int().nonnegative().optional().describe("Return only the Nth match (0-based) after all other filtering and sorting"),
543
+ near: z.object({ x: z.number(), y: z.number() }).optional().describe("Sort results by ascending distance to this point and return closest first"),
293
544
  }, async (params) => {
294
- const results = await withSafety({ action: "find_element", params: {}, requiresAccessibility: true,
295
- execute: () => getPlatform().findElement({ text: params.text, role: params.role, app: params.app, depth: params.depth, includeBounds: params.includeBounds, maxResults: params.maxResults }) });
296
- return { content: [{ type: "text", text: JSON.stringify(results, null, 2) }] };
545
+ const effectiveApp = params.app || getActiveTarget()?.appName;
546
+ const response = await withSafety({ action: "find_element", params: {}, requiresAccessibility: true,
547
+ execute: () => getPlatform().findElement({ text: params.text, role: params.role, app: effectiveApp, depth: params.depth, includeBounds: params.includeBounds, maxResults: params.maxResults, textMode: params.textMode, visibleOnly: params.visibleOnly, value: params.value, index: params.index, near: params.near }) });
548
+ return { content: [{ type: "text", text: JSON.stringify({ results: response.results, metrics: response.metrics }, null, 2) }] };
297
549
  });
298
550
  registry.register("find_element");
299
- server.tool("click_element", "Click an accessibility element by its ID", {
551
+ registerTool("click_element", "Click an accessibility element by its ID", {
300
552
  elementId: z.string().describe("AX element identifier"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
301
553
  }, async (params) => {
302
- await withSafety({ action: "click_element", params: {}, requiresAccessibility: true, execute: () => getPlatform().clickElement(params.elementId, params.app) });
303
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ clicked: true, elementId: params.elementId }, params.captureAfter), null, 2) }] };
554
+ const effectiveApp = params.app || getActiveTarget()?.appName;
555
+ await withSafety({ action: "click_element", params: {}, requiresAccessibility: true, execute: () => getPlatform().clickElement(params.elementId, effectiveApp) });
556
+ return actionResponse("click_element", { clicked: true, elementId: params.elementId }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
304
557
  });
305
558
  registry.register("click_element");
306
- server.tool("set_value", "Set the value of an accessibility element", {
559
+ registerTool("set_value", "Set the value of an accessibility element", {
307
560
  elementId: z.string().describe("AX element identifier"), value: z.string().describe("Value to set"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
308
561
  }, async (params) => {
309
- await withSafety({ action: "set_value", params: { value: params.value }, requiresAccessibility: true, execute: () => getPlatform().setElementValue(params.elementId, params.value, params.app) });
310
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ setValue: true, elementId: params.elementId }, params.captureAfter), null, 2) }] };
562
+ const effectiveApp = params.app || getActiveTarget()?.appName;
563
+ await withSafety({ action: "set_value", params: { value: params.value }, requiresAccessibility: true, execute: () => getPlatform().setElementValue(params.elementId, params.value, effectiveApp) });
564
+ return actionResponse("set_value", { setValue: true, elementId: params.elementId }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
311
565
  });
312
566
  registry.register("set_value");
313
- server.tool("type_in_element", "Type text into an accessibility element, optionally clearing first", {
567
+ registerTool("type_in_element", "Type text into an accessibility element, optionally clearing first", {
314
568
  elementId: z.string().describe("AX element identifier"), text: z.string().describe("Text to type"),
315
569
  app: z.string().optional().describe("Target app"), clearFirst: z.boolean().optional().describe("Clear existing text before typing"), ...captureAfterFields,
316
570
  }, async (params) => {
317
- await withSafety({ action: "type_in_element", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().typeInElement(params.elementId, params.text, params.app, params.clearFirst) });
318
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ typed: true, elementId: params.elementId, charCount: params.text.length }, params.captureAfter), null, 2) }] };
571
+ const effectiveApp = params.app || getActiveTarget()?.appName;
572
+ await withSafety({ action: "type_in_element", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().typeInElement(params.elementId, params.text, effectiveApp, params.clearFirst) });
573
+ return actionResponse("type_in_element", { typed: true, elementId: params.elementId, charCount: params.text.length }, { elementId: params.elementId, app: effectiveApp }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
319
574
  });
320
575
  registry.register("type_in_element");
576
+ registerTool("clipboard_read", "Read the current contents of the system clipboard", {}, async () => {
577
+ const text = await withSafety({ action: "clipboard_read", params: {}, execute: () => getPlatform().readClipboard() });
578
+ return { content: [{ type: "text", text: JSON.stringify({ text }, null, 2) }] };
579
+ });
580
+ registry.register("clipboard_read");
581
+ registerTool("clipboard_write", "Write text to the system clipboard (text injection patterns are blocked)", {
582
+ text: z.string().describe("Text to place on the clipboard"),
583
+ }, async (params) => {
584
+ await withSafety({ action: "clipboard_write", params: { text: params.text }, execute: () => getPlatform().writeClipboard(params.text) });
585
+ return { content: [{ type: "text", text: JSON.stringify({ written: true }, null, 2) }] };
586
+ });
587
+ registry.register("clipboard_write");
321
588
  log.info("Registered tools", { count: registry.tools.length, tools: registry.tools.join(", ") });
322
589
  }
323
590
  export class ToolRegistry {