ucu-mcp 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@ import { SafetyGuard } from "../safety/guard.js";
10
10
  import { checkPermission } from "../safety/permissions.js";
11
11
  import { retry } from "../util/retry.js";
12
12
  import { createLogger } from "../util/logger.js";
13
- import { SafetyError, PermissionError, UnsupportedParameterError } from "../util/errors.js";
13
+ import { SafetyError, PermissionError, UnsupportedParameterError, UcuError, WindowNotFoundError } from "../util/errors.js";
14
14
  const log = createLogger("tools");
15
15
  let _platform;
16
16
  function getPlatform() {
@@ -31,16 +31,89 @@ const captureAfterFields = {
31
31
  async function resolvePoint(x, y, windowId) {
32
32
  if (!windowId)
33
33
  return { x, y };
34
+ const win = (await getPlatform().listWindows()).find(w => w.id === windowId);
35
+ if (!win)
36
+ throw new WindowNotFoundError(windowId);
37
+ return { x: win.bounds.x + x, y: win.bounds.y + y };
38
+ }
39
+ function jsonText(value) {
40
+ return { type: "text", text: JSON.stringify(value, null, 2) };
41
+ }
42
+ function recoveryHint(code) {
43
+ switch (code) {
44
+ case "WINDOW_NOT_FOUND":
45
+ return "Run list_windows again, then retry with a fresh windowId or omit windowId for screen coordinates.";
46
+ case "ELEMENT_NOT_FOUND":
47
+ return "Run find_element again, then retry with a fresh elementId.";
48
+ case "PERMISSION_DENIED":
49
+ return "Run doctor and grant the missing macOS permission, then restart the launching client.";
50
+ case "UNSUPPORTED_PARAMETER":
51
+ return "Remove or replace the unsupported parameter; inspect tools/list for this tool schema.";
52
+ case "SAFETY_BLOCKED":
53
+ return "Choose a less risky action or ask the user to perform it manually.";
54
+ case "INPUT_FAILED":
55
+ return "Observe current state with screenshot or get_window_state before retrying manually.";
56
+ case "CAPTURE_FAILED":
57
+ return "Run doctor to check Screen Recording permission, then retry screenshot or ocr.";
58
+ case "COORDINATE_OUT_OF_BOUNDS":
59
+ return "Run get_screen_size or list_windows, then retry with coordinates inside the active display or window bounds.";
60
+ default:
61
+ return "Inspect the error message, observe the current UI state, and retry only if the operation is safe.";
62
+ }
63
+ }
64
+ function mcpErrorResponse(error) {
65
+ const err = error instanceof Error ? error : new Error(String(error));
66
+ const code = error instanceof UcuError ? error.code : "UNKNOWN_ERROR";
67
+ const retryable = error instanceof UcuError ? error.retryable : false;
68
+ return {
69
+ isError: true,
70
+ content: [
71
+ jsonText({
72
+ error: {
73
+ name: err.name,
74
+ code,
75
+ retryable,
76
+ message: err.message,
77
+ recovery: recoveryHint(code),
78
+ },
79
+ }),
80
+ ],
81
+ };
82
+ }
83
+ async function actionResponse(result, captureAfter, captureFormat = "jpeg", captureMaxWidth = 1280) {
84
+ if (!captureAfter)
85
+ return { content: [jsonText(result)] };
34
86
  try {
35
- const win = (await getPlatform().listWindows()).find(w => w.id === windowId);
36
- if (!win)
37
- return { x, y };
38
- return { x: win.bounds.x + x, y: win.bounds.y + y };
87
+ const buf = await getPlatform().screenshot(undefined, undefined, {
88
+ format: captureFormat,
89
+ maxWidth: captureMaxWidth,
90
+ });
91
+ return {
92
+ content: [
93
+ jsonText({ actionResult: result }),
94
+ {
95
+ type: "image",
96
+ data: buf.toString("base64"),
97
+ mimeType: `image/${captureFormat}`,
98
+ },
99
+ ],
100
+ };
39
101
  }
40
102
  catch {
41
- return { x, y };
103
+ return { content: [jsonText(result)] };
42
104
  }
43
105
  }
106
+ const retryableActions = new Set([
107
+ "screenshot",
108
+ "list_windows",
109
+ "list_apps",
110
+ "get_window_state",
111
+ "get_cursor_position",
112
+ "get_screen_size",
113
+ "ocr",
114
+ "doctor",
115
+ "find_element",
116
+ ]);
44
117
  async function withSafety(sa) {
45
118
  const platform = getPlatform();
46
119
  if (platform.isScreenLocked?.())
@@ -64,27 +137,24 @@ async function withSafety(sa) {
64
137
  if (shouldManageFocus)
65
138
  await platform.saveFocus?.();
66
139
  try {
67
- return await retry(() => sa.execute());
140
+ return retryableActions.has(sa.action)
141
+ ? await retry(() => sa.execute())
142
+ : await sa.execute();
68
143
  }
69
144
  finally {
70
145
  if (shouldManageFocus)
71
146
  await platform.restoreFocus?.();
72
147
  }
73
148
  }
74
- async function appendCaptureAfter(result, captureAfter) {
75
- if (!captureAfter)
76
- return result;
149
+ export function startUserActivityMonitor() {
150
+ if (userActivityInterval)
151
+ return;
77
152
  try {
78
- const buf = await getPlatform().screenshot();
79
- return { actionResult: result, screenshot: { type: "image", data: buf.toString("base64"), mimeType: "image/png" } };
153
+ lastCursorPos = getPlatform().getCursorPosition();
80
154
  }
81
155
  catch {
82
- return result;
156
+ // Keep the default when the cursor cannot be queried during startup.
83
157
  }
84
- }
85
- export function startUserActivityMonitor() {
86
- if (userActivityInterval)
87
- return;
88
158
  userActivityInterval = setInterval(() => {
89
159
  try {
90
160
  const pos = getPlatform().getCursorPosition();
@@ -95,8 +165,9 @@ export function startUserActivityMonitor() {
95
165
  }
96
166
  catch { /* can't check cursor */ }
97
167
  }, 250);
168
+ userActivityInterval.unref?.();
98
169
  }
99
- function stopUserActivityMonitor() {
170
+ export function stopUserActivityMonitor() {
100
171
  if (userActivityInterval) {
101
172
  clearInterval(userActivityInterval);
102
173
  userActivityInterval = undefined;
@@ -104,43 +175,66 @@ function stopUserActivityMonitor() {
104
175
  }
105
176
  export function registerTools(server) {
106
177
  const registry = ToolRegistry.instance;
107
- server.tool("screenshot", "Capture a screenshot of the entire screen or a region", {
178
+ const registerTool = (name, description, schema, handler) => {
179
+ server.tool(name, description, schema, async (params) => {
180
+ try {
181
+ return await handler(params);
182
+ }
183
+ catch (error) {
184
+ return mcpErrorResponse(error);
185
+ }
186
+ });
187
+ };
188
+ registerTool("screenshot", "Capture a screenshot of the entire screen or a region", {
108
189
  display: z.number().optional().describe("Display index (default 0)"),
190
+ windowId: z.string().optional().describe("Window ID from list_windows; when set, captures that window"),
109
191
  region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to capture"),
110
192
  format: z.enum(["png", "jpeg"]).default("png").describe("Image format"),
111
193
  maxWidth: z.number().default(1280).describe("Maximum output width in pixels. Aspect ratio is preserved."),
112
194
  }, async (params) => {
113
- const buf = await withSafety({ action: "screenshot", params: {}, requiresScreenRecording: true, execute: () => getPlatform().screenshot(params.display, params.region, { format: params.format }) });
195
+ if (params.windowId && params.region)
196
+ throw new UnsupportedParameterError("screenshot windowId cannot be combined with region");
197
+ const options = { format: params.format, maxWidth: params.maxWidth };
198
+ const buf = await withSafety({
199
+ action: "screenshot",
200
+ params,
201
+ requiresScreenRecording: true,
202
+ execute: () => params.windowId
203
+ ? getPlatform().screenshotWindow
204
+ ? getPlatform().screenshotWindow(params.windowId, options)
205
+ : Promise.reject(new UnsupportedParameterError("window screenshots are not implemented on this platform"))
206
+ : getPlatform().screenshot(params.display, params.region, options),
207
+ });
114
208
  return { content: [{ type: "image", data: buf.toString("base64"), mimeType: `image/${params.format}` }] };
115
209
  });
116
210
  registry.register("screenshot");
117
- server.tool("list_windows", "List all visible windows on screen", {
211
+ registerTool("list_windows", "List all visible windows on screen", {
118
212
  includeMinimized: z.boolean().optional().describe("Include minimized windows"),
119
213
  }, async (params) => {
120
214
  const windows = await withSafety({ action: "list_windows", params: {}, requiresAccessibility: true, execute: () => getPlatform().listWindows(params.includeMinimized) });
121
215
  return { content: [{ type: "text", text: JSON.stringify(windows, null, 2) }] };
122
216
  });
123
217
  registry.register("list_windows");
124
- server.tool("list_apps", "List all running applications", {}, async () => {
218
+ registerTool("list_apps", "List all running applications", {}, async () => {
125
219
  const apps = await withSafety({ action: "list_apps", params: {}, requiresAccessibility: true, execute: async () => getPlatform().listApps() });
126
220
  return { content: [{ type: "text", text: JSON.stringify(apps, null, 2) }] };
127
221
  });
128
222
  registry.register("list_apps");
129
- server.tool("focus_app", "Bring an application to the foreground", {
223
+ registerTool("focus_app", "Select an application/window as the active target context", {
130
224
  app: z.string().describe("Application name to focus"),
131
225
  }, async (params) => {
132
226
  const target = await withSafety({ action: "focus_app", params: {}, requiresAccessibility: true, execute: () => getPlatform().focusApp(params.app) });
133
227
  return { content: [{ type: "text", text: JSON.stringify(target, null, 2) }] };
134
228
  });
135
229
  registry.register("focus_app");
136
- server.tool("get_window_state", "Get detailed state of a window including accessibility tree", {
230
+ registerTool("get_window_state", "Get detailed state of a window including accessibility tree", {
137
231
  windowId: z.string().optional().describe("Window ID"), depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().optional().describe("Include element bounds"),
138
232
  }, async (params) => {
139
233
  const state = await withSafety({ action: "get_window_state", params: {}, requiresAccessibility: true, execute: () => getPlatform().getWindowState(params.windowId, params.depth, params.includeBounds) });
140
234
  return { content: [{ type: "text", text: JSON.stringify(state, null, 2) }] };
141
235
  });
142
236
  registry.register("get_window_state");
143
- server.tool("click", "Click at screen coordinates", {
237
+ registerTool("click", "Click at screen coordinates", {
144
238
  x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
145
239
  button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
146
240
  windowId: z.string().optional().describe("If set, x/y are relative to this window"),
@@ -148,10 +242,10 @@ export function registerTools(server) {
148
242
  }, async (params) => {
149
243
  const pt = await resolvePoint(params.x, params.y, params.windowId);
150
244
  await withSafety({ action: "click", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button) });
151
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ clicked: true, x: pt.x, y: pt.y }, params.captureAfter), null, 2) }] };
245
+ return actionResponse({ clicked: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
152
246
  });
153
247
  registry.register("click");
154
- server.tool("double_click", "Double-click at screen coordinates", {
248
+ registerTool("double_click", "Double-click at screen coordinates", {
155
249
  x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
156
250
  button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
157
251
  windowId: z.string().optional().describe("If set, x/y are relative to this window"),
@@ -159,10 +253,10 @@ export function registerTools(server) {
159
253
  }, async (params) => {
160
254
  const pt = await resolvePoint(params.x, params.y, params.windowId);
161
255
  await withSafety({ action: "click", params: { x: pt.x, y: pt.y, doubleClick: true }, requiresAccessibility: true, execute: () => getPlatform().click(pt.x, pt.y, params.button, true) });
162
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ doubleClicked: true, x: pt.x, y: pt.y }, params.captureAfter), null, 2) }] };
256
+ return actionResponse({ doubleClicked: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
163
257
  });
164
258
  registry.register("double_click");
165
- server.tool("type_text", "Type text at the current cursor position", {
259
+ registerTool("type_text", "Type text at the current cursor position", {
166
260
  text: z.string().describe("Text to type"), delay: z.number().optional().describe("Delay between keystrokes in ms"),
167
261
  windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted keyboard typing is not implemented"),
168
262
  ...captureAfterFields,
@@ -170,47 +264,55 @@ export function registerTools(server) {
170
264
  if (params.windowId)
171
265
  throw new UnsupportedParameterError("windowId-targeted keyboard typing is not implemented");
172
266
  await withSafety({ action: "type_text", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().type(params.text, params.delay) });
173
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ typed: true, charCount: params.text.length }, params.captureAfter), null, 2) }] };
267
+ return actionResponse({ typed: true, charCount: params.text.length }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
174
268
  });
175
269
  registry.register("type_text");
176
- server.tool("press_key", "Press a keyboard shortcut", {
270
+ registerTool("press_key", "Press a keyboard shortcut", {
177
271
  keys: z.array(z.string()).optional().describe("Keys to press simultaneously"),
178
272
  key: z.string().optional().describe("Single key to press (alias for keys)"),
273
+ modifiers: z.array(z.string()).optional().describe("Modifier keys used with key, such as cmd, shift, alt, or ctrl"),
179
274
  windowId: z.string().optional().describe("UNSUPPORTED: windowId-targeted key events are not implemented"),
180
275
  ...captureAfterFields,
181
276
  }, async (params) => {
182
277
  if (params.windowId)
183
278
  throw new UnsupportedParameterError("windowId-targeted key events are not implemented");
184
- const keys = params.keys ?? (params.key ? [params.key] : []);
279
+ const keys = params.keys ?? [
280
+ ...(params.modifiers ?? []),
281
+ ...(params.key ? [params.key] : []),
282
+ ];
185
283
  if (keys.length === 0)
186
- throw new Error("press_key requires at least one key");
284
+ throw new UnsupportedParameterError("press_key requires at least one key");
187
285
  await withSafety({ action: "press_key", params: { keys }, requiresAccessibility: true, execute: () => getPlatform().key(keys) });
188
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ pressed: true, keys: params.keys }, params.captureAfter), null, 2) }] };
286
+ return actionResponse({ pressed: true, keys }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
189
287
  });
190
288
  registry.register("press_key");
191
- server.tool("scroll", "Scroll at coordinates", {
289
+ registerTool("scroll", "Scroll at coordinates", {
192
290
  x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
193
- deltaX: z.number().describe("Horizontal scroll"), deltaY: z.number().describe("Vertical scroll (negative = up)"),
291
+ deltaX: z.number().default(0).describe("Horizontal scroll"), deltaY: z.number().describe("Vertical scroll (negative = up)"),
194
292
  windowId: z.string().optional().describe("If set, x/y are relative to this window"),
195
293
  ...captureAfterFields,
196
294
  }, async (params) => {
197
295
  const pt = await resolvePoint(params.x, params.y, params.windowId);
198
- await withSafety({ action: "scroll", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().scroll(pt.x, pt.y, params.deltaX, params.deltaY) });
199
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ scrolled: true, x: pt.x, y: pt.y }, params.captureAfter), null, 2) }] };
296
+ const deltaX = params.deltaX ?? 0;
297
+ await withSafety({ action: "scroll", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().scroll(pt.x, pt.y, deltaX, params.deltaY) });
298
+ return actionResponse({ scrolled: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
200
299
  });
201
300
  registry.register("scroll");
202
- server.tool("drag", "Drag from one point to another", {
301
+ registerTool("drag", "Drag from one point to another", {
203
302
  startX: z.number().describe("Start X"), startY: z.number().describe("Start Y"),
204
303
  endX: z.number().describe("End X"), endY: z.number().describe("End Y"),
205
304
  button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button"),
305
+ windowId: z.string().optional().describe("If set, start/end coordinates are relative to this window"),
206
306
  duration: z.number().optional().describe("Drag duration in ms"),
207
307
  ...captureAfterFields,
208
308
  }, async (params) => {
209
- await withSafety({ action: "drag", params: {}, requiresAccessibility: true, execute: () => getPlatform().drag(params.startX, params.startY, params.endX, params.endY, params.button, params.duration) });
210
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ dragged: true }, params.captureAfter), null, 2) }] };
309
+ const start = await resolvePoint(params.startX, params.startY, params.windowId);
310
+ const end = await resolvePoint(params.endX, params.endY, params.windowId);
311
+ await withSafety({ action: "drag", params: { startX: start.x, startY: start.y, endX: end.x, endY: end.y }, requiresAccessibility: true, execute: () => getPlatform().drag(start.x, start.y, end.x, end.y, params.button, params.duration) });
312
+ return actionResponse({ dragged: true, startX: start.x, startY: start.y, endX: end.x, endY: end.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
211
313
  });
212
314
  registry.register("drag");
213
- server.tool("doctor", "Check system permissions and diagnose common issues", {}, async () => {
315
+ registerTool("doctor", "Check system permissions and diagnose common issues", {}, async () => {
214
316
  const { checkPermissions } = await import("../safety/permissions.js");
215
317
  const { MacOSPlatform: MacPlat } = await import("../platform/macos.js");
216
318
  const permissions = await checkPermissions();
@@ -236,41 +338,46 @@ export function registerTools(server) {
236
338
  return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] };
237
339
  });
238
340
  registry.register("doctor");
239
- server.tool("wait", "Wait for a specified duration", { ms: z.number().describe("Duration in milliseconds") }, async (params) => {
341
+ registerTool("wait", "Wait for a specified duration", { ms: z.number().describe("Duration in milliseconds") }, async (params) => {
240
342
  await new Promise(r => setTimeout(r, params.ms));
241
343
  return { content: [{ type: "text", text: JSON.stringify({ waited: params.ms }) }] };
242
344
  });
243
345
  registry.register("wait");
244
- server.tool("wait_for_element", "Poll until an accessibility element matching the criteria appears", {
346
+ registerTool("wait_for_element", "Poll until an accessibility element matching the criteria appears", {
245
347
  text: z.string().optional().describe("Element text"), role: z.string().optional().describe("Element role"),
246
- app: z.string().optional().describe("Target app"), timeout: z.number().optional().describe("Timeout ms (default 5000)"), interval: z.number().optional().describe("Poll interval ms (default 500)"),
348
+ app: z.string().optional().describe("Target app"),
349
+ timeout: z.number().optional().describe("Timeout ms (default 5000)"),
350
+ timeoutMs: z.number().optional().describe("Alias for timeout"),
351
+ interval: z.number().optional().describe("Poll interval ms (default 500)"),
352
+ intervalMs: z.number().optional().describe("Alias for interval"),
247
353
  }, async (params) => {
248
- const deadline = Date.now() + (params.timeout ?? 5000);
249
- const interval = params.interval ?? 500;
354
+ const deadline = Date.now() + (params.timeout ?? params.timeoutMs ?? 5000);
355
+ const interval = params.interval ?? params.intervalMs ?? 500;
356
+ const query = { text: params.text, role: params.role, app: params.app, maxResults: 1 };
357
+ const { granted } = await checkPermission("accessibility");
358
+ if (!granted)
359
+ throw new PermissionError("accessibility", process.platform);
250
360
  while (Date.now() < deadline) {
251
- try {
252
- const results = await getPlatform().findElement({ text: params.text, role: params.role, app: params.app, maxResults: 1 });
253
- if (results.length > 0)
254
- return { content: [{ type: "text", text: JSON.stringify({ found: true, element: results[0] }, null, 2) }] };
255
- }
256
- catch { /* retry */ }
361
+ const results = await getPlatform().findElement(query);
362
+ if (results.length > 0)
363
+ return { content: [{ type: "text", text: JSON.stringify({ found: true, element: results[0] }, null, 2) }] };
257
364
  await new Promise(r => setTimeout(r, interval));
258
365
  }
259
366
  return { content: [{ type: "text", text: JSON.stringify({ found: false, reason: "timeout" }) }] };
260
367
  });
261
368
  registry.register("wait_for_element");
262
- server.tool("get_cursor_position", "Get current cursor position", {}, async () => {
369
+ registerTool("get_cursor_position", "Get current cursor position", {}, async () => {
263
370
  const pos = await withSafety({ action: "get_cursor_position", params: {}, execute: () => Promise.resolve(getPlatform().getCursorPosition()) });
264
371
  return { content: [{ type: "text", text: JSON.stringify(pos, null, 2) }] };
265
372
  });
266
373
  registry.register("get_cursor_position");
267
- server.tool("get_screen_size", "Get screen dimensions and scale factor", {
374
+ registerTool("get_screen_size", "Get screen dimensions and scale factor", {
268
375
  display: z.number().optional().describe("Display index"),
269
376
  }, async (params) => {
270
377
  return { content: [{ type: "text", text: JSON.stringify(getPlatform().getScreenSize(params.display), null, 2) }] };
271
378
  });
272
379
  registry.register("get_screen_size");
273
- server.tool("ocr", "Perform OCR on screen region", {
380
+ registerTool("ocr", "Perform OCR on screen region", {
274
381
  display: z.number().optional().describe("Display index"),
275
382
  region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number() }).optional().describe("Region to OCR"),
276
383
  }, async (params) => {
@@ -278,16 +385,17 @@ export function registerTools(server) {
278
385
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
279
386
  });
280
387
  registry.register("ocr");
281
- server.tool("move", "Move cursor to coordinates", {
388
+ registerTool("move", "Move cursor to coordinates", {
282
389
  x: z.number().describe("X coordinate"), y: z.number().describe("Y coordinate"),
283
390
  windowId: z.string().optional().describe("If set, x/y are relative to this window"),
391
+ ...captureAfterFields,
284
392
  }, async (params) => {
285
393
  const pt = await resolvePoint(params.x, params.y, params.windowId);
286
394
  await withSafety({ action: "move", params: { x: pt.x, y: pt.y }, requiresAccessibility: true, execute: () => getPlatform().move(pt.x, pt.y) });
287
- return { content: [{ type: "text", text: JSON.stringify({ moved: true, x: pt.x, y: pt.y }, null, 2) }] };
395
+ return actionResponse({ moved: true, x: pt.x, y: pt.y }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
288
396
  });
289
397
  registry.register("move");
290
- server.tool("find_element", "Find accessibility elements by text, role, or app", {
398
+ registerTool("find_element", "Find accessibility elements by text, role, or app", {
291
399
  text: z.string().optional().describe("Text to search"), role: z.string().optional().describe("AX role"), app: z.string().optional().describe("Target app"),
292
400
  depth: z.number().optional().describe("AX tree depth"), includeBounds: z.boolean().default(true).describe("Include bounds"), maxResults: z.number().min(1).max(200).default(50).describe("Max results"),
293
401
  }, async (params) => {
@@ -296,26 +404,26 @@ export function registerTools(server) {
296
404
  return { content: [{ type: "text", text: JSON.stringify(results, null, 2) }] };
297
405
  });
298
406
  registry.register("find_element");
299
- server.tool("click_element", "Click an accessibility element by its ID", {
407
+ registerTool("click_element", "Click an accessibility element by its ID", {
300
408
  elementId: z.string().describe("AX element identifier"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
301
409
  }, async (params) => {
302
410
  await withSafety({ action: "click_element", params: {}, requiresAccessibility: true, execute: () => getPlatform().clickElement(params.elementId, params.app) });
303
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ clicked: true, elementId: params.elementId }, params.captureAfter), null, 2) }] };
411
+ return actionResponse({ clicked: true, elementId: params.elementId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
304
412
  });
305
413
  registry.register("click_element");
306
- server.tool("set_value", "Set the value of an accessibility element", {
414
+ registerTool("set_value", "Set the value of an accessibility element", {
307
415
  elementId: z.string().describe("AX element identifier"), value: z.string().describe("Value to set"), app: z.string().optional().describe("Target app"), ...captureAfterFields,
308
416
  }, async (params) => {
309
417
  await withSafety({ action: "set_value", params: { value: params.value }, requiresAccessibility: true, execute: () => getPlatform().setElementValue(params.elementId, params.value, params.app) });
310
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ setValue: true, elementId: params.elementId }, params.captureAfter), null, 2) }] };
418
+ return actionResponse({ setValue: true, elementId: params.elementId }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
311
419
  });
312
420
  registry.register("set_value");
313
- server.tool("type_in_element", "Type text into an accessibility element, optionally clearing first", {
421
+ registerTool("type_in_element", "Type text into an accessibility element, optionally clearing first", {
314
422
  elementId: z.string().describe("AX element identifier"), text: z.string().describe("Text to type"),
315
423
  app: z.string().optional().describe("Target app"), clearFirst: z.boolean().optional().describe("Clear existing text before typing"), ...captureAfterFields,
316
424
  }, async (params) => {
317
425
  await withSafety({ action: "type_in_element", params: { text: params.text }, requiresAccessibility: true, execute: () => getPlatform().typeInElement(params.elementId, params.text, params.app, params.clearFirst) });
318
- return { content: [{ type: "text", text: JSON.stringify(await appendCaptureAfter({ typed: true, elementId: params.elementId, charCount: params.text.length }, params.captureAfter), null, 2) }] };
426
+ return actionResponse({ typed: true, elementId: params.elementId, charCount: params.text.length }, params.captureAfter, params.captureFormat, params.captureMaxWidth);
319
427
  });
320
428
  registry.register("type_in_element");
321
429
  log.info("Registered tools", { count: registry.tools.length, tools: registry.tools.join(", ") });
@@ -3,6 +3,8 @@ export declare class MacOSPlatform implements Platform {
3
3
  private readonly elementCache;
4
4
  private readonly elementCacheTtlMs;
5
5
  private readonly elementCacheMaxSize;
6
+ private readonly windowCacheTtlMs;
7
+ private windowCache;
6
8
  private activeTarget;
7
9
  private savedFocus;
8
10
  /** Remove expired entries from the element cache. */
@@ -30,6 +32,8 @@ export declare class MacOSPlatform implements Platform {
30
32
  scroll(x: number, y: number, deltaX: number, deltaY: number): Promise<void>;
31
33
  getCursorPosition(): CursorPosition;
32
34
  ocr(display?: number, region?: ScreenRegion): Promise<OcrResult>;
35
+ private ocrNative;
36
+ private ocrJxa;
33
37
  type(text: string, delay?: number): Promise<void>;
34
38
  key(keys: string[]): Promise<void>;
35
39
  findElement(options: FindElementOptions): Promise<FindElementResult[]>;