ucu-mcp 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -1
- package/README.md +68 -12
- package/dist/src/mcp/server.js +18 -2
- package/dist/src/mcp/tools.d.ts +1 -0
- package/dist/src/mcp/tools.js +173 -65
- package/dist/src/platform/macos.d.ts +4 -0
- package/dist/src/platform/macos.js +355 -215
- package/dist/src/util/errors.d.ts +6 -0
- package/dist/src/util/errors.js +8 -0
- package/dist/src/utils/input.js +88 -18
- package/native/cgevent/cgevent-helper +0 -0
- package/native/cgevent/main.swift +126 -0
- package/native/ocr/main.swift +89 -0
- package/native/ocr/ocr-helper +0 -0
- package/package.json +6 -3
package/CHANGELOG.md
CHANGED
|
@@ -5,7 +5,42 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
-
## [0.
|
|
8
|
+
## [0.2.0] - 2026-06-05
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
|
|
12
|
+
- Replaced JXA keyboard/mouse input with native Swift CGEvent helper (`native/cgevent/cgevent-helper`), eliminating SIGSEGV crashes on macOS Sequoia+
|
|
13
|
+
- `listWindows` switched from `CGWindowListCopyWindowInfo` to System Events for reliable window enumeration
|
|
14
|
+
- `getWindowState` adapted to use System Events window IDs instead of CGWindow IDs
|
|
15
|
+
- Fixed OCR JXA script — `isValid` guard now correctly handles missing/broken references
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
|
|
19
|
+
- `typeInElement` now properly escapes `$` in text to prevent JXA template-literal interpolation errors
|
|
20
|
+
- AX element cache now refetches stale references instead of throwing
|
|
21
|
+
- MCP server version now resolves from `package.json` instead of advertising stale `0.1.0`
|
|
22
|
+
- `screenshot.maxWidth`, `screenshot.windowId`, and action `captureAfter` encode options now reach the execution path
|
|
23
|
+
- `captureAfter` now returns a separate MCP image content item instead of embedding screenshot bytes in JSON text
|
|
24
|
+
- Window-relative coordinate tools now reject stale `windowId` values instead of falling back to raw screen coordinates
|
|
25
|
+
- Real input actions no longer use the shared retry wrapper after a partial failure
|
|
26
|
+
- macOS AX traversal now uses `uiElements()` with `elements()` fallback, fixing TextEdit `AXTextArea` discovery
|
|
27
|
+
- User activity monitoring now starts with the MCP server and initializes the cursor baseline before polling
|
|
28
|
+
- Added client-friendly aliases and defaults: `press_key.modifiers`, `scroll.deltaX=0`, `wait_for_element.timeoutMs/intervalMs`, and `move.captureAfter`
|
|
29
|
+
- README tool tables and OCR/captureAfter response examples now match the live MCP schema
|
|
30
|
+
- macOS platform failures now use structured `UcuError` subclasses for screenshots, window lookup, AX permissions, stale elements, cursor queries, and input synthesis
|
|
31
|
+
- MCP tool failures now return `isError: true` with JSON `error.name`, `error.code`, `error.retryable`, `error.message`, and `error.recovery` instead of forcing clients to parse plain text
|
|
32
|
+
- `wait_for_element` no longer masks Accessibility/platform failures as ordinary timeouts; missing elements still time out, but real lookup failures surface through the structured MCP error response
|
|
33
|
+
- macOS `listWindows` now uses a short defensive-copy cache for repeated window lookups, reducing back-to-back window resolution calls from seconds to near-zero while `focusApp` still invalidates before activating a target app
|
|
34
|
+
- Added optional real client CLI smoke coverage for Claude Code CLI, Codex CLI, and OpenCode MCP visibility
|
|
35
|
+
- README now includes verified `claude mcp add`, `codex mcp add`, and OpenCode `opencode.json` setup paths
|
|
36
|
+
|
|
37
|
+
### Tests
|
|
38
|
+
|
|
39
|
+
- Unit test count grew from 83 → 161
|
|
40
|
+
- Optional client CLI smoke: 3/3 passing with `npm run test:client-cli`
|
|
41
|
+
- GUI smoke tests 6/6 passing (`UCU_MACOS_GUI_SMOKE=1`)
|
|
42
|
+
|
|
43
|
+
## [0.1.0] - 2026-06-02
|
|
9
44
|
|
|
10
45
|
### Added
|
|
11
46
|
|
package/README.md
CHANGED
|
@@ -78,10 +78,10 @@ UCU-MCP provides 22 tools across five categories:
|
|
|
78
78
|
|
|
79
79
|
| Tool | Description | Key Parameters |
|
|
80
80
|
|------|-------------|----------------|
|
|
81
|
-
| `screenshot` | Capture screen, window, or region as
|
|
81
|
+
| `screenshot` | Capture screen, window, or region as PNG/JPEG image content | `display?`, `windowId?`, `region?`, `maxWidth?`, `format?` |
|
|
82
82
|
| `list_windows` | List all on-screen windows with IDs, titles, bounds | `includeMinimized?` |
|
|
83
83
|
| `list_apps` | List visible macOS apps with pid, frontmost state, and window count | — |
|
|
84
|
-
| `focus_app` | Select an app/window target context
|
|
84
|
+
| `focus_app` | Select an app/window target context for later AX tools | `app` |
|
|
85
85
|
| `get_window_state` | Get accessibility tree of a window, or the prior focus_app target when windowId is omitted | `windowId?`, `depth?`, `includeBounds?` |
|
|
86
86
|
| `get_screen_size` | Get screen dimensions | `display?` |
|
|
87
87
|
| `ocr` | Perform OCR on screen or region; returns text with bounding boxes and confidence | `display?`, `region?` |
|
|
@@ -92,9 +92,9 @@ UCU-MCP provides 22 tools across five categories:
|
|
|
92
92
|
|------|-------------|----------------|
|
|
93
93
|
| `click` | Click at screen coordinates (non-invasive) | `x`, `y`, `windowId?`, `button?` |
|
|
94
94
|
| `double_click` | Double-click at screen coordinates | `x`, `y`, `windowId?`, `button?` |
|
|
95
|
-
| `scroll` | Scroll at a position (vertical/horizontal) | `x`, `y`, `deltaX?`, `deltaY`, `captureAfter?` |
|
|
96
|
-
| `drag` | Drag from one position to another | `startX`, `startY`, `endX`, `endY`, `duration?`, `button?`, `captureAfter?` |
|
|
97
|
-
| `move` | Move the physical cursor to a position (invasive) | `x`, `y` |
|
|
95
|
+
| `scroll` | Scroll at a position (vertical/horizontal) | `x`, `y`, `deltaX?`, `deltaY`, `windowId?`, `captureAfter?` |
|
|
96
|
+
| `drag` | Drag from one position to another | `startX`, `startY`, `endX`, `endY`, `windowId?`, `duration?`, `button?`, `captureAfter?` |
|
|
97
|
+
| `move` | Move the physical cursor to a position (invasive) | `x`, `y`, `windowId?`, `captureAfter?` |
|
|
98
98
|
| `get_cursor_position` | Get current cursor position | — |
|
|
99
99
|
|
|
100
100
|
### Keyboard
|
|
@@ -102,7 +102,7 @@ UCU-MCP provides 22 tools across five categories:
|
|
|
102
102
|
| Tool | Description | Key Parameters |
|
|
103
103
|
|------|-------------|----------------|
|
|
104
104
|
| `type_text` | Type text into the currently focused element via OS key events (not clipboard) | `text`, `delay?`, `captureAfter?` |
|
|
105
|
-
| `press_key` | Press key or keyboard shortcut in the focused window | `key
|
|
105
|
+
| `press_key` | Press key or keyboard shortcut in the focused window | `key?`, `modifiers?`, `keys?`, `captureAfter?` |
|
|
106
106
|
|
|
107
107
|
### AX Element Interaction
|
|
108
108
|
|
|
@@ -118,10 +118,10 @@ UCU-MCP provides 22 tools across five categories:
|
|
|
118
118
|
| Tool | Description | Key Parameters |
|
|
119
119
|
|------|-------------|----------------|
|
|
120
120
|
| `doctor` | Check platform readiness, permissions, lock-screen state, and client integration hints | — |
|
|
121
|
-
| `wait` | Wait for UI state to settle after launches, animations, or navigation | `ms
|
|
122
|
-
| `wait_for_element` | Poll the AX tree until a matching element appears | `text?`, `role?`, `app?`, `timeoutMs?`, `intervalMs?` |
|
|
121
|
+
| `wait` | Wait for UI state to settle after launches, animations, or navigation | `ms` |
|
|
122
|
+
| `wait_for_element` | Poll the AX tree until a matching element appears | `text?`, `role?`, `app?`, `timeout?`, `timeoutMs?`, `interval?`, `intervalMs?` |
|
|
123
123
|
|
|
124
|
-
Action tools accept `captureAfter`, `captureMaxWidth`, and `captureFormat` so an agent can receive a post-action screenshot in the same
|
|
124
|
+
Action tools accept `captureAfter`, `captureMaxWidth`, and `captureFormat` so an agent can receive a post-action screenshot as a second MCP image content item in the same response instead of spending another round trip on `screenshot`.
|
|
125
125
|
|
|
126
126
|
For fast AX discovery on large windows, use `find_element` with `includeBounds=false` and a small `maxResults`. Keep bounds enabled when the result may be used for coordinate fallback.
|
|
127
127
|
|
|
@@ -153,11 +153,14 @@ The `ocr` tool captures a screenshot and runs optical character recognition, ret
|
|
|
153
153
|
|
|
154
154
|
```json
|
|
155
155
|
{
|
|
156
|
-
"
|
|
156
|
+
"fullText": "Detected text here",
|
|
157
157
|
"elements": [
|
|
158
158
|
{
|
|
159
159
|
"text": "Hello",
|
|
160
|
-
"
|
|
160
|
+
"x": 120,
|
|
161
|
+
"y": 210,
|
|
162
|
+
"width": 80,
|
|
163
|
+
"height": 24,
|
|
161
164
|
"confidence": 0.97
|
|
162
165
|
}
|
|
163
166
|
]
|
|
@@ -245,10 +248,20 @@ UCU-MCP runs as a stdio MCP server. This is the common integration path for Clau
|
|
|
245
248
|
|
|
246
249
|
### Claude Code CLI
|
|
247
250
|
|
|
251
|
+
Verified CLI setup:
|
|
252
|
+
|
|
253
|
+
```bash
|
|
254
|
+
claude mcp add --scope user ucu -- ucu-mcp
|
|
255
|
+
claude mcp list
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
Equivalent config shape:
|
|
259
|
+
|
|
248
260
|
```json
|
|
249
261
|
{
|
|
250
262
|
"mcpServers": {
|
|
251
263
|
"ucu": {
|
|
264
|
+
"type": "stdio",
|
|
252
265
|
"command": "ucu-mcp"
|
|
253
266
|
}
|
|
254
267
|
}
|
|
@@ -263,25 +276,51 @@ Use the same local MCP server shape as Claude Desktop. Grant Accessibility and S
|
|
|
263
276
|
{
|
|
264
277
|
"mcpServers": {
|
|
265
278
|
"ucu": {
|
|
279
|
+
"type": "stdio",
|
|
266
280
|
"command": "ucu-mcp"
|
|
267
281
|
}
|
|
268
282
|
}
|
|
269
283
|
}
|
|
270
284
|
```
|
|
271
285
|
|
|
286
|
+
### Codex CLI
|
|
287
|
+
|
|
288
|
+
Verified CLI setup:
|
|
289
|
+
|
|
290
|
+
```bash
|
|
291
|
+
codex mcp add ucu -- ucu-mcp
|
|
292
|
+
codex mcp list
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
Equivalent `~/.codex/config.toml` shape:
|
|
296
|
+
|
|
297
|
+
```toml
|
|
298
|
+
[mcp_servers.ucu]
|
|
299
|
+
command = "ucu-mcp"
|
|
300
|
+
```
|
|
301
|
+
|
|
272
302
|
### OpenCode
|
|
273
303
|
|
|
304
|
+
OpenCode reads MCP servers from `~/.config/opencode/opencode.json`.
|
|
305
|
+
|
|
274
306
|
```json
|
|
275
307
|
{
|
|
276
308
|
"mcp": {
|
|
277
|
-
"ucu": {
|
|
309
|
+
"ucu-mcp": {
|
|
278
310
|
"type": "local",
|
|
311
|
+
"enabled": true,
|
|
279
312
|
"command": ["ucu-mcp"]
|
|
280
313
|
}
|
|
281
314
|
}
|
|
282
315
|
}
|
|
283
316
|
```
|
|
284
317
|
|
|
318
|
+
Verify with:
|
|
319
|
+
|
|
320
|
+
```bash
|
|
321
|
+
opencode mcp list
|
|
322
|
+
```
|
|
323
|
+
|
|
285
324
|
### Runtime Doctor
|
|
286
325
|
|
|
287
326
|
```bash
|
|
@@ -362,15 +401,32 @@ src/
|
|
|
362
401
|
|
|
363
402
|
## Error Handling
|
|
364
403
|
|
|
404
|
+
Tool execution failures return standard MCP tool results with `isError: true`. The first content item is JSON text so clients can make policy decisions without string matching:
|
|
405
|
+
|
|
406
|
+
```json
|
|
407
|
+
{
|
|
408
|
+
"error": {
|
|
409
|
+
"name": "WindowNotFoundError",
|
|
410
|
+
"code": "WINDOW_NOT_FOUND",
|
|
411
|
+
"retryable": false,
|
|
412
|
+
"message": "Window win-1 not found. It may have been closed. Run list_windows to get fresh IDs.",
|
|
413
|
+
"recovery": "Run list_windows again, then retry with a fresh windowId or omit windowId for screen coordinates."
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
```
|
|
417
|
+
|
|
365
418
|
| Error Code | Description | Retryable |
|
|
366
419
|
|------------|-------------|-----------|
|
|
367
420
|
| `PLATFORM_ERROR` | Platform API call failed | Yes |
|
|
368
421
|
| `PERMISSION_DENIED` | Missing system permission | No |
|
|
369
422
|
| `SAFETY_BLOCKED` | Blocked by safety rule | No |
|
|
370
423
|
| `WINDOW_NOT_FOUND` | Window does not exist | No |
|
|
424
|
+
| `ELEMENT_NOT_FOUND` | Accessibility element is stale or missing | No |
|
|
425
|
+
| `UNSUPPORTED_PARAMETER` | Valid JSON requested an unsupported parameter combination | No |
|
|
371
426
|
| `COORDINATE_OUT_OF_BOUNDS` | Coordinate outside screen | No |
|
|
372
427
|
| `INPUT_FAILED` | Input synthesis failed | Yes |
|
|
373
428
|
| `CAPTURE_FAILED` | Screenshot/OCR capture failed | Yes |
|
|
429
|
+
| `UNKNOWN_ERROR` | Unexpected internal failure | No |
|
|
374
430
|
|
|
375
431
|
## Development
|
|
376
432
|
|
package/dist/src/mcp/server.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
2
5
|
import { createStdioTransport } from "./transport.js";
|
|
3
|
-
import { registerTools } from "./tools.js";
|
|
6
|
+
import { registerTools, startUserActivityMonitor } from "./tools.js";
|
|
4
7
|
const UCU_MCP_INSTRUCTIONS = `
|
|
5
8
|
UCU-MCP is a cross-client computer-use server for Claude Code CLI, Claude Code Desktop, OpenCode, and other MCP clients.
|
|
6
9
|
|
|
@@ -12,14 +15,27 @@ Safety model: actions are blocked while macOS is locked, dangerous shortcuts and
|
|
|
12
15
|
|
|
13
16
|
For Claude Code CLI/Desktop and OpenCode configs, run the ucu-mcp executable over stdio. If tools fail on macOS, run doctor first to check Accessibility and Screen Recording permissions. Windows and Linux adapters are explicit stubs until their native backends are implemented.
|
|
14
17
|
`.trim();
|
|
18
|
+
function getPackageVersion() {
|
|
19
|
+
let dir = dirname(fileURLToPath(import.meta.url));
|
|
20
|
+
for (let i = 0; i < 6; i++) {
|
|
21
|
+
const path = join(dir, "package.json");
|
|
22
|
+
if (existsSync(path)) {
|
|
23
|
+
const parsed = JSON.parse(readFileSync(path, "utf-8"));
|
|
24
|
+
return parsed.version ?? "0.0.0";
|
|
25
|
+
}
|
|
26
|
+
dir = dirname(dir);
|
|
27
|
+
}
|
|
28
|
+
return "0.0.0";
|
|
29
|
+
}
|
|
15
30
|
export async function startServer() {
|
|
16
31
|
const server = new McpServer({
|
|
17
32
|
name: "ucu-mcp",
|
|
18
|
-
version:
|
|
33
|
+
version: getPackageVersion(),
|
|
19
34
|
}, {
|
|
20
35
|
instructions: UCU_MCP_INSTRUCTIONS,
|
|
21
36
|
});
|
|
22
37
|
registerTools(server);
|
|
38
|
+
startUserActivityMonitor();
|
|
23
39
|
const transport = createStdioTransport();
|
|
24
40
|
await server.connect(transport);
|
|
25
41
|
console.error("ucu-mcp server started on stdio");
|
package/dist/src/mcp/tools.d.ts
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
8
8
|
export declare function startUserActivityMonitor(): void;
|
|
9
|
+
export declare function stopUserActivityMonitor(): void;
|
|
9
10
|
export declare function registerTools(server: McpServer): void;
|
|
10
11
|
export declare class ToolRegistry {
|
|
11
12
|
private static _instance;
|