ucu-mcp 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,7 +5,42 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
- ## [0.1.0] - 2025-06-02
8
+ ## [0.2.0] - 2026-06-05
9
+
10
+ ### Changed
11
+
12
+ - Replaced JXA keyboard/mouse input with native Swift CGEvent helper (`native/cgevent/cgevent-helper`), eliminating SIGSEGV crashes on macOS Sequoia+
13
+ - `listWindows` switched from `CGWindowListCopyWindowInfo` to System Events for reliable window enumeration
14
+ - `getWindowState` adapted to use System Events window IDs instead of CGWindow IDs
15
+ - Fixed OCR JXA script — `isValid` guard now correctly handles missing/broken references
16
+
17
+ ### Fixed
18
+
19
+ - `typeInElement` now properly escapes `$` in text to prevent JXA template-literal interpolation errors
20
+ - AX element cache now refetches stale references instead of throwing
21
+ - MCP server version now resolves from `package.json` instead of advertising stale `0.1.0`
22
+ - `screenshot.maxWidth`, `screenshot.windowId`, and action `captureAfter` encode options now reach the execution path
23
+ - `captureAfter` now returns a separate MCP image content item instead of embedding screenshot bytes in JSON text
24
+ - Window-relative coordinate tools now reject stale `windowId` values instead of falling back to raw screen coordinates
25
+ - Real input actions no longer use the shared retry wrapper after a partial failure
26
+ - macOS AX traversal now uses `uiElements()` with `elements()` fallback, fixing TextEdit `AXTextArea` discovery
27
+ - User activity monitoring now starts with the MCP server and initializes the cursor baseline before polling
28
+ - Added client-friendly aliases and defaults: `press_key.modifiers`, `scroll.deltaX=0`, `wait_for_element.timeoutMs/intervalMs`, and `move.captureAfter`
29
+ - README tool tables and OCR/captureAfter response examples now match the live MCP schema
30
+ - macOS platform failures now use structured `UcuError` subclasses for screenshots, window lookup, AX permissions, stale elements, cursor queries, and input synthesis
31
+ - MCP tool failures now return `isError: true` with JSON `error.name`, `error.code`, `error.retryable`, `error.message`, and `error.recovery` instead of forcing clients to parse plain text
32
+ - `wait_for_element` no longer masks Accessibility/platform failures as ordinary timeouts; missing elements still time out, but real lookup failures surface through the structured MCP error response
33
+ - macOS `listWindows` now uses a short defensive-copy cache for repeated window lookups, reducing back-to-back window resolution calls from seconds to near-zero while `focusApp` still invalidates before activating a target app
34
+ - Added optional real client CLI smoke coverage for Claude Code CLI, Codex CLI, and OpenCode MCP visibility
35
+ - README now includes verified `claude mcp add`, `codex mcp add`, and OpenCode `opencode.json` setup paths
36
+
37
+ ### Tests
38
+
39
+ - Unit test count grew from 83 → 161
40
+ - Optional client CLI smoke: 3/3 passing with `npm run test:client-cli`
41
+ - GUI smoke tests 6/6 passing (`UCU_MACOS_GUI_SMOKE=1`)
42
+
43
+ ## [0.1.0] - 2026-06-02
9
44
 
10
45
  ### Added
11
46
 
package/README.md CHANGED
@@ -78,10 +78,10 @@ UCU-MCP provides 22 tools across five categories:
78
78
 
79
79
  | Tool | Description | Key Parameters |
80
80
  |------|-------------|----------------|
81
- | `screenshot` | Capture screen, window, or region as base64 PNG/JPEG | `display?`, `windowId?`, `region?`, `maxWidth?`, `format?` |
81
+ | `screenshot` | Capture screen, window, or region as PNG/JPEG image content | `display?`, `windowId?`, `region?`, `maxWidth?`, `format?` |
82
82
  | `list_windows` | List all on-screen windows with IDs, titles, bounds | `includeMinimized?` |
83
83
  | `list_apps` | List visible macOS apps with pid, frontmost state, and window count | — |
84
- | `focus_app` | Select an app/window target context without raising it | `app` |
84
+ | `focus_app` | Select an app/window target context for later AX tools | `app` |
85
85
  | `get_window_state` | Get accessibility tree of a window, or the prior focus_app target when windowId is omitted | `windowId?`, `depth?`, `includeBounds?` |
86
86
  | `get_screen_size` | Get screen dimensions | `display?` |
87
87
  | `ocr` | Perform OCR on screen or region; returns text with bounding boxes and confidence | `display?`, `region?` |
@@ -92,9 +92,9 @@ UCU-MCP provides 22 tools across five categories:
92
92
  |------|-------------|----------------|
93
93
  | `click` | Click at screen coordinates (non-invasive) | `x`, `y`, `windowId?`, `button?` |
94
94
  | `double_click` | Double-click at screen coordinates | `x`, `y`, `windowId?`, `button?` |
95
- | `scroll` | Scroll at a position (vertical/horizontal) | `x`, `y`, `deltaX?`, `deltaY`, `captureAfter?` |
96
- | `drag` | Drag from one position to another | `startX`, `startY`, `endX`, `endY`, `duration?`, `button?`, `captureAfter?` |
97
- | `move` | Move the physical cursor to a position (invasive) | `x`, `y` |
95
+ | `scroll` | Scroll at a position (vertical/horizontal) | `x`, `y`, `deltaX?`, `deltaY`, `windowId?`, `captureAfter?` |
96
+ | `drag` | Drag from one position to another | `startX`, `startY`, `endX`, `endY`, `windowId?`, `duration?`, `button?`, `captureAfter?` |
97
+ | `move` | Move the physical cursor to a position (invasive) | `x`, `y`, `windowId?`, `captureAfter?` |
98
98
  | `get_cursor_position` | Get current cursor position | — |
99
99
 
100
100
  ### Keyboard
@@ -102,7 +102,7 @@ UCU-MCP provides 22 tools across five categories:
102
102
  | Tool | Description | Key Parameters |
103
103
  |------|-------------|----------------|
104
104
  | `type_text` | Type text into the currently focused element via OS key events (not clipboard) | `text`, `delay?`, `captureAfter?` |
105
- | `press_key` | Press key or keyboard shortcut in the focused window | `key`, `modifiers?`, `captureAfter?` |
105
+ | `press_key` | Press key or keyboard shortcut in the focused window | `key?`, `modifiers?`, `keys?`, `captureAfter?` |
106
106
 
107
107
  ### AX Element Interaction
108
108
 
@@ -118,10 +118,10 @@ UCU-MCP provides 22 tools across five categories:
118
118
  | Tool | Description | Key Parameters |
119
119
  |------|-------------|----------------|
120
120
  | `doctor` | Check platform readiness, permissions, lock-screen state, and client integration hints | — |
121
- | `wait` | Wait for UI state to settle after launches, animations, or navigation | `ms?` |
122
- | `wait_for_element` | Poll the AX tree until a matching element appears | `text?`, `role?`, `app?`, `timeoutMs?`, `intervalMs?` |
121
+ | `wait` | Wait for UI state to settle after launches, animations, or navigation | `ms` |
122
+ | `wait_for_element` | Poll the AX tree until a matching element appears | `text?`, `role?`, `app?`, `timeout?`, `timeoutMs?`, `interval?`, `intervalMs?` |
123
123
 
124
- Action tools accept `captureAfter`, `captureMaxWidth`, and `captureFormat` so an agent can receive a post-action screenshot in the same MCP response instead of spending another round trip on `screenshot`.
124
+ Action tools accept `captureAfter`, `captureMaxWidth`, and `captureFormat` so an agent can receive a post-action screenshot as a second MCP image content item in the same response instead of spending another round trip on `screenshot`.
125
125
 
126
126
  For fast AX discovery on large windows, use `find_element` with `includeBounds=false` and a small `maxResults`. Keep bounds enabled when the result may be used for coordinate fallback.
127
127
 
@@ -153,11 +153,14 @@ The `ocr` tool captures a screenshot and runs optical character recognition, ret
153
153
 
154
154
  ```json
155
155
  {
156
- "text": "Detected text here",
156
+ "fullText": "Detected text here",
157
157
  "elements": [
158
158
  {
159
159
  "text": "Hello",
160
- "bounds": { "x": 120, "y": 210, "width": 80, "height": 24 },
160
+ "x": 120,
161
+ "y": 210,
162
+ "width": 80,
163
+ "height": 24,
161
164
  "confidence": 0.97
162
165
  }
163
166
  ]
@@ -245,10 +248,20 @@ UCU-MCP runs as a stdio MCP server. This is the common integration path for Clau
245
248
 
246
249
  ### Claude Code CLI
247
250
 
251
+ Verified CLI setup:
252
+
253
+ ```bash
254
+ claude mcp add --scope user ucu -- ucu-mcp
255
+ claude mcp list
256
+ ```
257
+
258
+ Equivalent config shape:
259
+
248
260
  ```json
249
261
  {
250
262
  "mcpServers": {
251
263
  "ucu": {
264
+ "type": "stdio",
252
265
  "command": "ucu-mcp"
253
266
  }
254
267
  }
@@ -263,25 +276,51 @@ Use the same local MCP server shape as Claude Desktop. Grant Accessibility and S
263
276
  {
264
277
  "mcpServers": {
265
278
  "ucu": {
279
+ "type": "stdio",
266
280
  "command": "ucu-mcp"
267
281
  }
268
282
  }
269
283
  }
270
284
  ```
271
285
 
286
+ ### Codex CLI
287
+
288
+ Verified CLI setup:
289
+
290
+ ```bash
291
+ codex mcp add ucu -- ucu-mcp
292
+ codex mcp list
293
+ ```
294
+
295
+ Equivalent `~/.codex/config.toml` shape:
296
+
297
+ ```toml
298
+ [mcp_servers.ucu]
299
+ command = "ucu-mcp"
300
+ ```
301
+
272
302
  ### OpenCode
273
303
 
304
+ OpenCode reads MCP servers from `~/.config/opencode/opencode.json`.
305
+
274
306
  ```json
275
307
  {
276
308
  "mcp": {
277
- "ucu": {
309
+ "ucu-mcp": {
278
310
  "type": "local",
311
+ "enabled": true,
279
312
  "command": ["ucu-mcp"]
280
313
  }
281
314
  }
282
315
  }
283
316
  ```
284
317
 
318
+ Verify with:
319
+
320
+ ```bash
321
+ opencode mcp list
322
+ ```
323
+
285
324
  ### Runtime Doctor
286
325
 
287
326
  ```bash
@@ -362,15 +401,32 @@ src/
362
401
 
363
402
  ## Error Handling
364
403
 
404
+ Tool execution failures return standard MCP tool results with `isError: true`. The first content item is JSON text so clients can make policy decisions without string matching:
405
+
406
+ ```json
407
+ {
408
+ "error": {
409
+ "name": "WindowNotFoundError",
410
+ "code": "WINDOW_NOT_FOUND",
411
+ "retryable": false,
412
+ "message": "Window win-1 not found. It may have been closed. Run list_windows to get fresh IDs.",
413
+ "recovery": "Run list_windows again, then retry with a fresh windowId or omit windowId for screen coordinates."
414
+ }
415
+ }
416
+ ```
417
+
365
418
  | Error Code | Description | Retryable |
366
419
  |------------|-------------|-----------|
367
420
  | `PLATFORM_ERROR` | Platform API call failed | Yes |
368
421
  | `PERMISSION_DENIED` | Missing system permission | No |
369
422
  | `SAFETY_BLOCKED` | Blocked by safety rule | No |
370
423
  | `WINDOW_NOT_FOUND` | Window does not exist | No |
424
+ | `ELEMENT_NOT_FOUND` | Accessibility element is stale or missing | No |
425
+ | `UNSUPPORTED_PARAMETER` | Valid JSON requested an unsupported parameter combination | No |
371
426
  | `COORDINATE_OUT_OF_BOUNDS` | Coordinate outside screen | No |
372
427
  | `INPUT_FAILED` | Input synthesis failed | Yes |
373
428
  | `CAPTURE_FAILED` | Screenshot/OCR capture failed | Yes |
429
+ | `UNKNOWN_ERROR` | Unexpected internal failure | No |
374
430
 
375
431
  ## Development
376
432
 
@@ -1,6 +1,9 @@
1
1
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { existsSync, readFileSync } from "node:fs";
3
+ import { dirname, join } from "node:path";
4
+ import { fileURLToPath } from "node:url";
2
5
  import { createStdioTransport } from "./transport.js";
3
- import { registerTools } from "./tools.js";
6
+ import { registerTools, startUserActivityMonitor } from "./tools.js";
4
7
  const UCU_MCP_INSTRUCTIONS = `
5
8
  UCU-MCP is a cross-client computer-use server for Claude Code CLI, Claude Code Desktop, OpenCode, and other MCP clients.
6
9
 
@@ -12,14 +15,27 @@ Safety model: actions are blocked while macOS is locked, dangerous shortcuts and
12
15
 
13
16
  For Claude Code CLI/Desktop and OpenCode configs, run the ucu-mcp executable over stdio. If tools fail on macOS, run doctor first to check Accessibility and Screen Recording permissions. Windows and Linux adapters are explicit stubs until their native backends are implemented.
14
17
  `.trim();
18
+ function getPackageVersion() {
19
+ let dir = dirname(fileURLToPath(import.meta.url));
20
+ for (let i = 0; i < 6; i++) {
21
+ const path = join(dir, "package.json");
22
+ if (existsSync(path)) {
23
+ const parsed = JSON.parse(readFileSync(path, "utf-8"));
24
+ return parsed.version ?? "0.0.0";
25
+ }
26
+ dir = dirname(dir);
27
+ }
28
+ return "0.0.0";
29
+ }
15
30
  export async function startServer() {
16
31
  const server = new McpServer({
17
32
  name: "ucu-mcp",
18
- version: "0.1.0",
33
+ version: getPackageVersion(),
19
34
  }, {
20
35
  instructions: UCU_MCP_INSTRUCTIONS,
21
36
  });
22
37
  registerTools(server);
38
+ startUserActivityMonitor();
23
39
  const transport = createStdioTransport();
24
40
  await server.connect(transport);
25
41
  console.error("ucu-mcp server started on stdio");
@@ -6,6 +6,7 @@
6
6
  */
7
7
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
8
8
  export declare function startUserActivityMonitor(): void;
9
+ export declare function stopUserActivityMonitor(): void;
9
10
  export declare function registerTools(server: McpServer): void;
10
11
  export declare class ToolRegistry {
11
12
  private static _instance;