ucu-mcp 0.1.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -20
- package/README.md +156 -14
- package/dist/src/mcp/server.js +29 -8
- package/dist/src/mcp/tools.d.ts +7 -1
- package/dist/src/mcp/tools.js +349 -82
- package/dist/src/platform/base.d.ts +26 -1
- package/dist/src/platform/linux.d.ts +4 -2
- package/dist/src/platform/linux.js +51 -0
- package/dist/src/platform/macos.d.ts +10 -2
- package/dist/src/platform/macos.js +513 -229
- package/dist/src/platform/windows.d.ts +4 -2
- package/dist/src/platform/windows.js +33 -0
- package/dist/src/safety/guard.d.ts +8 -1
- package/dist/src/safety/guard.js +43 -4
- package/dist/src/util/errors.d.ts +12 -0
- package/dist/src/util/errors.js +16 -0
- package/dist/src/utils/input.js +88 -18
- package/native/cgevent/cgevent-helper +0 -0
- package/native/cgevent/main.swift +126 -0
- package/native/ocr/main.swift +89 -0
- package/native/ocr/ocr-helper +0 -0
- package/package.json +7 -4
package/CHANGELOG.md
CHANGED
|
@@ -5,32 +5,30 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
-
## [0.
|
|
8
|
+
## [0.3.0] - 2026-06-06
|
|
9
|
+
|
|
9
10
|
|
|
10
11
|
### Added
|
|
11
12
|
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
- System: `system_info`, `process_list`, `process_terminate`
|
|
21
|
-
- Safety: `doctor` command for permission and environment diagnostics
|
|
22
|
-
- **Safety features**:
|
|
23
|
-
- URL blocklist to prevent navigation to sensitive sites
|
|
24
|
-
- Lock screen guard (macOS) — blocks automation when screen is locked
|
|
25
|
-
- Typed text injection scan — validates keyboard input before injection
|
|
26
|
-
- Focus steal suppression — prevents accidental focus changes during automation
|
|
27
|
-
- User interaction monitor — tracks user activity for safety coordination
|
|
28
|
-
- **macOS platform support** with Accessibility API integration
|
|
29
|
-
- TypeScript-first codebase with full type definitions
|
|
30
|
-
- CLI entry point with `doctor` diagnostic command
|
|
13
|
+
- Scenario-based MCP Instructions — tool-usage guidance organized by task pattern (form fill, menu bar click, screen read, app switch, verify action, wait for change, recover stale target, clipboard)
|
|
14
|
+
- findElement multi-strategy: `value` filter (AX value, respects textMode), `index` selector (0-based Nth match), `near` sorter (ascending distance to point)
|
|
15
|
+
- wait_for_element `until` parameter: `appear` (default), `disappear` (poll until gone), `value_change` (poll until first match value differs)
|
|
16
|
+
- Action Receipt v1 — unified receipt structure for all action-class tools (click, double_click, scroll, drag, move, type_text, press_key, click_element, set_value, type_in_element)
|
|
17
|
+
- Receipt fields: actionId (base36-timestamp unique ID), action, status (ok/partial/blocked), target (location context), result (business result), capture (screenshot metadata), warnings, next (suggested next step)
|
|
18
|
+
- Partial receipt when action succeeds but post-action screenshot fails: status="partial", capture.error contains error details, warnings includes "Post-action screenshot capture failed"
|
|
19
|
+
- Target Session v1 — `focus_app` now returns stable target metadata (`targetId`, `appName`, `pid`, `windowId`, `title`, `capturedAt`) for follow-up tool calls
|
|
20
|
+
- `TARGET_STALE` structured errors for active target windows that disappear before `get_window_state`
|
|
31
21
|
|
|
32
22
|
### Changed
|
|
33
23
|
|
|
24
|
+
- MCP instructions rewritten from generic description to scenario-driven workflow recommendations
|
|
25
|
+
- `wait_for_element` description updated to reflect `until` parameter semantics
|
|
26
|
+
- `find_element` schema extended with `value`, `index`, `near` parameters
|
|
27
|
+
- Action tool responses now wrap business results under `result` instead of returning them at the top level
|
|
28
|
+
- captureAfter failures now surface through receipt.capture.error instead of a flat captureError object
|
|
29
|
+
- `get_window_state` can use the prior `focus_app` target when `windowId` is omitted
|
|
30
|
+
- AX tools (`find_element`, `wait_for_element`, `click_element`, `set_value`, `type_in_element`) can use the prior `focus_app` target when `app` is omitted
|
|
31
|
+
|
|
34
32
|
- Rewrote `src/mcp/tools.ts` with comprehensive 22-tool registry:
|
|
35
33
|
- Unified `withSafety` wrapper for all automation actions
|
|
36
34
|
- `captureAfter` helper for post-action screenshots
|
package/README.md
CHANGED
|
@@ -78,10 +78,10 @@ UCU-MCP provides 22 tools across five categories:
|
|
|
78
78
|
|
|
79
79
|
| Tool | Description | Key Parameters |
|
|
80
80
|
|------|-------------|----------------|
|
|
81
|
-
| `screenshot` | Capture screen, window, or region as
|
|
81
|
+
| `screenshot` | Capture screen, window, or region as PNG/JPEG image content | `display?`, `windowId?`, `region?`, `maxWidth?`, `format?` |
|
|
82
82
|
| `list_windows` | List all on-screen windows with IDs, titles, bounds | `includeMinimized?` |
|
|
83
83
|
| `list_apps` | List visible macOS apps with pid, frontmost state, and window count | — |
|
|
84
|
-
| `focus_app` | Select an app/window target context
|
|
84
|
+
| `focus_app` | Select an app/window target context for later AX tools; returns `targetId`, `appName`, `pid`, `windowId`, `title`, and `capturedAt` | `app` |
|
|
85
85
|
| `get_window_state` | Get accessibility tree of a window, or the prior focus_app target when windowId is omitted | `windowId?`, `depth?`, `includeBounds?` |
|
|
86
86
|
| `get_screen_size` | Get screen dimensions | `display?` |
|
|
87
87
|
| `ocr` | Perform OCR on screen or region; returns text with bounding boxes and confidence | `display?`, `region?` |
|
|
@@ -92,9 +92,9 @@ UCU-MCP provides 22 tools across five categories:
|
|
|
92
92
|
|------|-------------|----------------|
|
|
93
93
|
| `click` | Click at screen coordinates (non-invasive) | `x`, `y`, `windowId?`, `button?` |
|
|
94
94
|
| `double_click` | Double-click at screen coordinates | `x`, `y`, `windowId?`, `button?` |
|
|
95
|
-
| `scroll` | Scroll at a position (vertical/horizontal) | `x`, `y`, `deltaX?`, `deltaY`, `captureAfter?` |
|
|
96
|
-
| `drag` | Drag from one position to another | `startX`, `startY`, `endX`, `endY`, `duration?`, `button?`, `captureAfter?` |
|
|
97
|
-
| `move` | Move the physical cursor to a position (invasive) | `x`, `y` |
|
|
95
|
+
| `scroll` | Scroll at a position (vertical/horizontal) | `x`, `y`, `deltaX?`, `deltaY`, `windowId?`, `captureAfter?` |
|
|
96
|
+
| `drag` | Drag from one position to another | `startX`, `startY`, `endX`, `endY`, `windowId?`, `duration?`, `button?`, `captureAfter?` |
|
|
97
|
+
| `move` | Move the physical cursor to a position (invasive) | `x`, `y`, `windowId?`, `captureAfter?` |
|
|
98
98
|
| `get_cursor_position` | Get current cursor position | — |
|
|
99
99
|
|
|
100
100
|
### Keyboard
|
|
@@ -102,14 +102,14 @@ UCU-MCP provides 22 tools across five categories:
|
|
|
102
102
|
| Tool | Description | Key Parameters |
|
|
103
103
|
|------|-------------|----------------|
|
|
104
104
|
| `type_text` | Type text into the currently focused element via OS key events (not clipboard) | `text`, `delay?`, `captureAfter?` |
|
|
105
|
-
| `press_key` | Press key or keyboard shortcut in the focused window | `key
|
|
105
|
+
| `press_key` | Press key or keyboard shortcut in the focused window | `key?`, `modifiers?`, `keys?`, `captureAfter?` |
|
|
106
106
|
|
|
107
107
|
### AX Element Interaction
|
|
108
108
|
|
|
109
109
|
| Tool | Description | Key Parameters |
|
|
110
110
|
|------|-------------|----------------|
|
|
111
|
-
| `find_element` | Find UI element by text, role, or description using AX APIs | `text?`, `role?`, `app?`, `depth?`, `includeBounds?`, `maxResults?` |
|
|
112
|
-
| `click_element` | Click an AX element by its id (from find_element); refetches equivalent elements after UI updates | `elementId`, `app?`, `captureAfter?` |
|
|
111
|
+
| `find_element` | Find UI element by text, role, or description using AX APIs, using the current focus_app target when app is omitted | `text?`, `role?`, `app?`, `depth?`, `includeBounds?`, `maxResults?` |
|
|
112
|
+
| `click_element` | Click an AX element by its id (from find_element), using the current focus_app target when app is omitted; refetches equivalent elements after UI updates | `elementId`, `app?`, `captureAfter?` |
|
|
113
113
|
| `set_value` | Set an AX element's value directly without focusing it, using the current focus_app target when app is omitted | `elementId`, `value`, `app?`, `captureAfter?` |
|
|
114
114
|
| `type_in_element` | Type text into a specific AX text field element; may focus the element and refetches equivalent elements after UI updates | `elementId`, `text`, `app?`, `clearFirst?`, `captureAfter?` |
|
|
115
115
|
|
|
@@ -118,13 +118,15 @@ UCU-MCP provides 22 tools across five categories:
|
|
|
118
118
|
| Tool | Description | Key Parameters |
|
|
119
119
|
|------|-------------|----------------|
|
|
120
120
|
| `doctor` | Check platform readiness, permissions, lock-screen state, and client integration hints | — |
|
|
121
|
-
| `wait` | Wait for UI state to settle after launches, animations, or navigation | `ms
|
|
122
|
-
| `wait_for_element` | Poll the AX tree until a matching element appears | `text?`, `role?`, `app?`, `timeoutMs?`, `intervalMs?` |
|
|
121
|
+
| `wait` | Wait for UI state to settle after launches, animations, or navigation | `ms` |
|
|
122
|
+
| `wait_for_element` | Poll the AX tree until a matching element appears | `text?`, `role?`, `app?`, `timeout?`, `timeoutMs?`, `interval?`, `intervalMs?` |
|
|
123
123
|
|
|
124
|
-
Action tools accept `captureAfter`, `captureMaxWidth`, and `captureFormat` so an agent can receive a post-action screenshot in the same
|
|
124
|
+
Action tools accept `captureAfter`, `captureMaxWidth`, and `captureFormat` so an agent can receive a post-action screenshot as a second MCP image content item in the same response instead of spending another round trip on `screenshot`. When `captureAfter` is requested and the action succeeds, the tool returns an `ActionReceipt` (see the Action Receipt section below) with `capture.status: "ok"`. If post-action capture fails, the receipt has `status: "partial"` and `capture.status: "error"` with the error details. If `captureAfter` is omitted, `capture.status` is `"skipped"`.
|
|
125
125
|
|
|
126
126
|
For fast AX discovery on large windows, use `find_element` with `includeBounds=false` and a small `maxResults`. Keep bounds enabled when the result may be used for coordinate fallback.
|
|
127
127
|
|
|
128
|
+
`focus_app` establishes a session target for follow-up observation and AX actions. After focusing an app, `get_window_state` may omit `windowId`, and AX tools may omit `app`. If the focused window closes or is replaced, UCU-MCP returns a structured `TARGET_STALE` error so the agent can refresh with `focus_app` or `list_windows` instead of silently acting on a different target.
|
|
129
|
+
|
|
128
130
|
## OCR Tool Usage
|
|
129
131
|
|
|
130
132
|
The `ocr` tool captures a screenshot and runs optical character recognition, returning each detected text element with its position and confidence score.
|
|
@@ -153,11 +155,14 @@ The `ocr` tool captures a screenshot and runs optical character recognition, ret
|
|
|
153
155
|
|
|
154
156
|
```json
|
|
155
157
|
{
|
|
156
|
-
"
|
|
158
|
+
"fullText": "Detected text here",
|
|
157
159
|
"elements": [
|
|
158
160
|
{
|
|
159
161
|
"text": "Hello",
|
|
160
|
-
"
|
|
162
|
+
"x": 120,
|
|
163
|
+
"y": 210,
|
|
164
|
+
"width": 80,
|
|
165
|
+
"height": 24,
|
|
161
166
|
"confidence": 0.97
|
|
162
167
|
}
|
|
163
168
|
]
|
|
@@ -207,6 +212,90 @@ The AX (Accessibility) element tools let you interact with UI controls by their
|
|
|
207
212
|
}
|
|
208
213
|
```
|
|
209
214
|
|
|
215
|
+
## Action Receipt
|
|
216
|
+
|
|
217
|
+
Action tools (`click`, `double_click`, `scroll`, `drag`, `move`, `type_text`, `press_key`, `click_element`, `set_value`, `type_in_element`) return a unified `ActionReceipt` JSON object that wraps the action result, target information, and optional post-action screenshot metadata.
|
|
218
|
+
|
|
219
|
+
### Receipt structure
|
|
220
|
+
|
|
221
|
+
| Field | Type | Description |
|
|
222
|
+
|-------|------|-------------|
|
|
223
|
+
| `actionId` | `string` | Unique base36-timestamp ID (e.g. `a1x9z2k-1`) |
|
|
224
|
+
| `action` | `string` | Tool name that produced this receipt |
|
|
225
|
+
| `status` | `"ok" \| "partial" \| "blocked"` | Overall action status |
|
|
226
|
+
| `target` | `object` | What was acted upon (coordinates, elementId, app, windowId) |
|
|
227
|
+
| `result` | `object` | Original business result (clicked, x, y, etc.) |
|
|
228
|
+
| `capture` | `object` | Screenshot metadata (requested, status, format, maxWidth, error) |
|
|
229
|
+
| `warnings` | `string[]` | Non-fatal warnings array |
|
|
230
|
+
| `next` | `string` | Suggested next action |
|
|
231
|
+
|
|
232
|
+
### Examples
|
|
233
|
+
|
|
234
|
+
**Success with captureAfter:**
|
|
235
|
+
|
|
236
|
+
```json
|
|
237
|
+
{
|
|
238
|
+
"actionId": "a1x9z2k-1",
|
|
239
|
+
"action": "click",
|
|
240
|
+
"status": "ok",
|
|
241
|
+
"target": { "x": 100, "y": 200 },
|
|
242
|
+
"result": { "clicked": true, "x": 100, "y": 200 },
|
|
243
|
+
"capture": {
|
|
244
|
+
"requested": true,
|
|
245
|
+
"status": "ok",
|
|
246
|
+
"format": "jpeg",
|
|
247
|
+
"maxWidth": 1280
|
|
248
|
+
},
|
|
249
|
+
"warnings": [],
|
|
250
|
+
"next": "find_element or get_window_state"
|
|
251
|
+
}
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
**Success without captureAfter:**
|
|
255
|
+
|
|
256
|
+
```json
|
|
257
|
+
{
|
|
258
|
+
"actionId": "a1x9z2k-2",
|
|
259
|
+
"action": "click_element",
|
|
260
|
+
"status": "ok",
|
|
261
|
+
"target": { "elementId": "AXButton-42", "app": "Safari" },
|
|
262
|
+
"result": { "clicked": true, "elementId": "AXButton-42" },
|
|
263
|
+
"capture": {
|
|
264
|
+
"requested": false,
|
|
265
|
+
"status": "skipped"
|
|
266
|
+
},
|
|
267
|
+
"warnings": [],
|
|
268
|
+
"next": "find_element or get_window_state"
|
|
269
|
+
}
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
**Partial when capture fails:**
|
|
273
|
+
|
|
274
|
+
```json
|
|
275
|
+
{
|
|
276
|
+
"actionId": "a1x9z2k-3",
|
|
277
|
+
"action": "click",
|
|
278
|
+
"status": "partial",
|
|
279
|
+
"target": { "x": 100, "y": 200 },
|
|
280
|
+
"result": { "clicked": true, "x": 100, "y": 200 },
|
|
281
|
+
"capture": {
|
|
282
|
+
"requested": true,
|
|
283
|
+
"status": "error",
|
|
284
|
+
"format": "jpeg",
|
|
285
|
+
"maxWidth": 1280,
|
|
286
|
+
"error": {
|
|
287
|
+
"name": "CaptureError",
|
|
288
|
+
"code": "CAPTURE_FAILED",
|
|
289
|
+
"retryable": true,
|
|
290
|
+
"message": "Screenshot capture failed after action",
|
|
291
|
+
"recovery": "Check Screen Recording permission and retry."
|
|
292
|
+
}
|
|
293
|
+
},
|
|
294
|
+
"warnings": ["Post-action screenshot capture failed"],
|
|
295
|
+
"next": "screenshot"
|
|
296
|
+
}
|
|
297
|
+
```
|
|
298
|
+
|
|
210
299
|
## macOS Permission Setup
|
|
211
300
|
|
|
212
301
|
UCU-MCP on macOS requires two system permissions:
|
|
@@ -245,10 +334,20 @@ UCU-MCP runs as a stdio MCP server. This is the common integration path for Clau
|
|
|
245
334
|
|
|
246
335
|
### Claude Code CLI
|
|
247
336
|
|
|
337
|
+
Verified CLI setup:
|
|
338
|
+
|
|
339
|
+
```bash
|
|
340
|
+
claude mcp add --scope user ucu -- ucu-mcp
|
|
341
|
+
claude mcp list
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
Equivalent config shape:
|
|
345
|
+
|
|
248
346
|
```json
|
|
249
347
|
{
|
|
250
348
|
"mcpServers": {
|
|
251
349
|
"ucu": {
|
|
350
|
+
"type": "stdio",
|
|
252
351
|
"command": "ucu-mcp"
|
|
253
352
|
}
|
|
254
353
|
}
|
|
@@ -263,25 +362,51 @@ Use the same local MCP server shape as Claude Desktop. Grant Accessibility and S
|
|
|
263
362
|
{
|
|
264
363
|
"mcpServers": {
|
|
265
364
|
"ucu": {
|
|
365
|
+
"type": "stdio",
|
|
266
366
|
"command": "ucu-mcp"
|
|
267
367
|
}
|
|
268
368
|
}
|
|
269
369
|
}
|
|
270
370
|
```
|
|
271
371
|
|
|
372
|
+
### Codex CLI
|
|
373
|
+
|
|
374
|
+
Verified CLI setup:
|
|
375
|
+
|
|
376
|
+
```bash
|
|
377
|
+
codex mcp add ucu -- ucu-mcp
|
|
378
|
+
codex mcp list
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
Equivalent `~/.codex/config.toml` shape:
|
|
382
|
+
|
|
383
|
+
```toml
|
|
384
|
+
[mcp_servers.ucu]
|
|
385
|
+
command = "ucu-mcp"
|
|
386
|
+
```
|
|
387
|
+
|
|
272
388
|
### OpenCode
|
|
273
389
|
|
|
390
|
+
OpenCode reads MCP servers from `~/.config/opencode/opencode.json`.
|
|
391
|
+
|
|
274
392
|
```json
|
|
275
393
|
{
|
|
276
394
|
"mcp": {
|
|
277
|
-
"ucu": {
|
|
395
|
+
"ucu-mcp": {
|
|
278
396
|
"type": "local",
|
|
397
|
+
"enabled": true,
|
|
279
398
|
"command": ["ucu-mcp"]
|
|
280
399
|
}
|
|
281
400
|
}
|
|
282
401
|
}
|
|
283
402
|
```
|
|
284
403
|
|
|
404
|
+
Verify with:
|
|
405
|
+
|
|
406
|
+
```bash
|
|
407
|
+
opencode mcp list
|
|
408
|
+
```
|
|
409
|
+
|
|
285
410
|
### Runtime Doctor
|
|
286
411
|
|
|
287
412
|
```bash
|
|
@@ -362,15 +487,32 @@ src/
|
|
|
362
487
|
|
|
363
488
|
## Error Handling
|
|
364
489
|
|
|
490
|
+
Tool execution failures return standard MCP tool results with `isError: true`. The first content item is JSON text so clients can make policy decisions without string matching:
|
|
491
|
+
|
|
492
|
+
```json
|
|
493
|
+
{
|
|
494
|
+
"error": {
|
|
495
|
+
"name": "WindowNotFoundError",
|
|
496
|
+
"code": "WINDOW_NOT_FOUND",
|
|
497
|
+
"retryable": false,
|
|
498
|
+
"message": "Window win-1 not found. It may have been closed. Run list_windows to get fresh IDs.",
|
|
499
|
+
"recovery": "Run list_windows again, then retry with a fresh windowId or omit windowId for screen coordinates."
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
```
|
|
503
|
+
|
|
365
504
|
| Error Code | Description | Retryable |
|
|
366
505
|
|------------|-------------|-----------|
|
|
367
506
|
| `PLATFORM_ERROR` | Platform API call failed | Yes |
|
|
368
507
|
| `PERMISSION_DENIED` | Missing system permission | No |
|
|
369
508
|
| `SAFETY_BLOCKED` | Blocked by safety rule | No |
|
|
370
509
|
| `WINDOW_NOT_FOUND` | Window does not exist | No |
|
|
510
|
+
| `ELEMENT_NOT_FOUND` | Accessibility element is stale or missing | No |
|
|
511
|
+
| `UNSUPPORTED_PARAMETER` | Valid JSON requested an unsupported parameter combination | No |
|
|
371
512
|
| `COORDINATE_OUT_OF_BOUNDS` | Coordinate outside screen | No |
|
|
372
513
|
| `INPUT_FAILED` | Input synthesis failed | Yes |
|
|
373
514
|
| `CAPTURE_FAILED` | Screenshot/OCR capture failed | Yes |
|
|
515
|
+
| `UNKNOWN_ERROR` | Unexpected internal failure | No |
|
|
374
516
|
|
|
375
517
|
## Development
|
|
376
518
|
|
package/dist/src/mcp/server.js
CHANGED
|
@@ -1,25 +1,46 @@
|
|
|
1
1
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
2
5
|
import { createStdioTransport } from "./transport.js";
|
|
3
|
-
import { registerTools } from "./tools.js";
|
|
6
|
+
import { registerTools, startUserActivityMonitor } from "./tools.js";
|
|
4
7
|
const UCU_MCP_INSTRUCTIONS = `
|
|
5
|
-
UCU-MCP is a cross-client computer-use server for Claude Code CLI
|
|
8
|
+
UCU-MCP is a cross-client computer-use server for Claude Code CLI/Desktop, OpenCode, and other MCP clients.
|
|
6
9
|
|
|
7
|
-
|
|
10
|
+
Pick the right tool sequence for the task:
|
|
8
11
|
|
|
9
|
-
|
|
12
|
+
• Fill a form field → find_element (text/role) + type_in_element or set_value. Prefer AX over coordinates.
|
|
13
|
+
• Click a menu bar item → get_screen_size + click with coordinates (menu bar is not in the AX tree).
|
|
14
|
+
• Read what's on screen → screenshot; for text not in AX use ocr; for a structured tree use get_window_state.
|
|
15
|
+
• Switch between apps → list_apps, then focus_app; subsequent tools use the active target context.
|
|
16
|
+
• Verify an action succeeded → captureAfter=true on action tools, or call screenshot afterwards.
|
|
17
|
+
• Wait for UI to change → wait_for_element (until: "appear" default; also "disappear" or "value_change").
|
|
18
|
+
• Recover from TARGET_STALE → call focus_app again for the target app, then retry the action.
|
|
19
|
+
• Read or write the clipboard → clipboard_read / clipboard_write.
|
|
10
20
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
For Claude Code CLI/Desktop and OpenCode configs, run the ucu-mcp executable over stdio. If tools fail on macOS, run doctor first to check Accessibility and Screen Recording permissions. Windows and Linux adapters are explicit stubs until their native backends are implemented.
|
|
21
|
+
General rules: on macOS call list_apps/focus_app first to establish target context, then prefer AX tools (find_element → click_element / type_in_element / set_value). Use coordinates only when AX lookup is unavailable. Actions are blocked while macOS is locked; dangerous shortcuts and sensitive windows are blocked; suspicious injected text is rejected. type_in_element can refetch equivalent AX elements when the UI tree changes. Run doctor to check Accessibility and Screen Recording permissions. Windows and Linux adapters are explicit stubs until their native backends are implemented.
|
|
14
22
|
`.trim();
|
|
23
|
+
function getPackageVersion() {
|
|
24
|
+
let dir = dirname(fileURLToPath(import.meta.url));
|
|
25
|
+
for (let i = 0; i < 6; i++) {
|
|
26
|
+
const path = join(dir, "package.json");
|
|
27
|
+
if (existsSync(path)) {
|
|
28
|
+
const parsed = JSON.parse(readFileSync(path, "utf-8"));
|
|
29
|
+
return parsed.version ?? "0.0.0";
|
|
30
|
+
}
|
|
31
|
+
dir = dirname(dir);
|
|
32
|
+
}
|
|
33
|
+
return "0.0.0";
|
|
34
|
+
}
|
|
15
35
|
export async function startServer() {
|
|
16
36
|
const server = new McpServer({
|
|
17
37
|
name: "ucu-mcp",
|
|
18
|
-
version:
|
|
38
|
+
version: getPackageVersion(),
|
|
19
39
|
}, {
|
|
20
40
|
instructions: UCU_MCP_INSTRUCTIONS,
|
|
21
41
|
});
|
|
22
42
|
registerTools(server);
|
|
43
|
+
startUserActivityMonitor();
|
|
23
44
|
const transport = createStdioTransport();
|
|
24
45
|
await server.connect(transport);
|
|
25
46
|
console.error("ucu-mcp server started on stdio");
|
package/dist/src/mcp/tools.d.ts
CHANGED
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Tool registry for UCU-MCP.
|
|
3
3
|
*
|
|
4
|
-
* Registers
|
|
4
|
+
* Registers 24 MCP tools on the server and dispatches each call through
|
|
5
5
|
* a shared safety/permission/retry pipeline (`withSafety`).
|
|
6
6
|
*/
|
|
7
7
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
8
|
+
import type { AppTarget } from "../platform/base.js";
|
|
9
|
+
/**
|
|
10
|
+
* Get the currently active target context (set by focus_app).
|
|
11
|
+
*/
|
|
12
|
+
export declare function getActiveTarget(): AppTarget | undefined;
|
|
8
13
|
export declare function startUserActivityMonitor(): void;
|
|
14
|
+
export declare function stopUserActivityMonitor(): void;
|
|
9
15
|
export declare function registerTools(server: McpServer): void;
|
|
10
16
|
export declare class ToolRegistry {
|
|
11
17
|
private static _instance;
|