ucu-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +55 -0
- package/README.md +393 -0
- package/dist/bin/ucu-mcp.d.ts +2 -0
- package/dist/bin/ucu-mcp.js +47 -0
- package/dist/src/index.d.ts +7 -0
- package/dist/src/index.js +6 -0
- package/dist/src/mcp/server.d.ts +1 -0
- package/dist/src/mcp/server.js +26 -0
- package/dist/src/mcp/tools.d.ts +17 -0
- package/dist/src/mcp/tools.js +340 -0
- package/dist/src/mcp/transport.d.ts +2 -0
- package/dist/src/mcp/transport.js +4 -0
- package/dist/src/platform/base.d.ts +127 -0
- package/dist/src/platform/base.js +1 -0
- package/dist/src/platform/linux.d.ts +22 -0
- package/dist/src/platform/linux.js +62 -0
- package/dist/src/platform/macos.d.ts +39 -0
- package/dist/src/platform/macos.js +1478 -0
- package/dist/src/platform/windows.d.ts +18 -0
- package/dist/src/platform/windows.js +48 -0
- package/dist/src/safety/guard.d.ts +50 -0
- package/dist/src/safety/guard.js +220 -0
- package/dist/src/safety/permissions.d.ts +17 -0
- package/dist/src/safety/permissions.js +184 -0
- package/dist/src/util/errors.d.ts +64 -0
- package/dist/src/util/errors.js +109 -0
- package/dist/src/util/logger.d.ts +41 -0
- package/dist/src/util/logger.js +92 -0
- package/dist/src/util/retry.d.ts +30 -0
- package/dist/src/util/retry.js +53 -0
- package/dist/src/utils/input.d.ts +23 -0
- package/dist/src/utils/input.js +425 -0
- package/dist/src/utils/screenshot.d.ts +20 -0
- package/dist/src/utils/screenshot.js +157 -0
- package/package.json +50 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] - 2025-06-02
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- Initial release of UCU-MCP (Universal Computer Use MCP Server)
|
|
13
|
+
- **22 MCP tools** for desktop automation via Model Context Protocol:
|
|
14
|
+
- Screen capture: `screen_capture`, `screen_capture_active_window`
|
|
15
|
+
- Mouse control: `mouse_move`, `mouse_click`, `mouse_double_click`, `mouse_drag`, `mouse_scroll`
|
|
16
|
+
- Keyboard control: `keyboard_type`, `keyboard_hotkey`, `keyboard_key`
|
|
17
|
+
- Clipboard: `clipboard_read`, `clipboard_write`
|
|
18
|
+
- Window management: `window_list`, `window_activate`, `window_close`
|
|
19
|
+
- Application control: `app_launch`, `app_quit`
|
|
20
|
+
- System: `system_info`, `process_list`, `process_terminate`
|
|
21
|
+
- Safety: `doctor` command for permission and environment diagnostics
|
|
22
|
+
- **Safety features**:
|
|
23
|
+
- URL blocklist to prevent navigation to sensitive sites
|
|
24
|
+
- Lock screen guard (macOS) — blocks automation when screen is locked
|
|
25
|
+
- Typed text injection scan — validates keyboard input before injection
|
|
26
|
+
- Focus steal suppression — prevents accidental focus changes during automation
|
|
27
|
+
- User interaction monitor — tracks user activity for safety coordination
|
|
28
|
+
- **macOS platform support** with Accessibility API integration
|
|
29
|
+
- TypeScript-first codebase with full type definitions
|
|
30
|
+
- CLI entry point with `doctor` diagnostic command
|
|
31
|
+
|
|
32
|
+
### Changed
|
|
33
|
+
|
|
34
|
+
- Rewrote `src/mcp/tools.ts` with comprehensive 22-tool registry:
|
|
35
|
+
- Unified `withSafety` wrapper for all automation actions
|
|
36
|
+
- `captureAfter` helper for post-action screenshots
|
|
37
|
+
- `windowId` guard for window-scoped operations
|
|
38
|
+
- Integrated safety report in `doctor` output
|
|
39
|
+
|
|
40
|
+
### Security
|
|
41
|
+
|
|
42
|
+
- Security audit fixes applied:
|
|
43
|
+
- Input validation on all tool parameters
|
|
44
|
+
- Safe handling of file paths and URLs
|
|
45
|
+
- Rate limiting considerations for rapid automation
|
|
46
|
+
|
|
47
|
+
### Architecture
|
|
48
|
+
|
|
49
|
+
- `ARCHITECTURE.md` rewritten to document:
|
|
50
|
+
- FocusStealSuppression implementation
|
|
51
|
+
- UserInteractionMonitor design
|
|
52
|
+
- Safety layer architecture
|
|
53
|
+
- Tool registry patterns
|
|
54
|
+
|
|
55
|
+
[0.1.0]: https://github.com/2876674942/ucu-mcp-backup/releases/tag/v0.1.0
|
package/README.md
ADDED
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
# ucu-mcp
|
|
2
|
+
|
|
3
|
+
Universal Computer Use MCP — desktop automation for any AI agent.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
UCU-MCP (Universal Computer Use MCP) is a Model Context Protocol server that gives AI agents cross-platform desktop automation capabilities. Its macOS path favors non-invasive observation and input where the OS allows it: coordinate mouse events preserve the physical cursor, `set_value` writes AX values directly, and focused keyboard typing is explicit.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- **Universal**: Works with Claude Code, OpenCode, Codex, Gemini CLI, and any MCP client
|
|
12
|
+
- **Cross-platform architecture**: macOS is the active implementation; Windows and Linux adapters fail explicitly until their native backends are completed
|
|
13
|
+
- **Non-invasive where possible**: Coordinate mouse events preserve cursor position; `set_value` avoids focusing AX elements; tools that require current focus say so explicitly
|
|
14
|
+
- **Codex-inspired**: AX element refetch, MCP instructions, lock-screen guard, URL blocklist, and runtime doctor checks
|
|
15
|
+
- **Safe**: Built-in permission checks and dangerous action interception
|
|
16
|
+
- **Extensible**: Modular architecture, easy to add new platforms and tools
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
### Global install (recommended)
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
npm install -g ucu-mcp
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Then run:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
ucu-mcp
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### One-shot with npx (no install required)
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
npx -y ucu-mcp
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Claude Desktop Integration
|
|
39
|
+
|
|
40
|
+
1. Copy the configuration below to your Claude Desktop config file:
|
|
41
|
+
|
|
42
|
+
**macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
43
|
+
|
|
44
|
+
**Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
|
|
45
|
+
|
|
46
|
+
2. Add this entry to the `mcpServers` object:
|
|
47
|
+
|
|
48
|
+
```json
|
|
49
|
+
{
|
|
50
|
+
"mcpServers": {
|
|
51
|
+
"ucu-mcp": {
|
|
52
|
+
"command": "npx",
|
|
53
|
+
"args": ["-y", "ucu-mcp"]
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
If you installed globally, you can use the shorter form:
|
|
60
|
+
|
|
61
|
+
```json
|
|
62
|
+
{
|
|
63
|
+
"mcpServers": {
|
|
64
|
+
"ucu-mcp": {
|
|
65
|
+
"command": "ucu-mcp"
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
3. Restart Claude Desktop. The UCU-MCP tools will appear automatically.
|
|
72
|
+
|
|
73
|
+
## Tool List
|
|
74
|
+
|
|
75
|
+
UCU-MCP provides 22 tools across five categories:
|
|
76
|
+
|
|
77
|
+
### Screen & Window
|
|
78
|
+
|
|
79
|
+
| Tool | Description | Key Parameters |
|
|
80
|
+
|------|-------------|----------------|
|
|
81
|
+
| `screenshot` | Capture screen, window, or region as base64 PNG/JPEG | `display?`, `windowId?`, `region?`, `maxWidth?`, `format?` |
|
|
82
|
+
| `list_windows` | List all on-screen windows with IDs, titles, bounds | `includeMinimized?` |
|
|
83
|
+
| `list_apps` | List visible macOS apps with pid, frontmost state, and window count | — |
|
|
84
|
+
| `focus_app` | Select an app/window target context without raising it | `app` |
|
|
85
|
+
| `get_window_state` | Get accessibility tree of a window, or the prior focus_app target when windowId is omitted | `windowId?`, `depth?`, `includeBounds?` |
|
|
86
|
+
| `get_screen_size` | Get screen dimensions | `display?` |
|
|
87
|
+
| `ocr` | Perform OCR on screen or region; returns text with bounding boxes and confidence | `display?`, `region?` |
|
|
88
|
+
|
|
89
|
+
### Mouse & Input
|
|
90
|
+
|
|
91
|
+
| Tool | Description | Key Parameters |
|
|
92
|
+
|------|-------------|----------------|
|
|
93
|
+
| `click` | Click at screen coordinates (non-invasive) | `x`, `y`, `windowId?`, `button?` |
|
|
94
|
+
| `double_click` | Double-click at screen coordinates | `x`, `y`, `windowId?`, `button?` |
|
|
95
|
+
| `scroll` | Scroll at a position (vertical/horizontal) | `x`, `y`, `deltaX?`, `deltaY`, `captureAfter?` |
|
|
96
|
+
| `drag` | Drag from one position to another | `startX`, `startY`, `endX`, `endY`, `duration?`, `button?`, `captureAfter?` |
|
|
97
|
+
| `move` | Move the physical cursor to a position (invasive) | `x`, `y` |
|
|
98
|
+
| `get_cursor_position` | Get current cursor position | — |
|
|
99
|
+
|
|
100
|
+
### Keyboard
|
|
101
|
+
|
|
102
|
+
| Tool | Description | Key Parameters |
|
|
103
|
+
|------|-------------|----------------|
|
|
104
|
+
| `type_text` | Type text into the currently focused element via OS key events (not clipboard) | `text`, `delay?`, `captureAfter?` |
|
|
105
|
+
| `press_key` | Press key or keyboard shortcut in the focused window | `key`, `modifiers?`, `captureAfter?` |
|
|
106
|
+
|
|
107
|
+
### AX Element Interaction
|
|
108
|
+
|
|
109
|
+
| Tool | Description | Key Parameters |
|
|
110
|
+
|------|-------------|----------------|
|
|
111
|
+
| `find_element` | Find UI element by text, role, or description using AX APIs | `text?`, `role?`, `app?`, `depth?`, `includeBounds?`, `maxResults?` |
|
|
112
|
+
| `click_element` | Click an AX element by its id (from find_element); refetches equivalent elements after UI updates | `elementId`, `app?`, `captureAfter?` |
|
|
113
|
+
| `set_value` | Set an AX element's value directly without focusing it, using the current focus_app target when app is omitted | `elementId`, `value`, `app?`, `captureAfter?` |
|
|
114
|
+
| `type_in_element` | Type text into a specific AX text field element; may focus the element and refetches equivalent elements after UI updates | `elementId`, `text`, `app?`, `clearFirst?`, `captureAfter?` |
|
|
115
|
+
|
|
116
|
+
### Runtime & Synchronization
|
|
117
|
+
|
|
118
|
+
| Tool | Description | Key Parameters |
|
|
119
|
+
|------|-------------|----------------|
|
|
120
|
+
| `doctor` | Check platform readiness, permissions, lock-screen state, and client integration hints | — |
|
|
121
|
+
| `wait` | Wait for UI state to settle after launches, animations, or navigation | `ms?` |
|
|
122
|
+
| `wait_for_element` | Poll the AX tree until a matching element appears | `text?`, `role?`, `app?`, `timeoutMs?`, `intervalMs?` |
|
|
123
|
+
|
|
124
|
+
Action tools accept `captureAfter`, `captureMaxWidth`, and `captureFormat` so an agent can receive a post-action screenshot in the same MCP response instead of spending another round trip on `screenshot`.
|
|
125
|
+
|
|
126
|
+
For fast AX discovery on large windows, use `find_element` with `includeBounds=false` and a small `maxResults`. Keep bounds enabled when the result may be used for coordinate fallback.
|
|
127
|
+
|
|
128
|
+
## OCR Tool Usage
|
|
129
|
+
|
|
130
|
+
The `ocr` tool captures a screenshot and runs optical character recognition, returning each detected text element with its position and confidence score.
|
|
131
|
+
|
|
132
|
+
**Example — read all text on screen:**
|
|
133
|
+
|
|
134
|
+
```json
|
|
135
|
+
{
|
|
136
|
+
"tool": "ocr",
|
|
137
|
+
"arguments": {}
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Example — read text in a specific region:**
|
|
142
|
+
|
|
143
|
+
```json
|
|
144
|
+
{
|
|
145
|
+
"tool": "ocr",
|
|
146
|
+
"arguments": {
|
|
147
|
+
"region": { "x": 100, "y": 200, "width": 600, "height": 400 }
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**Response format:**
|
|
153
|
+
|
|
154
|
+
```json
|
|
155
|
+
{
|
|
156
|
+
"text": "Detected text here",
|
|
157
|
+
"elements": [
|
|
158
|
+
{
|
|
159
|
+
"text": "Hello",
|
|
160
|
+
"bounds": { "x": 120, "y": 210, "width": 80, "height": 24 },
|
|
161
|
+
"confidence": 0.97
|
|
162
|
+
}
|
|
163
|
+
]
|
|
164
|
+
}
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## AX Element Interaction Usage
|
|
168
|
+
|
|
169
|
+
The AX (Accessibility) element tools let you interact with UI controls by their semantic identity rather than pixel coordinates — more reliable than screenshot-and-click patterns.
|
|
170
|
+
|
|
171
|
+
**Step 1 — Find an element:**
|
|
172
|
+
|
|
173
|
+
```json
|
|
174
|
+
{
|
|
175
|
+
"tool": "find_element",
|
|
176
|
+
"arguments": {
|
|
177
|
+
"text": "Submit",
|
|
178
|
+
"role": "AXButton",
|
|
179
|
+
"app": "Safari"
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Step 2 — Click the element by its id:**
|
|
185
|
+
|
|
186
|
+
```json
|
|
187
|
+
{
|
|
188
|
+
"tool": "click_element",
|
|
189
|
+
"arguments": {
|
|
190
|
+
"elementId": "AXButton-42",
|
|
191
|
+
"app": "Safari"
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
**Step 3 — Type into a text field element:**
|
|
197
|
+
|
|
198
|
+
```json
|
|
199
|
+
{
|
|
200
|
+
"tool": "type_in_element",
|
|
201
|
+
"arguments": {
|
|
202
|
+
"elementId": "AXTextField-7",
|
|
203
|
+
"text": "hello@example.com",
|
|
204
|
+
"app": "Safari",
|
|
205
|
+
"clearFirst": true
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
## macOS Permission Setup
|
|
211
|
+
|
|
212
|
+
UCU-MCP on macOS requires two system permissions:
|
|
213
|
+
|
|
214
|
+
### 1. Accessibility (required for click, type, key, drag, scroll, move)
|
|
215
|
+
|
|
216
|
+
1. Open **System Settings** > **Privacy & Security** > **Accessibility**
|
|
217
|
+
2. Click the **+** button
|
|
218
|
+
3. Add your terminal app (e.g., `/Applications/Utilities/Terminal.app`, or iTerm2, or the app that runs `ucu-mcp`)
|
|
219
|
+
4. Ensure the toggle next to the app is **enabled**
|
|
220
|
+
|
|
221
|
+
### 2. Screen Recording (required for screenshot, ocr, list_windows, get_screen_size)
|
|
222
|
+
|
|
223
|
+
1. Open **System Settings** > **Privacy & Security** > **Screen Recording**
|
|
224
|
+
2. Click the **+** button
|
|
225
|
+
3. Add your terminal app
|
|
226
|
+
4. Ensure the toggle is **enabled**
|
|
227
|
+
|
|
228
|
+
### Verify permissions
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
ucu-mcp doctor
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
This checks both permissions and reports any issues.
|
|
235
|
+
|
|
236
|
+
### Troubleshooting
|
|
237
|
+
|
|
238
|
+
- If you granted permission but tools still fail, **restart the terminal** or the app running ucu-mcp.
|
|
239
|
+
- On macOS Sequoia and later, you may need to re-grant Screen Recording after OS updates.
|
|
240
|
+
- If using Claude Desktop, the "Claude" app itself needs both permissions (not your terminal).
|
|
241
|
+
|
|
242
|
+
## Configuration for MCP Clients
|
|
243
|
+
|
|
244
|
+
UCU-MCP runs as a stdio MCP server. This is the common integration path for Claude Code CLI, Claude Code Desktop, OpenCode, and other local MCP clients.
|
|
245
|
+
|
|
246
|
+
### Claude Code CLI
|
|
247
|
+
|
|
248
|
+
```json
|
|
249
|
+
{
|
|
250
|
+
"mcpServers": {
|
|
251
|
+
"ucu": {
|
|
252
|
+
"command": "ucu-mcp"
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
### Claude Code Desktop
|
|
259
|
+
|
|
260
|
+
Use the same local MCP server shape as Claude Desktop. Grant Accessibility and Screen Recording to the desktop app that launches `ucu-mcp`.
|
|
261
|
+
|
|
262
|
+
```json
|
|
263
|
+
{
|
|
264
|
+
"mcpServers": {
|
|
265
|
+
"ucu": {
|
|
266
|
+
"command": "ucu-mcp"
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### OpenCode
|
|
273
|
+
|
|
274
|
+
```json
|
|
275
|
+
{
|
|
276
|
+
"mcp": {
|
|
277
|
+
"ucu": {
|
|
278
|
+
"type": "local",
|
|
279
|
+
"command": ["ucu-mcp"]
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
### Runtime Doctor
|
|
286
|
+
|
|
287
|
+
```bash
|
|
288
|
+
ucu-mcp doctor
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
The same readiness report is also available as the MCP `doctor` tool.
|
|
292
|
+
|
|
293
|
+
## Safety
|
|
294
|
+
|
|
295
|
+
### Built-in safety rules
|
|
296
|
+
|
|
297
|
+
1. **Key blocklist**: Dangerous shortcuts are blocked
|
|
298
|
+
- macOS: `Cmd+Q`, `Cmd+W`, `Cmd+L`, `Cmd+Option+Esc`, `Cmd+Ctrl+Power`
|
|
299
|
+
- Windows/Linux: `Alt+F4`, `Ctrl+Alt+Del`, `Ctrl+Alt+Backspace`
|
|
300
|
+
|
|
301
|
+
2. **Window skip list**: Sensitive windows are skipped
|
|
302
|
+
- Password managers: 1Password, Bitwarden, LastPass, KeePass, Dashlane
|
|
303
|
+
- Banking apps: windows containing "bank", "paypal"
|
|
304
|
+
- System tools: Keychain Access
|
|
305
|
+
|
|
306
|
+
3. **Rate limiting**: Minimum 100ms between actions (prevents runaway loops)
|
|
307
|
+
|
|
308
|
+
### Configuration via environment variables
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
export UCU_RATE_LIMIT_MS=100 # Minimum action interval in ms
|
|
312
|
+
export UCU_LOG_LEVEL=info # debug, info, warn, error
|
|
313
|
+
export UCU_DRY_RUN=1 # Dry-run mode (no real actions executed)
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### Custom safety config
|
|
317
|
+
|
|
318
|
+
Create `safety.json`:
|
|
319
|
+
|
|
320
|
+
```json
|
|
321
|
+
{
|
|
322
|
+
"blockedKeys": ["cmd+shift+q"],
|
|
323
|
+
"skippedWindows": ["My Sensitive App"],
|
|
324
|
+
"rateLimitMs": 100
|
|
325
|
+
}
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
Then point to it:
|
|
329
|
+
|
|
330
|
+
```bash
|
|
331
|
+
export UCU_SAFETY_CONFIG=/path/to/safety.json
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
## Architecture
|
|
335
|
+
|
|
336
|
+
```
|
|
337
|
+
src/
|
|
338
|
+
├── mcp/ # MCP protocol layer
|
|
339
|
+
│ ├── server.ts # MCP server
|
|
340
|
+
│ ├── tools.ts # Tool registration and dispatch
|
|
341
|
+
│ └── transport.ts # Transport (stdio)
|
|
342
|
+
│
|
|
343
|
+
├── platform/ # Platform abstraction layer
|
|
344
|
+
│ ├── base.ts # Platform interface
|
|
345
|
+
│ ├── macos.ts # macOS (AX API)
|
|
346
|
+
│ ├── windows.ts # Windows (UIA)
|
|
347
|
+
│ └── linux.ts # Linux (AT-SPI2)
|
|
348
|
+
│
|
|
349
|
+
├── safety/ # Safety subsystem
|
|
350
|
+
│ ├── guard.ts # Safety guard (rule pipeline)
|
|
351
|
+
│ └── permissions.ts # Permission checks
|
|
352
|
+
│
|
|
353
|
+
├── utils/ # Platform utilities
|
|
354
|
+
│ ├── screenshot.ts # Screenshot capture
|
|
355
|
+
│ └── input.ts # Input synthesis
|
|
356
|
+
│
|
|
357
|
+
└── util/ # General utilities
|
|
358
|
+
├── errors.ts # Error types
|
|
359
|
+
├── logger.ts # Structured logging
|
|
360
|
+
└── retry.ts # Retry logic
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
## Error Handling
|
|
364
|
+
|
|
365
|
+
| Error Code | Description | Retryable |
|
|
366
|
+
|------------|-------------|-----------|
|
|
367
|
+
| `PLATFORM_ERROR` | Platform API call failed | Yes |
|
|
368
|
+
| `PERMISSION_DENIED` | Missing system permission | No |
|
|
369
|
+
| `SAFETY_BLOCKED` | Blocked by safety rule | No |
|
|
370
|
+
| `WINDOW_NOT_FOUND` | Window does not exist | No |
|
|
371
|
+
| `COORDINATE_OUT_OF_BOUNDS` | Coordinate outside screen | No |
|
|
372
|
+
| `INPUT_FAILED` | Input synthesis failed | Yes |
|
|
373
|
+
| `CAPTURE_FAILED` | Screenshot/OCR capture failed | Yes |
|
|
374
|
+
|
|
375
|
+
## Development
|
|
376
|
+
|
|
377
|
+
```bash
|
|
378
|
+
git clone https://github.com/kaguya/ucu-mcp.git
|
|
379
|
+
cd ucu-mcp
|
|
380
|
+
npm install
|
|
381
|
+
npm run build
|
|
382
|
+
npm test
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
macOS GUI smoke tests are gated because they open and edit a temporary TextEdit document:
|
|
386
|
+
|
|
387
|
+
```bash
|
|
388
|
+
npm run test:macos-gui
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
## License
|
|
392
|
+
|
|
393
|
+
MIT
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { startServer } from "../src/mcp/server.js";
|
|
3
|
+
import { checkPermissions } from "../src/safety/permissions.js";
|
|
4
|
+
import { MacOSPlatform } from "../src/platform/macos.js";
|
|
5
|
+
async function runDoctor() {
|
|
6
|
+
const permissions = await checkPermissions();
|
|
7
|
+
const screenLocked = process.platform === "darwin"
|
|
8
|
+
? new MacOSPlatform().isScreenLocked?.() ?? false
|
|
9
|
+
: false;
|
|
10
|
+
const report = {
|
|
11
|
+
ok: permissions.granted && !screenLocked,
|
|
12
|
+
platform: process.platform,
|
|
13
|
+
node: process.version,
|
|
14
|
+
permissions,
|
|
15
|
+
screenLocked,
|
|
16
|
+
safety: {
|
|
17
|
+
urlBlocklist: true,
|
|
18
|
+
lockScreenGuard: process.platform === "darwin",
|
|
19
|
+
typedTextInjectionScan: true,
|
|
20
|
+
},
|
|
21
|
+
stdioCommand: "ucu-mcp",
|
|
22
|
+
clients: {
|
|
23
|
+
claudeCodeCli: "Run ucu-mcp as an MCP stdio server.",
|
|
24
|
+
claudeCodeDesktop: "Configure ucu-mcp as a local MCP stdio server and grant permissions to the desktop app.",
|
|
25
|
+
openCode: "Configure ucu-mcp as a local MCP stdio server.",
|
|
26
|
+
},
|
|
27
|
+
};
|
|
28
|
+
console.log(JSON.stringify(report, null, 2));
|
|
29
|
+
if (!report.ok)
|
|
30
|
+
process.exitCode = 1;
|
|
31
|
+
}
|
|
32
|
+
async function main() {
|
|
33
|
+
const command = process.argv[2];
|
|
34
|
+
if (command === "doctor") {
|
|
35
|
+
await runDoctor();
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
if (command === "--help" || command === "-h") {
|
|
39
|
+
console.log("Usage: ucu-mcp [doctor]\n\nWithout arguments, starts the MCP stdio server.");
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
await startServer();
|
|
43
|
+
}
|
|
44
|
+
main().catch((err) => {
|
|
45
|
+
console.error("Fatal error starting ucu-mcp:", err);
|
|
46
|
+
process.exit(1);
|
|
47
|
+
});
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export { startServer } from "./mcp/server.js";
|
|
2
|
+
export { ToolRegistry } from "./mcp/tools.js";
|
|
3
|
+
export { createStdioTransport } from "./mcp/transport.js";
|
|
4
|
+
export { Platform } from "./platform/base.js";
|
|
5
|
+
export { SafetyGuard } from "./safety/guard.js";
|
|
6
|
+
export { checkPermissions, checkPermission, type PermissionCheckResult, type PermissionType, } from "./safety/permissions.js";
|
|
7
|
+
export { MacOSPlatform } from "./platform/macos.js";
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export { startServer } from "./mcp/server.js";
|
|
2
|
+
export { ToolRegistry } from "./mcp/tools.js";
|
|
3
|
+
export { createStdioTransport } from "./mcp/transport.js";
|
|
4
|
+
export { SafetyGuard } from "./safety/guard.js";
|
|
5
|
+
export { checkPermissions, checkPermission, } from "./safety/permissions.js";
|
|
6
|
+
export { MacOSPlatform } from "./platform/macos.js";
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function startServer(): Promise<void>;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
+
import { createStdioTransport } from "./transport.js";
|
|
3
|
+
import { registerTools } from "./tools.js";
|
|
4
|
+
const UCU_MCP_INSTRUCTIONS = `
|
|
5
|
+
UCU-MCP is a cross-client computer-use server for Claude Code CLI, Claude Code Desktop, OpenCode, and other MCP clients.
|
|
6
|
+
|
|
7
|
+
Use screenshots and window state to observe before acting. On macOS, prefer list_apps/focus_app to establish the target app context, then use AX element tools when an element can be identified: find_element, then click_element, set_value, or type_in_element. Fall back to coordinates only when AX lookup is unavailable or ambiguous.
|
|
8
|
+
|
|
9
|
+
Before repeated UI work, call get_screen_size and list_windows so coordinates and target windows are explicit. Use get_window_state for structured UI trees. Use ocr when visible text is not exposed through Accessibility. For tight observe-act loops, set captureAfter=true on action tools to receive a post-action screenshot in the same tool response.
|
|
10
|
+
|
|
11
|
+
Safety model: actions are blocked while macOS is locked, dangerous shortcuts and sensitive windows are blocked, and suspicious injected text is rejected. For text entry into UI controls, prefer type_in_element because it can refetch equivalent AX elements if the UI tree changes.
|
|
12
|
+
|
|
13
|
+
For Claude Code CLI/Desktop and OpenCode configs, run the ucu-mcp executable over stdio. If tools fail on macOS, run doctor first to check Accessibility and Screen Recording permissions. Windows and Linux adapters are explicit stubs until their native backends are implemented.
|
|
14
|
+
`.trim();
|
|
15
|
+
export async function startServer() {
|
|
16
|
+
const server = new McpServer({
|
|
17
|
+
name: "ucu-mcp",
|
|
18
|
+
version: "0.1.0",
|
|
19
|
+
}, {
|
|
20
|
+
instructions: UCU_MCP_INSTRUCTIONS,
|
|
21
|
+
});
|
|
22
|
+
registerTools(server);
|
|
23
|
+
const transport = createStdioTransport();
|
|
24
|
+
await server.connect(transport);
|
|
25
|
+
console.error("ucu-mcp server started on stdio");
|
|
26
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool registry for UCU-MCP.
|
|
3
|
+
*
|
|
4
|
+
* Registers 22 MCP tools on the server and dispatches each call through
|
|
5
|
+
* a shared safety/permission/retry pipeline (`withSafety`).
|
|
6
|
+
*/
|
|
7
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
8
|
+
export declare function startUserActivityMonitor(): void;
|
|
9
|
+
export declare function registerTools(server: McpServer): void;
|
|
10
|
+
export declare class ToolRegistry {
|
|
11
|
+
private static _instance;
|
|
12
|
+
readonly tools: string[];
|
|
13
|
+
private readonly _handlers;
|
|
14
|
+
static get instance(): ToolRegistry;
|
|
15
|
+
register(name: string, handler?: (args: Record<string, unknown>) => Promise<unknown>): void;
|
|
16
|
+
dispatch(name: string, args: Record<string, unknown>): Promise<any>;
|
|
17
|
+
}
|