claude-kvm 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +320 -0
- package/index.js +340 -0
- package/package.json +44 -0
- package/tools/index.js +171 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Rıza Emre ARAS <r.emrearas@proton.me>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# Claude KVM
|
|
2
|
+
|
|
3
|
+
Claude KVM is an MCP tool that controls remote desktop environments over VNC. It consists of a thin JS proxy layer (MCP server) and a platform-native Swift VNC daemon running on your macOS system.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
```mermaid
|
|
8
|
+
graph TB
|
|
9
|
+
subgraph MCP["MCP Client (Claude)"]
|
|
10
|
+
AI["Claude"]
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
subgraph Proxy["claude-kvm · MCP Proxy (stdio)"]
|
|
14
|
+
direction TB
|
|
15
|
+
Server["MCP Server<br/><code>index.js</code>"]
|
|
16
|
+
Tools["Tool Definitions<br/><code>tools/index.js</code>"]
|
|
17
|
+
Server --> Tools
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
subgraph Daemon["claude-kvm-daemon · Native VNC Client (stdin/stdout)"]
|
|
21
|
+
direction TB
|
|
22
|
+
CMD["Command Handler<br/><i>PC Dispatch</i>"]
|
|
23
|
+
Scale["Display Scaling<br/><i>Scaled ↔ Native</i>"]
|
|
24
|
+
|
|
25
|
+
subgraph Screen["Screen"]
|
|
26
|
+
Capture["Frame Capture<br/><i>PNG · Crop · Diff</i>"]
|
|
27
|
+
OCR["OCR Detection<br/><i>Apple Vision</i>"]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
subgraph InputGroup["Input"]
|
|
31
|
+
Mouse["Mouse<br/><i>Click · Drag · Move · Scroll</i>"]
|
|
32
|
+
KB["Keyboard<br/><i>Tap · Combo · Type · Paste</i>"]
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
VNC["VNC Bridge<br/><i>LibVNCClient 0.9.15</i>"]
|
|
36
|
+
|
|
37
|
+
CMD --> Scale
|
|
38
|
+
Scale --> Capture
|
|
39
|
+
Scale --> Mouse
|
|
40
|
+
Scale --> KB
|
|
41
|
+
Capture -.->|"framebuffer"| VNC
|
|
42
|
+
Mouse -->|"pointer events"| VNC
|
|
43
|
+
KB -->|"key events"| VNC
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
subgraph Target["Target Machine"]
|
|
47
|
+
VNC_Server["VNC Server<br/><i>:5900</i>"]
|
|
48
|
+
Desktop["Desktop Environment"]
|
|
49
|
+
VNC_Server --> Desktop
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
AI <-->|"stdio<br/>JSON-RPC"| Server
|
|
53
|
+
Server <-->|"stdin/stdout<br/>PC (NDJSON)"| CMD
|
|
54
|
+
VNC <-->|"RFB Protocol<br/>TCP :5900"| VNC_Server
|
|
55
|
+
|
|
56
|
+
classDef proxy fill:#1a1a2e,stroke:#16213e,color:#e5e5e5
|
|
57
|
+
classDef daemon fill:#0f3460,stroke:#533483,color:#e5e5e5
|
|
58
|
+
classDef target fill:#1a1a2e,stroke:#e94560,color:#e5e5e5
|
|
59
|
+
|
|
60
|
+
class Server,Tools proxy
|
|
61
|
+
class CMD,Scale,VNC,Capture,Mouse,KB daemon
|
|
62
|
+
class VNC_Server,Desktop target
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Layers
|
|
66
|
+
|
|
67
|
+
| Layer | Language | Role | Communication |
|
|
68
|
+
|----------------|-------------------------|----------------------------------------------------------------------|--------------------------|
|
|
69
|
+
| **MCP Proxy** | JavaScript (Node.js) | Communicates with Claude over MCP protocol, manages daemon lifecycle | stdio JSON-RPC |
|
|
70
|
+
| **VNC Daemon** | Swift/C (Apple Silicon) | VNC connection, screen capture, mouse/keyboard input injection | stdin/stdout PC (NDJSON) |
|
|
71
|
+
|
|
72
|
+
### PC (Procedure Call) Protocol
|
|
73
|
+
|
|
74
|
+
Communication between the proxy and daemon uses the PC protocol over NDJSON:
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
Request: {"method":"<name>","params":{...},"id":<int|string>}
|
|
78
|
+
Response: {"result":{...},"id":<int|string>}
|
|
79
|
+
Error: {"error":{"code":<int>,"message":"..."},"id":<int|string>}
|
|
80
|
+
Notification: {"method":"<name>","params":{...}}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Coordinate Scaling
|
|
84
|
+
|
|
85
|
+
The VNC server's native resolution is scaled down to fit within `--max-dimension` (default: 1280px). Claude works more consistently with scaled coordinates — the daemon handles the conversion in the background:
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
Native: 4220 x 2568 (VNC server framebuffer)
|
|
89
|
+
Scaled: 1280 x 779 (what Claude sees and targets)
|
|
90
|
+
|
|
91
|
+
mouse_click(640, 400) → VNC receives (2110, 1284)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Screen Strategy
|
|
95
|
+
|
|
96
|
+
Claude minimizes token cost with a progressive verification approach:
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
diff_check → changeDetected: true/false ~5ms (text only, no image)
|
|
100
|
+
detect_elements → OCR text + bounding boxes ~50ms (text only, no image)
|
|
101
|
+
cursor_crop → crop around cursor ~50ms (small image)
|
|
102
|
+
screenshot → full screen capture ~200ms (full image)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
`detect_elements` uses Apple Vision framework for on-device OCR. Returns text content with bounding box coordinates in scaled space — enables precise click targeting without consuming vision tokens.
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Installation
|
|
110
|
+
|
|
111
|
+
### Requirements
|
|
112
|
+
|
|
113
|
+
- macOS (Apple Silicon / aarch64)
|
|
114
|
+
- Node.js (LTS)
|
|
115
|
+
|
|
116
|
+
### Daemon
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
brew tap ARAS-Workspace/tap
|
|
120
|
+
brew install claude-kvm-daemon
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
> [!NOTE]
|
|
124
|
+
> `claude-kvm-daemon` is compiled and code-signed via CI (GitHub Actions). The build output is packaged in two formats: a `.tar.gz` archive for Homebrew distribution and a `.dmg` disk image for notarization. The DMG is submitted to Apple servers for notarization within the same workflow — the process can be tracked from CI logs. The notarized DMG is available as a CI Artifact; the archived `.tar.gz` is also published as a release on the repository. Homebrew installation tracks this release.
|
|
125
|
+
>
|
|
126
|
+
> - [Release](https://github.com/ARAS-Workspace/claude-kvm/releases/tag/daemon-v1.0.1) · [Source Code](https://github.com/ARAS-Workspace/claude-kvm/tree/daemon-tool)
|
|
127
|
+
> - [LibVNC Build](https://github.com/ARAS-Workspace/claude-kvm/actions/runs/22122975416) · [LibVNC Branch](https://github.com/ARAS-Workspace/claude-kvm/tree/libvnc-build)
|
|
128
|
+
> - [Homebrew Tap](https://github.com/ARAS-Workspace/homebrew-tap)
|
|
129
|
+
|
|
130
|
+
### MCP Configuration
|
|
131
|
+
|
|
132
|
+
Create a `.mcp.json` file in your project directory:
|
|
133
|
+
|
|
134
|
+
```json
|
|
135
|
+
{
|
|
136
|
+
"mcpServers": {
|
|
137
|
+
"claude-kvm": {
|
|
138
|
+
"command": "npx",
|
|
139
|
+
"args": ["-y", "github:ARAS-Workspace/claude-kvm"],
|
|
140
|
+
"env": {
|
|
141
|
+
"VNC_HOST": "192.168.1.100",
|
|
142
|
+
"VNC_PORT": "5900",
|
|
143
|
+
"VNC_USERNAME": "user",
|
|
144
|
+
"VNC_PASSWORD": "pass",
|
|
145
|
+
"CLAUDE_KVM_DAEMON_PATH": "/opt/homebrew/bin/claude-kvm-daemon",
|
|
146
|
+
"CLAUDE_KVM_DAEMON_PARAMETERS": "-v"
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Configuration
|
|
154
|
+
|
|
155
|
+
#### MCP Proxy (ENV)
|
|
156
|
+
|
|
157
|
+
| Parameter | Default | Description |
|
|
158
|
+
|--------------------------------|---------------------|----------------------------------------------------|
|
|
159
|
+
| `VNC_HOST` | `127.0.0.1` | VNC server address |
|
|
160
|
+
| `VNC_PORT` | `5900` | VNC port number |
|
|
161
|
+
| `VNC_USERNAME` | | Username (required for ARD) |
|
|
162
|
+
| `VNC_PASSWORD` | | Password |
|
|
163
|
+
| `CLAUDE_KVM_DAEMON_PATH` | `claude-kvm-daemon` | Daemon binary path (not needed if already in PATH) |
|
|
164
|
+
| `CLAUDE_KVM_DAEMON_PARAMETERS` | | Additional CLI arguments for the daemon |
|
|
165
|
+
|
|
166
|
+
#### Daemon Parameters (CLI)
|
|
167
|
+
|
|
168
|
+
Additional arguments passed to the daemon via `CLAUDE_KVM_DAEMON_PARAMETERS`:
|
|
169
|
+
|
|
170
|
+
```
|
|
171
|
+
"CLAUDE_KVM_DAEMON_PARAMETERS": "--max-dimension 800 -v"
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
| Parameter | Default | Description |
|
|
175
|
+
|---------------------|---------|----------------------------------------|
|
|
176
|
+
| `--max-dimension` | `1280` | Maximum display scaling dimension (px) |
|
|
177
|
+
| `--connect-timeout` | | VNC connection timeout (seconds) |
|
|
178
|
+
| `--bits-per-sample` | | Bits per pixel sample |
|
|
179
|
+
| `--no-reconnect` | | Disable automatic reconnection |
|
|
180
|
+
| `-v, --verbose` | | Verbose logging (stderr) |
|
|
181
|
+
|
|
182
|
+
#### Runtime Configuration (PC)
|
|
183
|
+
|
|
184
|
+
All timing and display parameters are configurable at runtime via the `configure` method. Use `get_timing` to inspect current values.
|
|
185
|
+
|
|
186
|
+
Set timing:
|
|
187
|
+
```json
|
|
188
|
+
{"method":"configure","params":{"click_hold_ms":80,"key_hold_ms":50}}
|
|
189
|
+
```
|
|
190
|
+
```json
|
|
191
|
+
{"result":{"detail":"OK — changed: click_hold_ms, key_hold_ms"}}
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Change display scaling:
|
|
195
|
+
```json
|
|
196
|
+
{"method":"configure","params":{"max_dimension":960}}
|
|
197
|
+
```
|
|
198
|
+
```json
|
|
199
|
+
{"result":{"detail":"OK — changed: max_dimension","scaledWidth":960,"scaledHeight":584}}
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
Reset to defaults:
|
|
203
|
+
```json
|
|
204
|
+
{"method":"configure","params":{"reset":true}}
|
|
205
|
+
```
|
|
206
|
+
```json
|
|
207
|
+
{"result":{"detail":"OK — reset to defaults","timing":{"click_hold_ms":50,"combo_mod_ms":10,"cursor_crop_radius":150,"double_click_gap_ms":50,"drag_min_steps":10,"drag_pixels_per_step":20,"drag_position_ms":30,"drag_press_ms":50,"drag_settle_ms":30,"drag_step_ms":5,"hover_settle_ms":400,"key_hold_ms":30,"max_dimension":1280,"paste_settle_ms":30,"scroll_press_ms":10,"scroll_tick_ms":20,"type_inter_key_ms":20,"type_key_ms":20,"type_shift_ms":10},"scaledWidth":1280,"scaledHeight":779}}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Get current values:
|
|
211
|
+
```json
|
|
212
|
+
{"method":"get_timing"}
|
|
213
|
+
```
|
|
214
|
+
```json
|
|
215
|
+
{"result":{"timing":{"click_hold_ms":80,"combo_mod_ms":10,"cursor_crop_radius":150,"double_click_gap_ms":50,"drag_min_steps":10,"drag_pixels_per_step":20,"drag_position_ms":30,"drag_press_ms":50,"drag_settle_ms":30,"drag_step_ms":5,"hover_settle_ms":400,"key_hold_ms":50,"max_dimension":1280,"paste_settle_ms":30,"scroll_press_ms":10,"scroll_tick_ms":20,"type_inter_key_ms":20,"type_key_ms":20,"type_shift_ms":10},"scaledWidth":1280,"scaledHeight":779}}
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
| Parameter | Default | Description |
|
|
219
|
+
|------------------------|---------|----------------------------|
|
|
220
|
+
| `max_dimension` | `1280` | Max screenshot dimension |
|
|
221
|
+
| `cursor_crop_radius` | `150` | Cursor crop radius (px) |
|
|
222
|
+
| `click_hold_ms` | `50` | Click hold duration |
|
|
223
|
+
| `double_click_gap_ms` | `50` | Double-click gap delay |
|
|
224
|
+
| `hover_settle_ms` | `400` | Hover settle wait |
|
|
225
|
+
| `drag_position_ms` | `30` | Pre-drag position wait |
|
|
226
|
+
| `drag_press_ms` | `50` | Drag press hold threshold |
|
|
227
|
+
| `drag_step_ms` | `5` | Between interpolation pts |
|
|
228
|
+
| `drag_settle_ms` | `30` | Settle before release |
|
|
229
|
+
| `drag_pixels_per_step` | `20` | Point density per pixel |
|
|
230
|
+
| `drag_min_steps` | `10` | Min interpolation steps |
|
|
231
|
+
| `scroll_press_ms` | `10` | Scroll press-release gap |
|
|
232
|
+
| `scroll_tick_ms` | `20` | Inter-tick delay |
|
|
233
|
+
| `key_hold_ms` | `30` | Key hold duration |
|
|
234
|
+
| `combo_mod_ms` | `10` | Modifier settle delay |
|
|
235
|
+
| `type_key_ms` | `20` | Key hold during typing |
|
|
236
|
+
| `type_inter_key_ms` | `20` | Inter-character delay |
|
|
237
|
+
| `type_shift_ms` | `10` | Shift key settle |
|
|
238
|
+
| `paste_settle_ms` | `30` | Post-clipboard write wait |
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## Tools
|
|
243
|
+
|
|
244
|
+
All operations are performed through a single `vnc_command` tool:
|
|
245
|
+
|
|
246
|
+
### Screen
|
|
247
|
+
|
|
248
|
+
| Action | Parameters | Description |
|
|
249
|
+
|----------------|------------|--------------------------------------------|
|
|
250
|
+
| `screenshot` | | Full screen PNG capture |
|
|
251
|
+
| `cursor_crop` | | Crop around cursor with crosshair overlay |
|
|
252
|
+
| `diff_check` | | Detect screen changes against baseline |
|
|
253
|
+
| `set_baseline` | | Save current screen as diff reference |
|
|
254
|
+
|
|
255
|
+
### Mouse
|
|
256
|
+
|
|
257
|
+
| Action | Parameters | Description |
|
|
258
|
+
|----------------------|----------------------------|--------------------------------|
|
|
259
|
+
| `mouse_click` | `x, y, button?` | Click (left\|right\|middle) |
|
|
260
|
+
| `mouse_double_click` | `x, y` | Double click |
|
|
261
|
+
| `mouse_move` | `x, y` | Move cursor |
|
|
262
|
+
| `hover` | `x, y` | Move + settle wait |
|
|
263
|
+
| `nudge` | `dx, dy` | Relative cursor movement |
|
|
264
|
+
| `mouse_drag` | `x, y, toX, toY` | Drag from start to end |
|
|
265
|
+
| `scroll` | `x, y, direction, amount?` | Scroll (up\|down\|left\|right) |
|
|
266
|
+
|
|
267
|
+
### Keyboard
|
|
268
|
+
|
|
269
|
+
| Action | Parameters | Description |
|
|
270
|
+
|-------------|-------------------|--------------------------------------------------------------|
|
|
271
|
+
| `key_tap` | `key` | Single key press (enter\|escape\|tab\|space\|...) |
|
|
272
|
+
| `key_combo` | `key` or `keys` | Modifier combo ("cmd+c" or ["cmd","shift","3"]) |
|
|
273
|
+
| `key_type` | `text` | Type text character by character |
|
|
274
|
+
| `paste` | `text` | Paste text via clipboard |
|
|
275
|
+
|
|
276
|
+
### Detection
|
|
277
|
+
|
|
278
|
+
| Action | Parameters | Description |
|
|
279
|
+
|-------------------|------------|-------------------------------------------------------|
|
|
280
|
+
| `detect_elements` | | OCR text detection with bounding boxes (Apple Vision) |
|
|
281
|
+
|
|
282
|
+
Returns text elements with bounding box coordinates in scaled space:
|
|
283
|
+
|
|
284
|
+
```json
|
|
285
|
+
{"method":"detect_elements"}
|
|
286
|
+
```
|
|
287
|
+
```json
|
|
288
|
+
{"result":{"detail":"13 elements","elements":[{"confidence":1,"h":9,"text":"Finder","w":32,"x":37,"y":6},{"confidence":1,"h":9,"text":"File","w":15,"x":84,"y":6},{"confidence":1,"h":9,"text":"Edit","w":19,"x":112,"y":6},{"confidence":1,"h":9,"text":"View","w":22,"x":143,"y":6},{"confidence":1,"h":11,"text":"Go","w":15,"x":179,"y":6},{"confidence":1,"h":9,"text":"Window","w":35,"x":207,"y":6},{"confidence":1,"h":11,"text":"Help","w":22,"x":255,"y":6},{"confidence":1,"h":11,"text":"8•","w":26,"x":1161,"y":6},{"confidence":1,"h":9,"text":"Fri Feb 20 22:19","w":80,"x":1189,"y":6},{"confidence":1,"h":9,"text":"Assets","w":32,"x":1202,"y":97},{"confidence":1,"h":9,"text":"Passwords.kdbx","w":74,"x":1181,"y":168},{"confidence":1,"h":93,"text":"PHANTOM","w":633,"x":322,"y":477},{"confidence":1,"h":32,"text":"YOUR SERVER, YOUR NETWORK, YOUR PRIVACY","w":629,"x":325,"y":568}],"scaledHeight":717,"scaledWidth":1280}}
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### Configuration
|
|
292
|
+
|
|
293
|
+
| Action | Parameters | Description |
|
|
294
|
+
|--------------|-----------------|--------------------------------------|
|
|
295
|
+
| `configure` | `{<params>}` | Set timing/display params at runtime |
|
|
296
|
+
| `configure` | `{reset: true}` | Reset all params to defaults |
|
|
297
|
+
| `get_timing` | | Get current timing + display params |
|
|
298
|
+
|
|
299
|
+
### Control
|
|
300
|
+
|
|
301
|
+
| Action | Parameters | Description |
|
|
302
|
+
|------------|------------|-----------------------------------|
|
|
303
|
+
| `wait` | `ms?` | Wait (default 500ms) |
|
|
304
|
+
| `health` | | Connection status + display info |
|
|
305
|
+
| `shutdown` | | Graceful daemon shutdown |
|
|
306
|
+
|
|
307
|
+
---
|
|
308
|
+
|
|
309
|
+
## Authentication
|
|
310
|
+
|
|
311
|
+
Supported VNC authentication methods:
|
|
312
|
+
|
|
313
|
+
- **VNC Auth** — password-based challenge-response (DES)
|
|
314
|
+
- **ARD** — Apple Remote Desktop (Diffie-Hellman + AES-128-ECB)
|
|
315
|
+
|
|
316
|
+
macOS is auto-detected via the ARD auth type 30 credential request. When detected, Meta keys are remapped to Super (Command key compatibility).
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
Copyright (c) 2026 Riza Emre ARAS — MIT License
|
package/index.js
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
/**
|
|
4
|
+
* █████╗ ██████╗ █████╗ ███████╗
|
|
5
|
+
* ██╔══██╗██╔══██╗██╔══██╗██╔════╝
|
|
6
|
+
* ███████║██████╔╝███████║███████╗
|
|
7
|
+
* ██╔══██║██╔══██╗██╔══██║╚════██║
|
|
8
|
+
* ██║ ██║██║ ██║██║ ██║███████║
|
|
9
|
+
* ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝
|
|
10
|
+
*
|
|
11
|
+
* Copyright (c) 2026 Rıza Emre ARAS <r.emrearas@proton.me>
|
|
12
|
+
*
|
|
13
|
+
* This file is part of Claude KVM.
|
|
14
|
+
* Released under the MIT License — see LICENSE for details.
|
|
15
|
+
*
|
|
16
|
+
* MCP proxy server — spawns a native VNC daemon (claude-kvm-daemon)
|
|
17
|
+
* and exposes a single vnc_command tool to Claude.
|
|
18
|
+
* Communication: PC (Procedure Call) over stdin/stdout NDJSON.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { spawn } from 'node:child_process';
|
|
22
|
+
import { randomUUID } from 'node:crypto';
|
|
23
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
24
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
25
|
+
import { vncCommandTool, actionQueueTool, controlTools } from './tools/index.js';
|
|
26
|
+
|
|
27
|
+
// ── Configuration ───────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
const env = (key, fallback) => process.env[key] ?? fallback;
|
|
30
|
+
|
|
31
|
+
const DAEMON_PATH = env('CLAUDE_KVM_DAEMON_PATH', 'claude-kvm-daemon');
|
|
32
|
+
const DAEMON_PARAMS = env('CLAUDE_KVM_DAEMON_PARAMETERS', '');
|
|
33
|
+
const VNC_HOST = env('VNC_HOST', '127.0.0.1');
|
|
34
|
+
const VNC_PORT = env('VNC_PORT', '5900');
|
|
35
|
+
const VNC_USERNAME = env('VNC_USERNAME', '');
|
|
36
|
+
const VNC_PASSWORD = env('VNC_PASSWORD', '');
|
|
37
|
+
|
|
38
|
+
// ── Logging ─────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
function log(msg) {
|
|
41
|
+
const ts = new Date().toISOString().slice(11, 23);
|
|
42
|
+
process.stderr.write(`[MCP ${ts}] ${msg}\n`);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ── Daemon Process Manager ──────────────────────────────────
|
|
46
|
+
|
|
47
|
+
let daemon = null;
|
|
48
|
+
let daemonReady = false;
|
|
49
|
+
const display = { width: 1280, height: 800 };
|
|
50
|
+
const pendingRequests = new Map();
|
|
51
|
+
let lineBuffer = '';
|
|
52
|
+
|
|
53
|
+
function buildDaemonArgs() {
|
|
54
|
+
const args = ['--host', VNC_HOST, '--port', VNC_PORT];
|
|
55
|
+
if (VNC_USERNAME) args.push('--username', VNC_USERNAME);
|
|
56
|
+
if (VNC_PASSWORD) args.push('--password', VNC_PASSWORD);
|
|
57
|
+
|
|
58
|
+
// Extra parameters — passed directly to daemon CLI
|
|
59
|
+
if (DAEMON_PARAMS) {
|
|
60
|
+
const extra = DAEMON_PARAMS.match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g) || [];
|
|
61
|
+
args.push(...extra.map((s) => s.replace(/^['"]|['"]$/g, '')));
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return args;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function spawnDaemon() {
|
|
68
|
+
const args = buildDaemonArgs();
|
|
69
|
+
log(`Spawning daemon: ${DAEMON_PATH} ${args.join(' ')}`);
|
|
70
|
+
|
|
71
|
+
daemon = spawn(DAEMON_PATH, args, {
|
|
72
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
daemon.stdout.on('data', (chunk) => {
|
|
76
|
+
lineBuffer += chunk.toString();
|
|
77
|
+
let idx;
|
|
78
|
+
while ((idx = lineBuffer.indexOf('\n')) !== -1) {
|
|
79
|
+
const line = lineBuffer.slice(0, idx).trim();
|
|
80
|
+
lineBuffer = lineBuffer.slice(idx + 1);
|
|
81
|
+
if (line) handleDaemonMessage(line);
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
daemon.stderr.on('data', (chunk) => {
|
|
86
|
+
process.stderr.write(chunk);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
daemon.on('exit', (code) => {
|
|
90
|
+
log(`Daemon exited with code ${code}`);
|
|
91
|
+
daemonReady = false;
|
|
92
|
+
daemon = null;
|
|
93
|
+
for (const [, req] of pendingRequests) {
|
|
94
|
+
clearTimeout(req.timer);
|
|
95
|
+
req.reject(new Error('Daemon exited'));
|
|
96
|
+
}
|
|
97
|
+
pendingRequests.clear();
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
daemon.on('error', (err) => {
|
|
101
|
+
log(`Daemon spawn error: ${err.message}`);
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function handleDaemonMessage(line) {
|
|
106
|
+
let msg;
|
|
107
|
+
try {
|
|
108
|
+
msg = JSON.parse(line);
|
|
109
|
+
} catch {
|
|
110
|
+
log(`Invalid daemon JSON: ${line}`);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// PC notification — has method, no id
|
|
115
|
+
if (msg.method) {
|
|
116
|
+
const { scaledWidth, scaledHeight, state } = msg.params || {};
|
|
117
|
+
if (msg.method === 'ready') {
|
|
118
|
+
daemonReady = true;
|
|
119
|
+
if (scaledWidth) display.width = scaledWidth;
|
|
120
|
+
if (scaledHeight) display.height = scaledHeight;
|
|
121
|
+
log(`Daemon ready — display ${display.width}×${display.height}`);
|
|
122
|
+
} else if (msg.method === 'vnc_state') {
|
|
123
|
+
log(`VNC state: ${state}`);
|
|
124
|
+
}
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// PC response — has id
|
|
129
|
+
if (msg.id !== undefined && pendingRequests.has(msg.id)) {
|
|
130
|
+
const req = pendingRequests.get(msg.id);
|
|
131
|
+
pendingRequests.delete(msg.id);
|
|
132
|
+
clearTimeout(req.timer);
|
|
133
|
+
req.resolve(msg);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Send a PC request to the daemon and wait for response.
|
|
139
|
+
* @param {string} method - PC method name
|
|
140
|
+
* @param {object} [params] - Method parameters
|
|
141
|
+
* @param {number} [timeoutMs=30000] - Timeout in milliseconds
|
|
142
|
+
* @returns {Promise<object>} - Daemon PC response
|
|
143
|
+
*/
|
|
144
|
+
function sendRequest(method, params, timeoutMs = 30000) {
|
|
145
|
+
return new Promise((resolve, reject) => {
|
|
146
|
+
if (!daemon || !daemonReady) {
|
|
147
|
+
reject(new Error('Daemon not ready. Check CLAUDE_KVM_DAEMON_PATH and VNC credentials.'));
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const id = randomUUID();
|
|
152
|
+
|
|
153
|
+
const timer = setTimeout(() => {
|
|
154
|
+
pendingRequests.delete(id);
|
|
155
|
+
reject(new Error(`Daemon request timed out after ${timeoutMs}ms`));
|
|
156
|
+
}, timeoutMs);
|
|
157
|
+
|
|
158
|
+
pendingRequests.set(id, { resolve, reject, timer });
|
|
159
|
+
|
|
160
|
+
const request = { method, id };
|
|
161
|
+
if (params && Object.keys(params).length > 0) request.params = params;
|
|
162
|
+
|
|
163
|
+
daemon.stdin.write(JSON.stringify(request) + '\n');
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// ── Wait for daemon ready ───────────────────────────────────
|
|
168
|
+
|
|
169
|
+
function waitForReady(timeoutMs = 30000) {
|
|
170
|
+
return new Promise((resolve, reject) => {
|
|
171
|
+
if (daemonReady) { resolve(); return; }
|
|
172
|
+
|
|
173
|
+
const interval = setInterval(() => {
|
|
174
|
+
if (daemonReady) {
|
|
175
|
+
clearInterval(interval);
|
|
176
|
+
clearTimeout(timer);
|
|
177
|
+
resolve();
|
|
178
|
+
}
|
|
179
|
+
}, 100);
|
|
180
|
+
|
|
181
|
+
const timer = setTimeout(() => {
|
|
182
|
+
clearInterval(interval);
|
|
183
|
+
reject(new Error('Daemon did not become ready within timeout'));
|
|
184
|
+
}, timeoutMs);
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// ── Tool Execution ──────────────────────────────────────────
|
|
189
|
+
|
|
190
|
+
async function executeVncCommand(input) {
|
|
191
|
+
const { action, ...params } = input;
|
|
192
|
+
|
|
193
|
+
const response = await sendRequest(action, params);
|
|
194
|
+
|
|
195
|
+
// PC error response
|
|
196
|
+
if (response.error) {
|
|
197
|
+
return {
|
|
198
|
+
content: [{ type: 'text', text: `Error: ${response.error.message}` }],
|
|
199
|
+
isError: true,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const { detail, image, x, y, scaledWidth, scaledHeight, elements, timing } = response.result || {};
|
|
204
|
+
const content = [];
|
|
205
|
+
|
|
206
|
+
// Text detail
|
|
207
|
+
if (detail) {
|
|
208
|
+
content.push({ type: 'text', text: detail });
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Image
|
|
212
|
+
if (image) {
|
|
213
|
+
content.push({ type: 'image', data: image, mimeType: 'image/png' });
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// OCR elements (detect_elements)
|
|
217
|
+
if (elements) {
|
|
218
|
+
content.push({ type: 'text', text: JSON.stringify(elements) });
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Timing map (get_timing, configure with reset)
|
|
222
|
+
if (timing) {
|
|
223
|
+
content.push({ type: 'text', text: JSON.stringify(timing) });
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Cursor position (nudge, cursor_crop)
|
|
227
|
+
if (x !== undefined && y !== undefined) {
|
|
228
|
+
content.push({ type: 'text', text: `cursor: (${x}, ${y})` });
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Display dimensions (health, detect_elements, get_timing — no image)
|
|
232
|
+
if (scaledWidth !== undefined && !image) {
|
|
233
|
+
content.push({ type: 'text', text: `display: ${scaledWidth}×${scaledHeight}` });
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
if (content.length === 0) {
|
|
237
|
+
content.push({ type: 'text', text: 'OK' });
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return { content };
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
async function executeActionQueue(input) {
|
|
244
|
+
const results = [];
|
|
245
|
+
for (let i = 0; i < input.actions.length; i++) {
|
|
246
|
+
const { action, ...params } = input.actions[i];
|
|
247
|
+
const response = await sendRequest(action, params);
|
|
248
|
+
|
|
249
|
+
if (response.error) {
|
|
250
|
+
results.push(`[${i + 1}] ${action}: ERROR — ${response.error.message}`);
|
|
251
|
+
break;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const detail = response.result?.detail;
|
|
255
|
+
results.push(`[${i + 1}] ${action}: ${detail || 'OK'}`);
|
|
256
|
+
}
|
|
257
|
+
return { content: [{ type: 'text', text: results.join('\n') }] };
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// ── MCP Server ──────────────────────────────────────────────
|
|
261
|
+
|
|
262
|
+
async function main() {
|
|
263
|
+
log('Claude KVM v1.0.0 — Native VNC proxy');
|
|
264
|
+
|
|
265
|
+
spawnDaemon();
|
|
266
|
+
|
|
267
|
+
const mcpServer = new McpServer(
|
|
268
|
+
{ name: 'claude-kvm', version: '1.0.0' },
|
|
269
|
+
{ capabilities: { tools: {} } },
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
try {
|
|
273
|
+
await waitForReady(30000);
|
|
274
|
+
} catch (err) {
|
|
275
|
+
log(`Warning: ${err.message} — registering with default dimensions`);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Register vnc_command tool
|
|
279
|
+
const vncTool = vncCommandTool(display.width, display.height);
|
|
280
|
+
mcpServer.tool(
|
|
281
|
+
vncTool.name,
|
|
282
|
+
vncTool.inputSchema,
|
|
283
|
+
async (input) => {
|
|
284
|
+
try {
|
|
285
|
+
return await executeVncCommand(input);
|
|
286
|
+
} catch (err) {
|
|
287
|
+
return { content: [{ type: 'text', text: `Error: ${err.message}` }], isError: true };
|
|
288
|
+
}
|
|
289
|
+
},
|
|
290
|
+
);
|
|
291
|
+
|
|
292
|
+
// Register action_queue tool
|
|
293
|
+
const queueTool = actionQueueTool(display.width, display.height);
|
|
294
|
+
mcpServer.tool(
|
|
295
|
+
queueTool.name,
|
|
296
|
+
queueTool.inputSchema,
|
|
297
|
+
async (input) => {
|
|
298
|
+
try {
|
|
299
|
+
return await executeActionQueue(input);
|
|
300
|
+
} catch (err) {
|
|
301
|
+
return { content: [{ type: 'text', text: `Error: ${err.message}` }], isError: true };
|
|
302
|
+
}
|
|
303
|
+
},
|
|
304
|
+
);
|
|
305
|
+
|
|
306
|
+
// Register control tools
|
|
307
|
+
for (const tool of controlTools()) {
|
|
308
|
+
mcpServer.tool(
|
|
309
|
+
tool.name,
|
|
310
|
+
tool.inputSchema,
|
|
311
|
+
async (input) => {
|
|
312
|
+
if (tool.name === 'task_complete') {
|
|
313
|
+
return { content: [{ type: 'text', text: input.summary }] };
|
|
314
|
+
}
|
|
315
|
+
if (tool.name === 'task_failed') {
|
|
316
|
+
return { content: [{ type: 'text', text: input.reason }], isError: true };
|
|
317
|
+
}
|
|
318
|
+
},
|
|
319
|
+
);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
const transport = new StdioServerTransport();
|
|
323
|
+
await mcpServer.connect(transport);
|
|
324
|
+
log('MCP server connected on stdio');
|
|
325
|
+
|
|
326
|
+
process.on('SIGINT', () => {
|
|
327
|
+
log('Shutting down...');
|
|
328
|
+
if (daemon) {
|
|
329
|
+
daemon.stdin.write(JSON.stringify({ method: 'shutdown' }) + '\n');
|
|
330
|
+
setTimeout(() => { daemon?.kill(); process.exit(0); }, 500);
|
|
331
|
+
} else {
|
|
332
|
+
process.exit(0);
|
|
333
|
+
}
|
|
334
|
+
});
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
main().catch((err) => {
|
|
338
|
+
log(`Fatal: ${err.message}`);
|
|
339
|
+
process.exit(1);
|
|
340
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "claude-kvm",
|
|
3
|
+
"version": "2.0.0",
|
|
4
|
+
"description": "MCP server — control remote desktops via VNC (MacOS)",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"author": "Rıza Emre ARAS <r.emrearas@proton.me>",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/ARAS-Workspace/claude-kvm.git"
|
|
10
|
+
},
|
|
11
|
+
"keywords": [
|
|
12
|
+
"mcp",
|
|
13
|
+
"vnc",
|
|
14
|
+
"kvm",
|
|
15
|
+
"claude",
|
|
16
|
+
"remote-desktop",
|
|
17
|
+
"automation",
|
|
18
|
+
"apple-silicon",
|
|
19
|
+
"native"
|
|
20
|
+
],
|
|
21
|
+
"main": "index.js",
|
|
22
|
+
"bin": {
|
|
23
|
+
"claude-kvm": "index.js"
|
|
24
|
+
},
|
|
25
|
+
"files": [
|
|
26
|
+
"index.js",
|
|
27
|
+
"tools/",
|
|
28
|
+
"LICENSE",
|
|
29
|
+
"README.md"
|
|
30
|
+
],
|
|
31
|
+
"type": "module",
|
|
32
|
+
"scripts": {
|
|
33
|
+
"start": "node index.js"
|
|
34
|
+
},
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
37
|
+
"zod": "^4.3.6"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@anthropic-ai/sdk": "^0.52.0",
|
|
41
|
+
"@types/node": "^25.2.3",
|
|
42
|
+
"dotenv": "^17.3.1"
|
|
43
|
+
}
|
|
44
|
+
}
|
package/tools/index.js
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
/**
|
|
3
|
+
* █████╗ ██████╗ █████╗ ███████╗
|
|
4
|
+
* ██╔══██╗██╔══██╗██╔══██╗██╔════╝
|
|
5
|
+
* ███████║██████╔╝███████║███████╗
|
|
6
|
+
* ██╔══██║██╔══██╗██╔══██║╚════██║
|
|
7
|
+
* ██║ ██║██║ ██║██║ ██║███████║
|
|
8
|
+
* ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝
|
|
9
|
+
*
|
|
10
|
+
* Copyright (c) 2026 Rıza Emre ARAS <r.emrearas@proton.me>
|
|
11
|
+
*
|
|
12
|
+
* This file is part of Claude KVM.
|
|
13
|
+
* Released under the MIT License — see LICENSE for details.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { z } from 'zod';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Build vnc_command tool definition with display dimensions.
|
|
20
|
+
* @param {number} width - Scaled display width
|
|
21
|
+
* @param {number} height - Scaled display height
|
|
22
|
+
*/
|
|
23
|
+
export function vncCommandTool(width, height) {
|
|
24
|
+
return {
|
|
25
|
+
name: 'vnc_command',
|
|
26
|
+
description: [
|
|
27
|
+
`Control a remote desktop via VNC. Display: ${width}×${height}px.`,
|
|
28
|
+
'All coordinates are in this scaled space.',
|
|
29
|
+
'',
|
|
30
|
+
'ACTIONS:',
|
|
31
|
+
' screenshot → full screen PNG',
|
|
32
|
+
' cursor_crop → crop around cursor with crosshair',
|
|
33
|
+
' diff_check → detect screen changes since baseline',
|
|
34
|
+
' set_baseline → save current screen for diff comparison',
|
|
35
|
+
' mouse_click {x, y, button?} → click (left|right|middle)',
|
|
36
|
+
' mouse_double_click {x, y} → double click',
|
|
37
|
+
' mouse_move {x, y} → move cursor',
|
|
38
|
+
' hover {x, y} → move cursor + settle wait',
|
|
39
|
+
' nudge {dx, dy} → relative cursor move',
|
|
40
|
+
' mouse_drag {x, y, toX, toY} → drag from start to end',
|
|
41
|
+
' scroll {x, y, direction, amount?} → scroll (up|down|left|right)',
|
|
42
|
+
' key_tap {key} → single key press (enter|escape|tab|space|...)',
|
|
43
|
+
' key_combo {key} or {keys:[...]} → modifier combo (e.g. "cmd+c" or ["cmd","shift","3"])',
|
|
44
|
+
' key_type {text} → type text character by character',
|
|
45
|
+
' paste {text} → paste text via clipboard',
|
|
46
|
+
' detect_elements → OCR text detection with bounding boxes',
|
|
47
|
+
' configure {<params>} → set timing/display params at runtime',
|
|
48
|
+
' configure {reset: true} → reset all params to defaults',
|
|
49
|
+
' get_timing → get current timing + display params',
|
|
50
|
+
' wait {ms?} → pause (default 500ms)',
|
|
51
|
+
' health → connection status + display info',
|
|
52
|
+
' shutdown → graceful daemon exit',
|
|
53
|
+
'',
|
|
54
|
+
'Use screenshot → analyze → act → verify pattern.',
|
|
55
|
+
].join('\n'),
|
|
56
|
+
inputSchema: {
|
|
57
|
+
action: z.enum([
|
|
58
|
+
'screenshot', 'cursor_crop', 'diff_check', 'set_baseline',
|
|
59
|
+
'mouse_click', 'mouse_double_click', 'mouse_move', 'hover', 'nudge',
|
|
60
|
+
'mouse_drag', 'scroll',
|
|
61
|
+
'key_tap', 'key_combo', 'key_type', 'paste',
|
|
62
|
+
'detect_elements',
|
|
63
|
+
'configure', 'get_timing',
|
|
64
|
+
'wait', 'health', 'shutdown',
|
|
65
|
+
]).describe('The action to perform'),
|
|
66
|
+
x: z.number().int().min(0).max(width - 1).optional().describe('X coordinate'),
|
|
67
|
+
y: z.number().int().min(0).max(height - 1).optional().describe('Y coordinate'),
|
|
68
|
+
toX: z.number().int().min(0).max(width - 1).optional().describe('Drag target X'),
|
|
69
|
+
toY: z.number().int().min(0).max(height - 1).optional().describe('Drag target Y'),
|
|
70
|
+
dx: z.number().int().min(-50).max(50).optional().describe('Relative X offset (nudge)'),
|
|
71
|
+
dy: z.number().int().min(-50).max(50).optional().describe('Relative Y offset (nudge)'),
|
|
72
|
+
button: z.enum(['left', 'right', 'middle']).optional().describe('Mouse button'),
|
|
73
|
+
key: z.string().optional().describe('Key name or combo string (e.g. "cmd+c")'),
|
|
74
|
+
keys: z.array(z.string()).optional().describe('Array of key names for combo (e.g. ["cmd","shift","3"])'),
|
|
75
|
+
text: z.string().optional().describe('Text to type or paste'),
|
|
76
|
+
direction: z.enum(['up', 'down', 'left', 'right']).optional().describe('Scroll direction'),
|
|
77
|
+
amount: z.number().int().min(1).max(20).optional().describe('Scroll amount (default 3)'),
|
|
78
|
+
ms: z.number().int().min(50).max(10000).optional().describe('Wait duration in ms (default 500)'),
|
|
79
|
+
// Configure params (runtime timing/display adjustment)
|
|
80
|
+
reset: z.boolean().optional().describe('Reset all params to defaults'),
|
|
81
|
+
max_dimension: z.number().int().min(320).max(3840).optional().describe('Max screenshot dimension'),
|
|
82
|
+
cursor_crop_radius: z.number().int().min(50).max(500).optional().describe('Cursor crop radius'),
|
|
83
|
+
click_hold_ms: z.number().int().min(1).max(500).optional().describe('Click hold duration'),
|
|
84
|
+
double_click_gap_ms: z.number().int().min(1).max(500).optional().describe('Double-click gap'),
|
|
85
|
+
hover_settle_ms: z.number().int().min(1).max(2000).optional().describe('Hover settle wait'),
|
|
86
|
+
drag_position_ms: z.number().int().min(1).max(500).optional().describe('Pre-drag position wait'),
|
|
87
|
+
drag_press_ms: z.number().int().min(1).max(500).optional().describe('Drag press hold'),
|
|
88
|
+
drag_step_ms: z.number().int().min(1).max(100).optional().describe('Between interpolation pts'),
|
|
89
|
+
drag_settle_ms: z.number().int().min(1).max(500).optional().describe('Settle before release'),
|
|
90
|
+
drag_pixels_per_step: z.number().min(1).max(100).optional().describe('Point density per pixel'),
|
|
91
|
+
drag_min_steps: z.number().int().min(1).max(100).optional().describe('Min interpolation steps'),
|
|
92
|
+
scroll_press_ms: z.number().int().min(1).max(200).optional().describe('Scroll press-release gap'),
|
|
93
|
+
scroll_tick_ms: z.number().int().min(1).max(200).optional().describe('Inter-tick delay'),
|
|
94
|
+
key_hold_ms: z.number().int().min(1).max(500).optional().describe('Key hold duration'),
|
|
95
|
+
combo_mod_ms: z.number().int().min(1).max(200).optional().describe('Modifier settle delay'),
|
|
96
|
+
type_key_ms: z.number().int().min(1).max(200).optional().describe('Key hold during typing'),
|
|
97
|
+
type_inter_key_ms: z.number().int().min(1).max(200).optional().describe('Inter-character delay'),
|
|
98
|
+
type_shift_ms: z.number().int().min(1).max(200).optional().describe('Shift key settle'),
|
|
99
|
+
paste_settle_ms: z.number().int().min(1).max(500).optional().describe('Post-clipboard write wait'),
|
|
100
|
+
},
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Build action_queue tool definition with display dimensions.
|
|
106
|
+
* Executes multiple VNC actions in sequence, returns text-only results.
|
|
107
|
+
* @param {number} width - Scaled display width
|
|
108
|
+
* @param {number} height - Scaled display height
|
|
109
|
+
*/
|
|
110
|
+
export function actionQueueTool(width, height) {
|
|
111
|
+
const queueAction = z.object({
|
|
112
|
+
action: z.enum([
|
|
113
|
+
'mouse_click', 'mouse_double_click', 'mouse_move', 'hover', 'nudge',
|
|
114
|
+
'mouse_drag', 'scroll',
|
|
115
|
+
'key_tap', 'key_combo', 'key_type', 'paste',
|
|
116
|
+
'set_baseline', 'diff_check',
|
|
117
|
+
'wait',
|
|
118
|
+
]).describe('The action to perform'),
|
|
119
|
+
x: z.number().int().min(0).max(width - 1).optional().describe('X coordinate'),
|
|
120
|
+
y: z.number().int().min(0).max(height - 1).optional().describe('Y coordinate'),
|
|
121
|
+
toX: z.number().int().min(0).max(width - 1).optional().describe('Drag target X'),
|
|
122
|
+
toY: z.number().int().min(0).max(height - 1).optional().describe('Drag target Y'),
|
|
123
|
+
dx: z.number().int().min(-50).max(50).optional().describe('Relative X offset (nudge)'),
|
|
124
|
+
dy: z.number().int().min(-50).max(50).optional().describe('Relative Y offset (nudge)'),
|
|
125
|
+
button: z.enum(['left', 'right', 'middle']).optional().describe('Mouse button'),
|
|
126
|
+
key: z.string().optional().describe('Key name or combo string (e.g. "ctrl+c")'),
|
|
127
|
+
keys: z.array(z.string()).optional().describe('Array of key names for combo'),
|
|
128
|
+
text: z.string().optional().describe('Text to type or paste'),
|
|
129
|
+
direction: z.enum(['up', 'down', 'left', 'right']).optional().describe('Scroll direction'),
|
|
130
|
+
amount: z.number().int().min(1).max(20).optional().describe('Scroll amount (default 3)'),
|
|
131
|
+
ms: z.number().int().min(50).max(10000).optional().describe('Wait duration in ms (default 500)'),
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
return {
|
|
135
|
+
name: 'action_queue',
|
|
136
|
+
description: [
|
|
137
|
+
'Execute multiple VNC actions in sequence. Returns text results only (no screenshots).',
|
|
138
|
+
'Stops on first error. Use for batching confident action sequences.',
|
|
139
|
+
'',
|
|
140
|
+
'Examples:',
|
|
141
|
+
' Navigate: [click(640,91), ctrl+a, paste("url"), return]',
|
|
142
|
+
' Scroll: [click(640,400), pagedown, pagedown, pagedown]',
|
|
143
|
+
' Type: [click(300,200), key_type("hello"), tab, key_type("world")]',
|
|
144
|
+
].join('\n'),
|
|
145
|
+
inputSchema: {
|
|
146
|
+
actions: z.array(queueAction).min(1).max(20).describe('Ordered actions to execute sequentially'),
|
|
147
|
+
},
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Control tools (task lifecycle).
|
|
153
|
+
*/
|
|
154
|
+
export function controlTools() {
|
|
155
|
+
return [
|
|
156
|
+
{
|
|
157
|
+
name: 'task_complete',
|
|
158
|
+
description: 'Mark the task as successfully completed. Provide a brief summary.',
|
|
159
|
+
inputSchema: {
|
|
160
|
+
summary: z.string().describe('What was accomplished'),
|
|
161
|
+
},
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
name: 'task_failed',
|
|
165
|
+
description: 'Mark the task as failed. Explain why.',
|
|
166
|
+
inputSchema: {
|
|
167
|
+
reason: z.string().describe('Why the task could not be completed'),
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
];
|
|
171
|
+
}
|