claude-kvm-native 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +244 -0
- package/index.js +441 -0
- package/lib/capture.js +204 -0
- package/lib/hid.js +248 -0
- package/lib/ssh.js +162 -0
- package/lib/types.js +138 -0
- package/lib/vnc.js +1332 -0
- package/package.json +51 -0
- package/tools/control.js +55 -0
- package/tools/index.js +48 -0
- package/tools/keyboard.js +56 -0
- package/tools/mouse.js +61 -0
- package/tools/screen.js +67 -0
- package/tools/ssh.js +62 -0
- package/tools/vlm.js +59 -0
- package/utils/keysym.js +158 -0
package/package.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "claude-kvm-native",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "MCP server (Native) — control remote desktops via VNC, SSH and on-device VLM (macOS)",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"author": "Rıza Emre ARAS <r.emrearas@proton.me>",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/remrearas/claude-kvm-native.git"
|
|
10
|
+
},
|
|
11
|
+
"keywords": [
|
|
12
|
+
"mcp",
|
|
13
|
+
"vnc",
|
|
14
|
+
"kvm",
|
|
15
|
+
"claude",
|
|
16
|
+
"remote-desktop",
|
|
17
|
+
"automation",
|
|
18
|
+
"ssh",
|
|
19
|
+
"vlm",
|
|
20
|
+
"apple-silicon",
|
|
21
|
+
"native"
|
|
22
|
+
],
|
|
23
|
+
"main": "index.js",
|
|
24
|
+
"bin": {
|
|
25
|
+
"claude-kvm-native": "index.js"
|
|
26
|
+
},
|
|
27
|
+
"files": [
|
|
28
|
+
"index.js",
|
|
29
|
+
"lib/",
|
|
30
|
+
"tools/",
|
|
31
|
+
"utils/",
|
|
32
|
+
"LICENSE",
|
|
33
|
+
"README.md"
|
|
34
|
+
],
|
|
35
|
+
"type": "module",
|
|
36
|
+
"scripts": {
|
|
37
|
+
"start": "node index.js",
|
|
38
|
+
"test:integration": "node test/integration.js"
|
|
39
|
+
},
|
|
40
|
+
"dependencies": {
|
|
41
|
+
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
42
|
+
"sharp": "^0.33.5",
|
|
43
|
+
"ssh2": "^1.17.0",
|
|
44
|
+
"zod": "^4.3.6"
|
|
45
|
+
},
|
|
46
|
+
"devDependencies": {
|
|
47
|
+
"@anthropic-ai/sdk": "^0.52.0",
|
|
48
|
+
"@types/node": "^25.2.3",
|
|
49
|
+
"@types/ssh2": "^1.15.5"
|
|
50
|
+
}
|
|
51
|
+
}
|
package/tools/control.js
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
/**
|
|
3
|
+
* █████╗ ██████╗ █████╗ ███████╗
|
|
4
|
+
* ██╔══██╗██╔══██╗██╔══██╗██╔════╝
|
|
5
|
+
* ███████║██████╔╝███████║███████╗
|
|
6
|
+
* ██╔══██║██╔══██╗██╔══██║╚════██║
|
|
7
|
+
* ██║ ██║██║ ██║██║ ██║███████║
|
|
8
|
+
* ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝
|
|
9
|
+
*
|
|
10
|
+
* Copyright (c) 2025 Rıza Emre ARAS <r.emrearas@proton.me>
|
|
11
|
+
*
|
|
12
|
+
* This file is part of Claude KVM.
|
|
13
|
+
* Released under the MIT License — see LICENSE for details.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { z } from 'zod';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Flow control tool definitions.
|
|
20
|
+
* @returns {Array<{name: string, description: string, inputSchema: Record<string, import('zod').ZodType>}>}
|
|
21
|
+
*/
|
|
22
|
+
export function controlTools() {
|
|
23
|
+
return [
|
|
24
|
+
{
|
|
25
|
+
name: 'wait',
|
|
26
|
+
description: 'Wait before the next action. Use when waiting for a page load, animation, or response.\nDo NOT use wait between simple actions (click, type) — only when the remote system needs time to process (page navigation, app launch).',
|
|
27
|
+
inputSchema: {
|
|
28
|
+
ms: z.number().int().min(100).max(5000).describe('Duration in milliseconds (100–5000)'),
|
|
29
|
+
},
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
name: 'health_check',
|
|
33
|
+
description: [
|
|
34
|
+
'Check VNC connection status and server info.',
|
|
35
|
+
'Returns connection state, resolution, latency, reconnect count, and memory usage.',
|
|
36
|
+
'Use when you suspect the connection may be unstable or to diagnose issues.',
|
|
37
|
+
].join('\n'),
|
|
38
|
+
inputSchema: null,
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
name: 'task_complete',
|
|
42
|
+
description: 'Mark the task as successfully completed. Provide a brief summary.',
|
|
43
|
+
inputSchema: {
|
|
44
|
+
summary: z.string().describe('What was accomplished'),
|
|
45
|
+
},
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
name: 'task_failed',
|
|
49
|
+
description: 'Mark the task as failed. Explain why.',
|
|
50
|
+
inputSchema: {
|
|
51
|
+
reason: z.string().describe('Why the task could not be completed'),
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
];
|
|
55
|
+
}
|
package/tools/index.js
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
/**
|
|
3
|
+
* █████╗ ██████╗ █████╗ ███████╗
|
|
4
|
+
* ██╔══██╗██╔══██╗██╔══██╗██╔════╝
|
|
5
|
+
* ███████║██████╔╝███████║███████╗
|
|
6
|
+
* ██╔══██║██╔══██╗██╔══██║╚════██║
|
|
7
|
+
* ██║ ██║██║ ██║██║ ██║███████║
|
|
8
|
+
* ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝
|
|
9
|
+
*
|
|
10
|
+
* Copyright (c) 2025 Rıza Emre ARAS <r.emrearas@proton.me>
|
|
11
|
+
*
|
|
12
|
+
* This file is part of Claude KVM.
|
|
13
|
+
* Released under the MIT License — see LICENSE for details.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { screenTools } from './screen.js';
|
|
17
|
+
import { mouseTools } from './mouse.js';
|
|
18
|
+
import { keyboardTools } from './keyboard.js';
|
|
19
|
+
import { controlTools } from './control.js';
|
|
20
|
+
import { sshTools } from './ssh.js';
|
|
21
|
+
import { vlmTools } from './vlm.js';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Aggregate all KVM tool definitions.
|
|
25
|
+
* @param {import('../lib/types.js').ScaledDisplay} display
|
|
26
|
+
* @param {object} [options]
|
|
27
|
+
* @param {boolean} [options.sshEnabled] - Whether SSH is configured
|
|
28
|
+
* @param {boolean} [options.vlmEnabled] - Whether VLM tool is available
|
|
29
|
+
* @returns {Array<{name: string, description: string, inputSchema: object}>}
|
|
30
|
+
*/
|
|
31
|
+
export function getToolDefinitions(display, options = {}) {
|
|
32
|
+
const tools = [
|
|
33
|
+
...screenTools(),
|
|
34
|
+
...mouseTools(display),
|
|
35
|
+
...keyboardTools(),
|
|
36
|
+
...controlTools(),
|
|
37
|
+
];
|
|
38
|
+
|
|
39
|
+
if (options.sshEnabled) {
|
|
40
|
+
tools.push(...sshTools());
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (options.vlmEnabled) {
|
|
44
|
+
tools.push(...vlmTools(display));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return tools;
|
|
48
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
/**
|
|
3
|
+
* █████╗ ██████╗ █████╗ ███████╗
|
|
4
|
+
* ██╔══██╗██╔══██╗██╔══██╗██╔════╝
|
|
5
|
+
* ███████║██████╔╝███████║███████╗
|
|
6
|
+
* ██╔══██║██╔══██╗██╔══██║╚════██║
|
|
7
|
+
* ██║ ██║██║ ██║██║ ██║███████║
|
|
8
|
+
* ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝
|
|
9
|
+
*
|
|
10
|
+
* Copyright (c) 2025 Rıza Emre ARAS <r.emrearas@proton.me>
|
|
11
|
+
*
|
|
12
|
+
* This file is part of Claude KVM.
|
|
13
|
+
* Released under the MIT License — see LICENSE for details.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { z } from 'zod';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Keyboard instrument definition.
|
|
20
|
+
*
|
|
21
|
+
* @returns {Array<{name: string, description: string, inputSchema: Record<string, import('zod').ZodType>}>}
|
|
22
|
+
*/
|
|
23
|
+
export function keyboardTools() {
|
|
24
|
+
return [
|
|
25
|
+
{
|
|
26
|
+
name: 'keyboard',
|
|
27
|
+
description: [
|
|
28
|
+
'Keyboard control.',
|
|
29
|
+
'',
|
|
30
|
+
'Actions:',
|
|
31
|
+
'- press: Press a single key. Param: key.',
|
|
32
|
+
' Named keys: enter, escape, tab, backspace, delete, space,',
|
|
33
|
+
' up, down, left, right, home, end, pageup, pagedown, f1–f12.',
|
|
34
|
+
'- combo: Key combination with "+" separator. Param: keys.',
|
|
35
|
+
' Modifiers: ctrl, alt, shift, meta. Examples: ctrl+c, ctrl+l.',
|
|
36
|
+
'- type: Type text character by character. Param: text.',
|
|
37
|
+
'- paste: Paste text via clipboard. Param: text. Faster and more reliable for long text or special characters.',
|
|
38
|
+
'',
|
|
39
|
+
'Tips:',
|
|
40
|
+
'- Use pagedown/pageup for page scrolling — faster than mouse scroll.',
|
|
41
|
+
'- Use tab to move between form fields instead of clicking each one.',
|
|
42
|
+
'- Use meta+l to focus the address bar in browsers.',
|
|
43
|
+
'- Use meta+w to close windows/tabs, meta+a to select all.',
|
|
44
|
+
'- Chain keyboard actions without screenshots between them — verify with diff_check at the end.',
|
|
45
|
+
'- paste is preferred over type for longer text. Both support Unicode and special characters.',
|
|
46
|
+
'- On macOS, paste falls back to typing character-by-character (VNC clipboard limitation).',
|
|
47
|
+
].join('\n'),
|
|
48
|
+
inputSchema: {
|
|
49
|
+
action: z.enum(['press', 'combo', 'type', 'paste']),
|
|
50
|
+
key: z.string().describe('Key name. For: press.').optional(),
|
|
51
|
+
keys: z.string().describe('Key combo (e.g. "ctrl+c"). For: combo.').optional(),
|
|
52
|
+
text: z.string().describe('Text to type. For: type, paste.').optional(),
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
];
|
|
56
|
+
}
|
package/tools/mouse.js
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
/**
|
|
3
|
+
* █████╗ ██████╗ █████╗ ███████╗
|
|
4
|
+
* ██╔══██╗██╔══██╗██╔══██╗██╔════╝
|
|
5
|
+
* ███████║██████╔╝███████║███████╗
|
|
6
|
+
* ██╔══██║██╔══██╗██╔══██║╚════██║
|
|
7
|
+
* ██║ ██║██║ ██║██║ ██║███████║
|
|
8
|
+
* ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝
|
|
9
|
+
*
|
|
10
|
+
* Copyright (c) 2025 Rıza Emre ARAS <r.emrearas@proton.me>
|
|
11
|
+
*
|
|
12
|
+
* This file is part of Claude KVM.
|
|
13
|
+
* Released under the MIT License — see LICENSE for details.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { z } from 'zod';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Mouse instrument definition.
|
|
20
|
+
*
|
|
21
|
+
* @param {import('../lib/types.js').ScaledDisplay} display
|
|
22
|
+
* @returns {Array<{name: string, description: string, inputSchema: Record<string, import('zod').ZodType>}>}
|
|
23
|
+
*/
|
|
24
|
+
export function mouseTools(display) {
|
|
25
|
+
const { width, height } = display;
|
|
26
|
+
|
|
27
|
+
return [
|
|
28
|
+
{
|
|
29
|
+
name: 'mouse',
|
|
30
|
+
description: [
|
|
31
|
+
`Mouse control. Screen: ${width}x${height}px, origin (0,0) at top-left.`,
|
|
32
|
+
'',
|
|
33
|
+
'Actions:',
|
|
34
|
+
'- move: Move cursor to (x,y).',
|
|
35
|
+
'- hover: Move to (x,y) and wait briefly for tooltips/dropdowns to appear.',
|
|
36
|
+
'- nudge: Offset cursor by (dx,dy) relative to current position.',
|
|
37
|
+
'- click: Left-click at current cursor position.',
|
|
38
|
+
'- click_at: Move to (x,y) and left-click in one step. For large targets.',
|
|
39
|
+
'- right_click: Right-click at current position.',
|
|
40
|
+
'- double_click: Double-click at current position.',
|
|
41
|
+
'- drag: Hold and drag from current position to (x,y).',
|
|
42
|
+
'- scroll: Scroll at current position.',
|
|
43
|
+
'',
|
|
44
|
+
'Tips:',
|
|
45
|
+
'- Prefer click_at over move+click — it is faster (one call instead of two).',
|
|
46
|
+
'- For page scrolling, STRONGLY prefer keyboard pagedown/pageup — much faster and more predictable than mouse scroll.',
|
|
47
|
+
'- Mouse scroll is unreliable for precise amounts, especially on macOS. Use only for small adjustments in scrollable panels.',
|
|
48
|
+
'- No screenshot is needed after every mouse action. Use diff_check to verify changes.',
|
|
49
|
+
].join('\n'),
|
|
50
|
+
inputSchema: {
|
|
51
|
+
action: z.enum(['move', 'hover', 'nudge', 'click', 'click_at', 'right_click', 'double_click', 'drag', 'scroll']),
|
|
52
|
+
x: z.number().int().min(0).max(width - 1).describe('X coordinate. For: move, click_at, drag.').optional(),
|
|
53
|
+
y: z.number().int().min(0).max(height - 1).describe('Y coordinate. For: move, click_at, drag.').optional(),
|
|
54
|
+
dx: z.number().int().min(-20).max(20).describe('Horizontal offset. For: nudge.').optional(),
|
|
55
|
+
dy: z.number().int().min(-20).max(20).describe('Vertical offset. For: nudge.').optional(),
|
|
56
|
+
direction: z.enum(['up', 'down', 'left', 'right']).describe('For: scroll.').optional(),
|
|
57
|
+
amount: z.number().int().min(1).max(10).describe('Scroll steps. For: scroll.').optional(),
|
|
58
|
+
},
|
|
59
|
+
},
|
|
60
|
+
];
|
|
61
|
+
}
|
package/tools/screen.js
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
/**
|
|
3
|
+
* █████╗ ██████╗ █████╗ ███████╗
|
|
4
|
+
* ██╔══██╗██╔══██╗██╔══██╗██╔════╝
|
|
5
|
+
* ███████║██████╔╝███████║███████╗
|
|
6
|
+
* ██╔══██║██╔══██╗██╔══██║╚════██║
|
|
7
|
+
* ██║ ██║██║ ██║██║ ██║███████║
|
|
8
|
+
* ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝
|
|
9
|
+
*
|
|
10
|
+
* Copyright (c) 2025 Rıza Emre ARAS <r.emrearas@proton.me>
|
|
11
|
+
*
|
|
12
|
+
* This file is part of Claude KVM.
|
|
13
|
+
* Released under the MIT License — see LICENSE for details.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Screen instrument definitions.
|
|
18
|
+
*
|
|
19
|
+
* @returns {Array<{name: string, description: string, inputSchema: null}>}
|
|
20
|
+
*/
|
|
21
|
+
export function screenTools() {
|
|
22
|
+
return [
|
|
23
|
+
{
|
|
24
|
+
name: 'screenshot',
|
|
25
|
+
description: [
|
|
26
|
+
'Capture the full screen. Use to observe current state before acting.',
|
|
27
|
+
'IMPORTANT: Do NOT take a screenshot after every action. Only use when you need to:',
|
|
28
|
+
'- See the initial state of the screen',
|
|
29
|
+
'- Verify a complex visual result (page loaded, UI changed significantly)',
|
|
30
|
+
'- Read text or identify UI elements for the next action',
|
|
31
|
+
'For simple verifications, prefer diff_check (text-only, ~5ms) or cursor_crop (small image).',
|
|
32
|
+
'If you need to read text from the screen, take a screenshot and examine the image carefully.',
|
|
33
|
+
].join('\n'),
|
|
34
|
+
inputSchema: null,
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
name: 'cursor_crop',
|
|
38
|
+
description: [
|
|
39
|
+
'Capture a small crop around the current cursor position.',
|
|
40
|
+
'Returns cursor coordinates and a cropped image with a red crosshair marking the cursor.',
|
|
41
|
+
'Use to verify cursor placement on small targets (buttons, links, icons).',
|
|
42
|
+
'Much cheaper than a full screenshot — prefer this for position verification.',
|
|
43
|
+
].join('\n'),
|
|
44
|
+
inputSchema: null,
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
name: 'diff_check',
|
|
48
|
+
description: [
|
|
49
|
+
'Lightweight screen change detection. Compares current frame against the baseline.',
|
|
50
|
+
'Returns changeDetected: true/false — no image. Fast (~5ms).',
|
|
51
|
+
'Triggers on any pixel change — maximum sensitivity.',
|
|
52
|
+
'Use after actions to verify if something changed without the cost of a full screenshot.',
|
|
53
|
+
'Updates the baseline after comparison.',
|
|
54
|
+
'Typical workflow: set_baseline → action → diff_check → if changed, screenshot only if needed.',
|
|
55
|
+
].join('\n'),
|
|
56
|
+
inputSchema: null,
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
name: 'set_baseline',
|
|
60
|
+
description: [
|
|
61
|
+
'Save current screen as the diff baseline.',
|
|
62
|
+
'Call before an action so diff_check can measure what changed after it.',
|
|
63
|
+
].join('\n'),
|
|
64
|
+
inputSchema: null,
|
|
65
|
+
},
|
|
66
|
+
];
|
|
67
|
+
}
|
package/tools/ssh.js
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
/**
|
|
3
|
+
* █████╗ ██████╗ █████╗ ███████╗
|
|
4
|
+
* ██╔══██╗██╔══██╗██╔══██╗██╔════╝
|
|
5
|
+
* ███████║██████╔╝███████║███████╗
|
|
6
|
+
* ██╔══██║██╔══██╗██╔══██║╚════██║
|
|
7
|
+
* ██║ ██║██║ ██║██║ ██║███████║
|
|
8
|
+
* ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝
|
|
9
|
+
*
|
|
10
|
+
* Copyright (c) 2025 Rıza Emre ARAS <r.emrearas@proton.me>
|
|
11
|
+
*
|
|
12
|
+
* This file is part of Claude KVM.
|
|
13
|
+
* Released under the MIT License — see LICENSE for details.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { z } from 'zod';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* SSH tool definitions.
|
|
20
|
+
* @returns {Array<{name: string, description: string, inputSchema: Record<string, import('zod').ZodType>}>}
|
|
21
|
+
*/
|
|
22
|
+
export function sshTools() {
|
|
23
|
+
return [
|
|
24
|
+
{
|
|
25
|
+
name: 'ssh',
|
|
26
|
+
description: [
|
|
27
|
+
'Execute a command on the remote machine via SSH.',
|
|
28
|
+
'Returns stdout, stderr, and exit code. Use for any shell operation on the target system.',
|
|
29
|
+
'',
|
|
30
|
+
'Capabilities:',
|
|
31
|
+
'- File system operations: ls, cat, mkdir, cp, mv, rm',
|
|
32
|
+
'- Process management: ps, kill, top',
|
|
33
|
+
'- System info: uname, hostname, df, free',
|
|
34
|
+
'- Package management: apt, brew, etc.',
|
|
35
|
+
'- Network: curl, wget, ping, netstat',
|
|
36
|
+
'',
|
|
37
|
+
'macOS-specific (when VNC target is macOS):',
|
|
38
|
+
'- AppleScript: osascript -e \'tell application "Finder" to get name of every window\'',
|
|
39
|
+
'- UI validation: osascript to verify UI state after VNC actions',
|
|
40
|
+
'- App control: open -a "Safari", osascript to interact with apps',
|
|
41
|
+
'- System preferences: defaults read, defaults write',
|
|
42
|
+
'- Clipboard: pbcopy, pbpaste (direct system clipboard access)',
|
|
43
|
+
'- Screenshots with metadata: screencapture command',
|
|
44
|
+
'',
|
|
45
|
+
'Tips:',
|
|
46
|
+
'- On macOS, combine VNC visual actions with SSH osascript validation for reliable automation.',
|
|
47
|
+
'- Use pbpaste via SSH to read clipboard contents after a VNC copy action.',
|
|
48
|
+
'- Use osascript to get precise window positions, button states, and menu items.',
|
|
49
|
+
'- Requires SSH_HOST, SSH_USER, and SSH_PASSWORD or SSH_KEY environment variables.',
|
|
50
|
+
'',
|
|
51
|
+
'macOS permission dialogs:',
|
|
52
|
+
'- First-time osascript access to an app (System Events, Finder, etc.) triggers a macOS permission dialog.',
|
|
53
|
+
'- The SSH command will timeout while waiting for approval. Use VNC screenshot to check for the dialog, click "Allow", then retry the command.',
|
|
54
|
+
].join('\n'),
|
|
55
|
+
inputSchema: {
|
|
56
|
+
command: z.string().describe('The shell command to execute on the remote machine'),
|
|
57
|
+
timeout: z.number().int().min(1000).max(120000).optional()
|
|
58
|
+
.describe('Command timeout in milliseconds (default: 30000, max: 120000)'),
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
];
|
|
62
|
+
}
|
package/tools/vlm.js
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
/**
|
|
3
|
+
* █████╗ ██████╗ █████╗ ███████╗
|
|
4
|
+
* ██╔══██╗██╔══██╗██╔══██╗██╔════╝
|
|
5
|
+
* ███████║██████╔╝███████║███████╗
|
|
6
|
+
* ██╔══██║██╔══██╗██╔══██║╚════██║
|
|
7
|
+
* ██║ ██║██║ ██║██║ ██║███████║
|
|
8
|
+
* ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝
|
|
9
|
+
*
|
|
10
|
+
* Copyright (c) 2025 Rıza Emre ARAS <r.emrearas@proton.me>
|
|
11
|
+
*
|
|
12
|
+
* This file is part of Claude KVM.
|
|
13
|
+
* Released under the MIT License — see LICENSE for details.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { z } from 'zod';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* VLM (Vision Language Model) tool definitions.
|
|
20
|
+
* Only registered when CLAUDE_KVM_VLM_TOOL_PATH environment variable is set.
|
|
21
|
+
*
|
|
22
|
+
* @param {import('../lib/types.js').ScaledDisplay} display
|
|
23
|
+
* @returns {Array<{name: string, description: string, inputSchema: Record<string, import('zod').ZodType>}>}
|
|
24
|
+
*/
|
|
25
|
+
export function vlmTools(display) {
|
|
26
|
+
const { width, height } = display;
|
|
27
|
+
|
|
28
|
+
return [
|
|
29
|
+
{
|
|
30
|
+
name: 'vlm_query',
|
|
31
|
+
description: [
|
|
32
|
+
'Run an on-device Vision Language Model on a cropped region of the screen.',
|
|
33
|
+
`Coordinates are in screen space (${width}×${height}px). Specify a rectangular region with (x, y, width, height).`,
|
|
34
|
+
'',
|
|
35
|
+
'Use cases:',
|
|
36
|
+
'- Read text from a specific UI element (OCR)',
|
|
37
|
+
'- Describe what is visible in a region',
|
|
38
|
+
'- Identify icons, colors, or visual states in a small area',
|
|
39
|
+
'- Answer visual questions about a cropped portion of the screen',
|
|
40
|
+
'',
|
|
41
|
+
'Tips:',
|
|
42
|
+
'- Keep the crop region focused and small for faster/better results.',
|
|
43
|
+
'- The prompt should be a clear question or instruction about the cropped image.',
|
|
44
|
+
'- Runs locally on Apple Silicon — no network latency, but inference takes a few seconds.',
|
|
45
|
+
'- Use verbose=true for timing/debug info on stderr.',
|
|
46
|
+
'- macOS only.',
|
|
47
|
+
].join('\n'),
|
|
48
|
+
inputSchema: {
|
|
49
|
+
x: z.number().int().min(0).max(width - 1).describe('Left edge X coordinate of the crop region'),
|
|
50
|
+
y: z.number().int().min(0).max(height - 1).describe('Top edge Y coordinate of the crop region'),
|
|
51
|
+
width: z.number().int().min(1).max(width).describe('Width of the crop region in pixels'),
|
|
52
|
+
height: z.number().int().min(1).max(height).describe('Height of the crop region in pixels'),
|
|
53
|
+
prompt: z.string().min(1).describe('Question or instruction for the VLM about the cropped image'),
|
|
54
|
+
max_tokens: z.number().int().min(1).max(4096).optional().describe('Maximum tokens in VLM response (default: 1024)'),
|
|
55
|
+
verbose: z.boolean().optional().describe('Enable verbose logging to stderr for timing and debug info'),
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
];
|
|
59
|
+
}
|
package/utils/keysym.js
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
/**
|
|
3
|
+
* █████╗ ██████╗ █████╗ ███████╗
|
|
4
|
+
* ██╔══██╗██╔══██╗██╔══██╗██╔════╝
|
|
5
|
+
* ███████║██████╔╝███████║███████╗
|
|
6
|
+
* ██╔══██║██╔══██╗██╔══██║╚════██║
|
|
7
|
+
* ██║ ██║██║ ██║██║ ██║███████║
|
|
8
|
+
* ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝
|
|
9
|
+
*
|
|
10
|
+
* Copyright (c) 2025 Rıza Emre ARAS <r.emrearas@proton.me>
|
|
11
|
+
*
|
|
12
|
+
* This file is part of Claude KVM.
|
|
13
|
+
* Released under the MIT License — see LICENSE for details.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Character / Key Name → X11 Keysym mapping.
|
|
18
|
+
*
|
|
19
|
+
* VNC uses X11 keysyms for keyboard input.
|
|
20
|
+
* ASCII printable chars (0x20-0x7E) map directly to their code point.
|
|
21
|
+
* Special keys use the 0xFF** range.
|
|
22
|
+
*
|
|
23
|
+
* Reference: https://www.x.org/releases/current/doc/xproto/x11protocol.html#keysym_encoding
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
// Special key keysyms
|
|
27
|
+
const SPECIAL_KEYSYMS = {
|
|
28
|
+
backspace: 0xFF08,
|
|
29
|
+
tab: 0xFF09,
|
|
30
|
+
return: 0xFF0D,
|
|
31
|
+
enter: 0xFF0D,
|
|
32
|
+
ret: 0xFF0D,
|
|
33
|
+
escape: 0xFF1B,
|
|
34
|
+
esc: 0xFF1B,
|
|
35
|
+
delete: 0xFFFF,
|
|
36
|
+
del: 0xFFFF,
|
|
37
|
+
|
|
38
|
+
// Cursor control
|
|
39
|
+
home: 0xFF50,
|
|
40
|
+
left: 0xFF51,
|
|
41
|
+
up: 0xFF52,
|
|
42
|
+
right: 0xFF53,
|
|
43
|
+
down: 0xFF54,
|
|
44
|
+
pageup: 0xFF55,
|
|
45
|
+
pgup: 0xFF55,
|
|
46
|
+
pagedown: 0xFF56,
|
|
47
|
+
pgdn: 0xFF56,
|
|
48
|
+
end: 0xFF57,
|
|
49
|
+
insert: 0xFF63,
|
|
50
|
+
|
|
51
|
+
// Function keys
|
|
52
|
+
f1: 0xFFBE,
|
|
53
|
+
f2: 0xFFBF,
|
|
54
|
+
f3: 0xFFC0,
|
|
55
|
+
f4: 0xFFC1,
|
|
56
|
+
f5: 0xFFC2,
|
|
57
|
+
f6: 0xFFC3,
|
|
58
|
+
f7: 0xFFC4,
|
|
59
|
+
f8: 0xFFC5,
|
|
60
|
+
f9: 0xFFC6,
|
|
61
|
+
f10: 0xFFC7,
|
|
62
|
+
f11: 0xFFC8,
|
|
63
|
+
f12: 0xFFC9,
|
|
64
|
+
|
|
65
|
+
// Modifiers
|
|
66
|
+
shift: 0xFFE1,
|
|
67
|
+
shift_l: 0xFFE1,
|
|
68
|
+
shift_r: 0xFFE2,
|
|
69
|
+
ctrl: 0xFFE3,
|
|
70
|
+
ctrl_l: 0xFFE3,
|
|
71
|
+
ctrl_r: 0xFFE4,
|
|
72
|
+
control: 0xFFE3,
|
|
73
|
+
alt: 0xFFE9,
|
|
74
|
+
alt_l: 0xFFE9,
|
|
75
|
+
alt_r: 0xFFEA,
|
|
76
|
+
option: 0xFFE9,
|
|
77
|
+
opt: 0xFFE9,
|
|
78
|
+
meta: 0xFFE7,
|
|
79
|
+
meta_l: 0xFFE7,
|
|
80
|
+
meta_r: 0xFFE8,
|
|
81
|
+
cmd: 0xFFE7,
|
|
82
|
+
command: 0xFFE7,
|
|
83
|
+
super: 0xFFE7,
|
|
84
|
+
super_l: 0xFFE7,
|
|
85
|
+
super_r: 0xFFE8,
|
|
86
|
+
|
|
87
|
+
// Misc
|
|
88
|
+
space: 0x0020,
|
|
89
|
+
spc: 0x0020,
|
|
90
|
+
capslock: 0xFFE5,
|
|
91
|
+
numlock: 0xFF7F,
|
|
92
|
+
scrolllock: 0xFF14,
|
|
93
|
+
printscreen: 0xFF61,
|
|
94
|
+
print: 0xFF61,
|
|
95
|
+
pause: 0xFF13,
|
|
96
|
+
menu: 0xFF67,
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
// Characters that require Shift on a US keyboard layout
|
|
100
|
+
const SHIFT_CHARS = new Set('~!@#$%^&*()_+{}|:"<>?ABCDEFGHIJKLMNOPQRSTUVWXYZ');
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Convert a printable character to its X11 keysym.
|
|
104
|
+
* For ASCII chars, the keysym equals the Unicode code point.
|
|
105
|
+
* @param {string} ch - Single character
|
|
106
|
+
* @returns {import('../lib/types').KeysymMapping | null}
|
|
107
|
+
*/
|
|
108
|
+
export function charToKeysym(ch) {
|
|
109
|
+
const code = ch.charCodeAt(0);
|
|
110
|
+
|
|
111
|
+
// Standard ASCII printable range (space through ~)
|
|
112
|
+
if (code >= 0x20 && code <= 0x7E) {
|
|
113
|
+
const shift = SHIFT_CHARS.has(ch);
|
|
114
|
+
|
|
115
|
+
// For uppercase letters, the keysym is the lowercase version
|
|
116
|
+
// but we send it with shift held down
|
|
117
|
+
if (ch >= 'A' && ch <= 'Z') {
|
|
118
|
+
return { keysym: ch.toLowerCase().charCodeAt(0), shift: true };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// For shifted symbols, the keysym is the character itself
|
|
122
|
+
// VNC expects the keysym of the actual character, with shift modifier
|
|
123
|
+
if (shift) {
|
|
124
|
+
return { keysym: code, shift: true };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return { keysym: code, shift: false };
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Latin-1 supplement (0x80-0xFF) — keysym equals code point
|
|
131
|
+
if (code >= 0xA0 && code <= 0xFF) {
|
|
132
|
+
return { keysym: code, shift: false };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Unicode beyond Latin-1 (Turkish ığşİĞŞ, etc.)
|
|
136
|
+
// X11 keysym for Unicode: 0x01000000 + code point
|
|
137
|
+
if (code > 0xFF) {
|
|
138
|
+
const lower = ch.toLowerCase();
|
|
139
|
+
if (lower !== ch) {
|
|
140
|
+
// Uppercase Unicode — send lowercase keysym + shift (e.g. Ş → shift + ş)
|
|
141
|
+
return { keysym: 0x01000000 + lower.charCodeAt(0), shift: true };
|
|
142
|
+
}
|
|
143
|
+
return { keysym: 0x01000000 + code, shift: false };
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return null;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Resolve a named key to its X11 keysym.
|
|
151
|
+
* @param {string} name - Key name (e.g. "enter", "cmd", "f5")
|
|
152
|
+
* @returns {number | null}
|
|
153
|
+
*/
|
|
154
|
+
export function namedKeyToKeysym(name) {
|
|
155
|
+
return SPECIAL_KEYSYMS[name.toLowerCase()] ?? null;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export { SPECIAL_KEYSYMS };
|