desktop-mcp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- desktop_mcp-0.1.0/LICENSE +21 -0
- desktop_mcp-0.1.0/PKG-INFO +158 -0
- desktop_mcp-0.1.0/README.md +142 -0
- desktop_mcp-0.1.0/desktop_mcp/__init__.py +0 -0
- desktop_mcp-0.1.0/desktop_mcp/config.py +252 -0
- desktop_mcp-0.1.0/desktop_mcp/groups/__init__.py +0 -0
- desktop_mcp-0.1.0/desktop_mcp/groups/input_tools.py +129 -0
- desktop_mcp-0.1.0/desktop_mcp/groups/observe.py +135 -0
- desktop_mcp-0.1.0/desktop_mcp/groups/record.py +208 -0
- desktop_mcp-0.1.0/desktop_mcp/groups/window.py +88 -0
- desktop_mcp-0.1.0/desktop_mcp/paths.py +19 -0
- desktop_mcp-0.1.0/desktop_mcp/server.py +134 -0
- desktop_mcp-0.1.0/desktop_mcp.egg-info/PKG-INFO +158 -0
- desktop_mcp-0.1.0/desktop_mcp.egg-info/SOURCES.txt +24 -0
- desktop_mcp-0.1.0/desktop_mcp.egg-info/dependency_links.txt +1 -0
- desktop_mcp-0.1.0/desktop_mcp.egg-info/requires.txt +8 -0
- desktop_mcp-0.1.0/desktop_mcp.egg-info/top_level.txt +1 -0
- desktop_mcp-0.1.0/pyproject.toml +30 -0
- desktop_mcp-0.1.0/setup.cfg +4 -0
- desktop_mcp-0.1.0/tests/test_config.py +186 -0
- desktop_mcp-0.1.0/tests/test_input.py +169 -0
- desktop_mcp-0.1.0/tests/test_live_smoke.py +61 -0
- desktop_mcp-0.1.0/tests/test_observe.py +149 -0
- desktop_mcp-0.1.0/tests/test_record.py +230 -0
- desktop_mcp-0.1.0/tests/test_server.py +45 -0
- desktop_mcp-0.1.0/tests/test_window.py +128 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jaimen Bell
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: desktop-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Windows desktop-control MCP server: screenshot, window mgmt, mouse/keyboard input, and screen-recording. Config-gated tool groups (observe/window/input/record) -- input disabled by default. Local-only, single-machine.
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: fastmcp==3.4.2
|
|
9
|
+
Requires-Dist: mss==10.2.0
|
|
10
|
+
Requires-Dist: pyautogui==0.9.54
|
|
11
|
+
Requires-Dist: PyGetWindow==0.0.9
|
|
12
|
+
Requires-Dist: pywin32==312
|
|
13
|
+
Provides-Extra: test
|
|
14
|
+
Requires-Dist: pytest==9.0.3; extra == "test"
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# desktop-mcp
|
|
18
|
+
|
|
19
|
+
Windows desktop-control MCP server: screenshot, window management, mouse/keyboard
|
|
20
|
+
input, and ffmpeg screen-recording -- built to the same standard as
|
|
21
|
+
[mcp-factory](https://github.com/jaimenbell/mcp-factory) and rag-mcp (own
|
|
22
|
+
pyproject, fastmcp server, honest README, real test suite). Config-gated tool
|
|
23
|
+
groups, **input disabled by default**.
|
|
24
|
+
|
|
25
|
+
## Tool groups
|
|
26
|
+
|
|
27
|
+
| Group | Tools | Default state |
|
|
28
|
+
|---|---|---|
|
|
29
|
+
| `observe` | `screenshot`, `list_windows`, `get_active_window`, `window_info` | always on |
|
|
30
|
+
| `window` | `focus_window`, `move_resize_window`, `minimize_window`, `restore_window` | env-gated, off unless `DESKTOP_MCP_ENABLE_WINDOW=1` |
|
|
31
|
+
| `input` | `mouse_move`, `mouse_click`, `mouse_drag`, `mouse_scroll`, `key_press`, `hotkey`, `type_text` | env-gated, **OFF by default** -- requires `DESKTOP_MCP_ENABLE_INPUT=1` |
|
|
32
|
+
| `record` | `record_start`, `record_status`, `record_stop` | env-gated, off unless `DESKTOP_MCP_ENABLE_RECORD=1` |
|
|
33
|
+
|
|
34
|
+
A disabled group returns a structured `policy_refusal` error (never a silent
|
|
35
|
+
no-op, never a crash). Input actions are additionally rate-capped (default 60
|
|
36
|
+
actions/min, tunable via `DESKTOP_MCP_RATE_LIMIT_PER_MIN`) -- exceeding the
|
|
37
|
+
cap returns a structured `rate_limited` error.
|
|
38
|
+
|
|
39
|
+
This is defense-in-depth: harness-level permission prompts are the first
|
|
40
|
+
gate, but the server itself refuses input/window/record actions unless its
|
|
41
|
+
own env explicitly enables them, so a misconfigured or overly-permissive
|
|
42
|
+
harness can't turn on capabilities the operator didn't opt into for this
|
|
43
|
+
process.
|
|
44
|
+
|
|
45
|
+
## Honest-capabilities table
|
|
46
|
+
|
|
47
|
+
Every claim below maps to the file that implements it and the test(s) that
|
|
48
|
+
verify it -- no capability is asserted without a corresponding
|
|
49
|
+
implementation + test.
|
|
50
|
+
|
|
51
|
+
| Claim | Implementation | Verified by |
|
|
52
|
+
|---|---|---|
|
|
53
|
+
| Capture a screenshot (monitor / region / window) as PNG | `desktop_mcp/groups/observe.py::screenshot` | `tests/test_observe.py::TestScreenshot`, live: `tests/test_live_smoke.py::test_live_screenshot_real_png` |
|
|
54
|
+
| Enumerate windows / get active window / look up by title | `desktop_mcp/groups/observe.py` | `tests/test_observe.py::TestListWindows`, `TestGetActiveWindow`, `TestWindowInfo` |
|
|
55
|
+
| Focus / move+resize / minimize / restore a window | `desktop_mcp/groups/window.py` | `tests/test_window.py` |
|
|
56
|
+
| Mouse move/click/drag/scroll, key press/hotkey, type text | `desktop_mcp/groups/input_tools.py` | `tests/test_input.py` (mocked pyautogui only -- see Limitations) |
|
|
57
|
+
| Screen recording via ffmpeg gdigrab, hard duration cap, graceful stop | `desktop_mcp/groups/record.py` | `tests/test_record.py`, live: `tests/test_live_smoke.py::test_live_record_real_mp4` |
|
|
58
|
+
| Input group OFF by default, structured refusal when disabled | `desktop_mcp/config.py::group_enabled`, `gated` | `tests/test_config.py::TestGroupEnabled`, `tests/test_input.py::TestGateDisabledByDefault` |
|
|
59
|
+
| Rate cap on input actions (default 60/min) | `desktop_mcp/config.py::TokenBucket`, `RateLimiterRegistry` | `tests/test_config.py::TestTokenBucket`, `tests/test_input.py::TestRateLimit` |
|
|
60
|
+
| DPI-awareness bootstrap (per-monitor-v2) so mss/pyautogui coords agree on scaled displays | `desktop_mcp/config.py::ensure_dpi_awareness` | `tests/test_config.py::TestDpiAwareness` (idempotency only; visual coord-agreement is not automated -- see Limitations) |
|
|
61
|
+
| Coordinate validation against virtual-desktop bounds before any mouse action | `desktop_mcp/groups/input_tools.py::_validate_point` | `tests/test_input.py` (out-of-bounds cases) |
|
|
62
|
+
| Orphan-guard: a stale recorder from a crashed process gets killed before a new one starts | `desktop_mcp/groups/record.py::_orphan_guard` | `tests/test_record.py::TestRecordStart::test_orphan_guard_kills_stale_recorder` |
|
|
63
|
+
|
|
64
|
+
## Limitations (read before relying on this)
|
|
65
|
+
|
|
66
|
+
- **UIPI (User Interface Privilege Isolation).** A medium-integrity process
|
|
67
|
+
(this server, unless you elevate it) cannot send input to or manipulate
|
|
68
|
+
windows owned by a higher-integrity (elevated/admin) process. Window and
|
|
69
|
+
input tools surface this as a structured `window_action_failed` /
|
|
70
|
+
`input_failed` error naming UIPI, never a silent no-op -- but there is no
|
|
71
|
+
workaround short of running the server elevated, which this project does
|
|
72
|
+
not do or recommend.
|
|
73
|
+
- **DPI scaling.** The server sets per-monitor-v2 DPI awareness at startup so
|
|
74
|
+
`mss` pixel coordinates and `pyautogui` point coordinates should agree on
|
|
75
|
+
scaled displays. This bootstrap is unit-tested for idempotency/no-crash
|
|
76
|
+
only -- actual coordinate agreement on a live multi-DPI multi-monitor setup
|
|
77
|
+
has not been automated-tested and should be spot-checked if you're
|
|
78
|
+
targeting a non-100%-scaled monitor.
|
|
79
|
+
- **UAC secure desktop.** When Windows switches to the secure desktop (UAC
|
|
80
|
+
elevation prompts, Ctrl+Alt+Del, lock screen), no process running on the
|
|
81
|
+
regular desktop -- including this server -- can see or interact with it.
|
|
82
|
+
Screenshots will show whatever was on the regular desktop before the
|
|
83
|
+
switch; input calls will not reach the secure desktop at all.
|
|
84
|
+
- **Single machine, local only.** No network transport, no remote control.
|
|
85
|
+
stdio only, spawned by the MCP host on the same machine.
|
|
86
|
+
- **Input group is off by default in this repo's own registration.** See
|
|
87
|
+
`~/.claude.json`'s `desktop-mcp` entry -- `DESKTOP_MCP_ENABLE_INPUT` is not
|
|
88
|
+
set there. Enabling it is a deliberate per-registration operator choice,
|
|
89
|
+
not a code change.
|
|
90
|
+
- **pyautogui failsafe.** `FAILSAFE=True` is intentional: slamming the cursor
|
|
91
|
+
into a screen corner mid-action raises inside pyautogui and aborts the
|
|
92
|
+
call. This can interrupt an in-flight `mouse_drag`. Treated as acceptable
|
|
93
|
+
v1 behavior (see plan's Open questions) -- it's a deliberate human
|
|
94
|
+
kill-switch, not a bug.
|
|
95
|
+
- **No OCR / vision analysis.** Screenshots are raw PNGs; interpreting their
|
|
96
|
+
content is the consumer's job, not this server's.
|
|
97
|
+
- **No clipboard tools.** Credential-adjacent surface, deferred to a v2 with
|
|
98
|
+
its own safety design.
|
|
99
|
+
- **Not registered with the mcp-factory hub.** This ships as a standalone
|
|
100
|
+
repo (own pyproject, own venv-free system-Python312 install), matching the
|
|
101
|
+
rag-mcp model. Hub/registry integration is a v2 candidate.
|
|
102
|
+
|
|
103
|
+
## Env vars
|
|
104
|
+
|
|
105
|
+
| Var | Effect | Default |
|
|
106
|
+
|---|---|---|
|
|
107
|
+
| `DESKTOP_MCP_ENABLE_WINDOW` | enable the `window` tool group | unset (off) |
|
|
108
|
+
| `DESKTOP_MCP_ENABLE_INPUT` | enable the `input` tool group | unset (off) |
|
|
109
|
+
| `DESKTOP_MCP_ENABLE_RECORD` | enable the `record` tool group | unset (off) |
|
|
110
|
+
| `DESKTOP_MCP_RATE_LIMIT_PER_MIN` | input-group rate cap | `60` |
|
|
111
|
+
| `DESKTOP_MCP_SCRATCH_DIR` | where screenshots/recordings/pidfiles are written | `%TEMP%\desktop-mcp-scratch` |
|
|
112
|
+
| `DESKTOP_MCP_LIVE` | `1` to run real-hardware smoke tests (see Testing) | unset (skip) |
|
|
113
|
+
|
|
114
|
+
## Usage examples
|
|
115
|
+
|
|
116
|
+
```jsonc
|
|
117
|
+
// A tool call from the MCP host, illustrative -- not a shell command.
|
|
118
|
+
{"tool": "screenshot", "arguments": {"monitor": 0}}
|
|
119
|
+
// -> {"ok": true, "path": "C:\\Users\\...\\Temp\\desktop-mcp-scratch\\screenshot-....png", "w": 3840, "h": 1080, "monitor": 0}
|
|
120
|
+
|
|
121
|
+
{"tool": "record_start", "arguments": {"fps": 30, "max_duration_s": 30}}
|
|
122
|
+
// -> {"ok": true, "path": "...\\recording-....mp4", "pid": 12345, "fps": 30, "max_duration_s": 30}
|
|
123
|
+
{"tool": "record_stop", "arguments": {}}
|
|
124
|
+
// -> {"ok": true, "path": "...\\recording-....mp4", "bytes": 800560, "duration_s": 3.13}
|
|
125
|
+
|
|
126
|
+
// input group disabled (default):
|
|
127
|
+
{"tool": "mouse_click", "arguments": {"x": 500, "y": 500}}
|
|
128
|
+
// -> {"ok": false, "error": {"type": "policy_refusal", "group": "input", "required_env": "DESKTOP_MCP_ENABLE_INPUT", ...}}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Testing
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
# unit suite (mocked backends, no real screen/input/recording touched)
|
|
135
|
+
python -m pytest -q
|
|
136
|
+
|
|
137
|
+
# handshake check -- prints every registered tool name
|
|
138
|
+
python scripts/list_tools.py
|
|
139
|
+
|
|
140
|
+
# real-hardware smokes (real screenshot PNG, real ~3s screen recording;
|
|
141
|
+
# never input-injection -- see safety rails above)
|
|
142
|
+
DESKTOP_MCP_LIVE=1 python -m pytest -q -k live_screenshot
|
|
143
|
+
DESKTOP_MCP_LIVE=1 python -m pytest -q -k live_record
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Install
|
|
147
|
+
|
|
148
|
+
```
|
|
149
|
+
pip install -r requirements.txt # or: pip install .
|
|
150
|
+
# deps: fastmcp==3.4.2, mss==10.2.0, pyautogui==0.9.54, PyGetWindow==0.0.9, pywin32==312
|
|
151
|
+
# also requires ffmpeg + ffprobe on PATH for the record group
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Registered in `~/.claude.json` as `desktop-mcp` (stdio, system Python312,
|
|
155
|
+
`observe`+`window`+`record` groups enabled, `input` group absent from env).
|
|
156
|
+
|
|
157
|
+
<!-- MCP registry ownership marker -->
|
|
158
|
+
mcp-name: io.github.jaimenbell/desktop-mcp
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# desktop-mcp
|
|
2
|
+
|
|
3
|
+
Windows desktop-control MCP server: screenshot, window management, mouse/keyboard
|
|
4
|
+
input, and ffmpeg screen-recording -- built to the same standard as
|
|
5
|
+
[mcp-factory](https://github.com/jaimenbell/mcp-factory) and rag-mcp (own
|
|
6
|
+
pyproject, fastmcp server, honest README, real test suite). Config-gated tool
|
|
7
|
+
groups, **input disabled by default**.
|
|
8
|
+
|
|
9
|
+
## Tool groups
|
|
10
|
+
|
|
11
|
+
| Group | Tools | Default state |
|
|
12
|
+
|---|---|---|
|
|
13
|
+
| `observe` | `screenshot`, `list_windows`, `get_active_window`, `window_info` | always on |
|
|
14
|
+
| `window` | `focus_window`, `move_resize_window`, `minimize_window`, `restore_window` | env-gated, off unless `DESKTOP_MCP_ENABLE_WINDOW=1` |
|
|
15
|
+
| `input` | `mouse_move`, `mouse_click`, `mouse_drag`, `mouse_scroll`, `key_press`, `hotkey`, `type_text` | env-gated, **OFF by default** -- requires `DESKTOP_MCP_ENABLE_INPUT=1` |
|
|
16
|
+
| `record` | `record_start`, `record_status`, `record_stop` | env-gated, off unless `DESKTOP_MCP_ENABLE_RECORD=1` |
|
|
17
|
+
|
|
18
|
+
A disabled group returns a structured `policy_refusal` error (never a silent
|
|
19
|
+
no-op, never a crash). Input actions are additionally rate-capped (default 60
|
|
20
|
+
actions/min, tunable via `DESKTOP_MCP_RATE_LIMIT_PER_MIN`) -- exceeding the
|
|
21
|
+
cap returns a structured `rate_limited` error.
|
|
22
|
+
|
|
23
|
+
This is defense-in-depth: harness-level permission prompts are the first
|
|
24
|
+
gate, but the server itself refuses input/window/record actions unless its
|
|
25
|
+
own env explicitly enables them, so a misconfigured or overly-permissive
|
|
26
|
+
harness can't turn on capabilities the operator didn't opt into for this
|
|
27
|
+
process.
|
|
28
|
+
|
|
29
|
+
## Honest-capabilities table
|
|
30
|
+
|
|
31
|
+
Every claim below maps to the file that implements it and the test(s) that
|
|
32
|
+
verify it -- no capability is asserted without a corresponding
|
|
33
|
+
implementation + test.
|
|
34
|
+
|
|
35
|
+
| Claim | Implementation | Verified by |
|
|
36
|
+
|---|---|---|
|
|
37
|
+
| Capture a screenshot (monitor / region / window) as PNG | `desktop_mcp/groups/observe.py::screenshot` | `tests/test_observe.py::TestScreenshot`, live: `tests/test_live_smoke.py::test_live_screenshot_real_png` |
|
|
38
|
+
| Enumerate windows / get active window / look up by title | `desktop_mcp/groups/observe.py` | `tests/test_observe.py::TestListWindows`, `TestGetActiveWindow`, `TestWindowInfo` |
|
|
39
|
+
| Focus / move+resize / minimize / restore a window | `desktop_mcp/groups/window.py` | `tests/test_window.py` |
|
|
40
|
+
| Mouse move/click/drag/scroll, key press/hotkey, type text | `desktop_mcp/groups/input_tools.py` | `tests/test_input.py` (mocked pyautogui only -- see Limitations) |
|
|
41
|
+
| Screen recording via ffmpeg gdigrab, hard duration cap, graceful stop | `desktop_mcp/groups/record.py` | `tests/test_record.py`, live: `tests/test_live_smoke.py::test_live_record_real_mp4` |
|
|
42
|
+
| Input group OFF by default, structured refusal when disabled | `desktop_mcp/config.py::group_enabled`, `gated` | `tests/test_config.py::TestGroupEnabled`, `tests/test_input.py::TestGateDisabledByDefault` |
|
|
43
|
+
| Rate cap on input actions (default 60/min) | `desktop_mcp/config.py::TokenBucket`, `RateLimiterRegistry` | `tests/test_config.py::TestTokenBucket`, `tests/test_input.py::TestRateLimit` |
|
|
44
|
+
| DPI-awareness bootstrap (per-monitor-v2) so mss/pyautogui coords agree on scaled displays | `desktop_mcp/config.py::ensure_dpi_awareness` | `tests/test_config.py::TestDpiAwareness` (idempotency only; visual coord-agreement is not automated -- see Limitations) |
|
|
45
|
+
| Coordinate validation against virtual-desktop bounds before any mouse action | `desktop_mcp/groups/input_tools.py::_validate_point` | `tests/test_input.py` (out-of-bounds cases) |
|
|
46
|
+
| Orphan-guard: a stale recorder from a crashed process gets killed before a new one starts | `desktop_mcp/groups/record.py::_orphan_guard` | `tests/test_record.py::TestRecordStart::test_orphan_guard_kills_stale_recorder` |
|
|
47
|
+
|
|
48
|
+
## Limitations (read before relying on this)
|
|
49
|
+
|
|
50
|
+
- **UIPI (User Interface Privilege Isolation).** A medium-integrity process
|
|
51
|
+
(this server, unless you elevate it) cannot send input to or manipulate
|
|
52
|
+
windows owned by a higher-integrity (elevated/admin) process. Window and
|
|
53
|
+
input tools surface this as a structured `window_action_failed` /
|
|
54
|
+
`input_failed` error naming UIPI, never a silent no-op -- but there is no
|
|
55
|
+
workaround short of running the server elevated, which this project does
|
|
56
|
+
not do or recommend.
|
|
57
|
+
- **DPI scaling.** The server sets per-monitor-v2 DPI awareness at startup so
|
|
58
|
+
`mss` pixel coordinates and `pyautogui` point coordinates should agree on
|
|
59
|
+
scaled displays. This bootstrap is unit-tested for idempotency/no-crash
|
|
60
|
+
only -- actual coordinate agreement on a live multi-DPI multi-monitor setup
|
|
61
|
+
has not been automated-tested and should be spot-checked if you're
|
|
62
|
+
targeting a non-100%-scaled monitor.
|
|
63
|
+
- **UAC secure desktop.** When Windows switches to the secure desktop (UAC
|
|
64
|
+
elevation prompts, Ctrl+Alt+Del, lock screen), no process running on the
|
|
65
|
+
regular desktop -- including this server -- can see or interact with it.
|
|
66
|
+
Screenshots will show whatever was on the regular desktop before the
|
|
67
|
+
switch; input calls will not reach the secure desktop at all.
|
|
68
|
+
- **Single machine, local only.** No network transport, no remote control.
|
|
69
|
+
stdio only, spawned by the MCP host on the same machine.
|
|
70
|
+
- **Input group is off by default in this repo's own registration.** See
|
|
71
|
+
`~/.claude.json`'s `desktop-mcp` entry -- `DESKTOP_MCP_ENABLE_INPUT` is not
|
|
72
|
+
set there. Enabling it is a deliberate per-registration operator choice,
|
|
73
|
+
not a code change.
|
|
74
|
+
- **pyautogui failsafe.** `FAILSAFE=True` is intentional: slamming the cursor
|
|
75
|
+
into a screen corner mid-action raises inside pyautogui and aborts the
|
|
76
|
+
call. This can interrupt an in-flight `mouse_drag`. Treated as acceptable
|
|
77
|
+
v1 behavior (see plan's Open questions) -- it's a deliberate human
|
|
78
|
+
kill-switch, not a bug.
|
|
79
|
+
- **No OCR / vision analysis.** Screenshots are raw PNGs; interpreting their
|
|
80
|
+
content is the consumer's job, not this server's.
|
|
81
|
+
- **No clipboard tools.** Credential-adjacent surface, deferred to a v2 with
|
|
82
|
+
its own safety design.
|
|
83
|
+
- **Not registered with the mcp-factory hub.** This ships as a standalone
|
|
84
|
+
repo (own pyproject, own venv-free system-Python312 install), matching the
|
|
85
|
+
rag-mcp model. Hub/registry integration is a v2 candidate.
|
|
86
|
+
|
|
87
|
+
## Env vars
|
|
88
|
+
|
|
89
|
+
| Var | Effect | Default |
|
|
90
|
+
|---|---|---|
|
|
91
|
+
| `DESKTOP_MCP_ENABLE_WINDOW` | enable the `window` tool group | unset (off) |
|
|
92
|
+
| `DESKTOP_MCP_ENABLE_INPUT` | enable the `input` tool group | unset (off) |
|
|
93
|
+
| `DESKTOP_MCP_ENABLE_RECORD` | enable the `record` tool group | unset (off) |
|
|
94
|
+
| `DESKTOP_MCP_RATE_LIMIT_PER_MIN` | input-group rate cap | `60` |
|
|
95
|
+
| `DESKTOP_MCP_SCRATCH_DIR` | where screenshots/recordings/pidfiles are written | `%TEMP%\desktop-mcp-scratch` |
|
|
96
|
+
| `DESKTOP_MCP_LIVE` | `1` to run real-hardware smoke tests (see Testing) | unset (skip) |
|
|
97
|
+
|
|
98
|
+
## Usage examples
|
|
99
|
+
|
|
100
|
+
```jsonc
|
|
101
|
+
// A tool call from the MCP host, illustrative -- not a shell command.
|
|
102
|
+
{"tool": "screenshot", "arguments": {"monitor": 0}}
|
|
103
|
+
// -> {"ok": true, "path": "C:\\Users\\...\\Temp\\desktop-mcp-scratch\\screenshot-....png", "w": 3840, "h": 1080, "monitor": 0}
|
|
104
|
+
|
|
105
|
+
{"tool": "record_start", "arguments": {"fps": 30, "max_duration_s": 30}}
|
|
106
|
+
// -> {"ok": true, "path": "...\\recording-....mp4", "pid": 12345, "fps": 30, "max_duration_s": 30}
|
|
107
|
+
{"tool": "record_stop", "arguments": {}}
|
|
108
|
+
// -> {"ok": true, "path": "...\\recording-....mp4", "bytes": 800560, "duration_s": 3.13}
|
|
109
|
+
|
|
110
|
+
// input group disabled (default):
|
|
111
|
+
{"tool": "mouse_click", "arguments": {"x": 500, "y": 500}}
|
|
112
|
+
// -> {"ok": false, "error": {"type": "policy_refusal", "group": "input", "required_env": "DESKTOP_MCP_ENABLE_INPUT", ...}}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Testing
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
# unit suite (mocked backends, no real screen/input/recording touched)
|
|
119
|
+
python -m pytest -q
|
|
120
|
+
|
|
121
|
+
# handshake check -- prints every registered tool name
|
|
122
|
+
python scripts/list_tools.py
|
|
123
|
+
|
|
124
|
+
# real-hardware smokes (real screenshot PNG, real ~3s screen recording;
|
|
125
|
+
# never input-injection -- see safety rails above)
|
|
126
|
+
DESKTOP_MCP_LIVE=1 python -m pytest -q -k live_screenshot
|
|
127
|
+
DESKTOP_MCP_LIVE=1 python -m pytest -q -k live_record
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Install
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
pip install -r requirements.txt # or: pip install .
|
|
134
|
+
# deps: fastmcp==3.4.2, mss==10.2.0, pyautogui==0.9.54, PyGetWindow==0.0.9, pywin32==312
|
|
135
|
+
# also requires ffmpeg + ffprobe on PATH for the record group
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Registered in `~/.claude.json` as `desktop-mcp` (stdio, system Python312,
|
|
139
|
+
`observe`+`window`+`record` groups enabled, `input` group absent from env).
|
|
140
|
+
|
|
141
|
+
<!-- MCP registry ownership marker -->
|
|
142
|
+
mcp-name: io.github.jaimenbell/desktop-mcp
|
|
File without changes
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""Config/safety layer for desktop-mcp.
|
|
2
|
+
|
|
3
|
+
Tool-group gating + rate limiting + structured refusal errors + DPI-awareness
|
|
4
|
+
bootstrap. This is the server's own defense-in-depth layer: even if a caller
|
|
5
|
+
gets past harness permission prompts, the server itself refuses input/window/
|
|
6
|
+
record actions unless explicitly enabled via env, and caps input-action
|
|
7
|
+
throughput.
|
|
8
|
+
|
|
9
|
+
Groups:
|
|
10
|
+
observe -- screenshot / window-read (always on, no gate)
|
|
11
|
+
window -- focus/move/resize/minimize/restore (env-gated)
|
|
12
|
+
input -- mouse/keyboard (env-gated, OFF by default)
|
|
13
|
+
record -- ffmpeg screen recording (env-gated)
|
|
14
|
+
|
|
15
|
+
Env vars:
|
|
16
|
+
DESKTOP_MCP_ENABLE_INPUT=1 -- enable the input group
|
|
17
|
+
DESKTOP_MCP_ENABLE_WINDOW=1 -- enable the window group
|
|
18
|
+
DESKTOP_MCP_ENABLE_RECORD=1 -- enable the record group
|
|
19
|
+
DESKTOP_MCP_RATE_LIMIT_PER_MIN=<int> -- input-action rate cap (default 60)
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import os
|
|
24
|
+
import sys
|
|
25
|
+
import time
|
|
26
|
+
from dataclasses import dataclass, field
|
|
27
|
+
from threading import Lock
|
|
28
|
+
|
|
29
|
+
DEFAULT_RATE_LIMIT_PER_MIN = 60
|
|
30
|
+
|
|
31
|
+
GROUP_OBSERVE = "observe"
|
|
32
|
+
GROUP_WINDOW = "window"
|
|
33
|
+
GROUP_INPUT = "input"
|
|
34
|
+
GROUP_RECORD = "record"
|
|
35
|
+
|
|
36
|
+
_ENV_GATES = {
|
|
37
|
+
GROUP_WINDOW: "DESKTOP_MCP_ENABLE_WINDOW",
|
|
38
|
+
GROUP_INPUT: "DESKTOP_MCP_ENABLE_INPUT",
|
|
39
|
+
GROUP_RECORD: "DESKTOP_MCP_ENABLE_RECORD",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _env_truthy(name: str) -> bool:
|
|
44
|
+
val = os.environ.get(name, "")
|
|
45
|
+
return val.strip().lower() in ("1", "true", "yes", "on")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def group_enabled(group: str) -> bool:
|
|
49
|
+
"""observe is always on; other groups require their env gate."""
|
|
50
|
+
if group == GROUP_OBSERVE:
|
|
51
|
+
return True
|
|
52
|
+
env_name = _ENV_GATES.get(group)
|
|
53
|
+
if env_name is None:
|
|
54
|
+
return False
|
|
55
|
+
return _env_truthy(env_name)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def policy_refusal(group: str, tool: str) -> dict:
|
|
59
|
+
"""Structured refusal payload for a disabled tool group."""
|
|
60
|
+
env_name = _ENV_GATES.get(group, f"DESKTOP_MCP_ENABLE_{group.upper()}")
|
|
61
|
+
return {
|
|
62
|
+
"ok": False,
|
|
63
|
+
"error": {
|
|
64
|
+
"type": "policy_refusal",
|
|
65
|
+
"message": (
|
|
66
|
+
f"Tool group '{group}' is disabled. Set {env_name}=1 in the "
|
|
67
|
+
f"server's environment to enable it."
|
|
68
|
+
),
|
|
69
|
+
"group": group,
|
|
70
|
+
"tool": tool,
|
|
71
|
+
"required_env": env_name,
|
|
72
|
+
},
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def rate_limited(group: str, tool: str, limit_per_min: int, retry_after_s: float) -> dict:
|
|
77
|
+
"""Structured rate-limit payload."""
|
|
78
|
+
return {
|
|
79
|
+
"ok": False,
|
|
80
|
+
"error": {
|
|
81
|
+
"type": "rate_limited",
|
|
82
|
+
"message": (
|
|
83
|
+
f"Rate limit exceeded for group '{group}': max {limit_per_min} "
|
|
84
|
+
f"actions/min. Retry after ~{retry_after_s:.1f}s."
|
|
85
|
+
),
|
|
86
|
+
"group": group,
|
|
87
|
+
"tool": tool,
|
|
88
|
+
"limit_per_min": limit_per_min,
|
|
89
|
+
"retry_after_s": round(retry_after_s, 2),
|
|
90
|
+
},
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass
|
|
95
|
+
class TokenBucket:
|
|
96
|
+
"""Simple token-bucket rate limiter, one bucket per gated group.
|
|
97
|
+
|
|
98
|
+
Capacity == limit_per_min, refills continuously at limit_per_min / 60
|
|
99
|
+
tokens/sec. Thread-safe (guards a single Lock) since MCP tool calls could
|
|
100
|
+
in principle be dispatched concurrently by the host.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
limit_per_min: int
|
|
104
|
+
_tokens: float = field(init=False)
|
|
105
|
+
_last_refill: float = field(init=False)
|
|
106
|
+
_lock: Lock = field(init=False, default_factory=Lock, repr=False)
|
|
107
|
+
|
|
108
|
+
def __post_init__(self) -> None:
|
|
109
|
+
self._tokens = float(self.limit_per_min)
|
|
110
|
+
self._last_refill = time.monotonic()
|
|
111
|
+
|
|
112
|
+
def _refill(self) -> None:
|
|
113
|
+
now = time.monotonic()
|
|
114
|
+
elapsed = now - self._last_refill
|
|
115
|
+
self._last_refill = now
|
|
116
|
+
rate_per_s = self.limit_per_min / 60.0
|
|
117
|
+
self._tokens = min(self.limit_per_min, self._tokens + elapsed * rate_per_s)
|
|
118
|
+
|
|
119
|
+
def try_acquire(self) -> tuple[bool, float]:
|
|
120
|
+
"""Attempt to consume one token. Returns (allowed, retry_after_s)."""
|
|
121
|
+
with self._lock:
|
|
122
|
+
self._refill()
|
|
123
|
+
if self._tokens >= 1.0:
|
|
124
|
+
self._tokens -= 1.0
|
|
125
|
+
return True, 0.0
|
|
126
|
+
rate_per_s = self.limit_per_min / 60.0
|
|
127
|
+
deficit = 1.0 - self._tokens
|
|
128
|
+
retry_after = deficit / rate_per_s if rate_per_s > 0 else float("inf")
|
|
129
|
+
return False, retry_after
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class RateLimiterRegistry:
|
|
133
|
+
"""Holds one TokenBucket per group, keyed by the current configured limit.
|
|
134
|
+
|
|
135
|
+
Recreates the bucket if the configured limit_per_min changes (e.g. tests
|
|
136
|
+
that monkeypatch the env between calls), so limit changes take effect
|
|
137
|
+
without needing a fresh process.
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
def __init__(self) -> None:
|
|
141
|
+
self._buckets: dict[str, TokenBucket] = {}
|
|
142
|
+
self._limits: dict[str, int] = {}
|
|
143
|
+
|
|
144
|
+
def acquire(self, group: str) -> tuple[bool, float, int]:
|
|
145
|
+
limit = get_rate_limit_per_min()
|
|
146
|
+
bucket = self._buckets.get(group)
|
|
147
|
+
if bucket is None or self._limits.get(group) != limit:
|
|
148
|
+
bucket = TokenBucket(limit_per_min=limit)
|
|
149
|
+
self._buckets[group] = bucket
|
|
150
|
+
self._limits[group] = limit
|
|
151
|
+
allowed, retry_after = bucket.try_acquire()
|
|
152
|
+
return allowed, retry_after, limit
|
|
153
|
+
|
|
154
|
+
def reset(self) -> None:
|
|
155
|
+
"""Test helper: clear all buckets so each test starts fresh."""
|
|
156
|
+
self._buckets.clear()
|
|
157
|
+
self._limits.clear()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def get_rate_limit_per_min() -> int:
|
|
161
|
+
raw = os.environ.get("DESKTOP_MCP_RATE_LIMIT_PER_MIN")
|
|
162
|
+
if not raw:
|
|
163
|
+
return DEFAULT_RATE_LIMIT_PER_MIN
|
|
164
|
+
try:
|
|
165
|
+
val = int(raw)
|
|
166
|
+
return val if val > 0 else DEFAULT_RATE_LIMIT_PER_MIN
|
|
167
|
+
except ValueError:
|
|
168
|
+
return DEFAULT_RATE_LIMIT_PER_MIN
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# Module-level singleton registry shared by all input-group tools within a
|
|
172
|
+
# process. Tests call `reset()` on this (or construct their own registry) to
|
|
173
|
+
# avoid cross-test bucket state leaking.
|
|
174
|
+
RATE_LIMITER = RateLimiterRegistry()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
_dpi_bootstrapped = False
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def ensure_dpi_awareness() -> None:
|
|
181
|
+
"""Set per-monitor-v2 DPI awareness so mss pixel coords line up with
|
|
182
|
+
pyautogui point coords on scaled displays. Idempotent; no-op on failure
|
|
183
|
+
(e.g. non-Windows, or already set by host process) and never raises --
|
|
184
|
+
this is a best-effort bootstrap, not a safety boundary.
|
|
185
|
+
"""
|
|
186
|
+
global _dpi_bootstrapped
|
|
187
|
+
if _dpi_bootstrapped:
|
|
188
|
+
return
|
|
189
|
+
_dpi_bootstrapped = True
|
|
190
|
+
if sys.platform != "win32":
|
|
191
|
+
return
|
|
192
|
+
try:
|
|
193
|
+
import ctypes
|
|
194
|
+
|
|
195
|
+
# DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2 = -4
|
|
196
|
+
try:
|
|
197
|
+
ctypes.windll.user32.SetProcessDpiAwarenessContext(-4)
|
|
198
|
+
return
|
|
199
|
+
except (AttributeError, OSError):
|
|
200
|
+
pass
|
|
201
|
+
try:
|
|
202
|
+
ctypes.windll.shcore.SetProcessDpiAwareness(2)
|
|
203
|
+
except (AttributeError, OSError):
|
|
204
|
+
pass
|
|
205
|
+
except Exception: # noqa: BLE001 - best-effort only
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def check_group(group: str, tool: str) -> dict | None:
|
|
210
|
+
"""Gate check for a tool call. Returns a structured refusal dict if the
|
|
211
|
+
group is disabled, else None (caller proceeds)."""
|
|
212
|
+
if not group_enabled(group):
|
|
213
|
+
return policy_refusal(group, tool)
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def check_rate_limit(group: str, tool: str) -> dict | None:
|
|
218
|
+
"""Rate-limit check for a gated action. Returns a structured rate_limited
|
|
219
|
+
dict if the caller is over the cap, else None (caller proceeds)."""
|
|
220
|
+
allowed, retry_after, limit = RATE_LIMITER.acquire(group)
|
|
221
|
+
if not allowed:
|
|
222
|
+
return rate_limited(group, tool, limit, retry_after)
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def gated(group: str, *, rate_limited_group: bool = False):
|
|
227
|
+
"""Decorator applied directly to group-module functions so the policy
|
|
228
|
+
gate (and, for input-group actions, the rate limiter) is enforced at the
|
|
229
|
+
source -- not just in the MCP tool wrapper -- and is unit-testable without
|
|
230
|
+
spinning up the fastmcp server.
|
|
231
|
+
|
|
232
|
+
Order: group gate first (cheaper, and a disabled group shouldn't consume
|
|
233
|
+
rate-limit budget), then rate limit if requested.
|
|
234
|
+
"""
|
|
235
|
+
|
|
236
|
+
def decorator(fn):
|
|
237
|
+
def wrapper(*args, **kwargs):
|
|
238
|
+
refusal = check_group(group, fn.__name__)
|
|
239
|
+
if refusal is not None:
|
|
240
|
+
return refusal
|
|
241
|
+
if rate_limited_group:
|
|
242
|
+
limited = check_rate_limit(group, fn.__name__)
|
|
243
|
+
if limited is not None:
|
|
244
|
+
return limited
|
|
245
|
+
return fn(*args, **kwargs)
|
|
246
|
+
|
|
247
|
+
wrapper.__name__ = fn.__name__
|
|
248
|
+
wrapper.__doc__ = fn.__doc__
|
|
249
|
+
wrapper.__wrapped__ = fn
|
|
250
|
+
return wrapper
|
|
251
|
+
|
|
252
|
+
return decorator
|
|
File without changes
|