desk-mcp 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
desk_mcp-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: desk-mcp
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Desktop automation MCP — screenshot (calibrated), mouse/keyboard via xdotool, window inspection
|
|
5
|
+
Requires-Dist: fastmcp>=2.0
|
|
6
|
+
Requires-Dist: pillow>=10.0
|
|
7
|
+
Requires-Dist: dbus-python>=1.3
|
|
8
|
+
Requires-Dist: typer>=0.12
|
|
9
|
+
Requires-Dist: rich>=13.0
|
|
10
|
+
Requires-Python: >=3.12
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "desk-mcp"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "Desktop automation MCP — screenshot (calibrated), mouse/keyboard via xdotool, window inspection"
|
|
5
|
+
requires-python = ">=3.12"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"fastmcp>=2.0",
|
|
8
|
+
"Pillow>=10.0",
|
|
9
|
+
"dbus-python>=1.3",
|
|
10
|
+
"typer>=0.12",
|
|
11
|
+
"rich>=13.0",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[project.scripts]
|
|
15
|
+
desk-mcp = "desk_mcp.cli:app"
|
|
16
|
+
|
|
17
|
+
[build-system]
|
|
18
|
+
requires = ["uv_build"]
|
|
19
|
+
build-backend = "uv_build"
|
|
20
|
+
|
|
21
|
+
[tool.uv.sources]
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""desk-mcp CLI — admin and diagnostic commands."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.table import Table
|
|
10
|
+
from rich import print as rprint
|
|
11
|
+
|
|
12
|
+
app = typer.Typer(name="desktop-mcp", help="desk-mcp — Desktop automation MCP")
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@app.command()
|
|
17
|
+
def serve():
|
|
18
|
+
"""Start the MCP server (stdio transport for Claude Code)."""
|
|
19
|
+
from desk_mcp.server import serve as _serve
|
|
20
|
+
_serve()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@app.command()
|
|
24
|
+
def status():
|
|
25
|
+
"""Show environment and tool availability."""
|
|
26
|
+
console.rule("[bold blue]desk-mcp status[/]")
|
|
27
|
+
|
|
28
|
+
table = Table(show_header=True, header_style="bold cyan")
|
|
29
|
+
table.add_column("Component", style="white")
|
|
30
|
+
table.add_column("Status")
|
|
31
|
+
table.add_column("Details")
|
|
32
|
+
|
|
33
|
+
# Session type
|
|
34
|
+
session = os.environ.get("XDG_SESSION_TYPE", "unknown")
|
|
35
|
+
display = os.environ.get("DISPLAY", "")
|
|
36
|
+
wayland = os.environ.get("WAYLAND_DISPLAY", "")
|
|
37
|
+
table.add_row(
|
|
38
|
+
"Session",
|
|
39
|
+
f"[green]{session}[/]",
|
|
40
|
+
f"DISPLAY={display} WAYLAND={wayland}"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# xdotool
|
|
44
|
+
xt = shutil.which("xdotool")
|
|
45
|
+
if xt:
|
|
46
|
+
r = subprocess.run(["xdotool", "getdisplaygeometry"],
|
|
47
|
+
capture_output=True, text=True)
|
|
48
|
+
res = r.stdout.strip() if r.returncode == 0 else "error"
|
|
49
|
+
table.add_row("xdotool", "[green]✓ installed[/]",
|
|
50
|
+
f"Resolution: {res}")
|
|
51
|
+
else:
|
|
52
|
+
table.add_row("xdotool", "[red]✗ missing[/]",
|
|
53
|
+
"sudo apt install xdotool")
|
|
54
|
+
|
|
55
|
+
# python3-dbus + python3-gi (XDG portal screenshot backend)
|
|
56
|
+
r = subprocess.run(
|
|
57
|
+
["/usr/bin/python3", "-c", "import dbus, dbus.mainloop.glib; from gi.repository import GLib; print('ok')"],
|
|
58
|
+
capture_output=True, text=True
|
|
59
|
+
)
|
|
60
|
+
portal_ok = r.returncode == 0
|
|
61
|
+
table.add_row(
|
|
62
|
+
"python3-gi / dbus",
|
|
63
|
+
"[green]✓ available[/]" if portal_ok else "[red]✗ missing[/]",
|
|
64
|
+
"XDG portal screenshot backend" if portal_ok else "sudo apt install python3-gi python3-dbus"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# ydotool (Wayland-native input)
|
|
68
|
+
yd = shutil.which("ydotool")
|
|
69
|
+
table.add_row(
|
|
70
|
+
"ydotool",
|
|
71
|
+
"[green]✓ installed[/]" if yd else "[dim]not installed[/]",
|
|
72
|
+
"Wayland-native input (optional)" if not yd else "Available"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# wmctrl
|
|
76
|
+
wm = shutil.which("wmctrl")
|
|
77
|
+
table.add_row(
|
|
78
|
+
"wmctrl",
|
|
79
|
+
"[green]✓ installed[/]" if wm else "[dim]not installed[/]",
|
|
80
|
+
"Window management (optional)"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Pillow
|
|
84
|
+
try:
|
|
85
|
+
import PIL
|
|
86
|
+
table.add_row("Pillow", f"[green]✓ {PIL.__version__}[/]",
|
|
87
|
+
"Image cropping for calibrated shots")
|
|
88
|
+
except ImportError:
|
|
89
|
+
table.add_row("Pillow", "[red]✗ missing[/]", "pip install Pillow")
|
|
90
|
+
|
|
91
|
+
console.print(table)
|
|
92
|
+
|
|
93
|
+
rprint("\n[bold]Screenshot backend:[/]")
|
|
94
|
+
rprint(" [green]XDG Desktop Portal[/] via /usr/bin/python3 + python3-gi + dbus-python")
|
|
95
|
+
rprint(" Requires: [cyan]sudo apt install python3-gi python3-dbus[/] (standard on Ubuntu)")
|
|
96
|
+
rprint("\n[bold]Input simulation:[/] xdotool (XWayland — works for most apps)")
|
|
97
|
+
rprint("[dim]For Wayland-native windows: coordinates work, window auto-detect may not.[/]")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@app.command()
|
|
101
|
+
def screenshot(
|
|
102
|
+
output: str = typer.Option("/tmp/k_desktop/test.png", help="Output path"),
|
|
103
|
+
window: str = typer.Option(None, help="Window name to capture"),
|
|
104
|
+
):
|
|
105
|
+
"""Test screenshot from the CLI."""
|
|
106
|
+
from desk_mcp.server import _take_screenshot, _get_window_geometry, _crop_image # noqa: F401
|
|
107
|
+
from pathlib import Path
|
|
108
|
+
|
|
109
|
+
dest = Path(output)
|
|
110
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
111
|
+
|
|
112
|
+
console.print(f"[cyan]Taking screenshot → {dest}[/]")
|
|
113
|
+
ok = _take_screenshot(dest)
|
|
114
|
+
if not ok:
|
|
115
|
+
rprint("[red]Screenshot failed. Install gnome-screenshot:[/] sudo apt install gnome-screenshot")
|
|
116
|
+
raise typer.Exit(1)
|
|
117
|
+
|
|
118
|
+
if window:
|
|
119
|
+
geom = _get_window_geometry(window)
|
|
120
|
+
if geom:
|
|
121
|
+
cropped = dest.with_stem(dest.stem + "_cropped")
|
|
122
|
+
_crop_image(dest, {**geom, "w": geom["w"], "h": geom["h"]}, cropped)
|
|
123
|
+
dest.unlink()
|
|
124
|
+
rprint(f"[green]✓ Window '{window}' captured → {cropped}[/]")
|
|
125
|
+
rprint(f" Geometry: x={geom['x']} y={geom['y']} {geom['w']}×{geom['h']}")
|
|
126
|
+
else:
|
|
127
|
+
rprint(f"[yellow]Window '{window}' not found via xdotool — full screenshot saved.[/]")
|
|
128
|
+
rprint(f"[green]✓ Full screenshot → {dest}[/]")
|
|
129
|
+
else:
|
|
130
|
+
rprint(f"[green]✓ Full screenshot → {dest}[/]")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
if __name__ == "__main__":
|
|
134
|
+
app()
|
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
"""
|
|
2
|
+
desk-mcp server — Desktop automation tools for Claude Code.
|
|
3
|
+
|
|
4
|
+
Tools:
|
|
5
|
+
screenshot — Take a calibrated screenshot (full screen, window, or region)
|
|
6
|
+
get_windows — List all visible windows with geometry
|
|
7
|
+
get_screen — Screen info (resolution, session type)
|
|
8
|
+
click — Left/right/middle click at coordinates
|
|
9
|
+
double_click — Double-click at coordinates
|
|
10
|
+
move_mouse — Move mouse without clicking
|
|
11
|
+
type_text — Type text at current focus
|
|
12
|
+
key — Press a key combo (e.g. "ctrl+c", "Return", "super")
|
|
13
|
+
scroll — Scroll at coordinates
|
|
14
|
+
|
|
15
|
+
Screenshot backend:
|
|
16
|
+
XDG Desktop Portal via system python3 + dbus-python + GLib event loop.
|
|
17
|
+
Works natively on GNOME Wayland — no dialog, no user interaction required.
|
|
18
|
+
Requires: /usr/bin/python3 with python3-gi and python3-dbus (standard on Ubuntu).
|
|
19
|
+
|
|
20
|
+
Input simulation: xdotool (XWayland — covers X11 and XWayland apps).
|
|
21
|
+
For pure Wayland-native apps (e.g. WaveTerm): mouse position is still correct,
|
|
22
|
+
keyboard works, but window auto-detection via get_windows() may not see them.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import os
|
|
26
|
+
import shutil
|
|
27
|
+
import subprocess
|
|
28
|
+
import time
|
|
29
|
+
from datetime import datetime
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Optional
|
|
32
|
+
|
|
33
|
+
from fastmcp import FastMCP
|
|
34
|
+
|
|
35
|
+
# ── Init ──────────────────────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
mcp = FastMCP(
|
|
38
|
+
name="desk-mcp",
|
|
39
|
+
instructions=(
|
|
40
|
+
"Desktop automation MCP. Use screenshot() to see the screen — "
|
|
41
|
+
"pass window_name to auto-crop to a specific window, or region dict "
|
|
42
|
+
"{x,y,w,h} for a precise area. Use get_windows() to discover window "
|
|
43
|
+
"coordinates. Use click/type_text/key for input simulation via xdotool."
|
|
44
|
+
),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
SHOT_DIR = Path(os.environ.get("K_DESKTOP_SHOT_DIR", "/tmp/k_desktop"))
|
|
48
|
+
SHOT_DIR.mkdir(parents=True, exist_ok=True)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
def _run(cmd: list[str], timeout: int = 10) -> subprocess.CompletedProcess:
|
|
54
|
+
return subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _ts() -> str:
|
|
58
|
+
return datetime.now().strftime("%H%M%S_%f")[:10]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _crop_image(src: Path, region: dict, dest: Path) -> Path:
|
|
62
|
+
"""Crop image to region {x, y, w, h} using Pillow."""
|
|
63
|
+
from PIL import Image as PILImage
|
|
64
|
+
with PILImage.open(src) as img:
|
|
65
|
+
box = (region["x"], region["y"],
|
|
66
|
+
region["x"] + region["w"], region["y"] + region["h"])
|
|
67
|
+
# Clamp to image bounds
|
|
68
|
+
box = (
|
|
69
|
+
max(0, box[0]), max(0, box[1]),
|
|
70
|
+
min(img.width, box[2]), min(img.height, box[3]),
|
|
71
|
+
)
|
|
72
|
+
if box[2] <= box[0] or box[3] <= box[1]:
|
|
73
|
+
return src # Region is outside image bounds — return uncropped
|
|
74
|
+
cropped = img.crop(box)
|
|
75
|
+
cropped.save(dest)
|
|
76
|
+
return dest
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _get_window_geometry(window_name: str) -> Optional[dict]:
|
|
80
|
+
"""Find a window by name and return its geometry via xdotool."""
|
|
81
|
+
r = _run(["xdotool", "search", "--name", window_name])
|
|
82
|
+
if r.returncode != 0 or not r.stdout.strip():
|
|
83
|
+
# Try class-based search
|
|
84
|
+
r = _run(["xdotool", "search", "--class", window_name])
|
|
85
|
+
if r.returncode != 0 or not r.stdout.strip():
|
|
86
|
+
return None
|
|
87
|
+
wid = r.stdout.strip().splitlines()[-1] # Take last (most recent) match
|
|
88
|
+
r2 = _run(["xdotool", "getwindowgeometry", "--shell", wid])
|
|
89
|
+
if r2.returncode != 0:
|
|
90
|
+
return None
|
|
91
|
+
vals = {}
|
|
92
|
+
for line in r2.stdout.splitlines():
|
|
93
|
+
if "=" in line:
|
|
94
|
+
k, v = line.split("=", 1)
|
|
95
|
+
vals[k.strip()] = int(v.strip())
|
|
96
|
+
if all(k in vals for k in ("X", "Y", "WIDTH", "HEIGHT")):
|
|
97
|
+
return {"x": vals["X"], "y": vals["Y"],
|
|
98
|
+
"w": vals["WIDTH"], "h": vals["HEIGHT"]}
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
_PORTAL_SCRIPT = """\
|
|
103
|
+
import sys, dbus, dbus.mainloop.glib
|
|
104
|
+
from gi.repository import GLib
|
|
105
|
+
|
|
106
|
+
dbus.mainloop.glib.DBusGMainLoop(set_as_default=True)
|
|
107
|
+
bus = dbus.SessionBus()
|
|
108
|
+
loop = GLib.MainLoop()
|
|
109
|
+
result = {}
|
|
110
|
+
|
|
111
|
+
def on_response(response, results, **kwargs):
|
|
112
|
+
if response == 0:
|
|
113
|
+
result['uri'] = str(results.get('uri', ''))
|
|
114
|
+
loop.quit()
|
|
115
|
+
|
|
116
|
+
def on_timeout():
|
|
117
|
+
loop.quit()
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
portal = bus.get_object('org.freedesktop.portal.Desktop', '/org/freedesktop/portal/desktop')
|
|
122
|
+
portal_iface = dbus.Interface(portal, 'org.freedesktop.portal.Screenshot')
|
|
123
|
+
options = dbus.Dictionary({'interactive': dbus.Boolean(False)}, signature='sv')
|
|
124
|
+
request_path = str(portal_iface.Screenshot('', options))
|
|
125
|
+
request_obj = bus.get_object('org.freedesktop.portal.Desktop', request_path)
|
|
126
|
+
request_iface = dbus.Interface(request_obj, 'org.freedesktop.portal.Request')
|
|
127
|
+
request_iface.connect_to_signal('Response', on_response)
|
|
128
|
+
GLib.timeout_add_seconds(15, on_timeout)
|
|
129
|
+
loop.run()
|
|
130
|
+
except Exception:
|
|
131
|
+
pass
|
|
132
|
+
|
|
133
|
+
print(result.get('uri', ''), end='')
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _take_screenshot(dest: Path) -> bool:
|
|
138
|
+
"""Take a screenshot via XDG Desktop Portal (system python3 + dbus + GLib)."""
|
|
139
|
+
r = subprocess.run(
|
|
140
|
+
["/usr/bin/python3", "-c", _PORTAL_SCRIPT],
|
|
141
|
+
capture_output=True, text=True, timeout=20,
|
|
142
|
+
)
|
|
143
|
+
uri = r.stdout.strip()
|
|
144
|
+
if uri.startswith("file://"):
|
|
145
|
+
src = Path(uri[len("file://"):])
|
|
146
|
+
if src.exists():
|
|
147
|
+
shutil.copy2(src, dest)
|
|
148
|
+
return True
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# ── Tools ─────────────────────────────────────────────────────────────────────
|
|
153
|
+
|
|
154
|
+
@mcp.tool()
|
|
155
|
+
def screenshot(
|
|
156
|
+
window_name: Optional[str] = None,
|
|
157
|
+
region: Optional[dict] = None,
|
|
158
|
+
) -> dict:
|
|
159
|
+
"""
|
|
160
|
+
Take a screenshot and return the image.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
window_name: Name (or partial name) of the window to capture.
|
|
164
|
+
Automatically finds the window and crops to its bounds.
|
|
165
|
+
Works for XWayland apps (Chromium, TickTick, Bitwarden, etc.).
|
|
166
|
+
For Wayland-native apps, use region instead.
|
|
167
|
+
region: Explicit crop region: {"x": int, "y": int, "w": int, "h": int}.
|
|
168
|
+
Takes priority over window_name if both provided.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Dict with "path" (absolute path to PNG file) and "geometry" info.
|
|
172
|
+
Use the Read tool on the returned path to view the image.
|
|
173
|
+
|
|
174
|
+
Notes:
|
|
175
|
+
Uses XDG Desktop Portal via /usr/bin/python3 + dbus-python + GLib.
|
|
176
|
+
Works natively on GNOME Wayland. No extra tools needed.
|
|
177
|
+
"""
|
|
178
|
+
ts = _ts()
|
|
179
|
+
full_path = SHOT_DIR / f"full_{ts}.png"
|
|
180
|
+
final_path = SHOT_DIR / f"shot_{ts}.png"
|
|
181
|
+
|
|
182
|
+
if not _take_screenshot(full_path):
|
|
183
|
+
raise RuntimeError(
|
|
184
|
+
"Screenshot failed. Install: sudo apt install gnome-screenshot\n"
|
|
185
|
+
"Or run: sudo apt install grim (for Wayland-native)"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Resolve crop region
|
|
189
|
+
crop = None
|
|
190
|
+
geom_used = None
|
|
191
|
+
if region:
|
|
192
|
+
crop = region
|
|
193
|
+
geom_used = region
|
|
194
|
+
elif window_name:
|
|
195
|
+
geom = _get_window_geometry(window_name)
|
|
196
|
+
if geom:
|
|
197
|
+
pad = 4
|
|
198
|
+
crop = {
|
|
199
|
+
"x": max(0, geom["x"] - pad),
|
|
200
|
+
"y": max(0, geom["y"] - pad),
|
|
201
|
+
"w": geom["w"] + pad * 2,
|
|
202
|
+
"h": geom["h"] + pad * 2,
|
|
203
|
+
}
|
|
204
|
+
geom_used = geom
|
|
205
|
+
# If window not found, return full screenshot (don't error)
|
|
206
|
+
|
|
207
|
+
if crop:
|
|
208
|
+
_crop_image(full_path, crop, final_path)
|
|
209
|
+
full_path.unlink(missing_ok=True)
|
|
210
|
+
else:
|
|
211
|
+
final_path = full_path
|
|
212
|
+
|
|
213
|
+
result = {
|
|
214
|
+
"path": str(final_path),
|
|
215
|
+
"note": "Use the Read tool on 'path' to view this image",
|
|
216
|
+
}
|
|
217
|
+
if geom_used:
|
|
218
|
+
result["geometry"] = geom_used
|
|
219
|
+
if window_name and not geom_used:
|
|
220
|
+
result["warning"] = f"Window '{window_name}' not found via xdotool — full screenshot returned"
|
|
221
|
+
return result
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@mcp.tool()
|
|
225
|
+
def get_windows() -> list[dict]:
|
|
226
|
+
"""
|
|
227
|
+
List all visible windows with their IDs, names, and screen geometry.
|
|
228
|
+
|
|
229
|
+
Note: Only shows XWayland-accessible windows. Pure Wayland-native apps
|
|
230
|
+
(e.g. WaveTerm running in native Wayland mode) may not appear here.
|
|
231
|
+
Use screenshot() with a known region for those.
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
List of dicts: {id, name, x, y, width, height}
|
|
235
|
+
"""
|
|
236
|
+
r = _run(["xdotool", "search", "--name", ""])
|
|
237
|
+
if r.returncode != 0:
|
|
238
|
+
return []
|
|
239
|
+
|
|
240
|
+
windows = []
|
|
241
|
+
for wid in r.stdout.strip().splitlines():
|
|
242
|
+
name_r = _run(["xdotool", "getwindowname", wid])
|
|
243
|
+
geom_r = _run(["xdotool", "getwindowgeometry", "--shell", wid])
|
|
244
|
+
if name_r.returncode != 0:
|
|
245
|
+
continue
|
|
246
|
+
name = name_r.stdout.strip()
|
|
247
|
+
if not name or name in ("", "mutter guard window"):
|
|
248
|
+
continue
|
|
249
|
+
vals = {}
|
|
250
|
+
for line in geom_r.stdout.splitlines():
|
|
251
|
+
if "=" in line:
|
|
252
|
+
k, v = line.split("=", 1)
|
|
253
|
+
vals[k.strip()] = v.strip()
|
|
254
|
+
windows.append({
|
|
255
|
+
"id": int(wid),
|
|
256
|
+
"name": name,
|
|
257
|
+
"x": int(vals.get("X", 0)),
|
|
258
|
+
"y": int(vals.get("Y", 0)),
|
|
259
|
+
"width": int(vals.get("WIDTH", 0)),
|
|
260
|
+
"height": int(vals.get("HEIGHT", 0)),
|
|
261
|
+
})
|
|
262
|
+
return windows
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
@mcp.tool()
|
|
266
|
+
def get_screen() -> dict:
|
|
267
|
+
"""
|
|
268
|
+
Return screen information: resolution, session type, display.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
{width, height, session_type, display, wayland_display}
|
|
272
|
+
"""
|
|
273
|
+
r = _run(["xdotool", "getdisplaygeometry"])
|
|
274
|
+
w, h = 0, 0
|
|
275
|
+
if r.returncode == 0:
|
|
276
|
+
parts = r.stdout.strip().split()
|
|
277
|
+
if len(parts) == 2:
|
|
278
|
+
w, h = int(parts[0]), int(parts[1])
|
|
279
|
+
return {
|
|
280
|
+
"width": w,
|
|
281
|
+
"height": h,
|
|
282
|
+
"session_type": os.environ.get("XDG_SESSION_TYPE", "unknown"),
|
|
283
|
+
"display": os.environ.get("DISPLAY", ""),
|
|
284
|
+
"wayland_display": os.environ.get("WAYLAND_DISPLAY", ""),
|
|
285
|
+
"gnome_screenshot_available": bool(shutil.which("gnome-screenshot")),
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
@mcp.tool()
|
|
290
|
+
def click(x: int, y: int, button: str = "left") -> str:
|
|
291
|
+
"""
|
|
292
|
+
Click at screen coordinates.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
x: X coordinate in pixels
|
|
296
|
+
y: Y coordinate in pixels
|
|
297
|
+
button: "left" (default), "right", or "middle"
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Confirmation string.
|
|
301
|
+
"""
|
|
302
|
+
btn_map = {"left": "1", "middle": "2", "right": "3"}
|
|
303
|
+
btn = btn_map.get(button.lower(), "1")
|
|
304
|
+
_run(["xdotool", "mousemove", "--sync", str(x), str(y)])
|
|
305
|
+
_run(["xdotool", "click", btn])
|
|
306
|
+
return f"Clicked {button} at ({x}, {y})"
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
@mcp.tool()
|
|
310
|
+
def double_click(x: int, y: int) -> str:
|
|
311
|
+
"""
|
|
312
|
+
Double-click at screen coordinates.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
x: X coordinate in pixels
|
|
316
|
+
y: Y coordinate in pixels
|
|
317
|
+
"""
|
|
318
|
+
_run(["xdotool", "mousemove", "--sync", str(x), str(y)])
|
|
319
|
+
_run(["xdotool", "click", "--repeat", "2", "--delay", "100", "1"])
|
|
320
|
+
return f"Double-clicked at ({x}, {y})"
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
@mcp.tool()
|
|
324
|
+
def right_click(x: int, y: int) -> str:
|
|
325
|
+
"""
|
|
326
|
+
Right-click at screen coordinates (opens context menu).
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
x: X coordinate in pixels
|
|
330
|
+
y: Y coordinate in pixels
|
|
331
|
+
"""
|
|
332
|
+
_run(["xdotool", "mousemove", "--sync", str(x), str(y)])
|
|
333
|
+
_run(["xdotool", "click", "3"])
|
|
334
|
+
return f"Right-clicked at ({x}, {y})"
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
@mcp.tool()
|
|
338
|
+
def move_mouse(x: int, y: int) -> str:
|
|
339
|
+
"""
|
|
340
|
+
Move mouse to coordinates without clicking.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
x: X coordinate in pixels
|
|
344
|
+
y: Y coordinate in pixels
|
|
345
|
+
"""
|
|
346
|
+
_run(["xdotool", "mousemove", str(x), str(y)])
|
|
347
|
+
return f"Mouse moved to ({x}, {y})"
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
@mcp.tool()
|
|
351
|
+
def type_text(text: str, delay_ms: int = 12) -> str:
|
|
352
|
+
"""
|
|
353
|
+
Type text at the current keyboard focus.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
text: Text to type
|
|
357
|
+
delay_ms: Delay between keystrokes in ms (default 12 — natural speed)
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
Confirmation string.
|
|
361
|
+
|
|
362
|
+
Note:
|
|
363
|
+
For special characters or passwords, prefer key() with individual keys.
|
|
364
|
+
For apps running natively on Wayland, focus the window first with a click().
|
|
365
|
+
"""
|
|
366
|
+
_run(["xdotool", "type", "--delay", str(delay_ms), "--", text])
|
|
367
|
+
preview = text[:40] + ("..." if len(text) > 40 else "")
|
|
368
|
+
return f"Typed: {repr(preview)}"
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
@mcp.tool()
|
|
372
|
+
def key(combo: str) -> str:
|
|
373
|
+
"""
|
|
374
|
+
Press a key or key combination.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
combo: Key combo string. Examples:
|
|
378
|
+
"Return" — Enter key
|
|
379
|
+
"ctrl+c" — Copy
|
|
380
|
+
"ctrl+v" — Paste
|
|
381
|
+
"ctrl+shift+t" — New tab (in many apps)
|
|
382
|
+
"super" — Super/Windows key
|
|
383
|
+
"alt+F4" — Close window
|
|
384
|
+
"ctrl+alt+t" — Open terminal (GNOME default)
|
|
385
|
+
"Escape" — Escape
|
|
386
|
+
"Tab" — Tab
|
|
387
|
+
"BackSpace" — Backspace
|
|
388
|
+
"ctrl+a" — Select all
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
Confirmation string.
|
|
392
|
+
"""
|
|
393
|
+
_run(["xdotool", "key", "--clearmodifiers", combo])
|
|
394
|
+
return f"Key pressed: {combo}"
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
@mcp.tool()
|
|
398
|
+
def scroll(x: int, y: int, direction: str = "down", clicks: int = 3) -> str:
|
|
399
|
+
"""
|
|
400
|
+
Scroll at screen coordinates.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
x: X coordinate
|
|
404
|
+
y: Y coordinate
|
|
405
|
+
direction: "up", "down", "left", or "right"
|
|
406
|
+
clicks: Number of scroll ticks (default 3)
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
Confirmation string.
|
|
410
|
+
"""
|
|
411
|
+
btn_map = {"up": "4", "down": "5", "left": "6", "right": "7"}
|
|
412
|
+
btn = btn_map.get(direction.lower(), "5")
|
|
413
|
+
_run(["xdotool", "mousemove", "--sync", str(x), str(y)])
|
|
414
|
+
for _ in range(clicks):
|
|
415
|
+
_run(["xdotool", "click", btn])
|
|
416
|
+
return f"Scrolled {direction} {clicks}x at ({x}, {y})"
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
# ── Entry point ───────────────────────────────────────────────────────────────
|
|
420
|
+
|
|
421
|
+
def serve():
|
|
422
|
+
mcp.run(transport="stdio")
|