@iaforged/context-code 2.3.1 → 2.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/context-bootstrap.js +7 -5
  2. package/dist/src/QueryEngine.js +1 -1
  3. package/dist/src/cli/handlers/auth.js +1 -1
  4. package/dist/src/cli/handlers/modelList.js +1 -1
  5. package/dist/src/cli/structuredIO.js +1 -1
  6. package/dist/src/commands/branch/index.js +1 -1
  7. package/dist/src/commands/login/login.js +1 -1
  8. package/dist/src/commands/profile/index.js +1 -1
  9. package/dist/src/commands/profile/profile.js +1 -1
  10. package/dist/src/commands/provider/index.js +1 -1
  11. package/dist/src/commands/provider/provider.js +1 -1
  12. package/dist/src/components/BaseTextInput.js +1 -1
  13. package/dist/src/components/ConsoleOAuthFlow.js +1 -1
  14. package/dist/src/components/LogoV2/AnimatedClawd.js +1 -1
  15. package/dist/src/components/LogoV2/Clawd.js +1 -1
  16. package/dist/src/components/LogoV2/LogoV2.js +1 -1
  17. package/dist/src/components/LogoV2/Opus1mMergeNotice.js +1 -1
  18. package/dist/src/components/LogoV2/WelcomeV2.js +1 -1
  19. package/dist/src/components/ModelPicker.js +1 -1
  20. package/dist/src/components/PromptInput/PromptInputFooterLeftSide.js +1 -1
  21. package/dist/src/components/SessionTokenFooter.js +1 -0
  22. package/dist/src/components/Spinner.js +1 -1
  23. package/dist/src/components/Stats.js +1 -1
  24. package/dist/src/components/TeleportProgress.js +1 -1
  25. package/dist/src/components/TextInput.js +1 -1
  26. package/dist/src/components/design-system/ThemeProvider.js +1 -1
  27. package/dist/src/components/permissions/AskUserQuestionPermissionRequest/AskUserQuestionPermissionRequest.js +1 -1
  28. package/dist/src/constants/oauth.js +1 -1
  29. package/dist/src/core/providers/providerCore.js +1 -1
  30. package/dist/src/hooks/useTypeahead.js +1 -1
  31. package/dist/src/main.js +1 -1
  32. package/dist/src/query/stopHooks.js +1 -1
  33. package/dist/src/screens/REPL.js +1 -1
  34. package/dist/src/services/PromptSuggestion/promptSuggestion.js +1 -1
  35. package/dist/src/services/analytics/config.js +1 -1
  36. package/dist/src/services/analytics/datadog.js +1 -1
  37. package/dist/src/services/api/openai.js +1 -1
  38. package/dist/src/services/mcp/config.js +1 -1
  39. package/dist/src/services/oauth/auth-code-listener.js +1 -1
  40. package/dist/src/services/oauth/client.js +1 -1
  41. package/dist/src/services/oauth/geminiCli.js +1 -1
  42. package/dist/src/services/tips/tipRegistry.js +1 -1
  43. package/dist/src/services/toolUseSummary/toolUseSummaryGenerator.js +1 -1
  44. package/dist/src/tools/BriefTool/UI.js +1 -1
  45. package/dist/src/utils/auth.js +1 -1
  46. package/dist/src/utils/claudeInChrome/setup.js +1 -1
  47. package/dist/src/utils/computerControlMcp/mcpServer.js +1 -1
  48. package/dist/src/utils/computerControlMcp/server/.gitattributes +18 -0
  49. package/dist/src/utils/computerControlMcp/server/Dockerfile +25 -0
  50. package/dist/src/utils/computerControlMcp/server/LICENSE +21 -0
  51. package/dist/src/utils/computerControlMcp/server/MANIFEST.in +10 -0
  52. package/dist/src/utils/computerControlMcp/server/README.md +193 -0
  53. package/dist/src/utils/computerControlMcp/server/demonstration.gif +0 -0
  54. package/dist/src/utils/computerControlMcp/server/icon.png +0 -0
  55. package/dist/src/utils/computerControlMcp/server/pyproject.toml +52 -0
  56. package/dist/src/utils/computerControlMcp/server/smithery.yaml +13 -0
  57. package/dist/src/utils/computerControlMcp/server/src/README.md +12 -0
  58. package/dist/src/utils/computerControlMcp/server/src/computer_control_mcp/FZYTK.TTF +0 -0
  59. package/dist/src/utils/computerControlMcp/server/src/computer_control_mcp/__init__.py +11 -0
  60. package/dist/src/utils/computerControlMcp/server/src/computer_control_mcp/__main__.py +21 -0
  61. package/dist/src/utils/computerControlMcp/server/src/computer_control_mcp/cli.py +128 -0
  62. package/dist/src/utils/computerControlMcp/server/src/computer_control_mcp/core.py +1008 -0
  63. package/dist/src/utils/computerControlMcp/server/src/computer_control_mcp/gui.py +126 -0
  64. package/dist/src/utils/computerControlMcp/server/src/computer_control_mcp/server.py +15 -0
  65. package/dist/src/utils/computerControlMcp/server/src/computer_control_mcp/test.py +346 -0
  66. package/dist/src/utils/computerControlMcp/server/src/computer_control_mcp/test_image.png +0 -0
  67. package/dist/src/utils/computerControlMcp/server/tests/README.md +22 -0
  68. package/dist/src/utils/computerControlMcp/server/tests/conftest.py +10 -0
  69. package/dist/src/utils/computerControlMcp/server/tests/rapidocr_test.py +21 -0
  70. package/dist/src/utils/computerControlMcp/server/tests/run_cli.py +9 -0
  71. package/dist/src/utils/computerControlMcp/server/tests/run_server.py +15 -0
  72. package/dist/src/utils/computerControlMcp/server/tests/setup.py +16 -0
  73. package/dist/src/utils/computerControlMcp/server/tests/test_computer_control.py +161 -0
  74. package/dist/src/utils/computerControlMcp/server/tests/test_screenshot.py +14 -0
  75. package/dist/src/utils/computerControlMcp/server/tests/test_wgc_env_var.py +42 -0
  76. package/dist/src/utils/computerControlMcp/server/tests/test_wgc_screenshot.py +67 -0
  77. package/dist/src/utils/computerControlMcp/server/uv.lock +4986 -0
  78. package/dist/src/utils/computerControlMcp/setup.js +1 -1
  79. package/dist/src/utils/envUtils.js +1 -1
  80. package/dist/src/utils/git.js +1 -1
  81. package/dist/src/utils/localInstaller.js +1 -1
  82. package/dist/src/utils/logoV2Utils.js +1 -1
  83. package/dist/src/utils/model/configs.js +1 -1
  84. package/dist/src/utils/model/model.js +1 -1
  85. package/dist/src/utils/model/modelAllowlist.js +1 -1
  86. package/dist/src/utils/model/modelOptions.js +1 -1
  87. package/dist/src/utils/model/providerBaseUrls.js +1 -1
  88. package/dist/src/utils/model/providerCatalog.js +1 -1
  89. package/dist/src/utils/model/providerModels.js +1 -1
  90. package/dist/src/utils/model/providerProfiles.js +1 -1
  91. package/dist/src/utils/model/providerProfilesDb.js +1 -1
  92. package/dist/src/utils/model/providers.js +1 -1
  93. package/dist/src/utils/model/validateModel.js +1 -1
  94. package/dist/src/utils/ripgrep.js +1 -1
  95. package/dist/src/utils/sembleMcp/setup.js +1 -1
  96. package/dist/src/utils/theme.js +1 -1
  97. package/dist/src/utils/themes/bootstrap.js +1 -1
  98. package/dist/src/utils/themes/opencodeMapper.js +1 -1
  99. package/dist/webapp/chunk-VAB2VXFI.js +1 -1
  100. package/dist/webapp/main-MTQLKGXD.js +1 -1
  101. package/dist/webapp/ngsw.json +1 -1
  102. package/dist/webapp/polyfills-7R4CRVNH.js +1 -1
  103. package/package.json +1 -1
@@ -0,0 +1,1008 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Computer Control MCP - Core Implementation
4
+ A compact ModelContextProtocol server that provides computer control capabilities
5
+ using PyAutoGUI for mouse/keyboard control.
6
+ """
7
+
8
+ import json
9
+ import shutil
10
+ import sys
11
+ import os
12
+ from typing import Dict, Any, List, Optional, Tuple
13
+ from io import BytesIO
14
+ import re
15
+ import asyncio
16
+ import uuid
17
+ import datetime
18
+ from pathlib import Path
19
+ import tempfile
20
+ from typing import Union
21
+ import threading
22
+
23
+ # --- Auto-install dependencies if needed ---
24
+ import pyautogui
25
+ from mcp.server.fastmcp import FastMCP, Image
26
+ import mss
27
+ from PIL import Image as PILImage
28
+
29
+ try:
30
+ import pywinctl as gw
31
+ except (NotImplementedError, ImportError):
32
+ import pygetwindow as gw
33
+ from fuzzywuzzy import fuzz, process
34
+
35
+ import cv2
36
+ from rapidocr import RapidOCR
37
+
38
+ from pydantic import BaseModel
39
+
40
+ BaseModel.model_config = {"arbitrary_types_allowed": True}
41
+
42
+ engine = RapidOCR()
43
+
44
+
45
+ DEBUG = True # Set to False in production
46
+ RELOAD_ENABLED = True # Set to False to disable auto-reload
47
+
48
+ # Create FastMCP server instance at module level
49
+ mcp = FastMCP("ComputerControlMCP")
50
+
51
+
52
+ # Try to import Windows Graphics Capture API
53
+ try:
54
+ from windows_capture import WindowsCapture, Frame, InternalCaptureControl
55
+ WGC_AVAILABLE = True
56
+ except ImportError:
57
+ WGC_AVAILABLE = False
58
+
59
+
60
+ # Determine mode automatically
61
+ IS_DEVELOPMENT = os.getenv("ENV") == "development"
62
+
63
+
64
+ def log(message: str) -> None:
65
+ """Log to stderr in dev, to stdout or file in production.
66
+
67
+ Handles Unicode encoding errors gracefully to prevent crashes
68
+ when printing special characters on Windows terminals.
69
+ """
70
+ try:
71
+ if IS_DEVELOPMENT:
72
+ # In dev, write to stderr
73
+ print(f"[DEV] {message}", file=sys.stderr)
74
+ else:
75
+ # In production, write to stdout or a file
76
+ print(f"[PROD] {message}", file=sys.stdout)
77
+ # or append to a file: open("app.log", "a").write(message+"\n")
78
+ except UnicodeEncodeError:
79
+ # Handle encoding errors by escaping or replacing problematic characters
80
+ safe_message = message.encode('utf-8', errors='replace').decode('utf-8')
81
+ if IS_DEVELOPMENT:
82
+ print(f"[DEV] {safe_message}", file=sys.stderr)
83
+ else:
84
+ print(f"[PROD] {safe_message}", file=sys.stdout)
85
+ except Exception:
86
+ # Fallback for any other printing errors
87
+ try:
88
+ safe_message = repr(message) # Use repr to escape special characters
89
+ if IS_DEVELOPMENT:
90
+ print(f"[DEV] {safe_message}", file=sys.stderr)
91
+ else:
92
+ print(f"[PROD] {safe_message}", file=sys.stdout)
93
+ except Exception:
94
+ # Last resort - if even repr fails, don't crash
95
+ pass
96
+
97
+
98
+ def get_downloads_dir() -> Path:
99
+ """Get the directory for saving screenshots.
100
+
101
+ Checks for COMPUTER_CONTROL_MCP_SCREENSHOT_DIR environment variable first,
102
+ then falls back to the OS downloads directory.
103
+ """
104
+ # Check for custom directory from environment variable
105
+ custom_dir = os.getenv("COMPUTER_CONTROL_MCP_SCREENSHOT_DIR")
106
+ if custom_dir:
107
+ custom_path = Path(custom_dir)
108
+ if custom_path.exists() and custom_path.is_dir():
109
+ return custom_path
110
+ else:
111
+ log(f"Warning: COMPUTER_CONTROL_MCP_SCREENSHOT_DIR path '{custom_dir}' does not exist or is not a directory. Falling back to default.")
112
+
113
+ # Default: OS downloads directory
114
+ if os.name == "nt": # Windows
115
+ import winreg
116
+
117
+ sub_key = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
118
+ downloads_guid = "{374DE290-123F-4565-9164-39C4925E467B}"
119
+ with winreg.OpenKey(winreg.HKEY_CURRENT_USER, sub_key) as key:
120
+ downloads_dir = winreg.QueryValueEx(key, downloads_guid)[0]
121
+ return Path(downloads_dir)
122
+ else: # macOS, Linux, etc.
123
+ return Path.home() / "Downloads"
124
+
125
+
126
+ def _should_use_wgc_by_default(window_title: str) -> bool:
127
+ """Check if WGC should be used for a window based on environment variable patterns.
128
+
129
+ Checks the COMPUTER_CONTROL_MCP_WGC_PATTERNS environment variable, which should
130
+ contain comma-separated patterns. If any pattern matches the window title,
131
+ WGC will be used by default.
132
+
133
+ Args:
134
+ window_title: Title of the window to check
135
+
136
+ Returns:
137
+ True if WGC should be used by default for this window, False otherwise
138
+ """
139
+ # Get patterns from environment variable
140
+ patterns_str = os.getenv("COMPUTER_CONTROL_MCP_WGC_PATTERNS")
141
+ if not patterns_str:
142
+ return False
143
+
144
+ # Split patterns by comma and trim whitespace
145
+ patterns = [pattern.strip().lower() for pattern in patterns_str.split(",") if pattern.strip()]
146
+
147
+ # Convert window title to lowercase for case-insensitive matching
148
+ title_lower = window_title.lower()
149
+
150
+ # Check if any pattern matches
151
+ for pattern in patterns:
152
+ if pattern in title_lower:
153
+ log(f"Window '{window_title}' matches WGC pattern: {pattern}")
154
+ return True
155
+
156
+ return False
157
+
158
+
159
+ def _mss_screenshot(region=None):
160
+ """Take a screenshot using mss and return PIL Image.
161
+
162
+ Args:
163
+ region: Optional tuple (left, top, width, height) for region capture
164
+
165
+ Returns:
166
+ PIL Image object
167
+ """
168
+ with mss.mss() as sct:
169
+ if region is None:
170
+ # Full screen screenshot
171
+ monitor = sct.monitors[0] # All monitors combined
172
+ else:
173
+ # Region screenshot
174
+ left, top, width, height = region
175
+ monitor = {
176
+ "left": left,
177
+ "top": top,
178
+ "width": width,
179
+ "height": height,
180
+ }
181
+
182
+ screenshot = sct.grab(monitor)
183
+ # Convert to PIL Image
184
+ return PILImage.frombytes(
185
+ "RGB", screenshot.size, screenshot.bgra, "raw", "BGRX"
186
+ )
187
+
188
+
189
+ def _wgc_screenshot(window_title: str) -> Optional[Tuple[bytes, int, int]]:
190
+ """Capture a window using Windows Graphics Capture API.
191
+
192
+ Args:
193
+ window_title: Title of the window to capture
194
+
195
+ Returns:
196
+ Tuple of (image_bytes, width, height) or None if failed
197
+ """
198
+ if not WGC_AVAILABLE:
199
+ log("Windows Graphics Capture API not available")
200
+ return None
201
+
202
+ captured_frame = {"data": None, "width": 0, "height": 0, "error": None}
203
+ capture_event = threading.Event()
204
+
205
+ try:
206
+ capture = WindowsCapture(
207
+ cursor_capture=False,
208
+ draw_border=False,
209
+ monitor_index=None,
210
+ window_name=window_title,
211
+ )
212
+
213
+ @capture.event
214
+ def on_frame_arrived(frame: Frame, capture_control: InternalCaptureControl):
215
+ try:
216
+ # Save frame to temp file, then read it back
217
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
218
+ tmp_path = tmp.name
219
+
220
+ frame.save_as_image(tmp_path)
221
+
222
+ with open(tmp_path, "rb") as f:
223
+ captured_frame["data"] = f.read()
224
+
225
+ # Get dimensions from the saved image
226
+ with PILImage.open(tmp_path) as img:
227
+ captured_frame["width"] = img.width
228
+ captured_frame["height"] = img.height
229
+
230
+ os.unlink(tmp_path)
231
+ except Exception as e:
232
+ captured_frame["error"] = str(e)
233
+ finally:
234
+ capture_control.stop()
235
+ capture_event.set()
236
+
237
+ @capture.event
238
+ def on_closed():
239
+ capture_event.set()
240
+
241
+ # Start capture in a thread
242
+ def run_capture():
243
+ try:
244
+ capture.start()
245
+ except Exception as e:
246
+ captured_frame["error"] = str(e)
247
+ capture_event.set()
248
+
249
+ thread = threading.Thread(target=run_capture, daemon=True)
250
+ thread.start()
251
+
252
+ # Wait for frame (with timeout)
253
+ if not capture_event.wait(timeout=5.0):
254
+ captured_frame["error"] = "Capture timed out"
255
+
256
+ if captured_frame["error"]:
257
+ log(f"WGC capture error: {captured_frame['error']}")
258
+ return None
259
+
260
+ if captured_frame["data"] is None:
261
+ log("No frame captured with WGC")
262
+ return None
263
+
264
+ return captured_frame["data"], captured_frame["width"], captured_frame["height"]
265
+
266
+ except Exception as e:
267
+ log(f"WGC capture failed: {e}")
268
+ return None
269
+
270
+
271
+ def save_image_to_downloads(
272
+ image, prefix: str = "screenshot", directory: Path = None
273
+ ) -> Tuple[str, bytes]:
274
+ """Save an image to the downloads directory and return its absolute path.
275
+
276
+ Args:
277
+ image: Either a PIL Image object or MCP Image object
278
+ prefix: Prefix for the filename (default: 'screenshot')
279
+ directory: Optional directory to save the image to
280
+
281
+ Returns:
282
+ Tuple of (absolute_path, image_data_bytes)
283
+ """
284
+ # Create a unique filename with timestamp
285
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
286
+ unique_id = str(uuid.uuid4())[:8]
287
+ filename = f"{prefix}_{timestamp}_{unique_id}.png"
288
+
289
+ # Get downloads directory
290
+ downloads_dir = directory or get_downloads_dir()
291
+ filepath = downloads_dir / filename
292
+
293
+ # Handle different image types
294
+ if hasattr(image, "save"): # PIL Image
295
+ image.save(filepath)
296
+ # Also get the bytes for returning
297
+ img_byte_arr = BytesIO()
298
+ image.save(img_byte_arr, format="PNG")
299
+ img_bytes = img_byte_arr.getvalue()
300
+ elif hasattr(image, "data"): # MCP Image
301
+ img_bytes = image.data
302
+ with open(filepath, "wb") as f:
303
+ f.write(img_bytes)
304
+ else:
305
+ raise TypeError("Unsupported image type")
306
+
307
+ log(f"Saved image to {filepath}")
308
+ return str(filepath.absolute()), img_bytes
309
+
310
+
311
+ def _find_matching_window(
312
+ windows: any,
313
+ title_pattern: str = None,
314
+ use_regex: bool = False,
315
+ threshold: int = 10,
316
+ ) -> Optional[Dict[str, Any]]:
317
+ """Helper function to find a matching window based on title pattern.
318
+
319
+ Args:
320
+ windows: List of window dictionaries
321
+ title_pattern: Pattern to match window title
322
+ use_regex: If True, treat the pattern as a regex, otherwise use fuzzy matching
323
+ threshold: Minimum score (0-100) required for a fuzzy match
324
+
325
+ Returns:
326
+ The best matching window or None if no match found
327
+ """
328
+ if not title_pattern:
329
+ log("No title pattern provided, returning None")
330
+ return None
331
+
332
+ # For regex matching
333
+ if use_regex:
334
+ for window in windows:
335
+ if re.search(title_pattern, window["title"], re.IGNORECASE):
336
+ log(f"Regex match found: {window['title']}")
337
+ return window
338
+ return None
339
+
340
+ # For fuzzy matching using fuzzywuzzy
341
+ # Extract all window titles
342
+ window_titles = [window["title"] for window in windows]
343
+
344
+ # Use process.extractOne to find the best match
345
+ best_match_title, score = process.extractOne(
346
+ title_pattern, window_titles, scorer=fuzz.partial_ratio
347
+ )
348
+ log(f"Best fuzzy match: '{best_match_title}' with score {score}")
349
+
350
+ # Only return if the score is above the threshold
351
+ if score >= threshold:
352
+ # Find the window with the matching title
353
+ for window in windows:
354
+ if window["title"] == best_match_title:
355
+ return window
356
+
357
+ return None
358
+
359
+
360
+ # --- MCP Function Handlers ---
361
+
362
+
363
+ @mcp.tool()
364
+ def click_screen(x: int, y: int) -> str:
365
+ """Click at the specified screen coordinates."""
366
+ try:
367
+ pyautogui.click(x=x, y=y)
368
+ return f"Successfully clicked at coordinates ({x}, {y})"
369
+ except Exception as e:
370
+ return f"Error clicking at coordinates ({x}, {y}): {str(e)}"
371
+
372
+
373
+ @mcp.tool()
374
+ def get_screen_size() -> Dict[str, Any]:
375
+ """Get the current screen resolution."""
376
+ try:
377
+ width, height = pyautogui.size()
378
+ return {
379
+ "width": width,
380
+ "height": height,
381
+ "message": f"Screen size: {width}x{height}",
382
+ }
383
+ except Exception as e:
384
+ return {"error": str(e), "message": f"Error getting screen size: {str(e)}"}
385
+
386
+
387
+ @mcp.tool()
388
+ def type_text(text: str) -> str:
389
+ """Type the specified text at the current cursor position."""
390
+ try:
391
+ pyautogui.typewrite(text)
392
+ return f"Successfully typed text: {text}"
393
+ except Exception as e:
394
+ return f"Error typing text: {str(e)}"
395
+
396
+
397
+ @mcp.tool()
398
+ def take_screenshot(
399
+ title_pattern: str = None,
400
+ use_regex: bool = False,
401
+ threshold: int = 10,
402
+ scale_percent_for_ocr: int = None,
403
+ save_to_downloads: bool = False,
404
+ use_wgc: bool = False,
405
+ ) -> Image:
406
+ """
407
+ Get screenshot Image as MCP Image object. If no title pattern is provided, get screenshot of entire screen and all text on the screen.
408
+
409
+ Args:
410
+ title_pattern: Pattern to match window title, if None, take screenshot of entire screen
411
+ use_regex: If True, treat the pattern as a regex, otherwise best match with fuzzy matching
412
+ threshold: Minimum score (0-100) required for a fuzzy match
413
+ scale_percent_for_ocr: Percentage to scale the image down before processing, you wont need this most of the time unless your pc is extremely old or slow
414
+ save_to_downloads: If True, save the screenshot to the downloads directory and return the absolute path
415
+ use_wgc: If True, use Windows Graphics Capture API for window capture (recommended for GPU-accelerated windows)
416
+
417
+ Returns:
418
+ Returns a single screenshot as MCP Image object. "content type image not supported" means preview isnt supported but Image object is there and returned successfully.
419
+ """
420
+ try:
421
+ all_windows = gw.getAllWindows()
422
+
423
+ # Convert to list of dictionaries for _find_matching_window
424
+ windows = []
425
+ for window in all_windows:
426
+ if window.title: # Only include windows with titles
427
+ windows.append(
428
+ {
429
+ "title": window.title,
430
+ "window_obj": window, # Store the actual window object
431
+ }
432
+ )
433
+
434
+ log(f"Found {len(windows)} windows")
435
+ window = _find_matching_window(windows, title_pattern, use_regex, threshold)
436
+ window = window["window_obj"] if window else None
437
+
438
+ import ctypes
439
+ import time
440
+
441
+ def force_activate(window):
442
+ """Force a window to the foreground on Windows."""
443
+ try:
444
+ hwnd = window._hWnd # pywinctl window handle
445
+
446
+ # Restore if minimized
447
+ if window.isMinimized:
448
+ window.restore()
449
+ time.sleep(0.1)
450
+
451
+ # Bring to top and set foreground
452
+ ctypes.windll.user32.SetForegroundWindow(hwnd)
453
+ ctypes.windll.user32.BringWindowToTop(hwnd)
454
+ window.activate() # fallback
455
+ time.sleep(0.3) # wait for OS to update
456
+
457
+ except Exception as e:
458
+ print(f"Warning: Could not force window: {e}", file=sys.stderr)
459
+
460
+ # Take the screenshot
461
+ if not window:
462
+ log("No matching window found, taking screenshot of entire screen")
463
+ screenshot = _mss_screenshot()
464
+ else:
465
+ try:
466
+ # Re-fetch window handle to ensure it's valid
467
+ window = gw.getWindowsWithTitle(window.title)[0]
468
+ current_active_window = gw.getActiveWindow()
469
+ log(f"Taking screenshot of window: {window.title}")
470
+
471
+ # Determine if we should use WGC:
472
+ # 1. If explicitly requested via use_wgc parameter
473
+ # 2. If the window matches patterns defined in environment variable
474
+ should_use_wgc = use_wgc or _should_use_wgc_by_default(window.title)
475
+
476
+ # Try WGC capture first if requested or if it's likely a GPU-accelerated window
477
+ if should_use_wgc and WGC_AVAILABLE:
478
+ log("Attempting WGC capture")
479
+ wgc_result = _wgc_screenshot(window.title)
480
+ if wgc_result:
481
+ image_bytes, width, height = wgc_result
482
+ screenshot = PILImage.open(BytesIO(image_bytes))
483
+ log(f"WGC capture successful: {width}x{height}")
484
+ else:
485
+ log("WGC capture failed, falling back to MSS")
486
+ # Fall back to MSS if WGC fails
487
+ if sys.platform == "win32":
488
+ force_activate(window)
489
+ else:
490
+ window.activate()
491
+ pyautogui.sleep(0.5) # Give Windows time to focus
492
+
493
+ screen_width, screen_height = pyautogui.size()
494
+
495
+ screenshot = _mss_screenshot(
496
+ region=(
497
+ max(window.left, 0),
498
+ max(window.top, 0),
499
+ min(window.width, screen_width),
500
+ min(window.height, screen_height),
501
+ )
502
+ )
503
+ else:
504
+ if sys.platform == "win32":
505
+ force_activate(window)
506
+ else:
507
+ window.activate()
508
+ pyautogui.sleep(0.5) # Give Windows time to focus
509
+
510
+ screen_width, screen_height = pyautogui.size()
511
+
512
+ screenshot = _mss_screenshot(
513
+ region=(
514
+ max(window.left, 0),
515
+ max(window.top, 0),
516
+ min(window.width, screen_width),
517
+ min(window.height, screen_height),
518
+ )
519
+ )
520
+
521
+ # Restore previously active window
522
+ if current_active_window and current_active_window != window:
523
+ try:
524
+ if sys.platform == "win32":
525
+ force_activate(current_active_window)
526
+ else:
527
+ current_active_window.activate()
528
+ pyautogui.sleep(0.2)
529
+ except Exception as e:
530
+ log(f"Error restoring previous window: {str(e)}")
531
+ except Exception as e:
532
+ log(f"Error taking screenshot of window: {str(e)}")
533
+ screenshot = _mss_screenshot() # fallback to full screen
534
+
535
+ # Create temp directory
536
+ temp_dir = Path(tempfile.mkdtemp())
537
+
538
+ # Save screenshot and get filepath
539
+ filepath, _ = save_image_to_downloads(
540
+ screenshot, prefix="screenshot", directory=temp_dir
541
+ )
542
+
543
+ # Create Image object from filepath
544
+ image = Image(filepath)
545
+
546
+ if save_to_downloads:
547
+ log("Copying screenshot from temp to downloads")
548
+ shutil.copy(filepath, get_downloads_dir())
549
+
550
+ return image # MCP Image object
551
+
552
+ except Exception as e:
553
+ log(f"Error in screenshot or getting UI elements: {str(e)}")
554
+ import traceback
555
+
556
+ stack_trace = traceback.format_exc()
557
+ log(f"Stack trace:\n{stack_trace}")
558
+ return f"Error in screenshot or getting UI elements: {str(e)}\nStack trace:\n{stack_trace}"
559
+
560
+
561
+ def is_low_spec_pc() -> bool:
562
+ try:
563
+ import psutil
564
+
565
+ cpu_low = psutil.cpu_count(logical=False) < 4
566
+ ram_low = psutil.virtual_memory().total < 8 * 1024**3
567
+ return cpu_low or ram_low
568
+ except Exception:
569
+ # Fallback if psutil not available or info unavailable
570
+ return False
571
+
572
+
573
+ def _safe_format_ocr_results(results: List[Tuple]) -> str:
574
+ """Safely format OCR results for logging, handling Unicode characters.
575
+
576
+ Args:
577
+ results: List of OCR results tuples ([boxes], text, confidence)
578
+
579
+ Returns:
580
+ Safely formatted string representation of the results
581
+ """
582
+ try:
583
+ # Try normal formatting first
584
+ return str(results)
585
+ except UnicodeEncodeError:
586
+ # If that fails, create a safe representation
587
+ safe_items = []
588
+ for item in results:
589
+ # Handle each component of the tuple
590
+ boxes, text, confidence = item
591
+ # Ensure text is safe for printing
592
+ try:
593
+ safe_text = str(text)
594
+ safe_text.encode('utf-8').decode(sys.stdout.encoding or 'utf-8')
595
+ except (UnicodeEncodeError, UnicodeDecodeError):
596
+ # Replace problematic characters
597
+ safe_text = text.encode('utf-8', errors='replace').decode('utf-8')
598
+
599
+ safe_items.append((boxes, safe_text, confidence))
600
+
601
+ return str(safe_items)
602
+ except Exception:
603
+ # Ultimate fallback
604
+ return f"<OCR results with {len(results)} items>"
605
+
606
+
607
+ @mcp.tool()
608
+ def take_screenshot_with_ocr(
609
+ title_pattern: str = None,
610
+ use_regex: bool = False,
611
+ threshold: int = 10,
612
+ scale_percent_for_ocr: int = None,
613
+ save_to_downloads: bool = False,
614
+ ) -> str:
615
+ """
616
+ Get OCR text from screenshot with absolute coordinates as JSON string of List[Tuple[List[List[int]], str, float]] (returned after adding the window offset from true (0, 0) of screen to the OCR coordinates, so clicking is on-point. Recommended to click in the middle of OCR Box) and using confidence from window with the specified title pattern. If no title pattern is provided, get screenshot of entire screen and all text on the screen. Know that OCR takes around 20 seconds on an mid-spec pc at 1080p resolution.
617
+
618
+ Args:
619
+ title_pattern: Pattern to match window title, if None, take screenshot of entire screen
620
+ use_regex: If True, treat the pattern as a regex, otherwise best match with fuzzy matching
621
+ threshold: Minimum score (0-100) required for a fuzzy match
622
+ scale_percent_for_ocr: Percentage to scale the image down before processing, you wont need this most of the time unless your pc is extremely old or slow
623
+ save_to_downloads: If True, save the screenshot to the downloads directory and return the absolute path
624
+
625
+ Returns:
626
+ Returns a list of UI elements as List[Tuple[List[List[int]], str, float]] where each tuple is [[4 corners of box], text, confidence], "content type image not supported" means preview isnt supported but Image object is there.
627
+ """
628
+ try:
629
+ all_windows = gw.getAllWindows()
630
+
631
+ # Convert to list of dictionaries for _find_matching_window
632
+ windows = []
633
+ for window in all_windows:
634
+ if window.title: # Only include windows with titles
635
+ windows.append(
636
+ {
637
+ "title": window.title,
638
+ "window_obj": window, # Store the actual window object
639
+ }
640
+ )
641
+
642
+ log(f"Found {len(windows)} windows")
643
+ window = _find_matching_window(windows, title_pattern, use_regex, threshold)
644
+ window = window["window_obj"] if window else None
645
+
646
+ # Store the currently active window
647
+
648
+ # Take the screenshot
649
+ if not window:
650
+ log("No matching window found, taking screenshot of entire screen")
651
+ screenshot = _mss_screenshot()
652
+ else:
653
+ current_active_window = gw.getActiveWindow()
654
+ log(f"Taking screenshot of window: {window.title}")
655
+ # Activate the window and wait for it to be fully in focus
656
+ try:
657
+ window.activate()
658
+ pyautogui.sleep(0.5) # Wait for 0.5 seconds to ensure window is active
659
+ screenshot = _mss_screenshot(
660
+ region=(window.left, window.top, window.width, window.height)
661
+ )
662
+ # Restore the previously active window
663
+ if current_active_window:
664
+ try:
665
+ current_active_window.activate()
666
+ pyautogui.sleep(
667
+ 0.2
668
+ ) # Wait a bit to ensure previous window is restored
669
+ except Exception as e:
670
+ log(f"Error restoring previous window: {str(e)}")
671
+ except Exception as e:
672
+ log(f"Error taking screenshot of window: {str(e)}")
673
+ return f"Error taking screenshot of window: {str(e)}"
674
+
675
+ # Create temp directory
676
+ temp_dir = Path(tempfile.mkdtemp())
677
+
678
+ # Save screenshot and get filepath
679
+ filepath, _ = save_image_to_downloads(
680
+ screenshot, prefix="screenshot", directory=temp_dir
681
+ )
682
+
683
+ # Create Image object from filepath
684
+ image = Image(filepath)
685
+
686
+ # Copy from temp to downloads
687
+ if save_to_downloads:
688
+ log("Copying screenshot from temp to downloads")
689
+ shutil.copy(filepath, get_downloads_dir())
690
+
691
+ image_path = image.path
692
+ img = cv2.imread(image_path)
693
+
694
+ if img is None:
695
+ log(f"Error: Failed to read image from {image_path}")
696
+ return f"Error: Failed to read image from {image_path}"
697
+
698
+ if scale_percent_for_ocr is None:
699
+ # Calculate percent to scale height to 360 pixels
700
+ scale_percent_for_ocr = 100 # 360 / img.shape[0] * 100
701
+
702
+ # Validate scale_percent_for_ocr
703
+ if scale_percent_for_ocr <= 0:
704
+ log(f"Error: scale_percent_for_ocr must be greater than 0, got {scale_percent_for_ocr}")
705
+ return f"Error: scale_percent_for_ocr must be greater than 0, got {scale_percent_for_ocr}"
706
+
707
+ # Lower down resolution before processing
708
+ width = int(img.shape[1] * scale_percent_for_ocr / 100)
709
+ height = int(img.shape[0] * scale_percent_for_ocr / 100)
710
+
711
+ # Ensure dimensions are at least 1 pixel
712
+ width = max(1, width)
713
+ height = max(1, height)
714
+
715
+ dim = (width, height)
716
+ log(f"Resizing image from {img.shape[1]}x{img.shape[0]} to {width}x{height} (scale: {scale_percent_for_ocr}%)")
717
+ resized_img = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
718
+ # save resized image to pwd
719
+ # cv2.imwrite("resized_img.png", resized_img)
720
+
721
+ output = engine(resized_img)
722
+ boxes = output.boxes
723
+ txts = output.txts
724
+ scores = output.scores
725
+ zipped_results = list(zip(boxes, txts, scores))
726
+ zipped_results = [
727
+ (
728
+ box.tolist(),
729
+ text,
730
+ float(score),
731
+ ) # convert np.array -> list, ensure score is float
732
+ for box, text, score in zipped_results
733
+ ]
734
+ log(f"Found {len(zipped_results)} text items in OCR result.")
735
+ # Use safe formatting for OCR results to prevent Unicode encoding errors
736
+ log(f"First 5 items: {_safe_format_ocr_results(zipped_results[:5])}")
737
+ return (
738
+ ",\n".join([str(item) for item in zipped_results])
739
+ if zipped_results
740
+ else "No text found"
741
+ )
742
+
743
+ except Exception as e:
744
+ log(f"Error in screenshot or getting UI elements: {str(e)}")
745
+ import traceback
746
+
747
+ stack_trace = traceback.format_exc()
748
+ log(f"Stack trace:\n{stack_trace}")
749
+ return f"Error in screenshot or getting UI elements: {str(e)}\nStack trace:\n{stack_trace}"
750
+
751
+
752
+ @mcp.tool()
753
+ def move_mouse(x: int, y: int) -> str:
754
+ """Move the mouse to the specified screen coordinates."""
755
+ try:
756
+ pyautogui.moveTo(x=x, y=y)
757
+ return f"Successfully moved mouse to coordinates ({x}, {y})"
758
+ except Exception as e:
759
+ return f"Error moving mouse to coordinates ({x}, {y}): {str(e)}"
760
+
761
+
762
+ @mcp.tool()
763
+ def mouse_down(button: str = "left") -> str:
764
+ """Hold down a mouse button ('left', 'right', 'middle')."""
765
+ try:
766
+ pyautogui.mouseDown(button=button)
767
+ return f"Held down {button} mouse button"
768
+ except Exception as e:
769
+ return f"Error holding {button} mouse button: {str(e)}"
770
+
771
+
772
+ @mcp.tool()
773
+ def mouse_up(button: str = "left") -> str:
774
+ """Release a mouse button ('left', 'right', 'middle')."""
775
+ try:
776
+ pyautogui.mouseUp(button=button)
777
+ return f"Released {button} mouse button"
778
+ except Exception as e:
779
+ return f"Error releasing {button} mouse button: {str(e)}"
780
+
781
+
782
+ @mcp.tool()
783
+ async def drag_mouse(
784
+ from_x: int, from_y: int, to_x: int, to_y: int, duration: float = 0.5
785
+ ) -> str:
786
+ """
787
+ Drag the mouse from one position to another.
788
+
789
+ Args:
790
+ from_x: Starting X coordinate
791
+ from_y: Starting Y coordinate
792
+ to_x: Ending X coordinate
793
+ to_y: Ending Y coordinate
794
+ duration: Duration of the drag in seconds (default: 0.5)
795
+
796
+ Returns:
797
+ Success or error message
798
+ """
799
+ try:
800
+ # First move to the starting position
801
+ pyautogui.moveTo(x=from_x, y=from_y)
802
+ # Then drag to the destination
803
+ log("starting drag")
804
+ await asyncio.to_thread(pyautogui.dragTo, x=to_x, y=to_y, duration=duration)
805
+ log("done drag")
806
+ return f"Successfully dragged from ({from_x}, {from_y}) to ({to_x}, {to_y})"
807
+ except Exception as e:
808
+ return f"Error dragging from ({from_x}, {from_y}) to ({to_x}, {to_y}): {str(e)}"
809
+
810
+
811
+ import pyautogui
812
+ from typing import Union, List
813
+
814
+
815
+ @mcp.tool()
816
+ def key_down(key: str) -> str:
817
+ """Hold down a specific keyboard key until released."""
818
+ try:
819
+ pyautogui.keyDown(key)
820
+ return f"Held down key: {key}"
821
+ except Exception as e:
822
+ return f"Error holding key {key}: {str(e)}"
823
+
824
+
825
+ @mcp.tool()
826
+ def key_up(key: str) -> str:
827
+ """Release a specific keyboard key."""
828
+ try:
829
+ pyautogui.keyUp(key)
830
+ return f"Released key: {key}"
831
+ except Exception as e:
832
+ return f"Error releasing key {key}: {str(e)}"
833
+
834
+
835
+ @mcp.tool()
836
+ def press_keys(keys: Union[str, List[Union[str, List[str]]]]) -> str:
837
+ """
838
+ Press keyboard keys.
839
+
840
+ Args:
841
+ keys:
842
+ - Single key as string (e.g., "enter")
843
+ - Sequence of keys as list (e.g., ["a", "b", "c"])
844
+ - Key combinations as nested list (e.g., [["ctrl", "c"], ["alt", "tab"]])
845
+
846
+ Examples:
847
+ press_keys("enter")
848
+ press_keys(["a", "b", "c"])
849
+ press_keys([["ctrl", "c"], ["alt", "tab"]])
850
+ """
851
+ try:
852
+ if isinstance(keys, str):
853
+ # Single key
854
+ pyautogui.press(keys)
855
+ return f"Pressed single key: {keys}"
856
+
857
+ elif isinstance(keys, list):
858
+ for item in keys:
859
+ if isinstance(item, str):
860
+ # Sequential key press
861
+ pyautogui.press(item)
862
+ elif isinstance(item, list):
863
+ # Key combination (e.g., ctrl+c)
864
+ pyautogui.hotkey(*item)
865
+ else:
866
+ return f"Invalid key format: {item}"
867
+ return f"Successfully pressed keys sequence: {keys}"
868
+
869
+ else:
870
+ return "Invalid input: must be str or list"
871
+
872
+ except Exception as e:
873
+ return f"Error pressing keys {keys}: {str(e)}"
874
+
875
+
876
+ @mcp.tool()
877
+ def list_windows() -> List[Dict[str, Any]]:
878
+ """List all open windows on the system."""
879
+ try:
880
+ windows = gw.getAllWindows()
881
+ result = []
882
+ for window in windows:
883
+ if window.title: # Only include windows with titles
884
+ result.append(
885
+ {
886
+ "title": window.title,
887
+ "left": window.left,
888
+ "top": window.top,
889
+ "width": window.width,
890
+ "height": window.height,
891
+ "is_active": window.isActive,
892
+ "is_visible": window.visible,
893
+ "is_minimized": window.isMinimized,
894
+ "is_maximized": window.isMaximized,
895
+ # "screenshot": pyautogui.screenshot(
896
+ # region=(
897
+ # window.left,
898
+ # window.top,
899
+ # window.width,
900
+ # window.height,
901
+ # )
902
+ # ),
903
+ }
904
+ )
905
+ return result
906
+ except Exception as e:
907
+ log(f"Error listing windows: {str(e)}")
908
+ return [{"error": str(e)}]
909
+
910
+
911
+ @mcp.tool()
912
+ def wait_milliseconds(milliseconds: int) -> str:
913
+ """
914
+ Wait for a specified number of milliseconds.
915
+
916
+ Args:
917
+ milliseconds: Number of milliseconds to wait
918
+
919
+ Returns:
920
+ Success message after waiting
921
+ """
922
+ try:
923
+ import time
924
+ seconds = milliseconds / 1000.0
925
+ time.sleep(seconds)
926
+ return f"Successfully waited for {milliseconds} milliseconds"
927
+ except Exception as e:
928
+ return f"Error waiting for {milliseconds} milliseconds: {str(e)}"
929
+
930
+
931
+ @mcp.tool()
932
+ def activate_window(
933
+ title_pattern: str, use_regex: bool = False, threshold: int = 60
934
+ ) -> str:
935
+ """
936
+ Activate a window (bring it to the foreground) by matching its title.
937
+
938
+ Args:
939
+ title_pattern: Pattern to match window title
940
+ use_regex: If True, treat the pattern as a regex, otherwise use fuzzy matching
941
+ threshold: Minimum score (0-100) required for a fuzzy match
942
+
943
+ Returns:
944
+ Success or error message
945
+ """
946
+ try:
947
+ # Get all windows
948
+ all_windows = gw.getAllWindows()
949
+
950
+ # Convert to list of dictionaries for _find_matching_window
951
+ windows = []
952
+ for window in all_windows:
953
+ if window.title: # Only include windows with titles
954
+ windows.append(
955
+ {
956
+ "title": window.title,
957
+ "window_obj": window, # Store the actual window object
958
+ }
959
+ )
960
+
961
+ # Find matching window using our improved function
962
+ matched_window_dict = _find_matching_window(
963
+ windows, title_pattern, use_regex, threshold
964
+ )
965
+
966
+ if not matched_window_dict:
967
+ log(f"No window found matching pattern: {title_pattern}")
968
+ return f"Error: No window found matching pattern: {title_pattern}"
969
+
970
+ # Get the actual window object
971
+ matched_window = matched_window_dict["window_obj"]
972
+
973
+ # Activate the window
974
+ matched_window.activate()
975
+
976
+ return f"Successfully activated window: '{matched_window.title}'"
977
+ except Exception as e:
978
+ log(f"Error activating window: {str(e)}")
979
+ return f"Error activating window: {str(e)}"
980
+
981
+
982
+ def main():
983
+ """Main entry point for the MCP server."""
984
+ pyautogui.FAILSAFE = True
985
+
986
+ if WGC_AVAILABLE:
987
+ log("Windows Graphics Capture API is available for enhanced window capture")
988
+ # Check if any WGC patterns are configured
989
+ wgc_patterns = os.getenv("COMPUTER_CONTROL_MCP_WGC_PATTERNS")
990
+ if wgc_patterns:
991
+ patterns = [p.strip() for p in wgc_patterns.split(",") if p.strip()]
992
+ log(f"WGC patterns configured: {patterns}")
993
+ else:
994
+ log("Windows Graphics Capture API not available. Using standard capture methods.")
995
+
996
+ try:
997
+ # Run the server
998
+ log("Computer Control MCP Server Started...")
999
+ mcp.run()
1000
+
1001
+ except KeyboardInterrupt:
1002
+ log("Server shutting down...")
1003
+ except Exception as e:
1004
+ log(f"Error: {str(e)}")
1005
+
1006
+
1007
+ if __name__ == "__main__":
1008
+ main()