skydeckai-code 0.1.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1029 @@
1
+ import json
2
+ import os
3
+ import platform
4
+ import subprocess
5
+ import tempfile
6
+ from datetime import datetime
7
+ from typing import Any, Dict, List, Optional, Tuple
8
+
9
+ from mcp import types
10
+
11
+ from .state import state
12
+
13
+ # Import the required libraries for improved screenshot functionality
14
+ try:
15
+ import mss
16
+ import mss.tools
17
+ MSS_AVAILABLE = True
18
+ except ImportError:
19
+ MSS_AVAILABLE = False
20
+
21
+ try:
22
+ import pygetwindow as gw
23
+ PYGETWINDOW_AVAILABLE = True
24
+ except ImportError:
25
+ PYGETWINDOW_AVAILABLE = False
26
+
27
+ # Import macOS-specific libraries if available
28
+ try:
29
+ import Quartz
30
+ from Quartz import (
31
+ CGWindowListCopyWindowInfo,
32
+ kCGNullWindowID,
33
+ kCGWindowListOptionOnScreenOnly,
34
+ )
35
+ QUARTZ_AVAILABLE = True
36
+ except ImportError:
37
+ QUARTZ_AVAILABLE = False
38
+
39
+ # Define platform-specific permission error messages
40
+ PERMISSION_ERROR_MESSAGES = {
41
+ "darwin": "Permission denied to capture the screen. Please grant screen recording permission in System Settings > Privacy & Security > Screen Recording."
42
+ }
43
+
44
+
45
+ def _check_macos_screen_recording_permission() -> Dict[str, Any]:
46
+ """
47
+ Check if the application has screen recording permission on macOS.
48
+
49
+ For macOS 11+, this function uses the official Apple API:
50
+ - CGPreflightScreenCaptureAccess() to check if permission is already granted
51
+ - CGRequestScreenCaptureAccess() to request permission if needed
52
+
53
+ Requesting access will present the system prompt and automatically add your app
54
+ in the list so the user just needs to enable access. The system prompt will only
55
+ appear once per app session.
56
+
57
+ Returns:
58
+ Dict with keys:
59
+ - has_permission (bool): Whether permission is granted
60
+ - error (str or None): Error message if permission is denied
61
+ - details (dict): Additional context about the permission check
62
+ """
63
+ result = {"has_permission": False, "error": None, "details": {}}
64
+
65
+ # Check if Quartz is available
66
+ if not QUARTZ_AVAILABLE:
67
+ result["error"] = "Quartz framework not available. Cannot check screen recording permission."
68
+ result["details"] = {"error": "Quartz not available"}
69
+ return result
70
+
71
+ # Check if the API is available (macOS 11+)
72
+ if not hasattr(Quartz, 'CGPreflightScreenCaptureAccess'):
73
+ result["error"] = "CGPreflightScreenCaptureAccess not available. Your macOS version may be too old (requires macOS 11+)."
74
+ result["details"] = {"error": "API not available"}
75
+ return result
76
+
77
+ try:
78
+ # Check if we already have permission
79
+ has_permission = Quartz.CGPreflightScreenCaptureAccess()
80
+ result["details"]["preflight_result"] = has_permission
81
+
82
+ if has_permission:
83
+ # We already have permission
84
+ result["has_permission"] = True
85
+ return result
86
+ else:
87
+ # We don't have permission, request it
88
+ # This will show the system prompt to the user
89
+ permission_granted = Quartz.CGRequestScreenCaptureAccess()
90
+ result["details"]["request_result"] = permission_granted
91
+
92
+ if permission_granted:
93
+ result["has_permission"] = True
94
+ return result
95
+ else:
96
+ # User denied permission
97
+ result["error"] = PERMISSION_ERROR_MESSAGES["darwin"]
98
+ return result
99
+ except Exception as e:
100
+ result["details"]["exception"] = str(e)
101
+ result["error"] = f"Error checking screen recording permission: {str(e)}"
102
+
103
+ return result
104
+
105
+
106
+ def capture_screenshot_tool():
107
+ """Define the capture_screenshot tool."""
108
+ return {
109
+ "name": "capture_screenshot",
110
+ "description": "Capture a screenshot of the current screen and save it to a file. "
111
+ "This tool allows capturing the entire screen, the active window, or a specific named window. "
112
+ "The screenshot will be saved to the specified output path or to a default location if not provided. "
113
+ "WHEN TO USE: When you need to visually document what's on screen, capture a specific application "
114
+ "window, create visual references for troubleshooting, or gather visual information about the user's "
115
+ "environment. Useful for documenting issues, creating tutorials, or assisting with visual tasks. "
116
+ "WHEN NOT TO USE: When you need information about windows without capturing them (use get_available_windows "
117
+ "instead). "
118
+ "RETURNS: A JSON object containing success status, file path where the screenshot was saved, and a "
119
+ "message. On failure, includes a detailed error message. If debug mode is enabled, also includes debug "
120
+ "information about the attempted capture. Windows can be captured in the background without bringing "
121
+ "them to the front. Works on macOS, Windows, and Linux with platform-specific implementations.",
122
+ "inputSchema": {
123
+ "type": "object",
124
+ "properties": {
125
+ "output_path": {
126
+ "type": "string",
127
+ "description": "Optional path where the screenshot should be saved. If not provided, a default path will be used."
128
+ "Examples: 'screenshots/main_window.png', 'docs/current_state.png'. Both absolute "
129
+ "and relative paths are supported, but must be within the allowed workspace."
130
+ },
131
+ "capture_mode": {
132
+ "type": "object",
133
+ "description": "Specifies what to capture in the screenshot.",
134
+ "properties": {
135
+ "type": {
136
+ "type": "string",
137
+ "description": "The type of screenshot to capture. Use 'full' for the entire screen, 'active_window' "
138
+ "for the currently active window (foreground window), or 'named_window' for a specific "
139
+ "window by name or application name.",
140
+ "enum": ["full", "active_window", "named_window"]
141
+ },
142
+ "window_name": {
143
+ "type": "string",
144
+ "description": "Name of the specific application or window to capture. Required when type is 'named_window'. "
145
+ "This can be a partial window title or application name, and the search is case-insensitive. "
146
+ "Examples: 'Chrome', 'Visual Studio Code', 'Terminal'. Windows can be captured in the "
147
+ "background without bringing them to the front."
148
+ }
149
+ },
150
+ "required": ["type"]
151
+ },
152
+ "debug": {
153
+ "type": "boolean",
154
+ "description": "Whether to include detailed debug information in the response when the operation fails. When "
155
+ "set to true, the response will include additional information about available windows, match "
156
+ "attempts, and system-specific details that can help diagnose capture issues. Default is False.",
157
+ }
158
+ },
159
+ "required": ["capture_mode"]
160
+ },
161
+ }
162
+
163
+
164
+ def _get_default_screenshot_path() -> str:
165
+ """Generate a default path for saving screenshots."""
166
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
167
+ filename = f"screenshot_{timestamp}.png"
168
+
169
+ # Use the allowed directory from state if available, otherwise use temp directory
170
+ if hasattr(state, 'allowed_directory') and state.allowed_directory:
171
+ base_dir = os.path.join(state.allowed_directory, "screenshots")
172
+ # Create screenshots directory if it doesn't exist
173
+ os.makedirs(base_dir, exist_ok=True)
174
+ else:
175
+ base_dir = tempfile.gettempdir()
176
+
177
+ return os.path.join(base_dir, filename)
178
+
179
+
180
+ def _capture_with_mss(output_path: str, region: Optional[Dict[str, int]] = None) -> bool:
181
+ """
182
+ Capture screenshot using MSS library.
183
+
184
+ Args:
185
+ output_path: Path where to save the screenshot
186
+ region: Optional dictionary with top, left, width, height for specific region
187
+
188
+ Returns:
189
+ bool: True if successful, False otherwise
190
+ """
191
+ try:
192
+ with mss.mss() as sct:
193
+ if region:
194
+ # Capture specific region
195
+ monitor = region
196
+ else:
197
+ # Capture entire primary monitor
198
+ monitor = sct.monitors[1] # monitors[0] is all monitors combined, monitors[1] is the primary
199
+
200
+ # Grab the picture
201
+ sct_img = sct.grab(monitor)
202
+
203
+ # Save it to the output path
204
+ mss.tools.to_png(sct_img.rgb, sct_img.size, output=output_path)
205
+
206
+ return os.path.exists(output_path) and os.path.getsize(output_path) > 0
207
+ except Exception as e:
208
+ print(f"MSS screenshot error: {str(e)}")
209
+ return False
210
+
211
+
212
+ def _find_window_by_name(window_name: str) -> Tuple[Optional[Dict[str, int]], Dict[str, Any]]:
213
+ """
214
+ Find a window by name and return its position and size along with debug info.
215
+
216
+ Args:
217
+ window_name: Name of the window to find
218
+
219
+ Returns:
220
+ Tuple containing:
221
+ - Window region dict with top, left, width, height (or None if not found)
222
+ - Debug info dictionary with search results and details
223
+ """
224
+ # Check if we're on macOS
225
+ if platform.system().lower() in ["darwin", "macos"]:
226
+ # Use the macOS-specific function
227
+ window_region, detailed_debug_info = find_macos_window_by_name(window_name)
228
+ if window_region:
229
+ return window_region, {
230
+ "search_term": window_name,
231
+ "found_window": True,
232
+ "match_type": "quartz_window_search",
233
+ "detailed_info": detailed_debug_info
234
+ }
235
+ else:
236
+ # Get active apps for better error message
237
+ active_apps = _get_active_apps_macos()
238
+ return None, {
239
+ "search_term": window_name,
240
+ "reason": "No matching window title",
241
+ "active_apps": active_apps,
242
+ "quartz_available": QUARTZ_AVAILABLE,
243
+ "detailed_info": detailed_debug_info
244
+ }
245
+
246
+ # For non-macOS platforms, use PyGetWindow
247
+ if not PYGETWINDOW_AVAILABLE:
248
+ print("PyGetWindow is not available")
249
+ return None, {"error": "PyGetWindow is not available"}
250
+
251
+ try:
252
+ # Get all available windows
253
+ all_windows = gw.getAllWindows()
254
+
255
+ # Collect window titles for debugging
256
+ window_titles = []
257
+ for w in all_windows:
258
+ if w.title:
259
+ window_titles.append(f"'{w.title}' ({w.width}x{w.height})")
260
+ print(f" - '{w.title}' ({w.width}x{w.height})")
261
+
262
+ # Standard window matching (case-insensitive)
263
+ matching_windows = []
264
+ for window in all_windows:
265
+ if window.title and window_name.lower() in window.title.lower():
266
+ matching_windows.append(window)
267
+
268
+ if not matching_windows:
269
+ print(f"No window found with title containing '{window_name}'")
270
+ return None, {
271
+ "search_term": window_name,
272
+ "reason": "No matching window title",
273
+ "matching_method": "case_insensitive_substring",
274
+ "all_windows": window_titles
275
+ }
276
+
277
+ # Get the first matching window
278
+ window = matching_windows[0]
279
+ print(f"Found matching window: '{window.title}'")
280
+
281
+ # Check if window dimensions are valid
282
+ if window.width <= 0 or window.height <= 0:
283
+ print(f"Window has invalid dimensions: {window.width}x{window.height}")
284
+ return None, {
285
+ "search_term": window_name,
286
+ "found_window": window.title,
287
+ "reason": f"Invalid dimensions: {window.width}x{window.height}",
288
+ "all_windows": window_titles
289
+ }
290
+
291
+ # Return the window position and size
292
+ return {
293
+ "top": window.top,
294
+ "left": window.left,
295
+ "width": window.width,
296
+ "height": window.height
297
+ }, {
298
+ "search_term": window_name,
299
+ "found_window": window.title,
300
+ "match_type": "standard_case_insensitive"
301
+ }
302
+ except Exception as e:
303
+ print(f"Error finding window: {str(e)}")
304
+ return None, {
305
+ "search_term": window_name,
306
+ "error": str(e)
307
+ }
308
+
309
+
310
+ def _get_active_apps_macos() -> List[str]:
311
+ """Get a list of currently active applications on macOS."""
312
+ try:
313
+ script = '''
314
+ tell application "System Events"
315
+ set appList to {}
316
+ set allProcesses to application processes
317
+
318
+ repeat with proc in allProcesses
319
+ if windows of proc is not {} then
320
+ set end of appList to name of proc
321
+ end if
322
+ end repeat
323
+
324
+ return appList
325
+ end tell
326
+ '''
327
+
328
+ result = subprocess.run(["osascript", "-e", script], capture_output=True, text=True)
329
+ if result.returncode == 0:
330
+ # Parse the comma-separated list from AppleScript
331
+ apps = result.stdout.strip()
332
+ if apps:
333
+ return [app.strip() for app in apps.split(",")]
334
+ return []
335
+ except Exception as e:
336
+ print(f"Error getting active apps: {str(e)}")
337
+ return []
338
+
339
+ def _format_error_with_available_windows(window_name: str, debug_info: Dict[str, Any], result: Dict[str, Any]) -> None:
340
+ """Format error message with available windows list and store debug info for later use."""
341
+ # Check for debug_info from macOS specific format
342
+ if debug_info and "available_windows" in debug_info:
343
+ available_windows = []
344
+ for window in debug_info["available_windows"]:
345
+ window_desc = f"'{window['owner']}'"
346
+ if window['name']:
347
+ window_desc += f" - '{window['name']}'"
348
+ available_windows.append(window_desc)
349
+
350
+ # Create a formatted list of available windows for the error message
351
+ windows_list = ", ".join(available_windows) if available_windows else "No windows found"
352
+ result["error"] = f"Window '{window_name}' not found. Available windows: {windows_list}"
353
+ result["_debug_info"] = debug_info # Store with underscore prefix for later use
354
+ # Check for debug_info from PyGetWindow format
355
+ elif debug_info and "all_windows" in debug_info:
356
+ window_titles = debug_info["all_windows"]
357
+ windows_list = ", ".join(window_titles) if window_titles else "No windows found"
358
+ result["error"] = f"Window '{window_name}' not found. Available windows: {windows_list}"
359
+ result["_debug_info"] = debug_info # Store with underscore prefix for later use
360
+ else:
361
+ result["error"] = f"Window '{window_name}' not found"
362
+ if debug_info:
363
+ result["_debug_info"] = debug_info # Store with underscore prefix for later use
364
+
365
+
366
+ def _verify_screenshot_success(output_path: str) -> bool:
367
+ """Verify if a screenshot was successfully saved to the output path."""
368
+ return os.path.exists(output_path) and os.path.getsize(output_path) > 0
369
+
370
+
371
+ def _try_mss_capture(output_path: str, window_region: Optional[Dict[str, int]], result: Dict[str, Any],
372
+ window_name: Optional[str] = None, debug_info: Optional[Dict[str, Any]] = None) -> bool:
373
+ """
374
+ Try to capture a screenshot using MSS library.
375
+
376
+ Args:
377
+ output_path: Path where the screenshot should be saved
378
+ window_region: Region to capture (with top, left, width, height keys) or None for full screen
379
+ result: Dictionary to store error information if capture fails
380
+ window_name: Optional name of the window being captured, for error messages
381
+ debug_info: Optional debug information to include in result on failure
382
+
383
+ Returns:
384
+ bool: True if capture was successful, False otherwise
385
+
386
+ Note:
387
+ - When window_region is None, captures the full primary screen.
388
+ - Updates the result dictionary with success=True on success.
389
+ - On failure, updates result with error message and debug_info if provided.
390
+ """
391
+ if MSS_AVAILABLE:
392
+ try:
393
+ if _capture_with_mss(output_path, window_region):
394
+ # Simply check if the file exists and has non-zero size
395
+ if _verify_screenshot_success(output_path):
396
+ result["success"] = True
397
+ # Debug info will be added by the caller if debug mode is enabled
398
+ return True
399
+ else:
400
+ result["error"] = "Failed to save screenshot (file is empty or not created)"
401
+ else:
402
+ if window_name:
403
+ result["error"] = f"Failed to capture window '{window_name}' using MSS"
404
+ else:
405
+ result["error"] = "MSS failed to capture full screen"
406
+ except Exception as e:
407
+ result["error"] = f"MSS error: {str(e)}"
408
+ return False
409
+
410
+
411
+ def _capture_screenshot_macos(output_path: str, capture_area: str = "full", window_name: Optional[str] = None) -> Dict[str, Any]:
412
+ """
413
+ Capture screenshot on macOS.
414
+
415
+ Returns:
416
+ Dict with success status and error message if failed
417
+ """
418
+ result = {"success": False, "error": None}
419
+ internal_debug_info = None # Store debug info internally but don't add to result yet
420
+
421
+ # Check for screen recording permission first
422
+ perm_check = _check_macos_screen_recording_permission()
423
+ if not perm_check["has_permission"]:
424
+ result["error"] = perm_check["error"]
425
+ result["_debug_info"] = perm_check["details"] # Store with underscore prefix for later use
426
+ return result
427
+
428
+ # If window_name is specified, try to capture that specific window
429
+ if window_name:
430
+ # Try to find the window using our macOS-specific function
431
+ window_region, debug_info = _find_window_by_name(window_name)
432
+
433
+ # Store debug info internally but don't add to result yet
434
+ internal_debug_info = debug_info
435
+
436
+ if window_region:
437
+ # If we have a window ID from Quartz, use it directly without activating the window
438
+ if 'id' in window_region:
439
+ try:
440
+ # Capture using the window ID without activating the window
441
+ cmd = ["screencapture", "-l", str(window_region['id']), output_path]
442
+ process = subprocess.run(cmd, capture_output=True)
443
+
444
+ # Check if file exists and has non-zero size
445
+ if _verify_screenshot_success(output_path):
446
+ result["success"] = True
447
+ # Debug info will be added by the caller if debug mode is enabled
448
+ result["_debug_info"] = internal_debug_info # Store for later use but with underscore prefix
449
+ return result
450
+ else:
451
+ result["error"] = f"Native screencapture failed with return code {process.returncode}"
452
+ except Exception as e:
453
+ result["error"] = f"Screenshot error: {str(e)}"
454
+
455
+ # If direct window ID capture failed or no ID available, try using MSS
456
+ if _try_mss_capture(output_path, window_region, result, window_name):
457
+ # If successful, store debug info for later use
458
+ result["_debug_info"] = internal_debug_info # Store for later use but with underscore prefix
459
+ return result
460
+ else:
461
+ # Window not found - create a more detailed error message with available windows
462
+ _format_error_with_available_windows(window_name, internal_debug_info, result)
463
+
464
+ # No fallback to capturing the active window - return the result
465
+ return result
466
+ elif capture_area == "window":
467
+ # Capture active window
468
+ try:
469
+ cmd = ["screencapture", "-w", output_path]
470
+ process = subprocess.run(cmd, capture_output=True)
471
+
472
+ # Check if file exists and has non-zero size
473
+ if _verify_screenshot_success(output_path):
474
+ result["success"] = True
475
+ return result
476
+ else:
477
+ result["error"] = f"Active window capture failed with return code {process.returncode}"
478
+ except Exception as e:
479
+ result["error"] = f"Active window screenshot error: {str(e)}"
480
+
481
+ # No fallback to full screen here either
482
+ return result
483
+
484
+ # For full screen capture
485
+ if _try_mss_capture(output_path, None, result):
486
+ return result
487
+
488
+ # Fall back to native macOS screencapture for full screen only
489
+ try:
490
+ cmd = ["screencapture", "-x", output_path]
491
+ process = subprocess.run(cmd, capture_output=True)
492
+
493
+ # Check if file exists and has non-zero size
494
+ if _verify_screenshot_success(output_path):
495
+ result["success"] = True
496
+ return result
497
+ else:
498
+ result["error"] = f"Native screencapture failed with return code {process.returncode}"
499
+ except Exception as e:
500
+ result["error"] = f"Screenshot error: {str(e)}"
501
+
502
+ return result
503
+
504
+
505
+ def _capture_screenshot_linux(output_path: str, capture_area: str = "full", window_name: Optional[str] = None) -> Dict[str, Any]:
506
+ """
507
+ Capture screenshot on Linux.
508
+
509
+ Returns:
510
+ Dict with success status and error message if failed
511
+ """
512
+ result = {"success": False, "error": None}
513
+
514
+ # If window_name is specified, try to capture that specific window
515
+ if window_name:
516
+ # Try to use MSS first if available
517
+ if MSS_AVAILABLE and PYGETWINDOW_AVAILABLE:
518
+ try:
519
+ window_region, debug_info = _find_window_by_name(window_name)
520
+ if window_region:
521
+ if _try_mss_capture(output_path, window_region, result, window_name):
522
+ # Store debug info for later use
523
+ result["_debug_info"] = debug_info
524
+ return result
525
+ else:
526
+ # Window not found - create a more detailed error message with available windows
527
+ _format_error_with_available_windows(window_name, debug_info, result)
528
+ return result
529
+ except Exception as e:
530
+ result["error"] = f"PyGetWindow error: {str(e)}"
531
+ return result
532
+
533
+ # Try native Linux methods only if MSS is not available
534
+ if not MSS_AVAILABLE or not PYGETWINDOW_AVAILABLE:
535
+ try:
536
+ # Try to find the window using xdotool
537
+ if subprocess.run(["which", "xdotool"], capture_output=True).returncode == 0:
538
+ # Search for the window
539
+ find_cmd = ["xdotool", "search", "--name", window_name]
540
+ result_cmd = subprocess.run(find_cmd, capture_output=True, text=True)
541
+
542
+ if result_cmd.returncode == 0 and result_cmd.stdout.strip():
543
+ # Get the first window ID
544
+ window_id = result_cmd.stdout.strip().split('\n')[0]
545
+
546
+ # Now capture the window
547
+ if subprocess.run(["which", "gnome-screenshot"], capture_output=True).returncode == 0:
548
+ cmd = ["gnome-screenshot", "-w", "-f", output_path, "-w", window_id]
549
+ process = subprocess.run(cmd, capture_output=True)
550
+
551
+ # Check if file exists and has non-zero size
552
+ if _verify_screenshot_success(output_path):
553
+ result["success"] = True
554
+ return result
555
+ else:
556
+ result["error"] = f"gnome-screenshot failed with return code {process.returncode}"
557
+ elif subprocess.run(["which", "scrot"], capture_output=True).returncode == 0:
558
+ cmd = ["scrot", "-u", output_path]
559
+ process = subprocess.run(cmd, capture_output=True)
560
+
561
+ # Check if file exists and has non-zero size
562
+ if _verify_screenshot_success(output_path):
563
+ result["success"] = True
564
+ return result
565
+ else:
566
+ result["error"] = f"scrot failed with return code {process.returncode}"
567
+ else:
568
+ result["error"] = "No screenshot tool found (gnome-screenshot or scrot)"
569
+ else:
570
+ result["error"] = f"Window '{window_name}' not found using xdotool"
571
+ # Store debug info
572
+ result["_debug_info"] = {"error": "Window not found using xdotool"}
573
+ else:
574
+ result["error"] = "xdotool not available for window capture"
575
+ except Exception as e:
576
+ result["error"] = f"Screenshot error: {str(e)}"
577
+
578
+ # No fallback to full screen - just return the error
579
+ return result
580
+ elif capture_area == "window":
581
+ # Capture active window
582
+ try:
583
+ if subprocess.run(["which", "gnome-screenshot"], capture_output=True).returncode == 0:
584
+ cmd = ["gnome-screenshot", "-w", "-f", output_path]
585
+ process = subprocess.run(cmd, capture_output=True)
586
+
587
+ # Check if file exists and has non-zero size
588
+ if _verify_screenshot_success(output_path):
589
+ result["success"] = True
590
+ return result
591
+ else:
592
+ result["error"] = f"Active window capture failed with return code {process.returncode}"
593
+ elif subprocess.run(["which", "scrot"], capture_output=True).returncode == 0:
594
+ cmd = ["scrot", "-u", output_path]
595
+ process = subprocess.run(cmd, capture_output=True)
596
+
597
+ # Check if file exists and has non-zero size
598
+ if _verify_screenshot_success(output_path):
599
+ result["success"] = True
600
+ return result
601
+ else:
602
+ result["error"] = f"scrot failed with return code {process.returncode}"
603
+ else:
604
+ result["error"] = "No screenshot tool found (gnome-screenshot or scrot)"
605
+ except Exception as e:
606
+ result["error"] = f"Active window screenshot error: {str(e)}"
607
+
608
+ # No fallback to full screen here either
609
+ return result
610
+
611
+ # For full screen capture
612
+ if _try_mss_capture(output_path, None, result):
613
+ return result
614
+
615
+ # Fall back to native Linux methods for full screen only
616
+ try:
617
+ if subprocess.run(["which", "gnome-screenshot"], capture_output=True).returncode == 0:
618
+ cmd = ["gnome-screenshot", "-f", output_path]
619
+ process = subprocess.run(cmd, capture_output=True)
620
+
621
+ # Check if file exists and has non-zero size
622
+ if _verify_screenshot_success(output_path):
623
+ result["success"] = True
624
+ return result
625
+ else:
626
+ result["error"] = f"gnome-screenshot failed with return code {process.returncode}"
627
+ elif subprocess.run(["which", "scrot"], capture_output=True).returncode == 0:
628
+ cmd = ["scrot", output_path]
629
+ process = subprocess.run(cmd, capture_output=True)
630
+
631
+ # Check if file exists and has non-zero size
632
+ if _verify_screenshot_success(output_path):
633
+ result["success"] = True
634
+ return result
635
+ else:
636
+ result["error"] = f"scrot failed with return code {process.returncode}"
637
+ else:
638
+ result["error"] = "No screenshot tool found (gnome-screenshot or scrot)"
639
+ except Exception as e:
640
+ result["error"] = f"Screenshot error: {str(e)}"
641
+
642
+ return result
643
+
644
+
645
+ def _capture_screenshot_windows(output_path: str, capture_area: str = "full", window_name: Optional[str] = None) -> Dict[str, Any]:
646
+ """
647
+ Capture screenshot on Windows.
648
+
649
+ Returns:
650
+ Dict with success status and error message if failed
651
+ """
652
+ result = {"success": False, "error": None}
653
+
654
+ # If window_name is specified, try to capture that specific window
655
+ if window_name:
656
+ # Try to use MSS first if available
657
+ if MSS_AVAILABLE and PYGETWINDOW_AVAILABLE:
658
+ try:
659
+ window_region, debug_info = _find_window_by_name(window_name)
660
+ if window_region:
661
+ if _try_mss_capture(output_path, window_region, result, window_name):
662
+ # Store debug info for later use
663
+ result["_debug_info"] = debug_info
664
+ return result
665
+ else:
666
+ # Window not found - create a more detailed error message with available windows
667
+ _format_error_with_available_windows(window_name, debug_info, result)
668
+ return result
669
+ except Exception as e:
670
+ result["error"] = f"PyGetWindow error: {str(e)}"
671
+ return result
672
+
673
+ # Try native Windows methods only if MSS is not available
674
+ if not MSS_AVAILABLE or not PYGETWINDOW_AVAILABLE:
675
+ try:
676
+ script = f"""
677
+ Add-Type -AssemblyName System.Windows.Forms
678
+ Add-Type -AssemblyName System.Drawing
679
+
680
+ # Function to find window by title
681
+ function Find-Window($title) {{
682
+ $processes = Get-Process | Where-Object {{$_.MainWindowTitle -like "*$title*"}}
683
+ return $processes
684
+ }}
685
+
686
+ $targetProcess = Find-Window("{window_name}")
687
+
688
+ if ($targetProcess -and $targetProcess.Count -gt 0) {{
689
+ # Use the first matching process
690
+ $process = $targetProcess[0]
691
+
692
+ # Get window bounds
693
+ $hwnd = $process.MainWindowHandle
694
+ $rect = New-Object System.Drawing.Rectangle
695
+ [void][System.Runtime.InteropServices.Marshal]::GetWindowRect($hwnd, [ref]$rect)
696
+
697
+ # Capture the window
698
+ $bitmap = New-Object System.Drawing.Bitmap ($rect.Width - $rect.X), ($rect.Height - $rect.Y)
699
+ $graphics = [System.Drawing.Graphics]::FromImage($bitmap)
700
+ $graphics.CopyFromScreen($rect.X, $rect.Y, 0, 0, $bitmap.Size)
701
+ $bitmap.Save('{output_path}')
702
+
703
+ return $true
704
+ }}
705
+ else {{
706
+ # List all windows for diagnostics
707
+ $allWindows = Get-Process | Where-Object {{$_.MainWindowTitle}} | Select-Object MainWindowTitle, ProcessName | Format-List | Out-String
708
+ Write-Output "WINDOWS_LIST:$allWindows"
709
+ return $false
710
+ }}
711
+ """
712
+
713
+ cmd = ["powershell", "-Command", script]
714
+ process = subprocess.run(cmd, capture_output=True, text=True)
715
+
716
+ output = process.stdout.strip()
717
+ if output.startswith("True"):
718
+ # Check if file exists and has non-zero size
719
+ if _verify_screenshot_success(output_path):
720
+ result["success"] = True
721
+ return result
722
+ else:
723
+ result["error"] = "Failed to save screenshot of window"
724
+ else:
725
+ # Check if we got a list of windows in the output
726
+ if "WINDOWS_LIST:" in output:
727
+ windows_list = output.split("WINDOWS_LIST:")[1].strip()
728
+ result["error"] = f"Window '{window_name}' not found. Available windows: {windows_list}"
729
+ # Store windows list as debug info
730
+ result["_debug_info"] = {"available_windows": windows_list}
731
+ else:
732
+ result["error"] = f"Window '{window_name}' not found or could not be captured"
733
+ except Exception as e:
734
+ result["error"] = f"Screenshot error: {str(e)}"
735
+
736
+ # No fallback to full screen - just return the error
737
+ return result
738
+ elif capture_area == "window":
739
+ # Capture active window using Windows methods
740
+ try:
741
+ script = f"""
742
+ Add-Type -AssemblyName System.Windows.Forms
743
+ Add-Type -AssemblyName System.Drawing
744
+
745
+ function Get-ActiveWindow {{
746
+ $foregroundWindowHandle = [System.Windows.Forms.Form]::ActiveForm.Handle
747
+ if (-not $foregroundWindowHandle) {{
748
+ # If no active form, try to get the foreground window
749
+ $foregroundWindowHandle = [System.Runtime.InteropServices.Marshal]::GetForegroundWindow()
750
+ }}
751
+
752
+ if ($foregroundWindowHandle) {{
753
+ $rect = New-Object System.Drawing.Rectangle
754
+ [void][System.Runtime.InteropServices.Marshal]::GetWindowRect($foregroundWindowHandle, [ref]$rect)
755
+
756
+ $bitmap = New-Object System.Drawing.Bitmap ($rect.Width - $rect.X), ($rect.Height - $rect.Y)
757
+ $graphics = [System.Drawing.Graphics]::FromImage($bitmap)
758
+ $graphics.CopyFromScreen($rect.X, $rect.Y, 0, 0, $bitmap.Size)
759
+ $bitmap.Save('{output_path}')
760
+
761
+ return $true
762
+ }}
763
+ return $false
764
+ }}
765
+
766
+ Get-ActiveWindow
767
+ """
768
+
769
+ cmd = ["powershell", "-Command", script]
770
+ process = subprocess.run(cmd, capture_output=True, text=True)
771
+
772
+ if process.stdout.strip() == "True":
773
+ # Check if file exists and has non-zero size
774
+ if _verify_screenshot_success(output_path):
775
+ result["success"] = True
776
+ return result
777
+ else:
778
+ result["error"] = "Failed to capture active window"
779
+ else:
780
+ result["error"] = "Failed to capture active window"
781
+ except Exception as e:
782
+ result["error"] = f"Active window screenshot error: {str(e)}"
783
+
784
+ # No fallback to full screen here either
785
+ return result
786
+ else:
787
+ # For full screen capture
788
+ if _try_mss_capture(output_path, None, result):
789
+ return result
790
+
791
+ # Fall back to native Windows methods for full screen only
792
+ try:
793
+ script = f"""
794
+ Add-Type -AssemblyName System.Windows.Forms
795
+ Add-Type -AssemblyName System.Drawing
796
+ $screen = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds
797
+ $bitmap = New-Object System.Drawing.Bitmap $screen.Width, $screen.Height
798
+ $graphics = [System.Drawing.Graphics]::FromImage($bitmap)
799
+ $graphics.CopyFromScreen($screen.X, $screen.Y, 0, 0, $screen.Size)
800
+ $bitmap.Save('{output_path}')
801
+ """
802
+
803
+ cmd = ["powershell", "-Command", script]
804
+ process = subprocess.run(cmd, capture_output=True)
805
+
806
+ # Check if file exists and has non-zero size
807
+ if _verify_screenshot_success(output_path):
808
+ result["success"] = True
809
+ return result
810
+ else:
811
+ result["error"] = f"PowerShell screenshot failed with return code {process.returncode}"
812
+ except Exception as e:
813
+ result["error"] = f"Screenshot error: {str(e)}"
814
+
815
+ return result
816
+
817
+
818
+ def find_macos_window_by_name(window_name):
819
+ """Find a window by name on macOS using Quartz."""
820
+ try:
821
+ if not QUARTZ_AVAILABLE:
822
+ return None, {"error": "Quartz not available"}
823
+
824
+ window_list = CGWindowListCopyWindowInfo(kCGWindowListOptionOnScreenOnly, kCGNullWindowID)
825
+
826
+ # Collect debug info instead of printing
827
+ debug_info = {
828
+ "search_term": window_name,
829
+ "available_windows": []
830
+ }
831
+
832
+ all_windows = []
833
+ for window in window_list:
834
+ name = window.get('kCGWindowName', '')
835
+ owner = window.get('kCGWindowOwnerName', '')
836
+ layer = window.get('kCGWindowLayer', 0)
837
+ window_id = window.get('kCGWindowNumber', 0)
838
+
839
+ # Skip windows with layer > 0 (typically system UI elements)
840
+ if layer > 0:
841
+ continue
842
+
843
+ window_info = {
844
+ "id": window_id,
845
+ "name": name,
846
+ "owner": owner,
847
+ "layer": layer
848
+ }
849
+ debug_info["available_windows"].append(window_info)
850
+
851
+ all_windows.append({
852
+ 'id': window_id,
853
+ 'name': name,
854
+ 'owner': owner,
855
+ 'layer': layer,
856
+ 'bounds': window.get('kCGWindowBounds', {})
857
+ })
858
+
859
+ # Define matching categories with different priorities
860
+ exact_app_matches = [] # Exact match on application name
861
+ exact_window_matches = [] # Exact match on window title
862
+ app_contains_matches = [] # Application name contains search term
863
+ window_contains_matches = [] # Window title contains search term
864
+
865
+ # Normalize the search term for comparison
866
+ search_term_lower = window_name.lower()
867
+
868
+ # First pass: categorize windows by match quality
869
+ for window in all_windows:
870
+ name = window['name'] or ''
871
+ owner = window['owner'] or ''
872
+
873
+ # Skip empty windows
874
+ if not name and not owner:
875
+ continue
876
+
877
+ name_lower = name.lower()
878
+ owner_lower = owner.lower()
879
+
880
+ # Check for exact matches first (case-insensitive)
881
+ if owner_lower == search_term_lower:
882
+ exact_app_matches.append(window)
883
+ elif name_lower == search_term_lower:
884
+ exact_window_matches.append(window)
885
+ # Then check for contains matches
886
+ elif search_term_lower in owner_lower:
887
+ app_contains_matches.append(window)
888
+ elif search_term_lower in name_lower:
889
+ window_contains_matches.append(window)
890
+
891
+ # Process matches in priority order
892
+ for match_list, reason in [
893
+ (exact_app_matches, "Exact match on application name"),
894
+ (exact_window_matches, "Exact match on window title"),
895
+ (app_contains_matches, "Application name contains search term"),
896
+ (window_contains_matches, "Window title contains search term")
897
+ ]:
898
+ if match_list:
899
+ # Sort by layer (lower layer = more in front)
900
+ match_list.sort(key=lambda w: w['layer'])
901
+ selected_window = match_list[0]
902
+ debug_info["selected_window"] = {
903
+ "id": selected_window['id'],
904
+ "name": selected_window['name'],
905
+ "owner": selected_window['owner'],
906
+ "layer": selected_window['layer'],
907
+ "selection_reason": reason
908
+ }
909
+
910
+ bounds = selected_window['bounds']
911
+ return {
912
+ 'id': selected_window['id'],
913
+ 'top': bounds.get('Y', 0),
914
+ 'left': bounds.get('X', 0),
915
+ 'width': bounds.get('Width', 0),
916
+ 'height': bounds.get('Height', 0)
917
+ }, debug_info
918
+
919
+ debug_info["error"] = f"No matching window found for '{window_name}'"
920
+ return None, debug_info
921
+ except Exception as e:
922
+ return None, {"error": f"Error finding macOS window: {str(e)}"}
923
+
924
+
925
+ def capture_screenshot(output_path: Optional[str] = None, capture_mode: Optional[Dict[str, str]] = None, debug: bool = False) -> Dict[str, Any]:
926
+ """
927
+ Capture a screenshot and save it to the specified path.
928
+
929
+ Args:
930
+ output_path: Path where the screenshot should be saved. If None, a default path will be used.
931
+ capture_mode: Dictionary specifying what to capture:
932
+ - type: 'full' for entire screen, 'active_window' for current window, 'named_window' for specific window
933
+ - window_name: Name of window to capture (required when type is 'named_window')
934
+ Windows can be captured in the background without bringing them to the front.
935
+ debug: Whether to include debug information in the response on failure
936
+
937
+ Returns:
938
+ Dictionary with success status and path to the saved screenshot.
939
+ """
940
+ # Set defaults if capture_mode is not provided
941
+ if not capture_mode:
942
+ capture_mode = {"type": "full"}
943
+
944
+ # Extract capture type and window name
945
+ capture_type = capture_mode.get("type", "full")
946
+ window_name = capture_mode.get("window_name") if capture_type == "named_window" else None
947
+
948
+ if debug:
949
+ print(f"Capture mode: {capture_type}")
950
+ if window_name:
951
+ print(f"Window name: {window_name}")
952
+
953
+ # Use default path if none provided
954
+ if not output_path:
955
+ output_path = _get_default_screenshot_path()
956
+
957
+ # Ensure the output directory exists
958
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
959
+
960
+ # Convert to old parameters for compatibility with existing functions
961
+ capture_area = "window" if capture_type in ["active_window", "named_window"] else "full"
962
+
963
+ # Capture screenshot based on platform
964
+ system_name = platform.system().lower()
965
+ if debug:
966
+ print(f"Detected platform: {system_name}")
967
+
968
+ if system_name == "darwin" or system_name == "macos":
969
+ result = _capture_screenshot_macos(output_path, capture_area, window_name)
970
+ elif system_name == "linux":
971
+ result = _capture_screenshot_linux(output_path, capture_area, window_name)
972
+ elif system_name == "windows":
973
+ result = _capture_screenshot_windows(output_path, capture_area, window_name)
974
+ else:
975
+ result = {"success": False, "error": f"Unsupported platform: {system_name}"}
976
+
977
+ # Check if the error might be related to permission issues
978
+ if not result["success"] and result.get("error"):
979
+ # If the error already mentions permission, highlight it
980
+ if "permission" in result["error"].lower():
981
+ # Make the error message more prominent for permission issues
982
+ modified_message = f"PERMISSION ERROR: {result['error']}"
983
+ result["error"] = modified_message
984
+
985
+ # Add additional hints for macOS
986
+ if system_name == "darwin":
987
+ result["error"] += " To fix this: Open System Settings > Privacy & Security > Screen Recording, and enable permission for this application."
988
+
989
+ # Extract debug info if present
990
+ debug_info = result.pop("_debug_info", None) if "_debug_info" in result else None
991
+
992
+ # Format the final result
993
+ response = {
994
+ "success": result["success"],
995
+ "path": output_path if result["success"] else None,
996
+ "message": "Screenshot captured successfully" if result["success"] else result.get("error", "Failed to capture screenshot")
997
+ }
998
+
999
+ # Add warning if present
1000
+ if "warning" in result:
1001
+ response["warning"] = result["warning"]
1002
+
1003
+ # Only include debug info if debug mode is enabled AND the operation failed
1004
+ if debug and not result["success"] and debug_info:
1005
+ response["debug_info"] = debug_info
1006
+
1007
+ return response
1008
+
1009
+
1010
+ async def handle_capture_screenshot(arguments: dict) -> List[types.TextContent]:
1011
+ """Handle capturing a screenshot."""
1012
+ output_path = arguments.get("output_path")
1013
+ debug = arguments.get("debug", False)
1014
+
1015
+ # Handle legacy platform parameter (ignore it)
1016
+ if "platform" in arguments:
1017
+ print("Note: 'platform' parameter is deprecated and will be auto-detected")
1018
+
1019
+ # Enforce new parameter format requiring capture_mode
1020
+ capture_mode = arguments.get("capture_mode")
1021
+ if not capture_mode:
1022
+ result = {
1023
+ "success": False,
1024
+ "error": "Missing required parameter 'capture_mode'. Please provide a capture_mode object with 'type' field."
1025
+ }
1026
+ else:
1027
+ result = capture_screenshot(output_path, capture_mode, debug)
1028
+
1029
+ return [types.TextContent(type="text", text=json.dumps(result, indent=2))]