hopx-ai 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hopx_ai/desktop.py ADDED
@@ -0,0 +1,1227 @@
1
+ """Desktop automation resource for Bunnyshell Sandboxes."""
2
+
3
+ from typing import Optional, List, Tuple
4
+ import logging
5
+ from ._agent_client import AgentHTTPClient
6
+ from .models import VNCInfo, WindowInfo, RecordingInfo, DisplayInfo
7
+ from .errors import DesktopNotAvailableError
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class Desktop:
13
+ """
14
+ Desktop automation resource.
15
+
16
+ Provides methods for controlling GUI applications, VNC access, mouse/keyboard input,
17
+ screenshots, screen recording, and window management.
18
+
19
+ Features:
20
+ - VNC server management
21
+ - Mouse control (click, move, drag, scroll)
22
+ - Keyboard control (type, press, combinations)
23
+ - Clipboard operations
24
+ - Screenshot capture
25
+ - Screen recording
26
+ - Window management
27
+ - Display configuration
28
+
29
+ Note:
30
+ Desktop automation requires specific dependencies in your template.
31
+ If not available, methods will raise DesktopNotAvailableError with
32
+ installation instructions.
33
+
34
+ Example:
35
+ >>> sandbox = Sandbox.create(template="desktop")
36
+ >>>
37
+ >>> # Start VNC
38
+ >>> vnc_info = sandbox.desktop.start_vnc()
39
+ >>> print(f"VNC at: {vnc_info.url}")
40
+ >>>
41
+ >>> # Mouse control
42
+ >>> sandbox.desktop.click(100, 100)
43
+ >>> sandbox.desktop.type("Hello World")
44
+ >>>
45
+ >>> # Screenshot
46
+ >>> img_bytes = sandbox.desktop.screenshot()
47
+ >>> with open('screen.png', 'wb') as f:
48
+ ... f.write(img_bytes)
49
+ """
50
+
51
+ def __init__(self, client: AgentHTTPClient):
52
+ """
53
+ Initialize Desktop resource.
54
+
55
+ Args:
56
+ client: Shared agent HTTP client
57
+ """
58
+ self._client = client
59
+ self._available: Optional[bool] = None
60
+ self._checked = False
61
+ logger.debug("Desktop resource initialized")
62
+
63
+ def _check_availability(self) -> None:
64
+ """
65
+ Check if desktop automation is available.
66
+
67
+ Raises:
68
+ DesktopNotAvailableError: If desktop features not available
69
+ """
70
+ if self._checked:
71
+ if not self._available:
72
+ raise DesktopNotAvailableError(
73
+ message=(
74
+ "Desktop automation is not available in this sandbox. "
75
+ "Your template may be missing required dependencies."
76
+ )
77
+ )
78
+ return
79
+
80
+ self._checked = True
81
+
82
+ try:
83
+ # Try to get VNC status (simple desktop endpoint check)
84
+ response = self._client.get(
85
+ "/desktop/vnc/status",
86
+ operation="check desktop availability",
87
+ timeout=5
88
+ )
89
+ self._available = True
90
+ logger.debug("Desktop automation available")
91
+
92
+ except Exception as e:
93
+ # Desktop not available
94
+ self._available = False
95
+ logger.warning(f"Desktop automation not available: {e}")
96
+
97
+ raise DesktopNotAvailableError(
98
+ message=(
99
+ "Desktop automation is not available in this sandbox. "
100
+ "Your template may be missing required dependencies."
101
+ ),
102
+ missing_dependencies=[
103
+ "xdotool",
104
+ "xvfb",
105
+ "tigervnc-standalone-server",
106
+ "wmctrl",
107
+ "imagemagick"
108
+ ],
109
+ request_id=getattr(e, 'request_id', None)
110
+ )
111
+
112
+ # =============================================================================
113
+ # VNC SERVER
114
+ # =============================================================================
115
+
116
+ def start_vnc(self, display: int = 1, password: Optional[str] = None) -> VNCInfo:
117
+ """
118
+ Start VNC server.
119
+
120
+ Args:
121
+ display: Display number (default: 1, creates :1)
122
+ password: VNC password (optional)
123
+
124
+ Returns:
125
+ VNC server information with URL and port
126
+
127
+ Raises:
128
+ DesktopNotAvailableError: If desktop not available
129
+
130
+ Example:
131
+ >>> vnc_info = sandbox.desktop.start_vnc()
132
+ >>> print(f"Connect to: {vnc_info.url}")
133
+ >>> print(f"Display: {vnc_info.display}")
134
+ """
135
+ self._check_availability()
136
+
137
+ logger.debug(f"Starting VNC server on display :{display}")
138
+
139
+ payload = {"display": display}
140
+ if password:
141
+ payload["password"] = password
142
+
143
+ response = self._client.post(
144
+ "/desktop/vnc/start",
145
+ json=payload,
146
+ operation="start VNC server"
147
+ )
148
+
149
+ data = response.json()
150
+ return VNCInfo(
151
+ running=data.get("running", True),
152
+ display=data.get("display"),
153
+ port=data.get("port"),
154
+ url=data.get("url"),
155
+ password=password
156
+ )
157
+
158
+ def stop_vnc(self) -> None:
159
+ """
160
+ Stop VNC server.
161
+
162
+ Raises:
163
+ DesktopNotAvailableError: If desktop not available
164
+
165
+ Example:
166
+ >>> sandbox.desktop.stop_vnc()
167
+ """
168
+ self._check_availability()
169
+
170
+ logger.debug("Stopping VNC server")
171
+
172
+ self._client.post(
173
+ "/desktop/vnc/stop",
174
+ json={},
175
+ operation="stop VNC server"
176
+ )
177
+
178
+ def get_vnc_status(self) -> VNCInfo:
179
+ """
180
+ Get VNC server status.
181
+
182
+ Returns:
183
+ VNC server information
184
+
185
+ Raises:
186
+ DesktopNotAvailableError: If desktop not available
187
+
188
+ Example:
189
+ >>> vnc = sandbox.desktop.get_vnc_status()
190
+ >>> if vnc.running:
191
+ ... print(f"VNC running at {vnc.url}")
192
+ """
193
+ self._check_availability()
194
+
195
+ response = self._client.get(
196
+ "/desktop/vnc/status",
197
+ operation="get VNC status"
198
+ )
199
+
200
+ data = response.json()
201
+ return VNCInfo(
202
+ running=data.get("running", False),
203
+ display=data.get("display"),
204
+ port=data.get("port"),
205
+ url=data.get("url")
206
+ )
207
+
208
+ def get_vnc_url(self) -> str:
209
+ """
210
+ Get VNC URL (convenience method).
211
+
212
+ This is a convenience method that returns just the URL string
213
+ instead of the full VNCInfo object.
214
+
215
+ Returns:
216
+ VNC URL string
217
+
218
+ Raises:
219
+ DesktopNotAvailableError: If desktop not available
220
+
221
+ Example:
222
+ >>> url = sandbox.desktop.get_vnc_url()
223
+ >>> print(f"Connect to: {url}")
224
+ """
225
+ vnc_info = self.get_vnc_status()
226
+ return vnc_info.url or ""
227
+
228
+ # =============================================================================
229
+ # MOUSE CONTROL
230
+ # =============================================================================
231
+
232
+ def click(self, x: int, y: int, button: str = "left", clicks: int = 1) -> None:
233
+ """
234
+ Click at position.
235
+
236
+ Args:
237
+ x: X coordinate
238
+ y: Y coordinate
239
+ button: Mouse button ('left', 'right', 'middle')
240
+ clicks: Number of clicks (1 for single, 2 for double)
241
+
242
+ Raises:
243
+ DesktopNotAvailableError: If desktop not available
244
+
245
+ Example:
246
+ >>> # Single click
247
+ >>> sandbox.desktop.click(100, 100)
248
+ >>>
249
+ >>> # Right click
250
+ >>> sandbox.desktop.click(200, 200, button="right")
251
+ >>>
252
+ >>> # Double click
253
+ >>> sandbox.desktop.click(150, 150, clicks=2)
254
+ """
255
+ self._check_availability()
256
+
257
+ logger.debug(f"Mouse click at ({x}, {y}), button={button}, clicks={clicks}")
258
+
259
+ self._client.post(
260
+ "/desktop/mouse/click",
261
+ json={
262
+ "x": x,
263
+ "y": y,
264
+ "button": button,
265
+ "clicks": clicks
266
+ },
267
+ operation="mouse click"
268
+ )
269
+
270
+ def move(self, x: int, y: int) -> None:
271
+ """
272
+ Move mouse cursor to position.
273
+
274
+ Args:
275
+ x: X coordinate
276
+ y: Y coordinate
277
+
278
+ Raises:
279
+ DesktopNotAvailableError: If desktop not available
280
+
281
+ Example:
282
+ >>> sandbox.desktop.move(300, 400)
283
+ """
284
+ self._check_availability()
285
+
286
+ logger.debug(f"Mouse move to ({x}, {y})")
287
+
288
+ self._client.post(
289
+ "/desktop/mouse/move",
290
+ json={"x": x, "y": y},
291
+ operation="mouse move"
292
+ )
293
+
294
+ def drag(self, from_x: int, from_y: int, to_x: int, to_y: int, button: str = "left") -> None:
295
+ """
296
+ Drag from one position to another.
297
+
298
+ Args:
299
+ from_x: Starting X coordinate
300
+ from_y: Starting Y coordinate
301
+ to_x: Ending X coordinate
302
+ to_y: Ending Y coordinate
303
+ button: Mouse button to hold ('left', 'right', 'middle')
304
+
305
+ Raises:
306
+ DesktopNotAvailableError: If desktop not available
307
+
308
+ Example:
309
+ >>> # Drag from (100, 100) to (200, 200)
310
+ >>> sandbox.desktop.drag(100, 100, 200, 200)
311
+ """
312
+ self._check_availability()
313
+
314
+ logger.debug(f"Mouse drag from ({from_x}, {from_y}) to ({to_x}, {to_y})")
315
+
316
+ self._client.post(
317
+ "/desktop/mouse/drag",
318
+ json={
319
+ "from_x": from_x,
320
+ "from_y": from_y,
321
+ "to_x": to_x,
322
+ "to_y": to_y,
323
+ "button": button
324
+ },
325
+ operation="mouse drag"
326
+ )
327
+
328
+ def scroll(self, amount: int, direction: str = "down") -> None:
329
+ """
330
+ Scroll mouse wheel.
331
+
332
+ Args:
333
+ amount: Scroll amount (positive integer)
334
+ direction: Scroll direction ('up', 'down', 'left', 'right')
335
+
336
+ Raises:
337
+ DesktopNotAvailableError: If desktop not available
338
+
339
+ Example:
340
+ >>> # Scroll down 5 clicks
341
+ >>> sandbox.desktop.scroll(5, "down")
342
+ >>>
343
+ >>> # Scroll up 3 clicks
344
+ >>> sandbox.desktop.scroll(3, "up")
345
+ """
346
+ self._check_availability()
347
+
348
+ logger.debug(f"Mouse scroll {direction} by {amount}")
349
+
350
+ self._client.post(
351
+ "/desktop/mouse/scroll",
352
+ json={
353
+ "amount": amount,
354
+ "direction": direction
355
+ },
356
+ operation="mouse scroll"
357
+ )
358
+
359
+ # =============================================================================
360
+ # KEYBOARD CONTROL
361
+ # =============================================================================
362
+
363
+ def type(self, text: str, delay_ms: int = 10) -> None:
364
+ """
365
+ Type text.
366
+
367
+ Args:
368
+ text: Text to type
369
+ delay_ms: Delay between keystrokes in milliseconds
370
+
371
+ Raises:
372
+ DesktopNotAvailableError: If desktop not available
373
+
374
+ Example:
375
+ >>> sandbox.desktop.type("Hello, World!")
376
+ >>>
377
+ >>> # Slower typing
378
+ >>> sandbox.desktop.type("Slow typing", delay_ms=50)
379
+ """
380
+ self._check_availability()
381
+
382
+ logger.debug(f"Keyboard type: {text[:50]}... (delay={delay_ms}ms)")
383
+
384
+ self._client.post(
385
+ "/desktop/keyboard/type",
386
+ json={
387
+ "text": text,
388
+ "delay_ms": delay_ms
389
+ },
390
+ operation="keyboard type"
391
+ )
392
+
393
+ def press(self, key: str) -> None:
394
+ """
395
+ Press a key.
396
+
397
+ Args:
398
+ key: Key name (e.g., 'Return', 'Escape', 'Tab', 'F1', etc.)
399
+
400
+ Raises:
401
+ DesktopNotAvailableError: If desktop not available
402
+
403
+ Example:
404
+ >>> sandbox.desktop.press("Return")
405
+ >>> sandbox.desktop.press("Escape")
406
+ >>> sandbox.desktop.press("Tab")
407
+ """
408
+ self._check_availability()
409
+
410
+ logger.debug(f"Keyboard press: {key}")
411
+
412
+ self._client.post(
413
+ "/desktop/keyboard/press",
414
+ json={"key": key},
415
+ operation="keyboard press"
416
+ )
417
+
418
+ def combination(self, modifiers: List[str], key: str) -> None:
419
+ """
420
+ Press key combination.
421
+
422
+ Args:
423
+ modifiers: Modifier keys (e.g., ['ctrl'], ['ctrl', 'shift'])
424
+ key: Main key
425
+
426
+ Raises:
427
+ DesktopNotAvailableError: If desktop not available
428
+
429
+ Example:
430
+ >>> # Ctrl+C
431
+ >>> sandbox.desktop.combination(['ctrl'], 'c')
432
+ >>>
433
+ >>> # Ctrl+Shift+T
434
+ >>> sandbox.desktop.combination(['ctrl', 'shift'], 't')
435
+ >>>
436
+ >>> # Alt+F4
437
+ >>> sandbox.desktop.combination(['alt'], 'F4')
438
+ """
439
+ self._check_availability()
440
+
441
+ logger.debug(f"Keyboard combination: {'+'.join(modifiers)}+{key}")
442
+
443
+ self._client.post(
444
+ "/desktop/keyboard/combination",
445
+ json={
446
+ "modifiers": modifiers,
447
+ "key": key
448
+ },
449
+ operation="keyboard combination"
450
+ )
451
+
452
+ # =============================================================================
453
+ # CLIPBOARD
454
+ # =============================================================================
455
+
456
+ def set_clipboard(self, text: str) -> None:
457
+ """
458
+ Set clipboard content.
459
+
460
+ Args:
461
+ text: Text to set in clipboard
462
+
463
+ Raises:
464
+ DesktopNotAvailableError: If desktop not available
465
+
466
+ Example:
467
+ >>> sandbox.desktop.set_clipboard("Hello from clipboard!")
468
+ """
469
+ self._check_availability()
470
+
471
+ logger.debug(f"Set clipboard: {text[:50]}...")
472
+
473
+ self._client.post(
474
+ "/desktop/clipboard/set",
475
+ json={"text": text},
476
+ operation="set clipboard"
477
+ )
478
+
479
+ def get_clipboard(self) -> str:
480
+ """
481
+ Get clipboard content.
482
+
483
+ Returns:
484
+ Current clipboard text
485
+
486
+ Raises:
487
+ DesktopNotAvailableError: If desktop not available
488
+
489
+ Example:
490
+ >>> text = sandbox.desktop.get_clipboard()
491
+ >>> print(text)
492
+ """
493
+ self._check_availability()
494
+
495
+ logger.debug("Get clipboard")
496
+
497
+ response = self._client.get(
498
+ "/desktop/clipboard/get",
499
+ operation="get clipboard"
500
+ )
501
+
502
+ data = response.json()
503
+ return data.get("text", "")
504
+
505
+ def get_clipboard_history(self) -> List[str]:
506
+ """
507
+ Get clipboard history.
508
+
509
+ Returns:
510
+ List of recent clipboard contents
511
+
512
+ Raises:
513
+ DesktopNotAvailableError: If desktop not available
514
+
515
+ Example:
516
+ >>> history = sandbox.desktop.get_clipboard_history()
517
+ >>> for item in history:
518
+ ... print(item)
519
+ """
520
+ self._check_availability()
521
+
522
+ logger.debug("Get clipboard history")
523
+
524
+ response = self._client.get(
525
+ "/desktop/clipboard/history",
526
+ operation="get clipboard history"
527
+ )
528
+
529
+ data = response.json()
530
+ return data.get("history", [])
531
+
532
+ # =============================================================================
533
+ # SCREENSHOT
534
+ # =============================================================================
535
+
536
+ def screenshot(self) -> bytes:
537
+ """
538
+ Capture full screen screenshot.
539
+
540
+ Returns:
541
+ Screenshot image as PNG bytes
542
+
543
+ Raises:
544
+ DesktopNotAvailableError: If desktop not available
545
+
546
+ Example:
547
+ >>> img_bytes = sandbox.desktop.screenshot()
548
+ >>> with open('screenshot.png', 'wb') as f:
549
+ ... f.write(img_bytes)
550
+ """
551
+ self._check_availability()
552
+
553
+ logger.debug("Capture screenshot")
554
+
555
+ response = self._client.get(
556
+ "/desktop/screenshot",
557
+ operation="capture screenshot"
558
+ )
559
+
560
+ return response.content
561
+
562
+ def screenshot_region(self, x: int, y: int, width: int, height: int) -> bytes:
563
+ """
564
+ Capture screenshot of specific region.
565
+
566
+ Args:
567
+ x: Starting X coordinate
568
+ y: Starting Y coordinate
569
+ width: Region width
570
+ height: Region height
571
+
572
+ Returns:
573
+ Screenshot image as PNG bytes
574
+
575
+ Raises:
576
+ DesktopNotAvailableError: If desktop not available
577
+
578
+ Example:
579
+ >>> # Capture 500x300 region starting at (100, 100)
580
+ >>> img_bytes = sandbox.desktop.screenshot_region(100, 100, 500, 300)
581
+ >>> with open('region.png', 'wb') as f:
582
+ ... f.write(img_bytes)
583
+ """
584
+ self._check_availability()
585
+
586
+ logger.debug(f"Capture screenshot region: ({x}, {y}) {width}x{height}")
587
+
588
+ response = self._client.post(
589
+ "/desktop/screenshot/region",
590
+ json={
591
+ "x": x,
592
+ "y": y,
593
+ "width": width,
594
+ "height": height
595
+ },
596
+ operation="capture screenshot region"
597
+ )
598
+
599
+ return response.content
600
+
601
+ # =============================================================================
602
+ # SCREEN RECORDING
603
+ # =============================================================================
604
+
605
+ def start_recording(
606
+ self,
607
+ fps: int = 10,
608
+ format: str = "mp4",
609
+ quality: str = "medium"
610
+ ) -> RecordingInfo:
611
+ """
612
+ Start screen recording.
613
+
614
+ Args:
615
+ fps: Frames per second (default: 10)
616
+ format: Video format ('mp4', 'webm')
617
+ quality: Video quality ('low', 'medium', 'high')
618
+
619
+ Returns:
620
+ Recording information with recording_id
621
+
622
+ Raises:
623
+ DesktopNotAvailableError: If desktop not available
624
+
625
+ Example:
626
+ >>> rec = sandbox.desktop.start_recording(fps=30, quality="high")
627
+ >>> print(f"Recording ID: {rec.recording_id}")
628
+ >>> # ... do stuff ...
629
+ >>> sandbox.desktop.stop_recording(rec.recording_id)
630
+ """
631
+ self._check_availability()
632
+
633
+ logger.debug(f"Start recording: fps={fps}, format={format}, quality={quality}")
634
+
635
+ response = self._client.post(
636
+ "/desktop/recording/start",
637
+ json={
638
+ "fps": fps,
639
+ "format": format,
640
+ "quality": quality
641
+ },
642
+ operation="start recording"
643
+ )
644
+
645
+ data = response.json()
646
+ return RecordingInfo(
647
+ recording_id=data.get("recording_id", ""),
648
+ status=data.get("status", "recording"),
649
+ fps=fps,
650
+ format=format
651
+ )
652
+
653
+ def stop_recording(self, recording_id: str) -> RecordingInfo:
654
+ """
655
+ Stop screen recording.
656
+
657
+ Args:
658
+ recording_id: Recording ID from start_recording()
659
+
660
+ Returns:
661
+ Recording information with status and file size
662
+
663
+ Raises:
664
+ DesktopNotAvailableError: If desktop not available
665
+
666
+ Example:
667
+ >>> rec = sandbox.desktop.start_recording()
668
+ >>> # ... do stuff ...
669
+ >>> final_rec = sandbox.desktop.stop_recording(rec.recording_id)
670
+ >>> print(f"Duration: {final_rec.duration}s")
671
+ >>> print(f"Size: {final_rec.file_size} bytes")
672
+ """
673
+ self._check_availability()
674
+
675
+ logger.debug(f"Stop recording: {recording_id}")
676
+
677
+ response = self._client.post(
678
+ "/desktop/recording/stop",
679
+ json={"recording_id": recording_id},
680
+ operation="stop recording"
681
+ )
682
+
683
+ data = response.json()
684
+ return RecordingInfo(
685
+ recording_id=recording_id,
686
+ status=data.get("status", "stopped"),
687
+ duration=data.get("duration", 0.0),
688
+ file_size=data.get("file_size", 0),
689
+ format=data.get("format", "mp4")
690
+ )
691
+
692
+ def get_recording_status(self, recording_id: str) -> RecordingInfo:
693
+ """
694
+ Get recording status.
695
+
696
+ Args:
697
+ recording_id: Recording ID
698
+
699
+ Returns:
700
+ Recording information
701
+
702
+ Raises:
703
+ DesktopNotAvailableError: If desktop not available
704
+
705
+ Example:
706
+ >>> status = sandbox.desktop.get_recording_status(rec.recording_id)
707
+ >>> if status.is_ready:
708
+ ... video = sandbox.desktop.download_recording(rec.recording_id)
709
+ """
710
+ self._check_availability()
711
+
712
+ response = self._client.get(
713
+ "/desktop/recording/status",
714
+ params={"id": recording_id},
715
+ operation="get recording status"
716
+ )
717
+
718
+ data = response.json()
719
+ return RecordingInfo(
720
+ recording_id=recording_id,
721
+ status=data.get("status", "unknown"),
722
+ duration=data.get("duration", 0.0),
723
+ file_size=data.get("file_size", 0)
724
+ )
725
+
726
+ def download_recording(self, recording_id: str) -> bytes:
727
+ """
728
+ Download recorded video.
729
+
730
+ Args:
731
+ recording_id: Recording ID
732
+
733
+ Returns:
734
+ Video file bytes
735
+
736
+ Raises:
737
+ DesktopNotAvailableError: If desktop not available
738
+
739
+ Example:
740
+ >>> video_bytes = sandbox.desktop.download_recording(rec.recording_id)
741
+ >>> with open('recording.mp4', 'wb') as f:
742
+ ... f.write(video_bytes)
743
+ """
744
+ self._check_availability()
745
+
746
+ logger.debug(f"Download recording: {recording_id}")
747
+
748
+ response = self._client.get(
749
+ "/desktop/recording/download",
750
+ params={"id": recording_id},
751
+ operation="download recording",
752
+ timeout=120 # Longer timeout for video download
753
+ )
754
+
755
+ return response.content
756
+
757
+ # =============================================================================
758
+ # WINDOW MANAGEMENT
759
+ # =============================================================================
760
+
761
+ def get_windows(self) -> List[WindowInfo]:
762
+ """
763
+ Get list of all windows.
764
+
765
+ Returns:
766
+ List of window information
767
+
768
+ Raises:
769
+ DesktopNotAvailableError: If desktop not available
770
+
771
+ Example:
772
+ >>> windows = sandbox.desktop.get_windows()
773
+ >>> for w in windows:
774
+ ... print(f"{w.title}: {w.width}x{w.height} at ({w.x}, {w.y})")
775
+ """
776
+ self._check_availability()
777
+
778
+ logger.debug("Get windows list")
779
+
780
+ response = self._client.get(
781
+ "/desktop/windows",
782
+ operation="get windows"
783
+ )
784
+
785
+ data = response.json()
786
+ windows = []
787
+ for win in data.get("windows", []):
788
+ windows.append(WindowInfo(
789
+ id=win.get("id", ""),
790
+ title=win.get("title", ""),
791
+ x=win.get("x", 0),
792
+ y=win.get("y", 0),
793
+ width=win.get("width", 0),
794
+ height=win.get("height", 0),
795
+ desktop=win.get("desktop"),
796
+ pid=win.get("pid")
797
+ ))
798
+
799
+ return windows
800
+
801
+ def focus_window(self, window_id: str) -> None:
802
+ """
803
+ Focus (activate) window.
804
+
805
+ Args:
806
+ window_id: Window ID from get_windows()
807
+
808
+ Raises:
809
+ DesktopNotAvailableError: If desktop not available
810
+
811
+ Example:
812
+ >>> windows = sandbox.desktop.get_windows()
813
+ >>> if windows:
814
+ ... sandbox.desktop.focus_window(windows[0].id)
815
+ """
816
+ self._check_availability()
817
+
818
+ logger.debug(f"Focus window: {window_id}")
819
+
820
+ self._client.post(
821
+ "/desktop/windows/focus",
822
+ json={"window_id": window_id},
823
+ operation="focus window"
824
+ )
825
+
826
+ def close_window(self, window_id: str) -> None:
827
+ """
828
+ Close window.
829
+
830
+ Args:
831
+ window_id: Window ID from get_windows()
832
+
833
+ Raises:
834
+ DesktopNotAvailableError: If desktop not available
835
+
836
+ Example:
837
+ >>> windows = sandbox.desktop.get_windows()
838
+ >>> for w in windows:
839
+ ... if "Firefox" in w.title:
840
+ ... sandbox.desktop.close_window(w.id)
841
+ """
842
+ self._check_availability()
843
+
844
+ logger.debug(f"Close window: {window_id}")
845
+
846
+ self._client.post(
847
+ "/desktop/windows/close",
848
+ json={"window_id": window_id},
849
+ operation="close window"
850
+ )
851
+
852
+ def resize_window(self, window_id: str, width: int, height: int) -> None:
853
+ """
854
+ Resize window.
855
+
856
+ Args:
857
+ window_id: Window ID from get_windows()
858
+ width: New width
859
+ height: New height
860
+
861
+ Raises:
862
+ DesktopNotAvailableError: If desktop not available
863
+
864
+ Example:
865
+ >>> windows = sandbox.desktop.get_windows()
866
+ >>> if windows:
867
+ ... sandbox.desktop.resize_window(windows[0].id, 800, 600)
868
+ """
869
+ self._check_availability()
870
+
871
+ logger.debug(f"Resize window {window_id}: {width}x{height}")
872
+
873
+ self._client.post(
874
+ "/desktop/windows/resize",
875
+ json={
876
+ "window_id": window_id,
877
+ "width": width,
878
+ "height": height
879
+ },
880
+ operation="resize window"
881
+ )
882
+
883
+ def minimize_window(self, window_id: str) -> None:
884
+ """
885
+ Minimize window.
886
+
887
+ Args:
888
+ window_id: Window ID from get_windows()
889
+
890
+ Raises:
891
+ DesktopNotAvailableError: If desktop not available
892
+
893
+ Example:
894
+ >>> windows = sandbox.desktop.get_windows()
895
+ >>> if windows:
896
+ ... sandbox.desktop.minimize_window(windows[0].id)
897
+ """
898
+ self._check_availability()
899
+
900
+ logger.debug(f"Minimize window {window_id}")
901
+
902
+ self._client.post(
903
+ "/desktop/windows/minimize",
904
+ json={"window_id": window_id},
905
+ operation="minimize window"
906
+ )
907
+
908
+ # =============================================================================
909
+ # DISPLAY
910
+ # =============================================================================
911
+
912
+ def get_display(self) -> DisplayInfo:
913
+ """
914
+ Get current display resolution.
915
+
916
+ Returns:
917
+ Display information
918
+
919
+ Raises:
920
+ DesktopNotAvailableError: If desktop not available
921
+
922
+ Example:
923
+ >>> display = sandbox.desktop.get_display()
924
+ >>> print(f"Resolution: {display.resolution}")
925
+ >>> print(f"Size: {display.width}x{display.height}")
926
+ """
927
+ self._check_availability()
928
+
929
+ response = self._client.get(
930
+ "/desktop/display",
931
+ operation="get display info"
932
+ )
933
+
934
+ data = response.json()
935
+ return DisplayInfo(
936
+ width=data.get("width", 1920),
937
+ height=data.get("height", 1080),
938
+ depth=data.get("depth", 24)
939
+ )
940
+
941
+ def get_available_resolutions(self) -> List[Tuple[int, int]]:
942
+ """
943
+ Get available display resolutions.
944
+
945
+ Returns:
946
+ List of (width, height) tuples
947
+
948
+ Raises:
949
+ DesktopNotAvailableError: If desktop not available
950
+
951
+ Example:
952
+ >>> resolutions = sandbox.desktop.get_available_resolutions()
953
+ >>> for w, h in resolutions:
954
+ ... print(f"{w}x{h}")
955
+ """
956
+ self._check_availability()
957
+
958
+ response = self._client.get(
959
+ "/desktop/display/available",
960
+ operation="get available resolutions"
961
+ )
962
+
963
+ data = response.json()
964
+ resolutions = []
965
+ for res in data.get("resolutions", []):
966
+ if isinstance(res, dict):
967
+ resolutions.append((res.get("width", 0), res.get("height", 0)))
968
+ elif isinstance(res, (list, tuple)) and len(res) >= 2:
969
+ resolutions.append((res[0], res[1]))
970
+
971
+ return resolutions
972
+
973
+ def set_resolution(self, width: int, height: int) -> DisplayInfo:
974
+ """
975
+ Set display resolution.
976
+
977
+ Args:
978
+ width: Screen width
979
+ height: Screen height
980
+
981
+ Returns:
982
+ New display information
983
+
984
+ Raises:
985
+ DesktopNotAvailableError: If desktop not available
986
+
987
+ Example:
988
+ >>> display = sandbox.desktop.set_resolution(1920, 1080)
989
+ >>> print(f"New resolution: {display.resolution}")
990
+ """
991
+ self._check_availability()
992
+
993
+ logger.debug(f"Set resolution: {width}x{height}")
994
+
995
+ response = self._client.post(
996
+ "/desktop/display/set",
997
+ json={
998
+ "width": width,
999
+ "height": height
1000
+ },
1001
+ operation="set display resolution"
1002
+ )
1003
+
1004
+ data = response.json()
1005
+ return DisplayInfo(
1006
+ width=data.get("width", width),
1007
+ height=data.get("height", height),
1008
+ depth=data.get("depth", 24)
1009
+ )
1010
+
1011
+ # =============================================================================
1012
+ # X11 ADVANCED FEATURES
1013
+ # =============================================================================
1014
+
1015
+ def ocr(
1016
+ self,
1017
+ x: int,
1018
+ y: int,
1019
+ width: int,
1020
+ height: int,
1021
+ *,
1022
+ language: str = "eng",
1023
+ timeout: Optional[int] = None
1024
+ ) -> str:
1025
+ """
1026
+ Perform OCR on a screen region.
1027
+
1028
+ Args:
1029
+ x: X coordinate of top-left corner
1030
+ y: Y coordinate of top-left corner
1031
+ width: Width of region
1032
+ height: Height of region
1033
+ language: OCR language (default: "eng")
1034
+ timeout: Request timeout in seconds
1035
+
1036
+ Returns:
1037
+ Extracted text from the region
1038
+
1039
+ Example:
1040
+ >>> text = sandbox.desktop.ocr(100, 100, 400, 200)
1041
+ >>> print(f"Extracted: {text}")
1042
+ """
1043
+ self._ensure_available()
1044
+
1045
+ response = self._client.post(
1046
+ "/desktop/x11/ocr",
1047
+ json={"x": x, "y": y, "width": width, "height": height, "language": language},
1048
+ operation="OCR screen region",
1049
+ timeout=timeout
1050
+ )
1051
+
1052
+ data = response.json()
1053
+ return data.get("text", "")
1054
+
1055
+ def find_element(
1056
+ self,
1057
+ text: str,
1058
+ *,
1059
+ timeout: Optional[int] = None
1060
+ ) -> Optional[dict]:
1061
+ """
1062
+ Find UI element by text.
1063
+
1064
+ Args:
1065
+ text: Text to search for
1066
+ timeout: Request timeout in seconds
1067
+
1068
+ Returns:
1069
+ Dict with element coordinates (x, y, width, height) or None
1070
+
1071
+ Example:
1072
+ >>> element = sandbox.desktop.find_element("Submit")
1073
+ >>> if element:
1074
+ ... sandbox.desktop.click(element['x'], element['y'])
1075
+ """
1076
+ self._ensure_available()
1077
+
1078
+ response = self._client.post(
1079
+ "/desktop/x11/find_element",
1080
+ json={"text": text},
1081
+ operation="find UI element",
1082
+ timeout=timeout
1083
+ )
1084
+
1085
+ data = response.json()
1086
+ return data.get("element")
1087
+
1088
+ def wait_for(
1089
+ self,
1090
+ text: str,
1091
+ *,
1092
+ timeout: int = 30
1093
+ ) -> dict:
1094
+ """
1095
+ Wait for UI element to appear.
1096
+
1097
+ Args:
1098
+ text: Text to wait for
1099
+ timeout: Max wait time in seconds
1100
+
1101
+ Returns:
1102
+ Dict with element coordinates when found
1103
+
1104
+ Example:
1105
+ >>> element = sandbox.desktop.wait_for("Loading complete", timeout=60)
1106
+ >>> print(f"Found at: {element['x']}, {element['y']}")
1107
+ """
1108
+ self._ensure_available()
1109
+
1110
+ response = self._client.post(
1111
+ "/desktop/x11/wait_for",
1112
+ json={"text": text, "timeout": timeout},
1113
+ operation="wait for element",
1114
+ timeout=timeout + 5
1115
+ )
1116
+
1117
+ data = response.json()
1118
+ return data.get("element", {})
1119
+
1120
+ def drag_drop(
1121
+ self,
1122
+ from_x: int,
1123
+ from_y: int,
1124
+ to_x: int,
1125
+ to_y: int,
1126
+ *,
1127
+ timeout: Optional[int] = None
1128
+ ) -> None:
1129
+ """
1130
+ Drag and drop from one point to another.
1131
+
1132
+ Args:
1133
+ from_x: Starting X coordinate
1134
+ from_y: Starting Y coordinate
1135
+ to_x: Ending X coordinate
1136
+ to_y: Ending Y coordinate
1137
+ timeout: Request timeout in seconds
1138
+
1139
+ Example:
1140
+ >>> # Drag file to folder
1141
+ >>> sandbox.desktop.drag_drop(100, 200, 500, 300)
1142
+ """
1143
+ self._ensure_available()
1144
+
1145
+ self._client.post(
1146
+ "/desktop/x11/drag_drop",
1147
+ json={"from_x": from_x, "from_y": from_y, "to_x": to_x, "to_y": to_y},
1148
+ operation="drag and drop",
1149
+ timeout=timeout
1150
+ )
1151
+
1152
+ def get_bounds(
1153
+ self,
1154
+ text: str,
1155
+ *,
1156
+ timeout: Optional[int] = None
1157
+ ) -> dict:
1158
+ """
1159
+ Get bounding box of UI element.
1160
+
1161
+ Args:
1162
+ text: Text to search for
1163
+ timeout: Request timeout in seconds
1164
+
1165
+ Returns:
1166
+ Dict with x, y, width, height
1167
+
1168
+ Example:
1169
+ >>> bounds = sandbox.desktop.get_bounds("OK Button")
1170
+ >>> print(f"Button at: {bounds['x']}, {bounds['y']}")
1171
+ >>> print(f"Size: {bounds['width']}x{bounds['height']}")
1172
+ """
1173
+ self._ensure_available()
1174
+
1175
+ response = self._client.post(
1176
+ "/desktop/x11/get_bounds",
1177
+ json={"text": text},
1178
+ operation="get element bounds",
1179
+ timeout=timeout
1180
+ )
1181
+
1182
+ return response.json()
1183
+
1184
+ def capture_window(
1185
+ self,
1186
+ window_id: Optional[str] = None,
1187
+ *,
1188
+ timeout: Optional[int] = None
1189
+ ) -> bytes:
1190
+ """
1191
+ Capture screenshot of specific window.
1192
+
1193
+ Args:
1194
+ window_id: Window ID (None for active window)
1195
+ timeout: Request timeout in seconds
1196
+
1197
+ Returns:
1198
+ PNG image bytes
1199
+
1200
+ Example:
1201
+ >>> # Capture active window
1202
+ >>> img = sandbox.desktop.capture_window()
1203
+ >>> with open('window.png', 'wb') as f:
1204
+ ... f.write(img)
1205
+ >>>
1206
+ >>> # Capture specific window
1207
+ >>> img = sandbox.desktop.capture_window(window_id="0x1234567")
1208
+ """
1209
+ self._ensure_available()
1210
+
1211
+ params = {}
1212
+ if window_id:
1213
+ params['window_id'] = window_id
1214
+
1215
+ response = self._client.get(
1216
+ "/desktop/x11/capture_window",
1217
+ params=params,
1218
+ operation="capture window",
1219
+ timeout=timeout
1220
+ )
1221
+
1222
+ return response.content
1223
+
1224
+ def __repr__(self) -> str:
1225
+ status = "available" if self._available else "unknown" if not self._checked else "unavailable"
1226
+ return f"<Desktop status={status}>"
1227
+