cua-computer 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,691 +1,8 @@
1
- import asyncio
2
- import json
3
- import time
4
- from typing import Any, Dict, List, Optional, Tuple
5
- from PIL import Image
1
+ from typing import Optional
2
+ from .generic import GenericComputerInterface
6
3
 
7
- import websockets
8
-
9
- from ..logger import Logger, LogLevel
10
- from .base import BaseComputerInterface
11
- from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image
12
- from .models import Key, KeyType, MouseButton, CommandResult
13
-
14
- class LinuxComputerInterface(BaseComputerInterface):
4
+ class LinuxComputerInterface(GenericComputerInterface):
15
5
  """Interface for Linux."""
16
6
 
17
7
  def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None):
18
- super().__init__(ip_address, username, password, api_key, vm_name)
19
- self._ws = None
20
- self._reconnect_task = None
21
- self._closed = False
22
- self._last_ping = 0
23
- self._ping_interval = 5 # Send ping every 5 seconds
24
- self._ping_timeout = 120 # Wait 120 seconds for pong response
25
- self._reconnect_delay = 1 # Start with 1 second delay
26
- self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts
27
- self._log_connection_attempts = True # Flag to control connection attempt logging
28
- self._authenticated = False # Track authentication status
29
- self._command_lock = asyncio.Lock() # Lock to ensure only one command at a time
30
-
31
- # Set logger name for Linux interface
32
- self.logger = Logger("computer.interface.linux", LogLevel.NORMAL)
33
-
34
- @property
35
- def ws_uri(self) -> str:
36
- """Get the WebSocket URI using the current IP address.
37
-
38
- Returns:
39
- WebSocket URI for the Computer API Server
40
- """
41
- protocol = "wss" if self.api_key else "ws"
42
- port = "8443" if self.api_key else "8000"
43
- return f"{protocol}://{self.ip_address}:{port}/ws"
44
-
45
- async def _keep_alive(self):
46
- """Keep the WebSocket connection alive with automatic reconnection."""
47
- retry_count = 0
48
- max_log_attempts = 1 # Only log the first attempt at INFO level
49
- log_interval = 500 # Then log every 500th attempt (significantly increased from 30)
50
- last_warning_time = 0
51
- min_warning_interval = 30 # Minimum seconds between connection lost warnings
52
- min_retry_delay = 0.5 # Minimum delay between connection attempts (500ms)
53
-
54
- while not self._closed:
55
- try:
56
- if self._ws is None or (
57
- self._ws and self._ws.state == websockets.protocol.State.CLOSED
58
- ):
59
- try:
60
- retry_count += 1
61
-
62
- # Add a minimum delay between connection attempts to avoid flooding
63
- if retry_count > 1:
64
- await asyncio.sleep(min_retry_delay)
65
-
66
- # Only log the first attempt at INFO level, then every Nth attempt
67
- if retry_count == 1:
68
- self.logger.info(f"Attempting WebSocket connection to {self.ws_uri}")
69
- elif retry_count % log_interval == 0:
70
- self.logger.info(
71
- f"Still attempting WebSocket connection (attempt {retry_count})..."
72
- )
73
- else:
74
- # All other attempts are logged at DEBUG level
75
- self.logger.debug(
76
- f"Attempting WebSocket connection to {self.ws_uri} (attempt {retry_count})"
77
- )
78
-
79
- self._ws = await asyncio.wait_for(
80
- websockets.connect(
81
- self.ws_uri,
82
- max_size=1024 * 1024 * 10, # 10MB limit
83
- max_queue=32,
84
- ping_interval=self._ping_interval,
85
- ping_timeout=self._ping_timeout,
86
- close_timeout=5,
87
- compression=None, # Disable compression to reduce overhead
88
- ),
89
- timeout=120,
90
- )
91
- self.logger.info("WebSocket connection established")
92
-
93
- # Authentication will be handled by the first command that needs it
94
- # Don't do authentication here to avoid recv conflicts
95
-
96
- self._reconnect_delay = 1 # Reset reconnect delay on successful connection
97
- self._last_ping = time.time()
98
- retry_count = 0 # Reset retry count on successful connection
99
- self._authenticated = False # Reset auth status on new connection
100
-
101
- except (asyncio.TimeoutError, websockets.exceptions.WebSocketException) as e:
102
- next_retry = self._reconnect_delay
103
-
104
- # Only log the first error at WARNING level, then every Nth attempt
105
- if retry_count == 1:
106
- self.logger.warning(
107
- f"Computer API Server not ready yet. Will retry automatically."
108
- )
109
- elif retry_count % log_interval == 0:
110
- self.logger.warning(
111
- f"Still waiting for Computer API Server (attempt {retry_count})..."
112
- )
113
- else:
114
- # All other errors are logged at DEBUG level
115
- self.logger.debug(f"Connection attempt {retry_count} failed: {e}")
116
-
117
- if self._ws:
118
- try:
119
- await self._ws.close()
120
- except:
121
- pass
122
- self._ws = None
123
-
124
- # Regular ping to check connection
125
- if self._ws and self._ws.state == websockets.protocol.State.OPEN:
126
- try:
127
- if time.time() - self._last_ping >= self._ping_interval:
128
- pong_waiter = await self._ws.ping()
129
- await asyncio.wait_for(pong_waiter, timeout=self._ping_timeout)
130
- self._last_ping = time.time()
131
- except Exception as e:
132
- self.logger.debug(f"Ping failed: {e}")
133
- if self._ws:
134
- try:
135
- await self._ws.close()
136
- except:
137
- pass
138
- self._ws = None
139
- continue
140
-
141
- await asyncio.sleep(1)
142
-
143
- except Exception as e:
144
- current_time = time.time()
145
- # Only log connection lost warnings at most once every min_warning_interval seconds
146
- if current_time - last_warning_time >= min_warning_interval:
147
- self.logger.warning(
148
- f"Computer API Server connection lost. Will retry automatically."
149
- )
150
- last_warning_time = current_time
151
- else:
152
- # Log at debug level instead
153
- self.logger.debug(f"Connection lost: {e}")
154
-
155
- if self._ws:
156
- try:
157
- await self._ws.close()
158
- except:
159
- pass
160
- self._ws = None
161
-
162
- async def _ensure_connection(self):
163
- """Ensure WebSocket connection is established."""
164
- if self._reconnect_task is None or self._reconnect_task.done():
165
- self._reconnect_task = asyncio.create_task(self._keep_alive())
166
-
167
- retry_count = 0
168
- max_retries = 5
169
-
170
- while retry_count < max_retries:
171
- try:
172
- if self._ws and self._ws.state == websockets.protocol.State.OPEN:
173
- return
174
- retry_count += 1
175
- await asyncio.sleep(1)
176
- except Exception as e:
177
- # Only log at ERROR level for the last retry attempt
178
- if retry_count == max_retries - 1:
179
- self.logger.error(
180
- f"Persistent connection check error after {retry_count} attempts: {e}"
181
- )
182
- else:
183
- self.logger.debug(f"Connection check error (attempt {retry_count}): {e}")
184
- retry_count += 1
185
- await asyncio.sleep(1)
186
- continue
187
-
188
- raise ConnectionError("Failed to establish WebSocket connection after multiple retries")
189
-
190
- async def _send_command(self, command: str, params: Optional[Dict] = None) -> Dict[str, Any]:
191
- """Send command through WebSocket."""
192
- max_retries = 3
193
- retry_count = 0
194
- last_error = None
195
-
196
- # Acquire lock to ensure only one command is processed at a time
197
- async with self._command_lock:
198
- self.logger.debug(f"Acquired lock for command: {command}")
199
- while retry_count < max_retries:
200
- try:
201
- await self._ensure_connection()
202
- if not self._ws:
203
- raise ConnectionError("WebSocket connection is not established")
204
-
205
- # Handle authentication if needed
206
- if self.api_key and self.vm_name and not self._authenticated:
207
- self.logger.info("Performing authentication handshake...")
208
- auth_message = {
209
- "command": "authenticate",
210
- "params": {
211
- "api_key": self.api_key,
212
- "container_name": self.vm_name
213
- }
214
- }
215
- await self._ws.send(json.dumps(auth_message))
216
-
217
- # Wait for authentication response
218
- auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
219
- auth_result = json.loads(auth_response)
220
-
221
- if not auth_result.get("success"):
222
- error_msg = auth_result.get("error", "Authentication failed")
223
- self.logger.error(f"Authentication failed: {error_msg}")
224
- self._authenticated = False
225
- raise ConnectionError(f"Authentication failed: {error_msg}")
226
-
227
- self.logger.info("Authentication successful")
228
- self._authenticated = True
229
-
230
- message = {"command": command, "params": params or {}}
231
- await self._ws.send(json.dumps(message))
232
- response = await asyncio.wait_for(self._ws.recv(), timeout=30)
233
- self.logger.debug(f"Completed command: {command}")
234
- return json.loads(response)
235
- except Exception as e:
236
- last_error = e
237
- retry_count += 1
238
- if retry_count < max_retries:
239
- # Only log at debug level for intermediate retries
240
- self.logger.debug(
241
- f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
242
- )
243
- await asyncio.sleep(1)
244
- continue
245
- else:
246
- # Only log at error level for the final failure
247
- self.logger.error(
248
- f"Failed to send command '{command}' after {max_retries} retries"
249
- )
250
- self.logger.debug(f"Command failure details: {e}")
251
- raise last_error if last_error else RuntimeError("Failed to send command")
252
-
253
- async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
254
- """Wait for WebSocket connection to become available."""
255
- start_time = time.time()
256
- last_error = None
257
- attempt_count = 0
258
- progress_interval = 10 # Log progress every 10 seconds
259
- last_progress_time = start_time
260
-
261
- # Disable detailed logging for connection attempts
262
- self._log_connection_attempts = False
263
-
264
- try:
265
- self.logger.info(
266
- f"Waiting for Computer API Server to be ready (timeout: {timeout}s)..."
267
- )
268
-
269
- # Start the keep-alive task if it's not already running
270
- if self._reconnect_task is None or self._reconnect_task.done():
271
- self._reconnect_task = asyncio.create_task(self._keep_alive())
272
-
273
- # Wait for the connection to be established
274
- while time.time() - start_time < timeout:
275
- try:
276
- attempt_count += 1
277
- current_time = time.time()
278
-
279
- # Log progress periodically without flooding logs
280
- if current_time - last_progress_time >= progress_interval:
281
- elapsed = current_time - start_time
282
- self.logger.info(
283
- f"Still waiting for Computer API Server... (elapsed: {elapsed:.1f}s, attempts: {attempt_count})"
284
- )
285
- last_progress_time = current_time
286
-
287
- # Check if we have a connection
288
- if self._ws and self._ws.state == websockets.protocol.State.OPEN:
289
- # Test the connection with a simple command
290
- try:
291
- await self._send_command("get_screen_size")
292
- elapsed = time.time() - start_time
293
- self.logger.info(
294
- f"Computer API Server is ready (after {elapsed:.1f}s, {attempt_count} attempts)"
295
- )
296
- return # Connection is fully working
297
- except Exception as e:
298
- last_error = e
299
- self.logger.debug(f"Connection test failed: {e}")
300
-
301
- # Wait before trying again
302
- await asyncio.sleep(interval)
303
-
304
- except Exception as e:
305
- last_error = e
306
- self.logger.debug(f"Connection attempt {attempt_count} failed: {e}")
307
- await asyncio.sleep(interval)
308
-
309
- # If we get here, we've timed out
310
- error_msg = f"Could not connect to {self.ip_address} after {timeout} seconds"
311
- if last_error:
312
- error_msg += f": {str(last_error)}"
313
- self.logger.error(error_msg)
314
- raise TimeoutError(error_msg)
315
- finally:
316
- # Reset to default logging behavior
317
- self._log_connection_attempts = False
318
-
319
- def close(self):
320
- """Close WebSocket connection.
321
-
322
- Note: In host computer server mode, we leave the connection open
323
- to allow other clients to connect to the same server. The server
324
- will handle cleaning up idle connections.
325
- """
326
- # Only cancel the reconnect task
327
- if self._reconnect_task:
328
- self._reconnect_task.cancel()
329
-
330
- # Don't set closed flag or close websocket by default
331
- # This allows the server to stay connected for other clients
332
- # self._closed = True
333
- # if self._ws:
334
- # asyncio.create_task(self._ws.close())
335
- # self._ws = None
336
-
337
- def force_close(self):
338
- """Force close the WebSocket connection.
339
-
340
- This method should be called when you want to completely
341
- shut down the connection, not just for regular cleanup.
342
- """
343
- self._closed = True
344
- if self._reconnect_task:
345
- self._reconnect_task.cancel()
346
- if self._ws:
347
- asyncio.create_task(self._ws.close())
348
- self._ws = None
349
-
350
- # Mouse Actions
351
- async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
352
- await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
353
-
354
- async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
355
- await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
356
-
357
- async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
358
- await self._send_command("left_click", {"x": x, "y": y})
359
-
360
- async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
361
- await self._send_command("right_click", {"x": x, "y": y})
362
-
363
- async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
364
- await self._send_command("double_click", {"x": x, "y": y})
365
-
366
- async def move_cursor(self, x: int, y: int) -> None:
367
- await self._send_command("move_cursor", {"x": x, "y": y})
368
-
369
- async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5) -> None:
370
- await self._send_command(
371
- "drag_to", {"x": x, "y": y, "button": button, "duration": duration}
372
- )
373
-
374
- async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5) -> None:
375
- await self._send_command(
376
- "drag", {"path": path, "button": button, "duration": duration}
377
- )
378
-
379
- # Keyboard Actions
380
- async def key_down(self, key: "KeyType") -> None:
381
- await self._send_command("key_down", {"key": key})
382
-
383
- async def key_up(self, key: "KeyType") -> None:
384
- await self._send_command("key_up", {"key": key})
385
-
386
- async def type_text(self, text: str) -> None:
387
- # Temporary fix for https://github.com/trycua/cua/issues/165
388
- # Check if text contains Unicode characters
389
- if any(ord(char) > 127 for char in text):
390
- # For Unicode text, use clipboard and paste
391
- await self.set_clipboard(text)
392
- await self.hotkey(Key.COMMAND, 'v')
393
- else:
394
- # For ASCII text, use the regular typing method
395
- await self._send_command("type_text", {"text": text})
396
-
397
- async def press(self, key: "KeyType") -> None:
398
- """Press a single key.
399
-
400
- Args:
401
- key: The key to press. Can be any of:
402
- - A Key enum value (recommended), e.g. Key.PAGE_DOWN
403
- - A direct key value string, e.g. 'pagedown'
404
- - A single character string, e.g. 'a'
405
-
406
- Examples:
407
- ```python
408
- # Using enum (recommended)
409
- await interface.press(Key.PAGE_DOWN)
410
- await interface.press(Key.ENTER)
411
-
412
- # Using direct values
413
- await interface.press('pagedown')
414
- await interface.press('enter')
415
-
416
- # Using single characters
417
- await interface.press('a')
418
- ```
419
-
420
- Raises:
421
- ValueError: If the key type is invalid or the key is not recognized
422
- """
423
- if isinstance(key, Key):
424
- actual_key = key.value
425
- elif isinstance(key, str):
426
- # Try to convert to enum if it matches a known key
427
- key_or_enum = Key.from_string(key)
428
- actual_key = key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum
429
- else:
430
- raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
431
-
432
- await self._send_command("press_key", {"key": actual_key})
433
-
434
- async def press_key(self, key: "KeyType") -> None:
435
- """DEPRECATED: Use press() instead.
436
-
437
- This method is kept for backward compatibility but will be removed in a future version.
438
- Please use the press() method instead.
439
- """
440
- await self.press(key)
441
-
442
- async def hotkey(self, *keys: "KeyType") -> None:
443
- """Press multiple keys simultaneously.
444
-
445
- Args:
446
- *keys: Multiple keys to press simultaneously. Each key can be any of:
447
- - A Key enum value (recommended), e.g. Key.COMMAND
448
- - A direct key value string, e.g. 'command'
449
- - A single character string, e.g. 'a'
450
-
451
- Examples:
452
- ```python
453
- # Using enums (recommended)
454
- await interface.hotkey(Key.COMMAND, Key.C) # Copy
455
- await interface.hotkey(Key.COMMAND, Key.V) # Paste
456
-
457
- # Using mixed formats
458
- await interface.hotkey(Key.COMMAND, 'a') # Select all
459
- ```
460
-
461
- Raises:
462
- ValueError: If any key type is invalid or not recognized
463
- """
464
- actual_keys = []
465
- for key in keys:
466
- if isinstance(key, Key):
467
- actual_keys.append(key.value)
468
- elif isinstance(key, str):
469
- # Try to convert to enum if it matches a known key
470
- key_or_enum = Key.from_string(key)
471
- actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
472
- else:
473
- raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
474
-
475
- await self._send_command("hotkey", {"keys": actual_keys})
476
-
477
- # Scrolling Actions
478
- async def scroll(self, x: int, y: int) -> None:
479
- await self._send_command("scroll", {"x": x, "y": y})
480
-
481
- async def scroll_down(self, clicks: int = 1) -> None:
482
- await self._send_command("scroll_down", {"clicks": clicks})
483
-
484
- async def scroll_up(self, clicks: int = 1) -> None:
485
- await self._send_command("scroll_up", {"clicks": clicks})
486
-
487
- # Screen Actions
488
- async def screenshot(
489
- self,
490
- boxes: Optional[List[Tuple[int, int, int, int]]] = None,
491
- box_color: str = "#FF0000",
492
- box_thickness: int = 2,
493
- scale_factor: float = 1.0,
494
- ) -> bytes:
495
- """Take a screenshot with optional box drawing and scaling.
496
-
497
- Args:
498
- boxes: Optional list of (x, y, width, height) tuples defining boxes to draw in screen coordinates
499
- box_color: Color of the boxes in hex format (default: "#FF0000" red)
500
- box_thickness: Thickness of the box borders in pixels (default: 2)
501
- scale_factor: Factor to scale the final image by (default: 1.0)
502
- Use > 1.0 to enlarge, < 1.0 to shrink (e.g., 0.5 for half size, 2.0 for double)
503
-
504
- Returns:
505
- bytes: The screenshot image data, optionally with boxes drawn on it and scaled
506
- """
507
- result = await self._send_command("screenshot")
508
- if not result.get("image_data"):
509
- raise RuntimeError("Failed to take screenshot")
510
-
511
- screenshot = decode_base64_image(result["image_data"])
512
-
513
- if boxes:
514
- # Get the natural scaling between screen and screenshot
515
- screen_size = await self.get_screen_size()
516
- screenshot_width, screenshot_height = bytes_to_image(screenshot).size
517
- width_scale = screenshot_width / screen_size["width"]
518
- height_scale = screenshot_height / screen_size["height"]
519
-
520
- # Scale box coordinates from screen space to screenshot space
521
- for box in boxes:
522
- scaled_box = (
523
- int(box[0] * width_scale), # x
524
- int(box[1] * height_scale), # y
525
- int(box[2] * width_scale), # width
526
- int(box[3] * height_scale), # height
527
- )
528
- screenshot = draw_box(
529
- screenshot,
530
- x=scaled_box[0],
531
- y=scaled_box[1],
532
- width=scaled_box[2],
533
- height=scaled_box[3],
534
- color=box_color,
535
- thickness=box_thickness,
536
- )
537
-
538
- if scale_factor != 1.0:
539
- screenshot = resize_image(screenshot, scale_factor)
540
-
541
- return screenshot
542
-
543
- async def get_screen_size(self) -> Dict[str, int]:
544
- result = await self._send_command("get_screen_size")
545
- if result["success"] and result["size"]:
546
- return result["size"]
547
- raise RuntimeError("Failed to get screen size")
548
-
549
- async def get_cursor_position(self) -> Dict[str, int]:
550
- result = await self._send_command("get_cursor_position")
551
- if result["success"] and result["position"]:
552
- return result["position"]
553
- raise RuntimeError("Failed to get cursor position")
554
-
555
- # Clipboard Actions
556
- async def copy_to_clipboard(self) -> str:
557
- result = await self._send_command("copy_to_clipboard")
558
- if result["success"] and result["content"]:
559
- return result["content"]
560
- raise RuntimeError("Failed to get clipboard content")
561
-
562
- async def set_clipboard(self, text: str) -> None:
563
- await self._send_command("set_clipboard", {"text": text})
564
-
565
- # File System Actions
566
- async def file_exists(self, path: str) -> bool:
567
- result = await self._send_command("file_exists", {"path": path})
568
- return result.get("exists", False)
569
-
570
- async def directory_exists(self, path: str) -> bool:
571
- result = await self._send_command("directory_exists", {"path": path})
572
- return result.get("exists", False)
573
-
574
- async def list_dir(self, path: str) -> list[str]:
575
- result = await self._send_command("list_dir", {"path": path})
576
- if not result.get("success", False):
577
- raise RuntimeError(result.get("error", "Failed to list directory"))
578
- return result.get("files", [])
579
-
580
- async def read_text(self, path: str) -> str:
581
- result = await self._send_command("read_text", {"path": path})
582
- if not result.get("success", False):
583
- raise RuntimeError(result.get("error", "Failed to read file"))
584
- return result.get("content", "")
585
-
586
- async def write_text(self, path: str, content: str) -> None:
587
- result = await self._send_command("write_text", {"path": path, "content": content})
588
- if not result.get("success", False):
589
- raise RuntimeError(result.get("error", "Failed to write file"))
590
-
591
- async def read_bytes(self, path: str) -> bytes:
592
- result = await self._send_command("read_bytes", {"path": path})
593
- if not result.get("success", False):
594
- raise RuntimeError(result.get("error", "Failed to read file"))
595
- content_b64 = result.get("content_b64", "")
596
- return decode_base64_image(content_b64)
597
-
598
- async def write_bytes(self, path: str, content: bytes) -> None:
599
- result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content)})
600
- if not result.get("success", False):
601
- raise RuntimeError(result.get("error", "Failed to write file"))
602
-
603
- async def delete_file(self, path: str) -> None:
604
- result = await self._send_command("delete_file", {"path": path})
605
- if not result.get("success", False):
606
- raise RuntimeError(result.get("error", "Failed to delete file"))
607
-
608
- async def create_dir(self, path: str) -> None:
609
- result = await self._send_command("create_dir", {"path": path})
610
- if not result.get("success", False):
611
- raise RuntimeError(result.get("error", "Failed to create directory"))
612
-
613
- async def delete_dir(self, path: str) -> None:
614
- result = await self._send_command("delete_dir", {"path": path})
615
- if not result.get("success", False):
616
- raise RuntimeError(result.get("error", "Failed to delete directory"))
617
-
618
- async def run_command(self, command: str) -> CommandResult:
619
- result = await self._send_command("run_command", {"command": command})
620
- if not result.get("success", False):
621
- raise RuntimeError(result.get("error", "Failed to run command"))
622
- return CommandResult(
623
- stdout=result.get("stdout", ""),
624
- stderr=result.get("stderr", ""),
625
- returncode=result.get("return_code", 0)
626
- )
627
-
628
- # Accessibility Actions
629
- async def get_accessibility_tree(self) -> Dict[str, Any]:
630
- """Get the accessibility tree of the current screen."""
631
- result = await self._send_command("get_accessibility_tree")
632
- if not result.get("success", False):
633
- raise RuntimeError(result.get("error", "Failed to get accessibility tree"))
634
- return result
635
-
636
- async def get_active_window_bounds(self) -> Dict[str, int]:
637
- """Get the bounds of the currently active window."""
638
- result = await self._send_command("get_active_window_bounds")
639
- if result["success"] and result["bounds"]:
640
- return result["bounds"]
641
- raise RuntimeError("Failed to get active window bounds")
642
-
643
- async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
644
- """Convert screenshot coordinates to screen coordinates.
645
-
646
- Args:
647
- x: X coordinate in screenshot space
648
- y: Y coordinate in screenshot space
649
-
650
- Returns:
651
- tuple[float, float]: (x, y) coordinates in screen space
652
- """
653
- screen_size = await self.get_screen_size()
654
- screenshot = await self.screenshot()
655
- screenshot_img = bytes_to_image(screenshot)
656
- screenshot_width, screenshot_height = screenshot_img.size
657
-
658
- # Calculate scaling factors
659
- width_scale = screen_size["width"] / screenshot_width
660
- height_scale = screen_size["height"] / screenshot_height
661
-
662
- # Convert coordinates
663
- screen_x = x * width_scale
664
- screen_y = y * height_scale
665
-
666
- return screen_x, screen_y
667
-
668
- async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
669
- """Convert screen coordinates to screenshot coordinates.
670
-
671
- Args:
672
- x: X coordinate in screen space
673
- y: Y coordinate in screen space
674
-
675
- Returns:
676
- tuple[float, float]: (x, y) coordinates in screenshot space
677
- """
678
- screen_size = await self.get_screen_size()
679
- screenshot = await self.screenshot()
680
- screenshot_img = bytes_to_image(screenshot)
681
- screenshot_width, screenshot_height = screenshot_img.size
682
-
683
- # Calculate scaling factors
684
- width_scale = screenshot_width / screen_size["width"]
685
- height_scale = screenshot_height / screen_size["height"]
686
-
687
- # Convert coordinates
688
- screenshot_x = x * width_scale
689
- screenshot_y = y * height_scale
690
-
691
- return screenshot_x, screenshot_y
8
+ super().__init__(ip_address, username, password, api_key, vm_name, "computer.interface.linux")