cua-computer 0.2.13__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,695 +1,12 @@
1
- import asyncio
2
- import json
3
- import time
4
- from typing import Any, Dict, List, Optional, Tuple
5
- from PIL import Image
1
+ from .generic import GenericComputerInterface
2
+ from typing import Optional
6
3
 
7
- import websockets
8
-
9
- from ..logger import Logger, LogLevel
10
- from .base import BaseComputerInterface
11
- from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image
12
- from .models import Key, KeyType, MouseButton
13
-
14
-
15
- class MacOSComputerInterface(BaseComputerInterface):
4
+ class MacOSComputerInterface(GenericComputerInterface):
16
5
  """Interface for macOS."""
17
6
 
18
7
  def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None):
19
- super().__init__(ip_address, username, password, api_key, vm_name)
20
- self._ws = None
21
- self._reconnect_task = None
22
- self._closed = False
23
- self._last_ping = 0
24
- self._ping_interval = 5 # Send ping every 5 seconds
25
- self._ping_timeout = 120 # Wait 120 seconds for pong response
26
- self._reconnect_delay = 1 # Start with 1 second delay
27
- self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts
28
- self._log_connection_attempts = True # Flag to control connection attempt logging
29
- self._command_lock = asyncio.Lock() # Lock to ensure only one command at a time
30
-
31
- # Set logger name for macOS interface
32
- self.logger = Logger("computer.interface.macos", LogLevel.NORMAL)
33
-
34
- @property
35
- def ws_uri(self) -> str:
36
- """Get the WebSocket URI using the current IP address.
37
-
38
- Returns:
39
- WebSocket URI for the Computer API Server
40
- """
41
- protocol = "wss" if self.api_key else "ws"
42
- port = "8443" if self.api_key else "8000"
43
- return f"{protocol}://{self.ip_address}:{port}/ws"
44
-
45
- async def _keep_alive(self):
46
- """Keep the WebSocket connection alive with automatic reconnection."""
47
- retry_count = 0
48
- max_log_attempts = 1 # Only log the first attempt at INFO level
49
- log_interval = 500 # Then log every 500th attempt (significantly increased from 30)
50
- last_warning_time = 0
51
- min_warning_interval = 30 # Minimum seconds between connection lost warnings
52
- min_retry_delay = 0.5 # Minimum delay between connection attempts (500ms)
53
-
54
- while not self._closed:
55
- try:
56
- if self._ws is None or (
57
- self._ws and self._ws.state == websockets.protocol.State.CLOSED
58
- ):
59
- try:
60
- retry_count += 1
61
-
62
- # Add a minimum delay between connection attempts to avoid flooding
63
- if retry_count > 1:
64
- await asyncio.sleep(min_retry_delay)
65
-
66
- # Only log the first attempt at INFO level, then every Nth attempt
67
- if retry_count == 1:
68
- self.logger.info(f"Attempting WebSocket connection to {self.ws_uri}")
69
- elif retry_count % log_interval == 0:
70
- self.logger.info(
71
- f"Still attempting WebSocket connection (attempt {retry_count})..."
72
- )
73
- else:
74
- # All other attempts are logged at DEBUG level
75
- self.logger.debug(
76
- f"Attempting WebSocket connection to {self.ws_uri} (attempt {retry_count})"
77
- )
78
-
79
- self._ws = await asyncio.wait_for(
80
- websockets.connect(
81
- self.ws_uri,
82
- max_size=1024 * 1024 * 10, # 10MB limit
83
- max_queue=32,
84
- ping_interval=self._ping_interval,
85
- ping_timeout=self._ping_timeout,
86
- close_timeout=5,
87
- compression=None, # Disable compression to reduce overhead
88
- ),
89
- timeout=120,
90
- )
91
- self.logger.info("WebSocket connection established")
92
-
93
- # If api_key and vm_name are provided, perform authentication handshake
94
- if self.api_key and self.vm_name:
95
- self.logger.info("Performing authentication handshake...")
96
- auth_message = {
97
- "command": "authenticate",
98
- "params": {
99
- "api_key": self.api_key,
100
- "container_name": self.vm_name
101
- }
102
- }
103
- await self._ws.send(json.dumps(auth_message))
104
-
105
- # Wait for authentication response
106
- auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
107
- auth_result = json.loads(auth_response)
108
-
109
- if not auth_result.get("success"):
110
- error_msg = auth_result.get("error", "Authentication failed")
111
- self.logger.error(f"Authentication failed: {error_msg}")
112
- await self._ws.close()
113
- self._ws = None
114
- raise ConnectionError(f"Authentication failed: {error_msg}")
115
-
116
- self.logger.info("Authentication successful")
117
-
118
- self._reconnect_delay = 1 # Reset reconnect delay on successful connection
119
- self._last_ping = time.time()
120
- retry_count = 0 # Reset retry count on successful connection
121
- except (asyncio.TimeoutError, websockets.exceptions.WebSocketException) as e:
122
- next_retry = self._reconnect_delay
123
-
124
- # Only log the first error at WARNING level, then every Nth attempt
125
- if retry_count == 1:
126
- self.logger.warning(
127
- f"Computer API Server not ready yet. Will retry automatically."
128
- )
129
- elif retry_count % log_interval == 0:
130
- self.logger.warning(
131
- f"Still waiting for Computer API Server (attempt {retry_count})..."
132
- )
133
- else:
134
- # All other errors are logged at DEBUG level
135
- self.logger.debug(f"Connection attempt {retry_count} failed: {e}")
136
-
137
- if self._ws:
138
- try:
139
- await self._ws.close()
140
- except:
141
- pass
142
- self._ws = None
143
-
144
- # Use exponential backoff for connection retries
145
- await asyncio.sleep(self._reconnect_delay)
146
- self._reconnect_delay = min(
147
- self._reconnect_delay * 2, self._max_reconnect_delay
148
- )
149
- continue
150
-
151
- # Regular ping to check connection
152
- if self._ws and self._ws.state == websockets.protocol.State.OPEN:
153
- try:
154
- if time.time() - self._last_ping >= self._ping_interval:
155
- pong_waiter = await self._ws.ping()
156
- await asyncio.wait_for(pong_waiter, timeout=self._ping_timeout)
157
- self._last_ping = time.time()
158
- except Exception as e:
159
- self.logger.debug(f"Ping failed: {e}")
160
- if self._ws:
161
- try:
162
- await self._ws.close()
163
- except:
164
- pass
165
- self._ws = None
166
- continue
167
-
168
- await asyncio.sleep(1)
169
-
170
- except Exception as e:
171
- current_time = time.time()
172
- # Only log connection lost warnings at most once every min_warning_interval seconds
173
- if current_time - last_warning_time >= min_warning_interval:
174
- self.logger.warning(
175
- f"Computer API Server connection lost. Will retry automatically."
176
- )
177
- last_warning_time = current_time
178
- else:
179
- # Log at debug level instead
180
- self.logger.debug(f"Connection lost: {e}")
181
-
182
- if self._ws:
183
- try:
184
- await self._ws.close()
185
- except:
186
- pass
187
- self._ws = None
188
-
189
- async def _ensure_connection(self):
190
- """Ensure WebSocket connection is established."""
191
- if self._reconnect_task is None or self._reconnect_task.done():
192
- self._reconnect_task = asyncio.create_task(self._keep_alive())
193
-
194
- retry_count = 0
195
- max_retries = 5
196
-
197
- while retry_count < max_retries:
198
- try:
199
- if self._ws and self._ws.state == websockets.protocol.State.OPEN:
200
- return
201
- retry_count += 1
202
- await asyncio.sleep(1)
203
- except Exception as e:
204
- # Only log at ERROR level for the last retry attempt
205
- if retry_count == max_retries - 1:
206
- self.logger.error(
207
- f"Persistent connection check error after {retry_count} attempts: {e}"
208
- )
209
- else:
210
- self.logger.debug(f"Connection check error (attempt {retry_count}): {e}")
211
- retry_count += 1
212
- await asyncio.sleep(1)
213
- continue
214
-
215
- raise ConnectionError("Failed to establish WebSocket connection after multiple retries")
216
-
217
- async def _send_command(self, command: str, params: Optional[Dict] = None) -> Dict[str, Any]:
218
- """Send command through WebSocket."""
219
- max_retries = 3
220
- retry_count = 0
221
- last_error = None
222
-
223
- # Acquire lock to ensure only one command is processed at a time
224
- async with self._command_lock:
225
- self.logger.debug(f"Acquired lock for command: {command}")
226
- while retry_count < max_retries:
227
- try:
228
- await self._ensure_connection()
229
- if not self._ws:
230
- raise ConnectionError("WebSocket connection is not established")
231
-
232
- message = {"command": command, "params": params or {}}
233
- await self._ws.send(json.dumps(message))
234
- response = await asyncio.wait_for(self._ws.recv(), timeout=120)
235
- self.logger.debug(f"Completed command: {command}")
236
- return json.loads(response)
237
- except Exception as e:
238
- last_error = e
239
- retry_count += 1
240
- if retry_count < max_retries:
241
- # Only log at debug level for intermediate retries
242
- self.logger.debug(
243
- f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
244
- )
245
- await asyncio.sleep(1)
246
- continue
247
- else:
248
- # Only log at error level for the final failure
249
- self.logger.error(
250
- f"Failed to send command '{command}' after {max_retries} retries"
251
- )
252
- self.logger.debug(f"Command failure details: {e}")
253
- raise
254
-
255
- raise last_error if last_error else RuntimeError("Failed to send command")
256
-
257
- async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
258
- """Wait for WebSocket connection to become available."""
259
- start_time = time.time()
260
- last_error = None
261
- attempt_count = 0
262
- progress_interval = 10 # Log progress every 10 seconds
263
- last_progress_time = start_time
264
-
265
- # Disable detailed logging for connection attempts
266
- self._log_connection_attempts = False
267
-
268
- try:
269
- self.logger.info(
270
- f"Waiting for Computer API Server to be ready (timeout: {timeout}s)..."
271
- )
272
-
273
- # Start the keep-alive task if it's not already running
274
- if self._reconnect_task is None or self._reconnect_task.done():
275
- self._reconnect_task = asyncio.create_task(self._keep_alive())
276
-
277
- # Wait for the connection to be established
278
- while time.time() - start_time < timeout:
279
- try:
280
- attempt_count += 1
281
- current_time = time.time()
282
-
283
- # Log progress periodically without flooding logs
284
- if current_time - last_progress_time >= progress_interval:
285
- elapsed = current_time - start_time
286
- self.logger.info(
287
- f"Still waiting for Computer API Server... (elapsed: {elapsed:.1f}s, attempts: {attempt_count})"
288
- )
289
- last_progress_time = current_time
290
-
291
- # Check if we have a connection
292
- if self._ws and self._ws.state == websockets.protocol.State.OPEN:
293
- # Test the connection with a simple command
294
- try:
295
- await self._send_command("get_screen_size")
296
- elapsed = time.time() - start_time
297
- self.logger.info(
298
- f"Computer API Server is ready (after {elapsed:.1f}s, {attempt_count} attempts)"
299
- )
300
- return # Connection is fully working
301
- except Exception as e:
302
- last_error = e
303
- self.logger.debug(f"Connection test failed: {e}")
304
-
305
- # Wait before trying again
306
- await asyncio.sleep(interval)
307
-
308
- except Exception as e:
309
- last_error = e
310
- self.logger.debug(f"Connection attempt {attempt_count} failed: {e}")
311
- await asyncio.sleep(interval)
312
-
313
- # If we get here, we've timed out
314
- error_msg = f"Could not connect to {self.ip_address} after {timeout} seconds"
315
- if last_error:
316
- error_msg += f": {str(last_error)}"
317
- self.logger.error(error_msg)
318
- raise TimeoutError(error_msg)
319
- finally:
320
- # Reset to default logging behavior
321
- self._log_connection_attempts = False
322
-
323
- def close(self):
324
- """Close WebSocket connection.
325
-
326
- Note: In host computer server mode, we leave the connection open
327
- to allow other clients to connect to the same server. The server
328
- will handle cleaning up idle connections.
329
- """
330
- # Only cancel the reconnect task
331
- if self._reconnect_task:
332
- self._reconnect_task.cancel()
333
-
334
- # Don't set closed flag or close websocket by default
335
- # This allows the server to stay connected for other clients
336
- # self._closed = True
337
- # if self._ws:
338
- # asyncio.create_task(self._ws.close())
339
- # self._ws = None
340
-
341
- def force_close(self):
342
- """Force close the WebSocket connection.
343
-
344
- This method should be called when you want to completely
345
- shut down the connection, not just for regular cleanup.
346
- """
347
- self._closed = True
348
- if self._reconnect_task:
349
- self._reconnect_task.cancel()
350
- if self._ws:
351
- asyncio.create_task(self._ws.close())
352
- self._ws = None
8
+ super().__init__(ip_address, username, password, api_key, vm_name, "computer.interface.macos")
353
9
 
354
10
  async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict:
355
11
  """Send a diorama command to the server (macOS only)."""
356
- return await self._send_command("diorama_cmd", {"action": action, "arguments": arguments or {}})
357
-
358
- # Mouse Actions
359
- async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
360
- await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
361
-
362
- async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
363
- await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
364
-
365
- async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
366
- await self._send_command("left_click", {"x": x, "y": y})
367
-
368
- async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
369
- await self._send_command("right_click", {"x": x, "y": y})
370
-
371
- async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
372
- await self._send_command("double_click", {"x": x, "y": y})
373
-
374
- async def move_cursor(self, x: int, y: int) -> None:
375
- await self._send_command("move_cursor", {"x": x, "y": y})
376
-
377
- async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5) -> None:
378
- await self._send_command(
379
- "drag_to", {"x": x, "y": y, "button": button, "duration": duration}
380
- )
381
-
382
- async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
383
- await self._send_command(
384
- "drag", {"path": path, "button": button, "duration": duration}
385
- )
386
-
387
- # Keyboard Actions
388
- async def key_down(self, key: "KeyType") -> None:
389
- await self._send_command("key_down", {"key": key})
390
-
391
- async def key_up(self, key: "KeyType") -> None:
392
- await self._send_command("key_up", {"key": key})
393
-
394
- async def type_text(self, text: str) -> None:
395
- # Temporary fix for https://github.com/trycua/cua/issues/165
396
- # Check if text contains Unicode characters
397
- if any(ord(char) > 127 for char in text):
398
- # For Unicode text, use clipboard and paste
399
- await self.set_clipboard(text)
400
- await self.hotkey(Key.COMMAND, 'v')
401
- else:
402
- # For ASCII text, use the regular typing method
403
- await self._send_command("type_text", {"text": text})
404
-
405
- async def press(self, key: "KeyType") -> None:
406
- """Press a single key.
407
-
408
- Args:
409
- key: The key to press. Can be any of:
410
- - A Key enum value (recommended), e.g. Key.PAGE_DOWN
411
- - A direct key value string, e.g. 'pagedown'
412
- - A single character string, e.g. 'a'
413
-
414
- Examples:
415
- ```python
416
- # Using enum (recommended)
417
- await interface.press(Key.PAGE_DOWN)
418
- await interface.press(Key.ENTER)
419
-
420
- # Using direct values
421
- await interface.press('pagedown')
422
- await interface.press('enter')
423
-
424
- # Using single characters
425
- await interface.press('a')
426
- ```
427
-
428
- Raises:
429
- ValueError: If the key type is invalid or the key is not recognized
430
- """
431
- if isinstance(key, Key):
432
- actual_key = key.value
433
- elif isinstance(key, str):
434
- # Try to convert to enum if it matches a known key
435
- key_or_enum = Key.from_string(key)
436
- actual_key = key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum
437
- else:
438
- raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
439
-
440
- await self._send_command("press_key", {"key": actual_key})
441
-
442
- async def press_key(self, key: "KeyType") -> None:
443
- """DEPRECATED: Use press() instead.
444
-
445
- This method is kept for backward compatibility but will be removed in a future version.
446
- Please use the press() method instead.
447
- """
448
- await self.press(key)
449
-
450
- async def hotkey(self, *keys: "KeyType") -> None:
451
- """Press multiple keys simultaneously.
452
-
453
- Args:
454
- *keys: Multiple keys to press simultaneously. Each key can be any of:
455
- - A Key enum value (recommended), e.g. Key.COMMAND
456
- - A direct key value string, e.g. 'command'
457
- - A single character string, e.g. 'a'
458
-
459
- Examples:
460
- ```python
461
- # Using enums (recommended)
462
- await interface.hotkey(Key.COMMAND, Key.C) # Copy
463
- await interface.hotkey(Key.COMMAND, Key.V) # Paste
464
-
465
- # Using mixed formats
466
- await interface.hotkey(Key.COMMAND, 'a') # Select all
467
- ```
468
-
469
- Raises:
470
- ValueError: If any key type is invalid or not recognized
471
- """
472
- actual_keys = []
473
- for key in keys:
474
- if isinstance(key, Key):
475
- actual_keys.append(key.value)
476
- elif isinstance(key, str):
477
- # Try to convert to enum if it matches a known key
478
- key_or_enum = Key.from_string(key)
479
- actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
480
- else:
481
- raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
482
-
483
- await self._send_command("hotkey", {"keys": actual_keys})
484
-
485
- # Scrolling Actions
486
- async def scroll(self, x: int, y: int) -> None:
487
- await self._send_command("scroll", {"x": x, "y": y})
488
-
489
- async def scroll_down(self, clicks: int = 1) -> None:
490
- await self._send_command("scroll_down", {"clicks": clicks})
491
-
492
- async def scroll_up(self, clicks: int = 1) -> None:
493
- await self._send_command("scroll_up", {"clicks": clicks})
494
-
495
- # Screen Actions
496
- async def screenshot(
497
- self,
498
- boxes: Optional[List[Tuple[int, int, int, int]]] = None,
499
- box_color: str = "#FF0000",
500
- box_thickness: int = 2,
501
- scale_factor: float = 1.0,
502
- ) -> bytes:
503
- """Take a screenshot with optional box drawing and scaling.
504
-
505
- Args:
506
- boxes: Optional list of (x, y, width, height) tuples defining boxes to draw in screen coordinates
507
- box_color: Color of the boxes in hex format (default: "#FF0000" red)
508
- box_thickness: Thickness of the box borders in pixels (default: 2)
509
- scale_factor: Factor to scale the final image by (default: 1.0)
510
- Use > 1.0 to enlarge, < 1.0 to shrink (e.g., 0.5 for half size, 2.0 for double)
511
-
512
- Returns:
513
- bytes: The screenshot image data, optionally with boxes drawn on it and scaled
514
- """
515
- result = await self._send_command("screenshot")
516
- if not result.get("image_data"):
517
- raise RuntimeError("Failed to take screenshot")
518
-
519
- screenshot = decode_base64_image(result["image_data"])
520
-
521
- if boxes:
522
- # Get the natural scaling between screen and screenshot
523
- screen_size = await self.get_screen_size()
524
- screenshot_width, screenshot_height = bytes_to_image(screenshot).size
525
- width_scale = screenshot_width / screen_size["width"]
526
- height_scale = screenshot_height / screen_size["height"]
527
-
528
- # Scale box coordinates from screen space to screenshot space
529
- for box in boxes:
530
- scaled_box = (
531
- int(box[0] * width_scale), # x
532
- int(box[1] * height_scale), # y
533
- int(box[2] * width_scale), # width
534
- int(box[3] * height_scale), # height
535
- )
536
- screenshot = draw_box(
537
- screenshot,
538
- x=scaled_box[0],
539
- y=scaled_box[1],
540
- width=scaled_box[2],
541
- height=scaled_box[3],
542
- color=box_color,
543
- thickness=box_thickness,
544
- )
545
-
546
- if scale_factor != 1.0:
547
- screenshot = resize_image(screenshot, scale_factor)
548
-
549
- return screenshot
550
-
551
- async def get_screen_size(self) -> Dict[str, int]:
552
- result = await self._send_command("get_screen_size")
553
- if result["success"] and result["size"]:
554
- return result["size"]
555
- raise RuntimeError("Failed to get screen size")
556
-
557
- async def get_cursor_position(self) -> Dict[str, int]:
558
- result = await self._send_command("get_cursor_position")
559
- if result["success"] and result["position"]:
560
- return result["position"]
561
- raise RuntimeError("Failed to get cursor position")
562
-
563
- # Clipboard Actions
564
- async def copy_to_clipboard(self) -> str:
565
- result = await self._send_command("copy_to_clipboard")
566
- if result["success"] and result["content"]:
567
- return result["content"]
568
- raise RuntimeError("Failed to get clipboard content")
569
-
570
- async def set_clipboard(self, text: str) -> None:
571
- await self._send_command("set_clipboard", {"text": text})
572
-
573
- # File System Actions
574
- async def file_exists(self, path: str) -> bool:
575
- result = await self._send_command("file_exists", {"path": path})
576
- return result.get("exists", False)
577
-
578
- async def directory_exists(self, path: str) -> bool:
579
- result = await self._send_command("directory_exists", {"path": path})
580
- return result.get("exists", False)
581
-
582
- async def list_dir(self, path: str) -> list[str]:
583
- result = await self._send_command("list_dir", {"path": path})
584
- if not result.get("success", False):
585
- raise RuntimeError(result.get("error", "Failed to list directory"))
586
- return result.get("files", [])
587
-
588
- async def read_text(self, path: str) -> str:
589
- result = await self._send_command("read_text", {"path": path})
590
- if not result.get("success", False):
591
- raise RuntimeError(result.get("error", "Failed to read file"))
592
- return result.get("content", "")
593
-
594
- async def write_text(self, path: str, content: str) -> None:
595
- result = await self._send_command("write_text", {"path": path, "content": content})
596
- if not result.get("success", False):
597
- raise RuntimeError(result.get("error", "Failed to write file"))
598
-
599
- async def read_bytes(self, path: str) -> bytes:
600
- result = await self._send_command("read_bytes", {"path": path})
601
- if not result.get("success", False):
602
- raise RuntimeError(result.get("error", "Failed to read file"))
603
- content_b64 = result.get("content_b64", "")
604
- return decode_base64_image(content_b64)
605
-
606
- async def write_bytes(self, path: str, content: bytes) -> None:
607
- result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content)})
608
- if not result.get("success", False):
609
- raise RuntimeError(result.get("error", "Failed to write file"))
610
-
611
- async def delete_file(self, path: str) -> None:
612
- result = await self._send_command("delete_file", {"path": path})
613
- if not result.get("success", False):
614
- raise RuntimeError(result.get("error", "Failed to delete file"))
615
-
616
- async def create_dir(self, path: str) -> None:
617
- result = await self._send_command("create_dir", {"path": path})
618
- if not result.get("success", False):
619
- raise RuntimeError(result.get("error", "Failed to create directory"))
620
-
621
- async def delete_dir(self, path: str) -> None:
622
- result = await self._send_command("delete_dir", {"path": path})
623
- if not result.get("success", False):
624
- raise RuntimeError(result.get("error", "Failed to delete directory"))
625
-
626
- async def run_command(self, command: str) -> Tuple[str, str]:
627
- result = await self._send_command("run_command", {"command": command})
628
- if not result.get("success", False):
629
- raise RuntimeError(result.get("error", "Failed to run command"))
630
- return result.get("stdout", ""), result.get("stderr", "")
631
-
632
- # Accessibility Actions
633
- async def get_accessibility_tree(self) -> Dict[str, Any]:
634
- """Get the accessibility tree of the current screen."""
635
- result = await self._send_command("get_accessibility_tree")
636
- if not result.get("success", False):
637
- raise RuntimeError(result.get("error", "Failed to get accessibility tree"))
638
- return result
639
-
640
- async def get_active_window_bounds(self) -> Dict[str, int]:
641
- """Get the bounds of the currently active window."""
642
- result = await self._send_command("get_active_window_bounds")
643
- if result["success"] and result["bounds"]:
644
- return result["bounds"]
645
- raise RuntimeError("Failed to get active window bounds")
646
-
647
- async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
648
- """Convert screenshot coordinates to screen coordinates.
649
-
650
- Args:
651
- x: X coordinate in screenshot space
652
- y: Y coordinate in screenshot space
653
-
654
- Returns:
655
- tuple[float, float]: (x, y) coordinates in screen space
656
- """
657
- screen_size = await self.get_screen_size()
658
- screenshot = await self.screenshot()
659
- screenshot_img = bytes_to_image(screenshot)
660
- screenshot_width, screenshot_height = screenshot_img.size
661
-
662
- # Calculate scaling factors
663
- width_scale = screen_size["width"] / screenshot_width
664
- height_scale = screen_size["height"] / screenshot_height
665
-
666
- # Convert coordinates
667
- screen_x = x * width_scale
668
- screen_y = y * height_scale
669
-
670
- return screen_x, screen_y
671
-
672
- async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
673
- """Convert screen coordinates to screenshot coordinates.
674
-
675
- Args:
676
- x: X coordinate in screen space
677
- y: Y coordinate in screen space
678
-
679
- Returns:
680
- tuple[float, float]: (x, y) coordinates in screenshot space
681
- """
682
- screen_size = await self.get_screen_size()
683
- screenshot = await self.screenshot()
684
- screenshot_img = bytes_to_image(screenshot)
685
- screenshot_width, screenshot_height = screenshot_img.size
686
-
687
- # Calculate scaling factors
688
- width_scale = screenshot_width / screen_size["width"]
689
- height_scale = screenshot_height / screen_size["height"]
690
-
691
- # Convert coordinates
692
- screenshot_x = x * width_scale
693
- screenshot_y = y * height_scale
694
-
695
- return screenshot_x, screenshot_y
12
+ return await self._send_command("diorama_cmd", {"action": action, "arguments": arguments or {}})