cua-computer 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,698 +1,12 @@
1
- import asyncio
2
- import json
3
- import time
4
- from typing import Any, Dict, List, Optional, Tuple
5
- from PIL import Image
1
+ from .generic import GenericComputerInterface
2
+ from typing import Optional
6
3
 
7
- import websockets
8
-
9
- from ..logger import Logger, LogLevel
10
- from .base import BaseComputerInterface
11
- from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image
12
- from .models import Key, KeyType, MouseButton, CommandResult
13
-
14
- class MacOSComputerInterface(BaseComputerInterface):
4
+ class MacOSComputerInterface(GenericComputerInterface):
15
5
  """Interface for macOS."""
16
6
 
17
7
  def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None):
18
- super().__init__(ip_address, username, password, api_key, vm_name)
19
- self._ws = None
20
- self._reconnect_task = None
21
- self._closed = False
22
- self._last_ping = 0
23
- self._ping_interval = 5 # Send ping every 5 seconds
24
- self._ping_timeout = 120 # Wait 120 seconds for pong response
25
- self._reconnect_delay = 1 # Start with 1 second delay
26
- self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts
27
- self._log_connection_attempts = True # Flag to control connection attempt logging
28
- self._command_lock = asyncio.Lock() # Lock to ensure only one command at a time
29
-
30
- # Set logger name for macOS interface
31
- self.logger = Logger("computer.interface.macos", LogLevel.NORMAL)
32
-
33
- @property
34
- def ws_uri(self) -> str:
35
- """Get the WebSocket URI using the current IP address.
36
-
37
- Returns:
38
- WebSocket URI for the Computer API Server
39
- """
40
- protocol = "wss" if self.api_key else "ws"
41
- port = "8443" if self.api_key else "8000"
42
- return f"{protocol}://{self.ip_address}:{port}/ws"
43
-
44
- async def _keep_alive(self):
45
- """Keep the WebSocket connection alive with automatic reconnection."""
46
- retry_count = 0
47
- max_log_attempts = 1 # Only log the first attempt at INFO level
48
- log_interval = 500 # Then log every 500th attempt (significantly increased from 30)
49
- last_warning_time = 0
50
- min_warning_interval = 30 # Minimum seconds between connection lost warnings
51
- min_retry_delay = 0.5 # Minimum delay between connection attempts (500ms)
52
-
53
- while not self._closed:
54
- try:
55
- if self._ws is None or (
56
- self._ws and self._ws.state == websockets.protocol.State.CLOSED
57
- ):
58
- try:
59
- retry_count += 1
60
-
61
- # Add a minimum delay between connection attempts to avoid flooding
62
- if retry_count > 1:
63
- await asyncio.sleep(min_retry_delay)
64
-
65
- # Only log the first attempt at INFO level, then every Nth attempt
66
- if retry_count == 1:
67
- self.logger.info(f"Attempting WebSocket connection to {self.ws_uri}")
68
- elif retry_count % log_interval == 0:
69
- self.logger.info(
70
- f"Still attempting WebSocket connection (attempt {retry_count})..."
71
- )
72
- else:
73
- # All other attempts are logged at DEBUG level
74
- self.logger.debug(
75
- f"Attempting WebSocket connection to {self.ws_uri} (attempt {retry_count})"
76
- )
77
-
78
- self._ws = await asyncio.wait_for(
79
- websockets.connect(
80
- self.ws_uri,
81
- max_size=1024 * 1024 * 10, # 10MB limit
82
- max_queue=32,
83
- ping_interval=self._ping_interval,
84
- ping_timeout=self._ping_timeout,
85
- close_timeout=5,
86
- compression=None, # Disable compression to reduce overhead
87
- ),
88
- timeout=120,
89
- )
90
- self.logger.info("WebSocket connection established")
91
-
92
- # If api_key and vm_name are provided, perform authentication handshake
93
- if self.api_key and self.vm_name:
94
- self.logger.info("Performing authentication handshake...")
95
- auth_message = {
96
- "command": "authenticate",
97
- "params": {
98
- "api_key": self.api_key,
99
- "container_name": self.vm_name
100
- }
101
- }
102
- await self._ws.send(json.dumps(auth_message))
103
-
104
- # Wait for authentication response
105
- auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
106
- auth_result = json.loads(auth_response)
107
-
108
- if not auth_result.get("success"):
109
- error_msg = auth_result.get("error", "Authentication failed")
110
- self.logger.error(f"Authentication failed: {error_msg}")
111
- await self._ws.close()
112
- self._ws = None
113
- raise ConnectionError(f"Authentication failed: {error_msg}")
114
-
115
- self.logger.info("Authentication successful")
116
-
117
- self._reconnect_delay = 1 # Reset reconnect delay on successful connection
118
- self._last_ping = time.time()
119
- retry_count = 0 # Reset retry count on successful connection
120
- except (asyncio.TimeoutError, websockets.exceptions.WebSocketException) as e:
121
- next_retry = self._reconnect_delay
122
-
123
- # Only log the first error at WARNING level, then every Nth attempt
124
- if retry_count == 1:
125
- self.logger.warning(
126
- f"Computer API Server not ready yet. Will retry automatically."
127
- )
128
- elif retry_count % log_interval == 0:
129
- self.logger.warning(
130
- f"Still waiting for Computer API Server (attempt {retry_count})..."
131
- )
132
- else:
133
- # All other errors are logged at DEBUG level
134
- self.logger.debug(f"Connection attempt {retry_count} failed: {e}")
135
-
136
- if self._ws:
137
- try:
138
- await self._ws.close()
139
- except:
140
- pass
141
- self._ws = None
142
-
143
- # Use exponential backoff for connection retries
144
- await asyncio.sleep(self._reconnect_delay)
145
- self._reconnect_delay = min(
146
- self._reconnect_delay * 2, self._max_reconnect_delay
147
- )
148
- continue
149
-
150
- # Regular ping to check connection
151
- if self._ws and self._ws.state == websockets.protocol.State.OPEN:
152
- try:
153
- if time.time() - self._last_ping >= self._ping_interval:
154
- pong_waiter = await self._ws.ping()
155
- await asyncio.wait_for(pong_waiter, timeout=self._ping_timeout)
156
- self._last_ping = time.time()
157
- except Exception as e:
158
- self.logger.debug(f"Ping failed: {e}")
159
- if self._ws:
160
- try:
161
- await self._ws.close()
162
- except:
163
- pass
164
- self._ws = None
165
- continue
166
-
167
- await asyncio.sleep(1)
168
-
169
- except Exception as e:
170
- current_time = time.time()
171
- # Only log connection lost warnings at most once every min_warning_interval seconds
172
- if current_time - last_warning_time >= min_warning_interval:
173
- self.logger.warning(
174
- f"Computer API Server connection lost. Will retry automatically."
175
- )
176
- last_warning_time = current_time
177
- else:
178
- # Log at debug level instead
179
- self.logger.debug(f"Connection lost: {e}")
180
-
181
- if self._ws:
182
- try:
183
- await self._ws.close()
184
- except:
185
- pass
186
- self._ws = None
187
-
188
- async def _ensure_connection(self):
189
- """Ensure WebSocket connection is established."""
190
- if self._reconnect_task is None or self._reconnect_task.done():
191
- self._reconnect_task = asyncio.create_task(self._keep_alive())
192
-
193
- retry_count = 0
194
- max_retries = 5
195
-
196
- while retry_count < max_retries:
197
- try:
198
- if self._ws and self._ws.state == websockets.protocol.State.OPEN:
199
- return
200
- retry_count += 1
201
- await asyncio.sleep(1)
202
- except Exception as e:
203
- # Only log at ERROR level for the last retry attempt
204
- if retry_count == max_retries - 1:
205
- self.logger.error(
206
- f"Persistent connection check error after {retry_count} attempts: {e}"
207
- )
208
- else:
209
- self.logger.debug(f"Connection check error (attempt {retry_count}): {e}")
210
- retry_count += 1
211
- await asyncio.sleep(1)
212
- continue
213
-
214
- raise ConnectionError("Failed to establish WebSocket connection after multiple retries")
215
-
216
- async def _send_command(self, command: str, params: Optional[Dict] = None) -> Dict[str, Any]:
217
- """Send command through WebSocket."""
218
- max_retries = 3
219
- retry_count = 0
220
- last_error = None
221
-
222
- # Acquire lock to ensure only one command is processed at a time
223
- async with self._command_lock:
224
- self.logger.debug(f"Acquired lock for command: {command}")
225
- while retry_count < max_retries:
226
- try:
227
- await self._ensure_connection()
228
- if not self._ws:
229
- raise ConnectionError("WebSocket connection is not established")
230
-
231
- message = {"command": command, "params": params or {}}
232
- await self._ws.send(json.dumps(message))
233
- response = await asyncio.wait_for(self._ws.recv(), timeout=120)
234
- self.logger.debug(f"Completed command: {command}")
235
- return json.loads(response)
236
- except Exception as e:
237
- last_error = e
238
- retry_count += 1
239
- if retry_count < max_retries:
240
- # Only log at debug level for intermediate retries
241
- self.logger.debug(
242
- f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
243
- )
244
- await asyncio.sleep(1)
245
- continue
246
- else:
247
- # Only log at error level for the final failure
248
- self.logger.error(
249
- f"Failed to send command '{command}' after {max_retries} retries"
250
- )
251
- self.logger.debug(f"Command failure details: {e}")
252
- raise
253
-
254
- raise last_error if last_error else RuntimeError("Failed to send command")
255
-
256
- async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
257
- """Wait for WebSocket connection to become available."""
258
- start_time = time.time()
259
- last_error = None
260
- attempt_count = 0
261
- progress_interval = 10 # Log progress every 10 seconds
262
- last_progress_time = start_time
263
-
264
- # Disable detailed logging for connection attempts
265
- self._log_connection_attempts = False
266
-
267
- try:
268
- self.logger.info(
269
- f"Waiting for Computer API Server to be ready (timeout: {timeout}s)..."
270
- )
271
-
272
- # Start the keep-alive task if it's not already running
273
- if self._reconnect_task is None or self._reconnect_task.done():
274
- self._reconnect_task = asyncio.create_task(self._keep_alive())
275
-
276
- # Wait for the connection to be established
277
- while time.time() - start_time < timeout:
278
- try:
279
- attempt_count += 1
280
- current_time = time.time()
281
-
282
- # Log progress periodically without flooding logs
283
- if current_time - last_progress_time >= progress_interval:
284
- elapsed = current_time - start_time
285
- self.logger.info(
286
- f"Still waiting for Computer API Server... (elapsed: {elapsed:.1f}s, attempts: {attempt_count})"
287
- )
288
- last_progress_time = current_time
289
-
290
- # Check if we have a connection
291
- if self._ws and self._ws.state == websockets.protocol.State.OPEN:
292
- # Test the connection with a simple command
293
- try:
294
- await self._send_command("get_screen_size")
295
- elapsed = time.time() - start_time
296
- self.logger.info(
297
- f"Computer API Server is ready (after {elapsed:.1f}s, {attempt_count} attempts)"
298
- )
299
- return # Connection is fully working
300
- except Exception as e:
301
- last_error = e
302
- self.logger.debug(f"Connection test failed: {e}")
303
-
304
- # Wait before trying again
305
- await asyncio.sleep(interval)
306
-
307
- except Exception as e:
308
- last_error = e
309
- self.logger.debug(f"Connection attempt {attempt_count} failed: {e}")
310
- await asyncio.sleep(interval)
311
-
312
- # If we get here, we've timed out
313
- error_msg = f"Could not connect to {self.ip_address} after {timeout} seconds"
314
- if last_error:
315
- error_msg += f": {str(last_error)}"
316
- self.logger.error(error_msg)
317
- raise TimeoutError(error_msg)
318
- finally:
319
- # Reset to default logging behavior
320
- self._log_connection_attempts = False
321
-
322
- def close(self):
323
- """Close WebSocket connection.
324
-
325
- Note: In host computer server mode, we leave the connection open
326
- to allow other clients to connect to the same server. The server
327
- will handle cleaning up idle connections.
328
- """
329
- # Only cancel the reconnect task
330
- if self._reconnect_task:
331
- self._reconnect_task.cancel()
332
-
333
- # Don't set closed flag or close websocket by default
334
- # This allows the server to stay connected for other clients
335
- # self._closed = True
336
- # if self._ws:
337
- # asyncio.create_task(self._ws.close())
338
- # self._ws = None
339
-
340
- def force_close(self):
341
- """Force close the WebSocket connection.
342
-
343
- This method should be called when you want to completely
344
- shut down the connection, not just for regular cleanup.
345
- """
346
- self._closed = True
347
- if self._reconnect_task:
348
- self._reconnect_task.cancel()
349
- if self._ws:
350
- asyncio.create_task(self._ws.close())
351
- self._ws = None
8
+ super().__init__(ip_address, username, password, api_key, vm_name, "computer.interface.macos")
352
9
 
353
10
  async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict:
354
11
  """Send a diorama command to the server (macOS only)."""
355
- return await self._send_command("diorama_cmd", {"action": action, "arguments": arguments or {}})
356
-
357
- # Mouse Actions
358
- async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
359
- await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
360
-
361
- async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
362
- await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
363
-
364
- async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
365
- await self._send_command("left_click", {"x": x, "y": y})
366
-
367
- async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
368
- await self._send_command("right_click", {"x": x, "y": y})
369
-
370
- async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
371
- await self._send_command("double_click", {"x": x, "y": y})
372
-
373
- async def move_cursor(self, x: int, y: int) -> None:
374
- await self._send_command("move_cursor", {"x": x, "y": y})
375
-
376
- async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5) -> None:
377
- await self._send_command(
378
- "drag_to", {"x": x, "y": y, "button": button, "duration": duration}
379
- )
380
-
381
- async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
382
- await self._send_command(
383
- "drag", {"path": path, "button": button, "duration": duration}
384
- )
385
-
386
- # Keyboard Actions
387
- async def key_down(self, key: "KeyType") -> None:
388
- await self._send_command("key_down", {"key": key})
389
-
390
- async def key_up(self, key: "KeyType") -> None:
391
- await self._send_command("key_up", {"key": key})
392
-
393
- async def type_text(self, text: str) -> None:
394
- # Temporary fix for https://github.com/trycua/cua/issues/165
395
- # Check if text contains Unicode characters
396
- if any(ord(char) > 127 for char in text):
397
- # For Unicode text, use clipboard and paste
398
- await self.set_clipboard(text)
399
- await self.hotkey(Key.COMMAND, 'v')
400
- else:
401
- # For ASCII text, use the regular typing method
402
- await self._send_command("type_text", {"text": text})
403
-
404
- async def press(self, key: "KeyType") -> None:
405
- """Press a single key.
406
-
407
- Args:
408
- key: The key to press. Can be any of:
409
- - A Key enum value (recommended), e.g. Key.PAGE_DOWN
410
- - A direct key value string, e.g. 'pagedown'
411
- - A single character string, e.g. 'a'
412
-
413
- Examples:
414
- ```python
415
- # Using enum (recommended)
416
- await interface.press(Key.PAGE_DOWN)
417
- await interface.press(Key.ENTER)
418
-
419
- # Using direct values
420
- await interface.press('pagedown')
421
- await interface.press('enter')
422
-
423
- # Using single characters
424
- await interface.press('a')
425
- ```
426
-
427
- Raises:
428
- ValueError: If the key type is invalid or the key is not recognized
429
- """
430
- if isinstance(key, Key):
431
- actual_key = key.value
432
- elif isinstance(key, str):
433
- # Try to convert to enum if it matches a known key
434
- key_or_enum = Key.from_string(key)
435
- actual_key = key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum
436
- else:
437
- raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
438
-
439
- await self._send_command("press_key", {"key": actual_key})
440
-
441
- async def press_key(self, key: "KeyType") -> None:
442
- """DEPRECATED: Use press() instead.
443
-
444
- This method is kept for backward compatibility but will be removed in a future version.
445
- Please use the press() method instead.
446
- """
447
- await self.press(key)
448
-
449
- async def hotkey(self, *keys: "KeyType") -> None:
450
- """Press multiple keys simultaneously.
451
-
452
- Args:
453
- *keys: Multiple keys to press simultaneously. Each key can be any of:
454
- - A Key enum value (recommended), e.g. Key.COMMAND
455
- - A direct key value string, e.g. 'command'
456
- - A single character string, e.g. 'a'
457
-
458
- Examples:
459
- ```python
460
- # Using enums (recommended)
461
- await interface.hotkey(Key.COMMAND, Key.C) # Copy
462
- await interface.hotkey(Key.COMMAND, Key.V) # Paste
463
-
464
- # Using mixed formats
465
- await interface.hotkey(Key.COMMAND, 'a') # Select all
466
- ```
467
-
468
- Raises:
469
- ValueError: If any key type is invalid or not recognized
470
- """
471
- actual_keys = []
472
- for key in keys:
473
- if isinstance(key, Key):
474
- actual_keys.append(key.value)
475
- elif isinstance(key, str):
476
- # Try to convert to enum if it matches a known key
477
- key_or_enum = Key.from_string(key)
478
- actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
479
- else:
480
- raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
481
-
482
- await self._send_command("hotkey", {"keys": actual_keys})
483
-
484
- # Scrolling Actions
485
- async def scroll(self, x: int, y: int) -> None:
486
- await self._send_command("scroll", {"x": x, "y": y})
487
-
488
- async def scroll_down(self, clicks: int = 1) -> None:
489
- await self._send_command("scroll_down", {"clicks": clicks})
490
-
491
- async def scroll_up(self, clicks: int = 1) -> None:
492
- await self._send_command("scroll_up", {"clicks": clicks})
493
-
494
- # Screen Actions
495
- async def screenshot(
496
- self,
497
- boxes: Optional[List[Tuple[int, int, int, int]]] = None,
498
- box_color: str = "#FF0000",
499
- box_thickness: int = 2,
500
- scale_factor: float = 1.0,
501
- ) -> bytes:
502
- """Take a screenshot with optional box drawing and scaling.
503
-
504
- Args:
505
- boxes: Optional list of (x, y, width, height) tuples defining boxes to draw in screen coordinates
506
- box_color: Color of the boxes in hex format (default: "#FF0000" red)
507
- box_thickness: Thickness of the box borders in pixels (default: 2)
508
- scale_factor: Factor to scale the final image by (default: 1.0)
509
- Use > 1.0 to enlarge, < 1.0 to shrink (e.g., 0.5 for half size, 2.0 for double)
510
-
511
- Returns:
512
- bytes: The screenshot image data, optionally with boxes drawn on it and scaled
513
- """
514
- result = await self._send_command("screenshot")
515
- if not result.get("image_data"):
516
- raise RuntimeError("Failed to take screenshot")
517
-
518
- screenshot = decode_base64_image(result["image_data"])
519
-
520
- if boxes:
521
- # Get the natural scaling between screen and screenshot
522
- screen_size = await self.get_screen_size()
523
- screenshot_width, screenshot_height = bytes_to_image(screenshot).size
524
- width_scale = screenshot_width / screen_size["width"]
525
- height_scale = screenshot_height / screen_size["height"]
526
-
527
- # Scale box coordinates from screen space to screenshot space
528
- for box in boxes:
529
- scaled_box = (
530
- int(box[0] * width_scale), # x
531
- int(box[1] * height_scale), # y
532
- int(box[2] * width_scale), # width
533
- int(box[3] * height_scale), # height
534
- )
535
- screenshot = draw_box(
536
- screenshot,
537
- x=scaled_box[0],
538
- y=scaled_box[1],
539
- width=scaled_box[2],
540
- height=scaled_box[3],
541
- color=box_color,
542
- thickness=box_thickness,
543
- )
544
-
545
- if scale_factor != 1.0:
546
- screenshot = resize_image(screenshot, scale_factor)
547
-
548
- return screenshot
549
-
550
- async def get_screen_size(self) -> Dict[str, int]:
551
- result = await self._send_command("get_screen_size")
552
- if result["success"] and result["size"]:
553
- return result["size"]
554
- raise RuntimeError("Failed to get screen size")
555
-
556
- async def get_cursor_position(self) -> Dict[str, int]:
557
- result = await self._send_command("get_cursor_position")
558
- if result["success"] and result["position"]:
559
- return result["position"]
560
- raise RuntimeError("Failed to get cursor position")
561
-
562
- # Clipboard Actions
563
- async def copy_to_clipboard(self) -> str:
564
- result = await self._send_command("copy_to_clipboard")
565
- if result["success"] and result["content"]:
566
- return result["content"]
567
- raise RuntimeError("Failed to get clipboard content")
568
-
569
- async def set_clipboard(self, text: str) -> None:
570
- await self._send_command("set_clipboard", {"text": text})
571
-
572
- # File System Actions
573
- async def file_exists(self, path: str) -> bool:
574
- result = await self._send_command("file_exists", {"path": path})
575
- return result.get("exists", False)
576
-
577
- async def directory_exists(self, path: str) -> bool:
578
- result = await self._send_command("directory_exists", {"path": path})
579
- return result.get("exists", False)
580
-
581
- async def list_dir(self, path: str) -> list[str]:
582
- result = await self._send_command("list_dir", {"path": path})
583
- if not result.get("success", False):
584
- raise RuntimeError(result.get("error", "Failed to list directory"))
585
- return result.get("files", [])
586
-
587
- async def read_text(self, path: str) -> str:
588
- result = await self._send_command("read_text", {"path": path})
589
- if not result.get("success", False):
590
- raise RuntimeError(result.get("error", "Failed to read file"))
591
- return result.get("content", "")
592
-
593
- async def write_text(self, path: str, content: str) -> None:
594
- result = await self._send_command("write_text", {"path": path, "content": content})
595
- if not result.get("success", False):
596
- raise RuntimeError(result.get("error", "Failed to write file"))
597
-
598
- async def read_bytes(self, path: str) -> bytes:
599
- result = await self._send_command("read_bytes", {"path": path})
600
- if not result.get("success", False):
601
- raise RuntimeError(result.get("error", "Failed to read file"))
602
- content_b64 = result.get("content_b64", "")
603
- return decode_base64_image(content_b64)
604
-
605
- async def write_bytes(self, path: str, content: bytes) -> None:
606
- result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content)})
607
- if not result.get("success", False):
608
- raise RuntimeError(result.get("error", "Failed to write file"))
609
-
610
- async def delete_file(self, path: str) -> None:
611
- result = await self._send_command("delete_file", {"path": path})
612
- if not result.get("success", False):
613
- raise RuntimeError(result.get("error", "Failed to delete file"))
614
-
615
- async def create_dir(self, path: str) -> None:
616
- result = await self._send_command("create_dir", {"path": path})
617
- if not result.get("success", False):
618
- raise RuntimeError(result.get("error", "Failed to create directory"))
619
-
620
- async def delete_dir(self, path: str) -> None:
621
- result = await self._send_command("delete_dir", {"path": path})
622
- if not result.get("success", False):
623
- raise RuntimeError(result.get("error", "Failed to delete directory"))
624
-
625
- async def run_command(self, command: str) -> CommandResult:
626
- result = await self._send_command("run_command", {"command": command})
627
- if not result.get("success", False):
628
- raise RuntimeError(result.get("error", "Failed to run command"))
629
- return CommandResult(
630
- stdout=result.get("stdout", ""),
631
- stderr=result.get("stderr", ""),
632
- returncode=result.get("return_code", 0)
633
- )
634
-
635
- # Accessibility Actions
636
- async def get_accessibility_tree(self) -> Dict[str, Any]:
637
- """Get the accessibility tree of the current screen."""
638
- result = await self._send_command("get_accessibility_tree")
639
- if not result.get("success", False):
640
- raise RuntimeError(result.get("error", "Failed to get accessibility tree"))
641
- return result
642
-
643
- async def get_active_window_bounds(self) -> Dict[str, int]:
644
- """Get the bounds of the currently active window."""
645
- result = await self._send_command("get_active_window_bounds")
646
- if result["success"] and result["bounds"]:
647
- return result["bounds"]
648
- raise RuntimeError("Failed to get active window bounds")
649
-
650
- async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
651
- """Convert screenshot coordinates to screen coordinates.
652
-
653
- Args:
654
- x: X coordinate in screenshot space
655
- y: Y coordinate in screenshot space
656
-
657
- Returns:
658
- tuple[float, float]: (x, y) coordinates in screen space
659
- """
660
- screen_size = await self.get_screen_size()
661
- screenshot = await self.screenshot()
662
- screenshot_img = bytes_to_image(screenshot)
663
- screenshot_width, screenshot_height = screenshot_img.size
664
-
665
- # Calculate scaling factors
666
- width_scale = screen_size["width"] / screenshot_width
667
- height_scale = screen_size["height"] / screenshot_height
668
-
669
- # Convert coordinates
670
- screen_x = x * width_scale
671
- screen_y = y * height_scale
672
-
673
- return screen_x, screen_y
674
-
675
- async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
676
- """Convert screen coordinates to screenshot coordinates.
677
-
678
- Args:
679
- x: X coordinate in screen space
680
- y: Y coordinate in screen space
681
-
682
- Returns:
683
- tuple[float, float]: (x, y) coordinates in screenshot space
684
- """
685
- screen_size = await self.get_screen_size()
686
- screenshot = await self.screenshot()
687
- screenshot_img = bytes_to_image(screenshot)
688
- screenshot_width, screenshot_height = screenshot_img.size
689
-
690
- # Calculate scaling factors
691
- width_scale = screenshot_width / screen_size["width"]
692
- height_scale = screenshot_height / screen_size["height"]
693
-
694
- # Convert coordinates
695
- screenshot_x = x * width_scale
696
- screenshot_y = y * height_scale
697
-
698
- return screenshot_x, screenshot_y
12
+ return await self._send_command("diorama_cmd", {"action": action, "arguments": arguments or {}})