cua-computer 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,785 @@
1
+ import asyncio
2
+ import json
3
+ import time
4
+ from typing import Any, Dict, List, Optional, Tuple
5
+ from PIL import Image
6
+
7
+ import websockets
8
+
9
+ from ..logger import Logger, LogLevel
10
+ from .base import BaseComputerInterface
11
+ from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image
12
+ from .models import Key, KeyType, MouseButton, CommandResult
13
+
14
+
15
+ class GenericComputerInterface(BaseComputerInterface):
16
+ """Generic interface with common functionality for all supported platforms (Windows, Linux, macOS)."""
17
+
18
+ def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None, logger_name: str = "computer.interface.generic"):
19
+ super().__init__(ip_address, username, password, api_key, vm_name)
20
+ self._ws = None
21
+ self._reconnect_task = None
22
+ self._closed = False
23
+ self._last_ping = 0
24
+ self._ping_interval = 5 # Send ping every 5 seconds
25
+ self._ping_timeout = 120 # Wait 120 seconds for pong response
26
+ self._reconnect_delay = 1 # Start with 1 second delay
27
+ self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts
28
+ self._log_connection_attempts = True # Flag to control connection attempt logging
29
+ self._authenticated = False # Track authentication status
30
+ self._command_lock = asyncio.Lock() # Lock to ensure only one command at a time
31
+
32
+ # Set logger name for the interface
33
+ self.logger = Logger(logger_name, LogLevel.NORMAL)
34
+
35
+ @property
36
+ def ws_uri(self) -> str:
37
+ """Get the WebSocket URI using the current IP address.
38
+
39
+ Returns:
40
+ WebSocket URI for the Computer API Server
41
+ """
42
+ protocol = "wss" if self.api_key else "ws"
43
+ port = "8443" if self.api_key else "8000"
44
+ return f"{protocol}://{self.ip_address}:{port}/ws"
45
+
46
+ # Mouse actions
47
+ async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
48
+ await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
49
+
50
+ async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
51
+ await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
52
+
53
+ async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
54
+ await self._send_command("left_click", {"x": x, "y": y})
55
+
56
+ async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
57
+ await self._send_command("right_click", {"x": x, "y": y})
58
+
59
+ async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
60
+ await self._send_command("double_click", {"x": x, "y": y})
61
+
62
+ async def move_cursor(self, x: int, y: int) -> None:
63
+ await self._send_command("move_cursor", {"x": x, "y": y})
64
+
65
+ async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5) -> None:
66
+ await self._send_command(
67
+ "drag_to", {"x": x, "y": y, "button": button, "duration": duration}
68
+ )
69
+
70
+ async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5) -> None:
71
+ await self._send_command(
72
+ "drag", {"path": path, "button": button, "duration": duration}
73
+ )
74
+
75
+ # Keyboard Actions
76
+ async def key_down(self, key: "KeyType") -> None:
77
+ await self._send_command("key_down", {"key": key})
78
+
79
+ async def key_up(self, key: "KeyType") -> None:
80
+ await self._send_command("key_up", {"key": key})
81
+
82
+ async def type_text(self, text: str) -> None:
83
+ # Temporary fix for https://github.com/trycua/cua/issues/165
84
+ # Check if text contains Unicode characters
85
+ if any(ord(char) > 127 for char in text):
86
+ # For Unicode text, use clipboard and paste
87
+ await self.set_clipboard(text)
88
+ await self.hotkey(Key.COMMAND, 'v')
89
+ else:
90
+ # For ASCII text, use the regular typing method
91
+ await self._send_command("type_text", {"text": text})
92
+
93
+ async def press(self, key: "KeyType") -> None:
94
+ """Press a single key.
95
+
96
+ Args:
97
+ key: The key to press. Can be any of:
98
+ - A Key enum value (recommended), e.g. Key.PAGE_DOWN
99
+ - A direct key value string, e.g. 'pagedown'
100
+ - A single character string, e.g. 'a'
101
+
102
+ Examples:
103
+ ```python
104
+ # Using enum (recommended)
105
+ await interface.press(Key.PAGE_DOWN)
106
+ await interface.press(Key.ENTER)
107
+
108
+ # Using direct values
109
+ await interface.press('pagedown')
110
+ await interface.press('enter')
111
+
112
+ # Using single characters
113
+ await interface.press('a')
114
+ ```
115
+
116
+ Raises:
117
+ ValueError: If the key type is invalid or the key is not recognized
118
+ """
119
+ if isinstance(key, Key):
120
+ actual_key = key.value
121
+ elif isinstance(key, str):
122
+ # Try to convert to enum if it matches a known key
123
+ key_or_enum = Key.from_string(key)
124
+ actual_key = key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum
125
+ else:
126
+ raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
127
+
128
+ await self._send_command("press_key", {"key": actual_key})
129
+
130
+ async def press_key(self, key: "KeyType") -> None:
131
+ """DEPRECATED: Use press() instead.
132
+
133
+ This method is kept for backward compatibility but will be removed in a future version.
134
+ Please use the press() method instead.
135
+ """
136
+ await self.press(key)
137
+
138
+ async def hotkey(self, *keys: "KeyType") -> None:
139
+ """Press multiple keys simultaneously.
140
+
141
+ Args:
142
+ *keys: Multiple keys to press simultaneously. Each key can be any of:
143
+ - A Key enum value (recommended), e.g. Key.COMMAND
144
+ - A direct key value string, e.g. 'command'
145
+ - A single character string, e.g. 'a'
146
+
147
+ Examples:
148
+ ```python
149
+ # Using enums (recommended)
150
+ await interface.hotkey(Key.COMMAND, Key.C) # Copy
151
+ await interface.hotkey(Key.COMMAND, Key.V) # Paste
152
+
153
+ # Using mixed formats
154
+ await interface.hotkey(Key.COMMAND, 'a') # Select all
155
+ ```
156
+
157
+ Raises:
158
+ ValueError: If any key type is invalid or not recognized
159
+ """
160
+ actual_keys = []
161
+ for key in keys:
162
+ if isinstance(key, Key):
163
+ actual_keys.append(key.value)
164
+ elif isinstance(key, str):
165
+ # Try to convert to enum if it matches a known key
166
+ key_or_enum = Key.from_string(key)
167
+ actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
168
+ else:
169
+ raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
170
+
171
+ await self._send_command("hotkey", {"keys": actual_keys})
172
+
173
+ # Scrolling Actions
174
+ async def scroll(self, x: int, y: int) -> None:
175
+ await self._send_command("scroll", {"x": x, "y": y})
176
+
177
+ async def scroll_down(self, clicks: int = 1) -> None:
178
+ await self._send_command("scroll_down", {"clicks": clicks})
179
+
180
+ async def scroll_up(self, clicks: int = 1) -> None:
181
+ await self._send_command("scroll_up", {"clicks": clicks})
182
+
183
+ # Screen actions
184
+ async def screenshot(
185
+ self,
186
+ boxes: Optional[List[Tuple[int, int, int, int]]] = None,
187
+ box_color: str = "#FF0000",
188
+ box_thickness: int = 2,
189
+ scale_factor: float = 1.0,
190
+ ) -> bytes:
191
+ """Take a screenshot with optional box drawing and scaling.
192
+
193
+ Args:
194
+ boxes: Optional list of (x, y, width, height) tuples defining boxes to draw in screen coordinates
195
+ box_color: Color of the boxes in hex format (default: "#FF0000" red)
196
+ box_thickness: Thickness of the box borders in pixels (default: 2)
197
+ scale_factor: Factor to scale the final image by (default: 1.0)
198
+ Use > 1.0 to enlarge, < 1.0 to shrink (e.g., 0.5 for half size, 2.0 for double)
199
+
200
+ Returns:
201
+ bytes: The screenshot image data, optionally with boxes drawn on it and scaled
202
+ """
203
+ result = await self._send_command("screenshot")
204
+ if not result.get("image_data"):
205
+ raise RuntimeError("Failed to take screenshot")
206
+
207
+ screenshot = decode_base64_image(result["image_data"])
208
+
209
+ if boxes:
210
+ # Get the natural scaling between screen and screenshot
211
+ screen_size = await self.get_screen_size()
212
+ screenshot_width, screenshot_height = bytes_to_image(screenshot).size
213
+ width_scale = screenshot_width / screen_size["width"]
214
+ height_scale = screenshot_height / screen_size["height"]
215
+
216
+ # Scale box coordinates from screen space to screenshot space
217
+ for box in boxes:
218
+ scaled_box = (
219
+ int(box[0] * width_scale), # x
220
+ int(box[1] * height_scale), # y
221
+ int(box[2] * width_scale), # width
222
+ int(box[3] * height_scale), # height
223
+ )
224
+ screenshot = draw_box(
225
+ screenshot,
226
+ x=scaled_box[0],
227
+ y=scaled_box[1],
228
+ width=scaled_box[2],
229
+ height=scaled_box[3],
230
+ color=box_color,
231
+ thickness=box_thickness,
232
+ )
233
+
234
+ if scale_factor != 1.0:
235
+ screenshot = resize_image(screenshot, scale_factor)
236
+
237
+ return screenshot
238
+
239
+ async def get_screen_size(self) -> Dict[str, int]:
240
+ result = await self._send_command("get_screen_size")
241
+ if result["success"] and result["size"]:
242
+ return result["size"]
243
+ raise RuntimeError("Failed to get screen size")
244
+
245
+ async def get_cursor_position(self) -> Dict[str, int]:
246
+ result = await self._send_command("get_cursor_position")
247
+ if result["success"] and result["position"]:
248
+ return result["position"]
249
+ raise RuntimeError("Failed to get cursor position")
250
+
251
+ # Clipboard Actions
252
+ async def copy_to_clipboard(self) -> str:
253
+ result = await self._send_command("copy_to_clipboard")
254
+ if result["success"] and result["content"]:
255
+ return result["content"]
256
+ raise RuntimeError("Failed to get clipboard content")
257
+
258
+ async def set_clipboard(self, text: str) -> None:
259
+ await self._send_command("set_clipboard", {"text": text})
260
+
261
+ # File Operations
262
+ async def _write_bytes_chunked(self, path: str, content: bytes, append: bool = False, chunk_size: int = 1024 * 1024) -> None:
263
+ """Write large files in chunks to avoid memory issues."""
264
+ total_size = len(content)
265
+ current_offset = 0
266
+
267
+ while current_offset < total_size:
268
+ chunk_end = min(current_offset + chunk_size, total_size)
269
+ chunk_data = content[current_offset:chunk_end]
270
+
271
+ # First chunk uses the original append flag, subsequent chunks always append
272
+ chunk_append = append if current_offset == 0 else True
273
+
274
+ result = await self._send_command("write_bytes", {
275
+ "path": path,
276
+ "content_b64": encode_base64_image(chunk_data),
277
+ "append": chunk_append
278
+ })
279
+
280
+ if not result.get("success", False):
281
+ raise RuntimeError(result.get("error", "Failed to write file chunk"))
282
+
283
+ current_offset = chunk_end
284
+
285
+ async def write_bytes(self, path: str, content: bytes, append: bool = False) -> None:
286
+ # For large files, use chunked writing
287
+ if len(content) > 5 * 1024 * 1024: # 5MB threshold
288
+ await self._write_bytes_chunked(path, content, append)
289
+ return
290
+
291
+ result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content), "append": append})
292
+ if not result.get("success", False):
293
+ raise RuntimeError(result.get("error", "Failed to write file"))
294
+
295
+ async def _read_bytes_chunked(self, path: str, offset: int, total_length: int, chunk_size: int = 1024 * 1024) -> bytes:
296
+ """Read large files in chunks to avoid memory issues."""
297
+ chunks = []
298
+ current_offset = offset
299
+ remaining = total_length
300
+
301
+ while remaining > 0:
302
+ read_size = min(chunk_size, remaining)
303
+ result = await self._send_command("read_bytes", {
304
+ "path": path,
305
+ "offset": current_offset,
306
+ "length": read_size
307
+ })
308
+
309
+ if not result.get("success", False):
310
+ raise RuntimeError(result.get("error", "Failed to read file chunk"))
311
+
312
+ content_b64 = result.get("content_b64", "")
313
+ chunk_data = decode_base64_image(content_b64)
314
+ chunks.append(chunk_data)
315
+
316
+ current_offset += read_size
317
+ remaining -= read_size
318
+
319
+ return b''.join(chunks)
320
+
321
+ async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes:
322
+ # For large files, use chunked reading
323
+ if length is None:
324
+ # Get file size first to determine if we need chunking
325
+ file_size = await self.get_file_size(path)
326
+ # If file is larger than 5MB, read in chunks
327
+ if file_size > 5 * 1024 * 1024: # 5MB threshold
328
+ return await self._read_bytes_chunked(path, offset, file_size - offset if offset > 0 else file_size)
329
+
330
+ result = await self._send_command("read_bytes", {
331
+ "path": path,
332
+ "offset": offset,
333
+ "length": length
334
+ })
335
+ if not result.get("success", False):
336
+ raise RuntimeError(result.get("error", "Failed to read file"))
337
+ content_b64 = result.get("content_b64", "")
338
+ return decode_base64_image(content_b64)
339
+
340
+ async def read_text(self, path: str, encoding: str = 'utf-8') -> str:
341
+ """Read text from a file with specified encoding.
342
+
343
+ Args:
344
+ path: Path to the file to read
345
+ encoding: Text encoding to use (default: 'utf-8')
346
+
347
+ Returns:
348
+ str: The decoded text content of the file
349
+ """
350
+ content_bytes = await self.read_bytes(path)
351
+ return content_bytes.decode(encoding)
352
+
353
+ async def write_text(self, path: str, content: str, encoding: str = 'utf-8', append: bool = False) -> None:
354
+ """Write text to a file with specified encoding.
355
+
356
+ Args:
357
+ path: Path to the file to write
358
+ content: Text content to write
359
+ encoding: Text encoding to use (default: 'utf-8')
360
+ append: Whether to append to the file instead of overwriting
361
+ """
362
+ content_bytes = content.encode(encoding)
363
+ await self.write_bytes(path, content_bytes, append)
364
+
365
+ async def get_file_size(self, path: str) -> int:
366
+ result = await self._send_command("get_file_size", {"path": path})
367
+ if not result.get("success", False):
368
+ raise RuntimeError(result.get("error", "Failed to get file size"))
369
+ return result.get("size", 0)
370
+
371
+ async def file_exists(self, path: str) -> bool:
372
+ result = await self._send_command("file_exists", {"path": path})
373
+ return result.get("exists", False)
374
+
375
+ async def directory_exists(self, path: str) -> bool:
376
+ result = await self._send_command("directory_exists", {"path": path})
377
+ return result.get("exists", False)
378
+
379
+ async def create_dir(self, path: str) -> None:
380
+ result = await self._send_command("create_dir", {"path": path})
381
+ if not result.get("success", False):
382
+ raise RuntimeError(result.get("error", "Failed to create directory"))
383
+
384
+ async def delete_file(self, path: str) -> None:
385
+ result = await self._send_command("delete_file", {"path": path})
386
+ if not result.get("success", False):
387
+ raise RuntimeError(result.get("error", "Failed to delete file"))
388
+
389
+ async def delete_dir(self, path: str) -> None:
390
+ result = await self._send_command("delete_dir", {"path": path})
391
+ if not result.get("success", False):
392
+ raise RuntimeError(result.get("error", "Failed to delete directory"))
393
+
394
+ async def list_dir(self, path: str) -> list[str]:
395
+ result = await self._send_command("list_dir", {"path": path})
396
+ if not result.get("success", False):
397
+ raise RuntimeError(result.get("error", "Failed to list directory"))
398
+ return result.get("files", [])
399
+
400
+ # Command execution
401
+ async def run_command(self, command: str) -> CommandResult:
402
+ result = await self._send_command("run_command", {"command": command})
403
+ if not result.get("success", False):
404
+ raise RuntimeError(result.get("error", "Failed to run command"))
405
+ return CommandResult(
406
+ stdout=result.get("stdout", ""),
407
+ stderr=result.get("stderr", ""),
408
+ returncode=result.get("return_code", 0)
409
+ )
410
+
411
+ # Accessibility Actions
412
+ async def get_accessibility_tree(self) -> Dict[str, Any]:
413
+ """Get the accessibility tree of the current screen."""
414
+ result = await self._send_command("get_accessibility_tree")
415
+ if not result.get("success", False):
416
+ raise RuntimeError(result.get("error", "Failed to get accessibility tree"))
417
+ return result
418
+
419
+ async def get_active_window_bounds(self) -> Dict[str, int]:
420
+ """Get the bounds of the currently active window."""
421
+ result = await self._send_command("get_active_window_bounds")
422
+ if result["success"] and result["bounds"]:
423
+ return result["bounds"]
424
+ raise RuntimeError("Failed to get active window bounds")
425
+
426
+ async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
427
+ """Convert screenshot coordinates to screen coordinates.
428
+
429
+ Args:
430
+ x: X coordinate in screenshot space
431
+ y: Y coordinate in screenshot space
432
+
433
+ Returns:
434
+ tuple[float, float]: (x, y) coordinates in screen space
435
+ """
436
+ screen_size = await self.get_screen_size()
437
+ screenshot = await self.screenshot()
438
+ screenshot_img = bytes_to_image(screenshot)
439
+ screenshot_width, screenshot_height = screenshot_img.size
440
+
441
+ # Calculate scaling factors
442
+ width_scale = screen_size["width"] / screenshot_width
443
+ height_scale = screen_size["height"] / screenshot_height
444
+
445
+ # Convert coordinates
446
+ screen_x = x * width_scale
447
+ screen_y = y * height_scale
448
+
449
+ return screen_x, screen_y
450
+
451
+ async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
452
+ """Convert screen coordinates to screenshot coordinates.
453
+
454
+ Args:
455
+ x: X coordinate in screen space
456
+ y: Y coordinate in screen space
457
+
458
+ Returns:
459
+ tuple[float, float]: (x, y) coordinates in screenshot space
460
+ """
461
+ screen_size = await self.get_screen_size()
462
+ screenshot = await self.screenshot()
463
+ screenshot_img = bytes_to_image(screenshot)
464
+ screenshot_width, screenshot_height = screenshot_img.size
465
+
466
+ # Calculate scaling factors
467
+ width_scale = screenshot_width / screen_size["width"]
468
+ height_scale = screenshot_height / screen_size["height"]
469
+
470
+ # Convert coordinates
471
+ screenshot_x = x * width_scale
472
+ screenshot_y = y * height_scale
473
+
474
+ return screenshot_x, screenshot_y
475
+
476
+ # Websocket Methods
477
+ async def _keep_alive(self):
478
+ """Keep the WebSocket connection alive with automatic reconnection."""
479
+ retry_count = 0
480
+ max_log_attempts = 1 # Only log the first attempt at INFO level
481
+ log_interval = 500 # Then log every 500th attempt (significantly increased from 30)
482
+ last_warning_time = 0
483
+ min_warning_interval = 30 # Minimum seconds between connection lost warnings
484
+ min_retry_delay = 0.5 # Minimum delay between connection attempts (500ms)
485
+
486
+ while not self._closed:
487
+ try:
488
+ if self._ws is None or (
489
+ self._ws and self._ws.state == websockets.protocol.State.CLOSED
490
+ ):
491
+ try:
492
+ retry_count += 1
493
+
494
+ # Add a minimum delay between connection attempts to avoid flooding
495
+ if retry_count > 1:
496
+ await asyncio.sleep(min_retry_delay)
497
+
498
+ # Only log the first attempt at INFO level, then every Nth attempt
499
+ if retry_count == 1:
500
+ self.logger.info(f"Attempting WebSocket connection to {self.ws_uri}")
501
+ elif retry_count % log_interval == 0:
502
+ self.logger.info(
503
+ f"Still attempting WebSocket connection (attempt {retry_count})..."
504
+ )
505
+ else:
506
+ # All other attempts are logged at DEBUG level
507
+ self.logger.debug(
508
+ f"Attempting WebSocket connection to {self.ws_uri} (attempt {retry_count})"
509
+ )
510
+
511
+ self._ws = await asyncio.wait_for(
512
+ websockets.connect(
513
+ self.ws_uri,
514
+ max_size=1024 * 1024 * 10, # 10MB limit
515
+ max_queue=32,
516
+ ping_interval=self._ping_interval,
517
+ ping_timeout=self._ping_timeout,
518
+ close_timeout=5,
519
+ compression=None, # Disable compression to reduce overhead
520
+ ),
521
+ timeout=120,
522
+ )
523
+ self.logger.info("WebSocket connection established")
524
+
525
+ # If api_key and vm_name are provided, perform authentication handshake
526
+ if self.api_key and self.vm_name:
527
+ self.logger.info("Performing authentication handshake...")
528
+ auth_message = {
529
+ "command": "authenticate",
530
+ "params": {
531
+ "api_key": self.api_key,
532
+ "container_name": self.vm_name
533
+ }
534
+ }
535
+ await self._ws.send(json.dumps(auth_message))
536
+
537
+ # Wait for authentication response
538
+ auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
539
+ auth_result = json.loads(auth_response)
540
+
541
+ if not auth_result.get("success"):
542
+ error_msg = auth_result.get("error", "Authentication failed")
543
+ self.logger.error(f"Authentication failed: {error_msg}")
544
+ await self._ws.close()
545
+ self._ws = None
546
+ raise ConnectionError(f"Authentication failed: {error_msg}")
547
+
548
+ self.logger.info("Authentication successful")
549
+
550
+ self._reconnect_delay = 1 # Reset reconnect delay on successful connection
551
+ self._last_ping = time.time()
552
+ retry_count = 0 # Reset retry count on successful connection
553
+ except (asyncio.TimeoutError, websockets.exceptions.WebSocketException) as e:
554
+ next_retry = self._reconnect_delay
555
+
556
+ # Only log the first error at WARNING level, then every Nth attempt
557
+ if retry_count == 1:
558
+ self.logger.warning(
559
+ f"Computer API Server not ready yet. Will retry automatically."
560
+ )
561
+ elif retry_count % log_interval == 0:
562
+ self.logger.warning(
563
+ f"Still waiting for Computer API Server (attempt {retry_count})..."
564
+ )
565
+ else:
566
+ # All other errors are logged at DEBUG level
567
+ self.logger.debug(f"Connection attempt {retry_count} failed: {e}")
568
+
569
+ if self._ws:
570
+ try:
571
+ await self._ws.close()
572
+ except:
573
+ pass
574
+ self._ws = None
575
+
576
+ # Use exponential backoff for connection retries
577
+ await asyncio.sleep(self._reconnect_delay)
578
+ self._reconnect_delay = min(
579
+ self._reconnect_delay * 2, self._max_reconnect_delay
580
+ )
581
+ continue
582
+
583
+ # Regular ping to check connection
584
+ if self._ws and self._ws.state == websockets.protocol.State.OPEN:
585
+ try:
586
+ if time.time() - self._last_ping >= self._ping_interval:
587
+ pong_waiter = await self._ws.ping()
588
+ await asyncio.wait_for(pong_waiter, timeout=self._ping_timeout)
589
+ self._last_ping = time.time()
590
+ except Exception as e:
591
+ self.logger.debug(f"Ping failed: {e}")
592
+ if self._ws:
593
+ try:
594
+ await self._ws.close()
595
+ except:
596
+ pass
597
+ self._ws = None
598
+ continue
599
+
600
+ await asyncio.sleep(1)
601
+
602
+ except Exception as e:
603
+ current_time = time.time()
604
+ # Only log connection lost warnings at most once every min_warning_interval seconds
605
+ if current_time - last_warning_time >= min_warning_interval:
606
+ self.logger.warning(
607
+ f"Computer API Server connection lost. Will retry automatically."
608
+ )
609
+ last_warning_time = current_time
610
+ else:
611
+ # Log at debug level instead
612
+ self.logger.debug(f"Connection lost: {e}")
613
+
614
+ if self._ws:
615
+ try:
616
+ await self._ws.close()
617
+ except:
618
+ pass
619
+ self._ws = None
620
+
621
+ async def _ensure_connection(self):
622
+ """Ensure WebSocket connection is established."""
623
+ if self._reconnect_task is None or self._reconnect_task.done():
624
+ self._reconnect_task = asyncio.create_task(self._keep_alive())
625
+
626
+ retry_count = 0
627
+ max_retries = 5
628
+
629
+ while retry_count < max_retries:
630
+ try:
631
+ if self._ws and self._ws.state == websockets.protocol.State.OPEN:
632
+ return
633
+ retry_count += 1
634
+ await asyncio.sleep(1)
635
+ except Exception as e:
636
+ # Only log at ERROR level for the last retry attempt
637
+ if retry_count == max_retries - 1:
638
+ self.logger.error(
639
+ f"Persistent connection check error after {retry_count} attempts: {e}"
640
+ )
641
+ else:
642
+ self.logger.debug(f"Connection check error (attempt {retry_count}): {e}")
643
+ retry_count += 1
644
+ await asyncio.sleep(1)
645
+ continue
646
+
647
+ raise ConnectionError("Failed to establish WebSocket connection after multiple retries")
648
+
649
+ async def _send_command(self, command: str, params: Optional[Dict] = None) -> Dict[str, Any]:
650
+ """Send command through WebSocket."""
651
+ max_retries = 3
652
+ retry_count = 0
653
+ last_error = None
654
+
655
+ # Acquire lock to ensure only one command is processed at a time
656
+ async with self._command_lock:
657
+ self.logger.debug(f"Acquired lock for command: {command}")
658
+ while retry_count < max_retries:
659
+ try:
660
+ await self._ensure_connection()
661
+ if not self._ws:
662
+ raise ConnectionError("WebSocket connection is not established")
663
+
664
+ message = {"command": command, "params": params or {}}
665
+ await self._ws.send(json.dumps(message))
666
+ response = await asyncio.wait_for(self._ws.recv(), timeout=120)
667
+ self.logger.debug(f"Completed command: {command}")
668
+ return json.loads(response)
669
+ except Exception as e:
670
+ last_error = e
671
+ retry_count += 1
672
+ if retry_count < max_retries:
673
+ # Only log at debug level for intermediate retries
674
+ self.logger.debug(
675
+ f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
676
+ )
677
+ await asyncio.sleep(1)
678
+ continue
679
+ else:
680
+ # Only log at error level for the final failure
681
+ self.logger.error(
682
+ f"Failed to send command '{command}' after {max_retries} retries"
683
+ )
684
+ self.logger.debug(f"Command failure details: {e}")
685
+ raise
686
+
687
+ raise last_error if last_error else RuntimeError("Failed to send command")
688
+
689
+ async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
690
+ """Wait for WebSocket connection to become available."""
691
+ start_time = time.time()
692
+ last_error = None
693
+ attempt_count = 0
694
+ progress_interval = 10 # Log progress every 10 seconds
695
+ last_progress_time = start_time
696
+
697
+ # Disable detailed logging for connection attempts
698
+ self._log_connection_attempts = False
699
+
700
+ try:
701
+ self.logger.info(
702
+ f"Waiting for Computer API Server to be ready (timeout: {timeout}s)..."
703
+ )
704
+
705
+ # Start the keep-alive task if it's not already running
706
+ if self._reconnect_task is None or self._reconnect_task.done():
707
+ self._reconnect_task = asyncio.create_task(self._keep_alive())
708
+
709
+ # Wait for the connection to be established
710
+ while time.time() - start_time < timeout:
711
+ try:
712
+ attempt_count += 1
713
+ current_time = time.time()
714
+
715
+ # Log progress periodically without flooding logs
716
+ if current_time - last_progress_time >= progress_interval:
717
+ elapsed = current_time - start_time
718
+ self.logger.info(
719
+ f"Still waiting for Computer API Server... (elapsed: {elapsed:.1f}s, attempts: {attempt_count})"
720
+ )
721
+ last_progress_time = current_time
722
+
723
+ # Check if we have a connection
724
+ if self._ws and self._ws.state == websockets.protocol.State.OPEN:
725
+ # Test the connection with a simple command
726
+ try:
727
+ await self._send_command("get_screen_size")
728
+ elapsed = time.time() - start_time
729
+ self.logger.info(
730
+ f"Computer API Server is ready (after {elapsed:.1f}s, {attempt_count} attempts)"
731
+ )
732
+ return # Connection is fully working
733
+ except Exception as e:
734
+ last_error = e
735
+ self.logger.debug(f"Connection test failed: {e}")
736
+
737
+ # Wait before trying again
738
+ await asyncio.sleep(interval)
739
+
740
+ except Exception as e:
741
+ last_error = e
742
+ self.logger.debug(f"Connection attempt {attempt_count} failed: {e}")
743
+ await asyncio.sleep(interval)
744
+
745
+ # If we get here, we've timed out
746
+ error_msg = f"Could not connect to {self.ip_address} after {timeout} seconds"
747
+ if last_error:
748
+ error_msg += f": {str(last_error)}"
749
+ self.logger.error(error_msg)
750
+ raise TimeoutError(error_msg)
751
+ finally:
752
+ # Reset to default logging behavior
753
+ self._log_connection_attempts = False
754
+
755
+ def close(self):
756
+ """Close WebSocket connection.
757
+
758
+ Note: In host computer server mode, we leave the connection open
759
+ to allow other clients to connect to the same server. The server
760
+ will handle cleaning up idle connections.
761
+ """
762
+ # Only cancel the reconnect task
763
+ if self._reconnect_task:
764
+ self._reconnect_task.cancel()
765
+
766
+ # Don't set closed flag or close websocket by default
767
+ # This allows the server to stay connected for other clients
768
+ # self._closed = True
769
+ # if self._ws:
770
+ # asyncio.create_task(self._ws.close())
771
+ # self._ws = None
772
+
773
+ def force_close(self):
774
+ """Force close the WebSocket connection.
775
+
776
+ This method should be called when you want to completely
777
+ shut down the connection, not just for regular cleanup.
778
+ """
779
+ self._closed = True
780
+ if self._reconnect_task:
781
+ self._reconnect_task.cancel()
782
+ if self._ws:
783
+ asyncio.create_task(self._ws.close())
784
+ self._ws = None
785
+