cua-computer 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- computer/interface/base.py +13 -2
- computer/interface/generic.py +785 -0
- computer/interface/linux.py +4 -687
- computer/interface/macos.py +5 -691
- computer/interface/windows.py +4 -686
- computer/providers/winsandbox/provider.py +22 -5
- {cua_computer-0.3.0.dist-info → cua_computer-0.3.2.dist-info}/METADATA +1 -1
- {cua_computer-0.3.0.dist-info → cua_computer-0.3.2.dist-info}/RECORD +10 -9
- {cua_computer-0.3.0.dist-info → cua_computer-0.3.2.dist-info}/WHEEL +0 -0
- {cua_computer-0.3.0.dist-info → cua_computer-0.3.2.dist-info}/entry_points.txt +0 -0
computer/interface/linux.py
CHANGED
@@ -1,691 +1,8 @@
|
|
1
|
-
import
|
2
|
-
import
|
3
|
-
import time
|
4
|
-
from typing import Any, Dict, List, Optional, Tuple
|
5
|
-
from PIL import Image
|
1
|
+
from typing import Optional
|
2
|
+
from .generic import GenericComputerInterface
|
6
3
|
|
7
|
-
|
8
|
-
|
9
|
-
from ..logger import Logger, LogLevel
|
10
|
-
from .base import BaseComputerInterface
|
11
|
-
from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image
|
12
|
-
from .models import Key, KeyType, MouseButton, CommandResult
|
13
|
-
|
14
|
-
class LinuxComputerInterface(BaseComputerInterface):
|
4
|
+
class LinuxComputerInterface(GenericComputerInterface):
|
15
5
|
"""Interface for Linux."""
|
16
6
|
|
17
7
|
def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None):
|
18
|
-
super().__init__(ip_address, username, password, api_key, vm_name)
|
19
|
-
self._ws = None
|
20
|
-
self._reconnect_task = None
|
21
|
-
self._closed = False
|
22
|
-
self._last_ping = 0
|
23
|
-
self._ping_interval = 5 # Send ping every 5 seconds
|
24
|
-
self._ping_timeout = 120 # Wait 120 seconds for pong response
|
25
|
-
self._reconnect_delay = 1 # Start with 1 second delay
|
26
|
-
self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts
|
27
|
-
self._log_connection_attempts = True # Flag to control connection attempt logging
|
28
|
-
self._authenticated = False # Track authentication status
|
29
|
-
self._command_lock = asyncio.Lock() # Lock to ensure only one command at a time
|
30
|
-
|
31
|
-
# Set logger name for Linux interface
|
32
|
-
self.logger = Logger("computer.interface.linux", LogLevel.NORMAL)
|
33
|
-
|
34
|
-
@property
|
35
|
-
def ws_uri(self) -> str:
|
36
|
-
"""Get the WebSocket URI using the current IP address.
|
37
|
-
|
38
|
-
Returns:
|
39
|
-
WebSocket URI for the Computer API Server
|
40
|
-
"""
|
41
|
-
protocol = "wss" if self.api_key else "ws"
|
42
|
-
port = "8443" if self.api_key else "8000"
|
43
|
-
return f"{protocol}://{self.ip_address}:{port}/ws"
|
44
|
-
|
45
|
-
async def _keep_alive(self):
|
46
|
-
"""Keep the WebSocket connection alive with automatic reconnection."""
|
47
|
-
retry_count = 0
|
48
|
-
max_log_attempts = 1 # Only log the first attempt at INFO level
|
49
|
-
log_interval = 500 # Then log every 500th attempt (significantly increased from 30)
|
50
|
-
last_warning_time = 0
|
51
|
-
min_warning_interval = 30 # Minimum seconds between connection lost warnings
|
52
|
-
min_retry_delay = 0.5 # Minimum delay between connection attempts (500ms)
|
53
|
-
|
54
|
-
while not self._closed:
|
55
|
-
try:
|
56
|
-
if self._ws is None or (
|
57
|
-
self._ws and self._ws.state == websockets.protocol.State.CLOSED
|
58
|
-
):
|
59
|
-
try:
|
60
|
-
retry_count += 1
|
61
|
-
|
62
|
-
# Add a minimum delay between connection attempts to avoid flooding
|
63
|
-
if retry_count > 1:
|
64
|
-
await asyncio.sleep(min_retry_delay)
|
65
|
-
|
66
|
-
# Only log the first attempt at INFO level, then every Nth attempt
|
67
|
-
if retry_count == 1:
|
68
|
-
self.logger.info(f"Attempting WebSocket connection to {self.ws_uri}")
|
69
|
-
elif retry_count % log_interval == 0:
|
70
|
-
self.logger.info(
|
71
|
-
f"Still attempting WebSocket connection (attempt {retry_count})..."
|
72
|
-
)
|
73
|
-
else:
|
74
|
-
# All other attempts are logged at DEBUG level
|
75
|
-
self.logger.debug(
|
76
|
-
f"Attempting WebSocket connection to {self.ws_uri} (attempt {retry_count})"
|
77
|
-
)
|
78
|
-
|
79
|
-
self._ws = await asyncio.wait_for(
|
80
|
-
websockets.connect(
|
81
|
-
self.ws_uri,
|
82
|
-
max_size=1024 * 1024 * 10, # 10MB limit
|
83
|
-
max_queue=32,
|
84
|
-
ping_interval=self._ping_interval,
|
85
|
-
ping_timeout=self._ping_timeout,
|
86
|
-
close_timeout=5,
|
87
|
-
compression=None, # Disable compression to reduce overhead
|
88
|
-
),
|
89
|
-
timeout=120,
|
90
|
-
)
|
91
|
-
self.logger.info("WebSocket connection established")
|
92
|
-
|
93
|
-
# Authentication will be handled by the first command that needs it
|
94
|
-
# Don't do authentication here to avoid recv conflicts
|
95
|
-
|
96
|
-
self._reconnect_delay = 1 # Reset reconnect delay on successful connection
|
97
|
-
self._last_ping = time.time()
|
98
|
-
retry_count = 0 # Reset retry count on successful connection
|
99
|
-
self._authenticated = False # Reset auth status on new connection
|
100
|
-
|
101
|
-
except (asyncio.TimeoutError, websockets.exceptions.WebSocketException) as e:
|
102
|
-
next_retry = self._reconnect_delay
|
103
|
-
|
104
|
-
# Only log the first error at WARNING level, then every Nth attempt
|
105
|
-
if retry_count == 1:
|
106
|
-
self.logger.warning(
|
107
|
-
f"Computer API Server not ready yet. Will retry automatically."
|
108
|
-
)
|
109
|
-
elif retry_count % log_interval == 0:
|
110
|
-
self.logger.warning(
|
111
|
-
f"Still waiting for Computer API Server (attempt {retry_count})..."
|
112
|
-
)
|
113
|
-
else:
|
114
|
-
# All other errors are logged at DEBUG level
|
115
|
-
self.logger.debug(f"Connection attempt {retry_count} failed: {e}")
|
116
|
-
|
117
|
-
if self._ws:
|
118
|
-
try:
|
119
|
-
await self._ws.close()
|
120
|
-
except:
|
121
|
-
pass
|
122
|
-
self._ws = None
|
123
|
-
|
124
|
-
# Regular ping to check connection
|
125
|
-
if self._ws and self._ws.state == websockets.protocol.State.OPEN:
|
126
|
-
try:
|
127
|
-
if time.time() - self._last_ping >= self._ping_interval:
|
128
|
-
pong_waiter = await self._ws.ping()
|
129
|
-
await asyncio.wait_for(pong_waiter, timeout=self._ping_timeout)
|
130
|
-
self._last_ping = time.time()
|
131
|
-
except Exception as e:
|
132
|
-
self.logger.debug(f"Ping failed: {e}")
|
133
|
-
if self._ws:
|
134
|
-
try:
|
135
|
-
await self._ws.close()
|
136
|
-
except:
|
137
|
-
pass
|
138
|
-
self._ws = None
|
139
|
-
continue
|
140
|
-
|
141
|
-
await asyncio.sleep(1)
|
142
|
-
|
143
|
-
except Exception as e:
|
144
|
-
current_time = time.time()
|
145
|
-
# Only log connection lost warnings at most once every min_warning_interval seconds
|
146
|
-
if current_time - last_warning_time >= min_warning_interval:
|
147
|
-
self.logger.warning(
|
148
|
-
f"Computer API Server connection lost. Will retry automatically."
|
149
|
-
)
|
150
|
-
last_warning_time = current_time
|
151
|
-
else:
|
152
|
-
# Log at debug level instead
|
153
|
-
self.logger.debug(f"Connection lost: {e}")
|
154
|
-
|
155
|
-
if self._ws:
|
156
|
-
try:
|
157
|
-
await self._ws.close()
|
158
|
-
except:
|
159
|
-
pass
|
160
|
-
self._ws = None
|
161
|
-
|
162
|
-
async def _ensure_connection(self):
|
163
|
-
"""Ensure WebSocket connection is established."""
|
164
|
-
if self._reconnect_task is None or self._reconnect_task.done():
|
165
|
-
self._reconnect_task = asyncio.create_task(self._keep_alive())
|
166
|
-
|
167
|
-
retry_count = 0
|
168
|
-
max_retries = 5
|
169
|
-
|
170
|
-
while retry_count < max_retries:
|
171
|
-
try:
|
172
|
-
if self._ws and self._ws.state == websockets.protocol.State.OPEN:
|
173
|
-
return
|
174
|
-
retry_count += 1
|
175
|
-
await asyncio.sleep(1)
|
176
|
-
except Exception as e:
|
177
|
-
# Only log at ERROR level for the last retry attempt
|
178
|
-
if retry_count == max_retries - 1:
|
179
|
-
self.logger.error(
|
180
|
-
f"Persistent connection check error after {retry_count} attempts: {e}"
|
181
|
-
)
|
182
|
-
else:
|
183
|
-
self.logger.debug(f"Connection check error (attempt {retry_count}): {e}")
|
184
|
-
retry_count += 1
|
185
|
-
await asyncio.sleep(1)
|
186
|
-
continue
|
187
|
-
|
188
|
-
raise ConnectionError("Failed to establish WebSocket connection after multiple retries")
|
189
|
-
|
190
|
-
async def _send_command(self, command: str, params: Optional[Dict] = None) -> Dict[str, Any]:
|
191
|
-
"""Send command through WebSocket."""
|
192
|
-
max_retries = 3
|
193
|
-
retry_count = 0
|
194
|
-
last_error = None
|
195
|
-
|
196
|
-
# Acquire lock to ensure only one command is processed at a time
|
197
|
-
async with self._command_lock:
|
198
|
-
self.logger.debug(f"Acquired lock for command: {command}")
|
199
|
-
while retry_count < max_retries:
|
200
|
-
try:
|
201
|
-
await self._ensure_connection()
|
202
|
-
if not self._ws:
|
203
|
-
raise ConnectionError("WebSocket connection is not established")
|
204
|
-
|
205
|
-
# Handle authentication if needed
|
206
|
-
if self.api_key and self.vm_name and not self._authenticated:
|
207
|
-
self.logger.info("Performing authentication handshake...")
|
208
|
-
auth_message = {
|
209
|
-
"command": "authenticate",
|
210
|
-
"params": {
|
211
|
-
"api_key": self.api_key,
|
212
|
-
"container_name": self.vm_name
|
213
|
-
}
|
214
|
-
}
|
215
|
-
await self._ws.send(json.dumps(auth_message))
|
216
|
-
|
217
|
-
# Wait for authentication response
|
218
|
-
auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
|
219
|
-
auth_result = json.loads(auth_response)
|
220
|
-
|
221
|
-
if not auth_result.get("success"):
|
222
|
-
error_msg = auth_result.get("error", "Authentication failed")
|
223
|
-
self.logger.error(f"Authentication failed: {error_msg}")
|
224
|
-
self._authenticated = False
|
225
|
-
raise ConnectionError(f"Authentication failed: {error_msg}")
|
226
|
-
|
227
|
-
self.logger.info("Authentication successful")
|
228
|
-
self._authenticated = True
|
229
|
-
|
230
|
-
message = {"command": command, "params": params or {}}
|
231
|
-
await self._ws.send(json.dumps(message))
|
232
|
-
response = await asyncio.wait_for(self._ws.recv(), timeout=30)
|
233
|
-
self.logger.debug(f"Completed command: {command}")
|
234
|
-
return json.loads(response)
|
235
|
-
except Exception as e:
|
236
|
-
last_error = e
|
237
|
-
retry_count += 1
|
238
|
-
if retry_count < max_retries:
|
239
|
-
# Only log at debug level for intermediate retries
|
240
|
-
self.logger.debug(
|
241
|
-
f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
|
242
|
-
)
|
243
|
-
await asyncio.sleep(1)
|
244
|
-
continue
|
245
|
-
else:
|
246
|
-
# Only log at error level for the final failure
|
247
|
-
self.logger.error(
|
248
|
-
f"Failed to send command '{command}' after {max_retries} retries"
|
249
|
-
)
|
250
|
-
self.logger.debug(f"Command failure details: {e}")
|
251
|
-
raise last_error if last_error else RuntimeError("Failed to send command")
|
252
|
-
|
253
|
-
async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
|
254
|
-
"""Wait for WebSocket connection to become available."""
|
255
|
-
start_time = time.time()
|
256
|
-
last_error = None
|
257
|
-
attempt_count = 0
|
258
|
-
progress_interval = 10 # Log progress every 10 seconds
|
259
|
-
last_progress_time = start_time
|
260
|
-
|
261
|
-
# Disable detailed logging for connection attempts
|
262
|
-
self._log_connection_attempts = False
|
263
|
-
|
264
|
-
try:
|
265
|
-
self.logger.info(
|
266
|
-
f"Waiting for Computer API Server to be ready (timeout: {timeout}s)..."
|
267
|
-
)
|
268
|
-
|
269
|
-
# Start the keep-alive task if it's not already running
|
270
|
-
if self._reconnect_task is None or self._reconnect_task.done():
|
271
|
-
self._reconnect_task = asyncio.create_task(self._keep_alive())
|
272
|
-
|
273
|
-
# Wait for the connection to be established
|
274
|
-
while time.time() - start_time < timeout:
|
275
|
-
try:
|
276
|
-
attempt_count += 1
|
277
|
-
current_time = time.time()
|
278
|
-
|
279
|
-
# Log progress periodically without flooding logs
|
280
|
-
if current_time - last_progress_time >= progress_interval:
|
281
|
-
elapsed = current_time - start_time
|
282
|
-
self.logger.info(
|
283
|
-
f"Still waiting for Computer API Server... (elapsed: {elapsed:.1f}s, attempts: {attempt_count})"
|
284
|
-
)
|
285
|
-
last_progress_time = current_time
|
286
|
-
|
287
|
-
# Check if we have a connection
|
288
|
-
if self._ws and self._ws.state == websockets.protocol.State.OPEN:
|
289
|
-
# Test the connection with a simple command
|
290
|
-
try:
|
291
|
-
await self._send_command("get_screen_size")
|
292
|
-
elapsed = time.time() - start_time
|
293
|
-
self.logger.info(
|
294
|
-
f"Computer API Server is ready (after {elapsed:.1f}s, {attempt_count} attempts)"
|
295
|
-
)
|
296
|
-
return # Connection is fully working
|
297
|
-
except Exception as e:
|
298
|
-
last_error = e
|
299
|
-
self.logger.debug(f"Connection test failed: {e}")
|
300
|
-
|
301
|
-
# Wait before trying again
|
302
|
-
await asyncio.sleep(interval)
|
303
|
-
|
304
|
-
except Exception as e:
|
305
|
-
last_error = e
|
306
|
-
self.logger.debug(f"Connection attempt {attempt_count} failed: {e}")
|
307
|
-
await asyncio.sleep(interval)
|
308
|
-
|
309
|
-
# If we get here, we've timed out
|
310
|
-
error_msg = f"Could not connect to {self.ip_address} after {timeout} seconds"
|
311
|
-
if last_error:
|
312
|
-
error_msg += f": {str(last_error)}"
|
313
|
-
self.logger.error(error_msg)
|
314
|
-
raise TimeoutError(error_msg)
|
315
|
-
finally:
|
316
|
-
# Reset to default logging behavior
|
317
|
-
self._log_connection_attempts = False
|
318
|
-
|
319
|
-
def close(self):
|
320
|
-
"""Close WebSocket connection.
|
321
|
-
|
322
|
-
Note: In host computer server mode, we leave the connection open
|
323
|
-
to allow other clients to connect to the same server. The server
|
324
|
-
will handle cleaning up idle connections.
|
325
|
-
"""
|
326
|
-
# Only cancel the reconnect task
|
327
|
-
if self._reconnect_task:
|
328
|
-
self._reconnect_task.cancel()
|
329
|
-
|
330
|
-
# Don't set closed flag or close websocket by default
|
331
|
-
# This allows the server to stay connected for other clients
|
332
|
-
# self._closed = True
|
333
|
-
# if self._ws:
|
334
|
-
# asyncio.create_task(self._ws.close())
|
335
|
-
# self._ws = None
|
336
|
-
|
337
|
-
def force_close(self):
|
338
|
-
"""Force close the WebSocket connection.
|
339
|
-
|
340
|
-
This method should be called when you want to completely
|
341
|
-
shut down the connection, not just for regular cleanup.
|
342
|
-
"""
|
343
|
-
self._closed = True
|
344
|
-
if self._reconnect_task:
|
345
|
-
self._reconnect_task.cancel()
|
346
|
-
if self._ws:
|
347
|
-
asyncio.create_task(self._ws.close())
|
348
|
-
self._ws = None
|
349
|
-
|
350
|
-
# Mouse Actions
|
351
|
-
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
|
352
|
-
await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
|
353
|
-
|
354
|
-
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
|
355
|
-
await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
|
356
|
-
|
357
|
-
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
358
|
-
await self._send_command("left_click", {"x": x, "y": y})
|
359
|
-
|
360
|
-
async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
361
|
-
await self._send_command("right_click", {"x": x, "y": y})
|
362
|
-
|
363
|
-
async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
364
|
-
await self._send_command("double_click", {"x": x, "y": y})
|
365
|
-
|
366
|
-
async def move_cursor(self, x: int, y: int) -> None:
|
367
|
-
await self._send_command("move_cursor", {"x": x, "y": y})
|
368
|
-
|
369
|
-
async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5) -> None:
|
370
|
-
await self._send_command(
|
371
|
-
"drag_to", {"x": x, "y": y, "button": button, "duration": duration}
|
372
|
-
)
|
373
|
-
|
374
|
-
async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5) -> None:
|
375
|
-
await self._send_command(
|
376
|
-
"drag", {"path": path, "button": button, "duration": duration}
|
377
|
-
)
|
378
|
-
|
379
|
-
# Keyboard Actions
|
380
|
-
async def key_down(self, key: "KeyType") -> None:
|
381
|
-
await self._send_command("key_down", {"key": key})
|
382
|
-
|
383
|
-
async def key_up(self, key: "KeyType") -> None:
|
384
|
-
await self._send_command("key_up", {"key": key})
|
385
|
-
|
386
|
-
async def type_text(self, text: str) -> None:
|
387
|
-
# Temporary fix for https://github.com/trycua/cua/issues/165
|
388
|
-
# Check if text contains Unicode characters
|
389
|
-
if any(ord(char) > 127 for char in text):
|
390
|
-
# For Unicode text, use clipboard and paste
|
391
|
-
await self.set_clipboard(text)
|
392
|
-
await self.hotkey(Key.COMMAND, 'v')
|
393
|
-
else:
|
394
|
-
# For ASCII text, use the regular typing method
|
395
|
-
await self._send_command("type_text", {"text": text})
|
396
|
-
|
397
|
-
async def press(self, key: "KeyType") -> None:
|
398
|
-
"""Press a single key.
|
399
|
-
|
400
|
-
Args:
|
401
|
-
key: The key to press. Can be any of:
|
402
|
-
- A Key enum value (recommended), e.g. Key.PAGE_DOWN
|
403
|
-
- A direct key value string, e.g. 'pagedown'
|
404
|
-
- A single character string, e.g. 'a'
|
405
|
-
|
406
|
-
Examples:
|
407
|
-
```python
|
408
|
-
# Using enum (recommended)
|
409
|
-
await interface.press(Key.PAGE_DOWN)
|
410
|
-
await interface.press(Key.ENTER)
|
411
|
-
|
412
|
-
# Using direct values
|
413
|
-
await interface.press('pagedown')
|
414
|
-
await interface.press('enter')
|
415
|
-
|
416
|
-
# Using single characters
|
417
|
-
await interface.press('a')
|
418
|
-
```
|
419
|
-
|
420
|
-
Raises:
|
421
|
-
ValueError: If the key type is invalid or the key is not recognized
|
422
|
-
"""
|
423
|
-
if isinstance(key, Key):
|
424
|
-
actual_key = key.value
|
425
|
-
elif isinstance(key, str):
|
426
|
-
# Try to convert to enum if it matches a known key
|
427
|
-
key_or_enum = Key.from_string(key)
|
428
|
-
actual_key = key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum
|
429
|
-
else:
|
430
|
-
raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
|
431
|
-
|
432
|
-
await self._send_command("press_key", {"key": actual_key})
|
433
|
-
|
434
|
-
async def press_key(self, key: "KeyType") -> None:
|
435
|
-
"""DEPRECATED: Use press() instead.
|
436
|
-
|
437
|
-
This method is kept for backward compatibility but will be removed in a future version.
|
438
|
-
Please use the press() method instead.
|
439
|
-
"""
|
440
|
-
await self.press(key)
|
441
|
-
|
442
|
-
async def hotkey(self, *keys: "KeyType") -> None:
|
443
|
-
"""Press multiple keys simultaneously.
|
444
|
-
|
445
|
-
Args:
|
446
|
-
*keys: Multiple keys to press simultaneously. Each key can be any of:
|
447
|
-
- A Key enum value (recommended), e.g. Key.COMMAND
|
448
|
-
- A direct key value string, e.g. 'command'
|
449
|
-
- A single character string, e.g. 'a'
|
450
|
-
|
451
|
-
Examples:
|
452
|
-
```python
|
453
|
-
# Using enums (recommended)
|
454
|
-
await interface.hotkey(Key.COMMAND, Key.C) # Copy
|
455
|
-
await interface.hotkey(Key.COMMAND, Key.V) # Paste
|
456
|
-
|
457
|
-
# Using mixed formats
|
458
|
-
await interface.hotkey(Key.COMMAND, 'a') # Select all
|
459
|
-
```
|
460
|
-
|
461
|
-
Raises:
|
462
|
-
ValueError: If any key type is invalid or not recognized
|
463
|
-
"""
|
464
|
-
actual_keys = []
|
465
|
-
for key in keys:
|
466
|
-
if isinstance(key, Key):
|
467
|
-
actual_keys.append(key.value)
|
468
|
-
elif isinstance(key, str):
|
469
|
-
# Try to convert to enum if it matches a known key
|
470
|
-
key_or_enum = Key.from_string(key)
|
471
|
-
actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
|
472
|
-
else:
|
473
|
-
raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
|
474
|
-
|
475
|
-
await self._send_command("hotkey", {"keys": actual_keys})
|
476
|
-
|
477
|
-
# Scrolling Actions
|
478
|
-
async def scroll(self, x: int, y: int) -> None:
|
479
|
-
await self._send_command("scroll", {"x": x, "y": y})
|
480
|
-
|
481
|
-
async def scroll_down(self, clicks: int = 1) -> None:
|
482
|
-
await self._send_command("scroll_down", {"clicks": clicks})
|
483
|
-
|
484
|
-
async def scroll_up(self, clicks: int = 1) -> None:
|
485
|
-
await self._send_command("scroll_up", {"clicks": clicks})
|
486
|
-
|
487
|
-
# Screen Actions
|
488
|
-
async def screenshot(
|
489
|
-
self,
|
490
|
-
boxes: Optional[List[Tuple[int, int, int, int]]] = None,
|
491
|
-
box_color: str = "#FF0000",
|
492
|
-
box_thickness: int = 2,
|
493
|
-
scale_factor: float = 1.0,
|
494
|
-
) -> bytes:
|
495
|
-
"""Take a screenshot with optional box drawing and scaling.
|
496
|
-
|
497
|
-
Args:
|
498
|
-
boxes: Optional list of (x, y, width, height) tuples defining boxes to draw in screen coordinates
|
499
|
-
box_color: Color of the boxes in hex format (default: "#FF0000" red)
|
500
|
-
box_thickness: Thickness of the box borders in pixels (default: 2)
|
501
|
-
scale_factor: Factor to scale the final image by (default: 1.0)
|
502
|
-
Use > 1.0 to enlarge, < 1.0 to shrink (e.g., 0.5 for half size, 2.0 for double)
|
503
|
-
|
504
|
-
Returns:
|
505
|
-
bytes: The screenshot image data, optionally with boxes drawn on it and scaled
|
506
|
-
"""
|
507
|
-
result = await self._send_command("screenshot")
|
508
|
-
if not result.get("image_data"):
|
509
|
-
raise RuntimeError("Failed to take screenshot")
|
510
|
-
|
511
|
-
screenshot = decode_base64_image(result["image_data"])
|
512
|
-
|
513
|
-
if boxes:
|
514
|
-
# Get the natural scaling between screen and screenshot
|
515
|
-
screen_size = await self.get_screen_size()
|
516
|
-
screenshot_width, screenshot_height = bytes_to_image(screenshot).size
|
517
|
-
width_scale = screenshot_width / screen_size["width"]
|
518
|
-
height_scale = screenshot_height / screen_size["height"]
|
519
|
-
|
520
|
-
# Scale box coordinates from screen space to screenshot space
|
521
|
-
for box in boxes:
|
522
|
-
scaled_box = (
|
523
|
-
int(box[0] * width_scale), # x
|
524
|
-
int(box[1] * height_scale), # y
|
525
|
-
int(box[2] * width_scale), # width
|
526
|
-
int(box[3] * height_scale), # height
|
527
|
-
)
|
528
|
-
screenshot = draw_box(
|
529
|
-
screenshot,
|
530
|
-
x=scaled_box[0],
|
531
|
-
y=scaled_box[1],
|
532
|
-
width=scaled_box[2],
|
533
|
-
height=scaled_box[3],
|
534
|
-
color=box_color,
|
535
|
-
thickness=box_thickness,
|
536
|
-
)
|
537
|
-
|
538
|
-
if scale_factor != 1.0:
|
539
|
-
screenshot = resize_image(screenshot, scale_factor)
|
540
|
-
|
541
|
-
return screenshot
|
542
|
-
|
543
|
-
async def get_screen_size(self) -> Dict[str, int]:
|
544
|
-
result = await self._send_command("get_screen_size")
|
545
|
-
if result["success"] and result["size"]:
|
546
|
-
return result["size"]
|
547
|
-
raise RuntimeError("Failed to get screen size")
|
548
|
-
|
549
|
-
async def get_cursor_position(self) -> Dict[str, int]:
|
550
|
-
result = await self._send_command("get_cursor_position")
|
551
|
-
if result["success"] and result["position"]:
|
552
|
-
return result["position"]
|
553
|
-
raise RuntimeError("Failed to get cursor position")
|
554
|
-
|
555
|
-
# Clipboard Actions
|
556
|
-
async def copy_to_clipboard(self) -> str:
|
557
|
-
result = await self._send_command("copy_to_clipboard")
|
558
|
-
if result["success"] and result["content"]:
|
559
|
-
return result["content"]
|
560
|
-
raise RuntimeError("Failed to get clipboard content")
|
561
|
-
|
562
|
-
async def set_clipboard(self, text: str) -> None:
|
563
|
-
await self._send_command("set_clipboard", {"text": text})
|
564
|
-
|
565
|
-
# File System Actions
|
566
|
-
async def file_exists(self, path: str) -> bool:
|
567
|
-
result = await self._send_command("file_exists", {"path": path})
|
568
|
-
return result.get("exists", False)
|
569
|
-
|
570
|
-
async def directory_exists(self, path: str) -> bool:
|
571
|
-
result = await self._send_command("directory_exists", {"path": path})
|
572
|
-
return result.get("exists", False)
|
573
|
-
|
574
|
-
async def list_dir(self, path: str) -> list[str]:
|
575
|
-
result = await self._send_command("list_dir", {"path": path})
|
576
|
-
if not result.get("success", False):
|
577
|
-
raise RuntimeError(result.get("error", "Failed to list directory"))
|
578
|
-
return result.get("files", [])
|
579
|
-
|
580
|
-
async def read_text(self, path: str) -> str:
|
581
|
-
result = await self._send_command("read_text", {"path": path})
|
582
|
-
if not result.get("success", False):
|
583
|
-
raise RuntimeError(result.get("error", "Failed to read file"))
|
584
|
-
return result.get("content", "")
|
585
|
-
|
586
|
-
async def write_text(self, path: str, content: str) -> None:
|
587
|
-
result = await self._send_command("write_text", {"path": path, "content": content})
|
588
|
-
if not result.get("success", False):
|
589
|
-
raise RuntimeError(result.get("error", "Failed to write file"))
|
590
|
-
|
591
|
-
async def read_bytes(self, path: str) -> bytes:
|
592
|
-
result = await self._send_command("read_bytes", {"path": path})
|
593
|
-
if not result.get("success", False):
|
594
|
-
raise RuntimeError(result.get("error", "Failed to read file"))
|
595
|
-
content_b64 = result.get("content_b64", "")
|
596
|
-
return decode_base64_image(content_b64)
|
597
|
-
|
598
|
-
async def write_bytes(self, path: str, content: bytes) -> None:
|
599
|
-
result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content)})
|
600
|
-
if not result.get("success", False):
|
601
|
-
raise RuntimeError(result.get("error", "Failed to write file"))
|
602
|
-
|
603
|
-
async def delete_file(self, path: str) -> None:
|
604
|
-
result = await self._send_command("delete_file", {"path": path})
|
605
|
-
if not result.get("success", False):
|
606
|
-
raise RuntimeError(result.get("error", "Failed to delete file"))
|
607
|
-
|
608
|
-
async def create_dir(self, path: str) -> None:
|
609
|
-
result = await self._send_command("create_dir", {"path": path})
|
610
|
-
if not result.get("success", False):
|
611
|
-
raise RuntimeError(result.get("error", "Failed to create directory"))
|
612
|
-
|
613
|
-
async def delete_dir(self, path: str) -> None:
|
614
|
-
result = await self._send_command("delete_dir", {"path": path})
|
615
|
-
if not result.get("success", False):
|
616
|
-
raise RuntimeError(result.get("error", "Failed to delete directory"))
|
617
|
-
|
618
|
-
async def run_command(self, command: str) -> CommandResult:
|
619
|
-
result = await self._send_command("run_command", {"command": command})
|
620
|
-
if not result.get("success", False):
|
621
|
-
raise RuntimeError(result.get("error", "Failed to run command"))
|
622
|
-
return CommandResult(
|
623
|
-
stdout=result.get("stdout", ""),
|
624
|
-
stderr=result.get("stderr", ""),
|
625
|
-
returncode=result.get("return_code", 0)
|
626
|
-
)
|
627
|
-
|
628
|
-
# Accessibility Actions
|
629
|
-
async def get_accessibility_tree(self) -> Dict[str, Any]:
|
630
|
-
"""Get the accessibility tree of the current screen."""
|
631
|
-
result = await self._send_command("get_accessibility_tree")
|
632
|
-
if not result.get("success", False):
|
633
|
-
raise RuntimeError(result.get("error", "Failed to get accessibility tree"))
|
634
|
-
return result
|
635
|
-
|
636
|
-
async def get_active_window_bounds(self) -> Dict[str, int]:
|
637
|
-
"""Get the bounds of the currently active window."""
|
638
|
-
result = await self._send_command("get_active_window_bounds")
|
639
|
-
if result["success"] and result["bounds"]:
|
640
|
-
return result["bounds"]
|
641
|
-
raise RuntimeError("Failed to get active window bounds")
|
642
|
-
|
643
|
-
async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
|
644
|
-
"""Convert screenshot coordinates to screen coordinates.
|
645
|
-
|
646
|
-
Args:
|
647
|
-
x: X coordinate in screenshot space
|
648
|
-
y: Y coordinate in screenshot space
|
649
|
-
|
650
|
-
Returns:
|
651
|
-
tuple[float, float]: (x, y) coordinates in screen space
|
652
|
-
"""
|
653
|
-
screen_size = await self.get_screen_size()
|
654
|
-
screenshot = await self.screenshot()
|
655
|
-
screenshot_img = bytes_to_image(screenshot)
|
656
|
-
screenshot_width, screenshot_height = screenshot_img.size
|
657
|
-
|
658
|
-
# Calculate scaling factors
|
659
|
-
width_scale = screen_size["width"] / screenshot_width
|
660
|
-
height_scale = screen_size["height"] / screenshot_height
|
661
|
-
|
662
|
-
# Convert coordinates
|
663
|
-
screen_x = x * width_scale
|
664
|
-
screen_y = y * height_scale
|
665
|
-
|
666
|
-
return screen_x, screen_y
|
667
|
-
|
668
|
-
async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
|
669
|
-
"""Convert screen coordinates to screenshot coordinates.
|
670
|
-
|
671
|
-
Args:
|
672
|
-
x: X coordinate in screen space
|
673
|
-
y: Y coordinate in screen space
|
674
|
-
|
675
|
-
Returns:
|
676
|
-
tuple[float, float]: (x, y) coordinates in screenshot space
|
677
|
-
"""
|
678
|
-
screen_size = await self.get_screen_size()
|
679
|
-
screenshot = await self.screenshot()
|
680
|
-
screenshot_img = bytes_to_image(screenshot)
|
681
|
-
screenshot_width, screenshot_height = screenshot_img.size
|
682
|
-
|
683
|
-
# Calculate scaling factors
|
684
|
-
width_scale = screenshot_width / screen_size["width"]
|
685
|
-
height_scale = screenshot_height / screen_size["height"]
|
686
|
-
|
687
|
-
# Convert coordinates
|
688
|
-
screenshot_x = x * width_scale
|
689
|
-
screenshot_y = y * height_scale
|
690
|
-
|
691
|
-
return screenshot_x, screenshot_y
|
8
|
+
super().__init__(ip_address, username, password, api_key, vm_name, "computer.interface.linux")
|