cua-computer 0.3.1__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cua_computer-0.3.1 → cua_computer-0.3.3}/PKG-INFO +1 -1
- cua_computer-0.3.3/computer/interface/base.py +461 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/interface/generic.py +50 -19
- {cua_computer-0.3.1 → cua_computer-0.3.3}/pyproject.toml +3 -3
- cua_computer-0.3.1/computer/interface/base.py +0 -304
- {cua_computer-0.3.1 → cua_computer-0.3.3}/README.md +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/__init__.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/computer.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/diorama_computer.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/helpers.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/interface/__init__.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/interface/factory.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/interface/linux.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/interface/macos.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/interface/models.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/interface/windows.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/logger.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/models.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/__init__.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/base.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/cloud/__init__.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/cloud/provider.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/factory.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/lume/__init__.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/lume/provider.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/lume_api.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/lumier/__init__.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/lumier/provider.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/winsandbox/__init__.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/winsandbox/provider.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/providers/winsandbox/setup_script.ps1 +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/telemetry.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/ui/__init__.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/ui/__main__.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/ui/gradio/__init__.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/ui/gradio/app.py +0 -0
- {cua_computer-0.3.1 → cua_computer-0.3.3}/computer/utils.py +0 -0
@@ -0,0 +1,461 @@
|
|
1
|
+
"""Base interface for computer control."""
|
2
|
+
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from typing import Optional, Dict, Any, Tuple, List
|
5
|
+
from ..logger import Logger, LogLevel
|
6
|
+
from .models import MouseButton, CommandResult
|
7
|
+
|
8
|
+
class BaseComputerInterface(ABC):
|
9
|
+
"""Base class for computer control interfaces."""
|
10
|
+
|
11
|
+
def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None):
|
12
|
+
"""Initialize interface.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
ip_address: IP address of the computer to control
|
16
|
+
username: Username for authentication
|
17
|
+
password: Password for authentication
|
18
|
+
api_key: Optional API key for cloud authentication
|
19
|
+
vm_name: Optional VM name for cloud authentication
|
20
|
+
"""
|
21
|
+
self.ip_address = ip_address
|
22
|
+
self.username = username
|
23
|
+
self.password = password
|
24
|
+
self.api_key = api_key
|
25
|
+
self.vm_name = vm_name
|
26
|
+
self.logger = Logger("cua.interface", LogLevel.NORMAL)
|
27
|
+
|
28
|
+
# Optional default delay time between commands (in seconds)
|
29
|
+
self.delay: float = 0.0
|
30
|
+
|
31
|
+
@abstractmethod
|
32
|
+
async def wait_for_ready(self, timeout: int = 60) -> None:
|
33
|
+
"""Wait for interface to be ready.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
timeout: Maximum time to wait in seconds
|
37
|
+
|
38
|
+
Raises:
|
39
|
+
TimeoutError: If interface is not ready within timeout
|
40
|
+
"""
|
41
|
+
pass
|
42
|
+
|
43
|
+
@abstractmethod
|
44
|
+
def close(self) -> None:
|
45
|
+
"""Close the interface connection."""
|
46
|
+
pass
|
47
|
+
|
48
|
+
def force_close(self) -> None:
|
49
|
+
"""Force close the interface connection.
|
50
|
+
|
51
|
+
By default, this just calls close(), but subclasses can override
|
52
|
+
to provide more forceful cleanup.
|
53
|
+
"""
|
54
|
+
self.close()
|
55
|
+
|
56
|
+
# Mouse Actions
|
57
|
+
@abstractmethod
|
58
|
+
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None:
|
59
|
+
"""Press and hold a mouse button.
|
60
|
+
|
61
|
+
Args:
|
62
|
+
x: X coordinate to press at. If None, uses current cursor position.
|
63
|
+
y: Y coordinate to press at. If None, uses current cursor position.
|
64
|
+
button: Mouse button to press ('left', 'middle', 'right').
|
65
|
+
delay: Optional delay in seconds after the action
|
66
|
+
"""
|
67
|
+
pass
|
68
|
+
|
69
|
+
@abstractmethod
|
70
|
+
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None:
|
71
|
+
"""Release a mouse button.
|
72
|
+
|
73
|
+
Args:
|
74
|
+
x: X coordinate to release at. If None, uses current cursor position.
|
75
|
+
y: Y coordinate to release at. If None, uses current cursor position.
|
76
|
+
button: Mouse button to release ('left', 'middle', 'right').
|
77
|
+
delay: Optional delay in seconds after the action
|
78
|
+
"""
|
79
|
+
pass
|
80
|
+
|
81
|
+
@abstractmethod
|
82
|
+
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
83
|
+
"""Perform a left mouse button click.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
x: X coordinate to click at. If None, uses current cursor position.
|
87
|
+
y: Y coordinate to click at. If None, uses current cursor position.
|
88
|
+
delay: Optional delay in seconds after the action
|
89
|
+
"""
|
90
|
+
pass
|
91
|
+
|
92
|
+
@abstractmethod
|
93
|
+
async def right_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
94
|
+
"""Perform a right mouse button click.
|
95
|
+
|
96
|
+
Args:
|
97
|
+
x: X coordinate to click at. If None, uses current cursor position.
|
98
|
+
y: Y coordinate to click at. If None, uses current cursor position.
|
99
|
+
delay: Optional delay in seconds after the action
|
100
|
+
"""
|
101
|
+
pass
|
102
|
+
|
103
|
+
@abstractmethod
|
104
|
+
async def double_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
105
|
+
"""Perform a double left mouse button click.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
x: X coordinate to double-click at. If None, uses current cursor position.
|
109
|
+
y: Y coordinate to double-click at. If None, uses current cursor position.
|
110
|
+
delay: Optional delay in seconds after the action
|
111
|
+
"""
|
112
|
+
pass
|
113
|
+
|
114
|
+
@abstractmethod
|
115
|
+
async def move_cursor(self, x: int, y: int, delay: Optional[float] = None) -> None:
|
116
|
+
"""Move the cursor to the specified screen coordinates.
|
117
|
+
|
118
|
+
Args:
|
119
|
+
x: X coordinate to move cursor to.
|
120
|
+
y: Y coordinate to move cursor to.
|
121
|
+
delay: Optional delay in seconds after the action
|
122
|
+
"""
|
123
|
+
pass
|
124
|
+
|
125
|
+
@abstractmethod
|
126
|
+
async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
|
127
|
+
"""Drag from current position to specified coordinates.
|
128
|
+
|
129
|
+
Args:
|
130
|
+
x: The x coordinate to drag to
|
131
|
+
y: The y coordinate to drag to
|
132
|
+
button: The mouse button to use ('left', 'middle', 'right')
|
133
|
+
duration: How long the drag should take in seconds
|
134
|
+
delay: Optional delay in seconds after the action
|
135
|
+
"""
|
136
|
+
pass
|
137
|
+
|
138
|
+
@abstractmethod
|
139
|
+
async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
|
140
|
+
"""Drag the cursor along a path of coordinates.
|
141
|
+
|
142
|
+
Args:
|
143
|
+
path: List of (x, y) coordinate tuples defining the drag path
|
144
|
+
button: The mouse button to use ('left', 'middle', 'right')
|
145
|
+
duration: Total time in seconds that the drag operation should take
|
146
|
+
delay: Optional delay in seconds after the action
|
147
|
+
"""
|
148
|
+
pass
|
149
|
+
|
150
|
+
# Keyboard Actions
|
151
|
+
@abstractmethod
|
152
|
+
async def key_down(self, key: str, delay: Optional[float] = None) -> None:
|
153
|
+
"""Press and hold a key.
|
154
|
+
|
155
|
+
Args:
|
156
|
+
key: The key to press and hold (e.g., 'a', 'shift', 'ctrl').
|
157
|
+
delay: Optional delay in seconds after the action.
|
158
|
+
"""
|
159
|
+
pass
|
160
|
+
|
161
|
+
@abstractmethod
|
162
|
+
async def key_up(self, key: str, delay: Optional[float] = None) -> None:
|
163
|
+
"""Release a previously pressed key.
|
164
|
+
|
165
|
+
Args:
|
166
|
+
key: The key to release (e.g., 'a', 'shift', 'ctrl').
|
167
|
+
delay: Optional delay in seconds after the action.
|
168
|
+
"""
|
169
|
+
pass
|
170
|
+
|
171
|
+
@abstractmethod
|
172
|
+
async def type_text(self, text: str, delay: Optional[float] = None) -> None:
|
173
|
+
"""Type the specified text string.
|
174
|
+
|
175
|
+
Args:
|
176
|
+
text: The text string to type.
|
177
|
+
delay: Optional delay in seconds after the action.
|
178
|
+
"""
|
179
|
+
pass
|
180
|
+
|
181
|
+
@abstractmethod
|
182
|
+
async def press_key(self, key: str, delay: Optional[float] = None) -> None:
|
183
|
+
"""Press and release a single key.
|
184
|
+
|
185
|
+
Args:
|
186
|
+
key: The key to press (e.g., 'a', 'enter', 'escape').
|
187
|
+
delay: Optional delay in seconds after the action.
|
188
|
+
"""
|
189
|
+
pass
|
190
|
+
|
191
|
+
@abstractmethod
|
192
|
+
async def hotkey(self, *keys: str, delay: Optional[float] = None) -> None:
|
193
|
+
"""Press multiple keys simultaneously (keyboard shortcut).
|
194
|
+
|
195
|
+
Args:
|
196
|
+
*keys: Variable number of keys to press together (e.g., 'ctrl', 'c').
|
197
|
+
delay: Optional delay in seconds after the action.
|
198
|
+
"""
|
199
|
+
pass
|
200
|
+
|
201
|
+
# Scrolling Actions
|
202
|
+
@abstractmethod
|
203
|
+
async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None:
|
204
|
+
"""Scroll the mouse wheel by specified amounts.
|
205
|
+
|
206
|
+
Args:
|
207
|
+
x: Horizontal scroll amount (positive = right, negative = left).
|
208
|
+
y: Vertical scroll amount (positive = up, negative = down).
|
209
|
+
delay: Optional delay in seconds after the action.
|
210
|
+
"""
|
211
|
+
pass
|
212
|
+
|
213
|
+
@abstractmethod
|
214
|
+
async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None:
|
215
|
+
"""Scroll down by the specified number of clicks.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
clicks: Number of scroll clicks to perform downward.
|
219
|
+
delay: Optional delay in seconds after the action.
|
220
|
+
"""
|
221
|
+
pass
|
222
|
+
|
223
|
+
@abstractmethod
|
224
|
+
async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None:
|
225
|
+
"""Scroll up by the specified number of clicks.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
clicks: Number of scroll clicks to perform upward.
|
229
|
+
delay: Optional delay in seconds after the action.
|
230
|
+
"""
|
231
|
+
pass
|
232
|
+
|
233
|
+
# Screen Actions
|
234
|
+
@abstractmethod
|
235
|
+
async def screenshot(self) -> bytes:
|
236
|
+
"""Take a screenshot.
|
237
|
+
|
238
|
+
Returns:
|
239
|
+
Raw bytes of the screenshot image
|
240
|
+
"""
|
241
|
+
pass
|
242
|
+
|
243
|
+
@abstractmethod
|
244
|
+
async def get_screen_size(self) -> Dict[str, int]:
|
245
|
+
"""Get the screen dimensions.
|
246
|
+
|
247
|
+
Returns:
|
248
|
+
Dict with 'width' and 'height' keys
|
249
|
+
"""
|
250
|
+
pass
|
251
|
+
|
252
|
+
@abstractmethod
|
253
|
+
async def get_cursor_position(self) -> Dict[str, int]:
|
254
|
+
"""Get the current cursor position on screen.
|
255
|
+
|
256
|
+
Returns:
|
257
|
+
Dict with 'x' and 'y' keys containing cursor coordinates.
|
258
|
+
"""
|
259
|
+
pass
|
260
|
+
|
261
|
+
# Clipboard Actions
|
262
|
+
@abstractmethod
|
263
|
+
async def copy_to_clipboard(self) -> str:
|
264
|
+
"""Get the current clipboard content.
|
265
|
+
|
266
|
+
Returns:
|
267
|
+
The text content currently stored in the clipboard.
|
268
|
+
"""
|
269
|
+
pass
|
270
|
+
|
271
|
+
@abstractmethod
|
272
|
+
async def set_clipboard(self, text: str) -> None:
|
273
|
+
"""Set the clipboard content to the specified text.
|
274
|
+
|
275
|
+
Args:
|
276
|
+
text: The text to store in the clipboard.
|
277
|
+
"""
|
278
|
+
pass
|
279
|
+
|
280
|
+
# File System Actions
|
281
|
+
@abstractmethod
|
282
|
+
async def file_exists(self, path: str) -> bool:
|
283
|
+
"""Check if a file exists at the specified path.
|
284
|
+
|
285
|
+
Args:
|
286
|
+
path: The file path to check.
|
287
|
+
|
288
|
+
Returns:
|
289
|
+
True if the file exists, False otherwise.
|
290
|
+
"""
|
291
|
+
pass
|
292
|
+
|
293
|
+
@abstractmethod
|
294
|
+
async def directory_exists(self, path: str) -> bool:
|
295
|
+
"""Check if a directory exists at the specified path.
|
296
|
+
|
297
|
+
Args:
|
298
|
+
path: The directory path to check.
|
299
|
+
|
300
|
+
Returns:
|
301
|
+
True if the directory exists, False otherwise.
|
302
|
+
"""
|
303
|
+
pass
|
304
|
+
|
305
|
+
@abstractmethod
|
306
|
+
async def list_dir(self, path: str) -> List[str]:
|
307
|
+
"""List the contents of a directory.
|
308
|
+
|
309
|
+
Args:
|
310
|
+
path: The directory path to list.
|
311
|
+
|
312
|
+
Returns:
|
313
|
+
List of file and directory names in the specified directory.
|
314
|
+
"""
|
315
|
+
pass
|
316
|
+
|
317
|
+
@abstractmethod
|
318
|
+
async def read_text(self, path: str) -> str:
|
319
|
+
"""Read the text contents of a file.
|
320
|
+
|
321
|
+
Args:
|
322
|
+
path: The file path to read from.
|
323
|
+
|
324
|
+
Returns:
|
325
|
+
The text content of the file.
|
326
|
+
"""
|
327
|
+
pass
|
328
|
+
|
329
|
+
@abstractmethod
|
330
|
+
async def write_text(self, path: str, content: str) -> None:
|
331
|
+
"""Write text content to a file.
|
332
|
+
|
333
|
+
Args:
|
334
|
+
path: The file path to write to.
|
335
|
+
content: The text content to write.
|
336
|
+
"""
|
337
|
+
pass
|
338
|
+
|
339
|
+
@abstractmethod
|
340
|
+
async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes:
|
341
|
+
"""Read file binary contents with optional seeking support.
|
342
|
+
|
343
|
+
Args:
|
344
|
+
path: Path to the file
|
345
|
+
offset: Byte offset to start reading from (default: 0)
|
346
|
+
length: Number of bytes to read (default: None for entire file)
|
347
|
+
"""
|
348
|
+
pass
|
349
|
+
|
350
|
+
@abstractmethod
|
351
|
+
async def write_bytes(self, path: str, content: bytes) -> None:
|
352
|
+
"""Write binary content to a file.
|
353
|
+
|
354
|
+
Args:
|
355
|
+
path: The file path to write to.
|
356
|
+
content: The binary content to write.
|
357
|
+
"""
|
358
|
+
pass
|
359
|
+
|
360
|
+
@abstractmethod
|
361
|
+
async def delete_file(self, path: str) -> None:
|
362
|
+
"""Delete a file at the specified path.
|
363
|
+
|
364
|
+
Args:
|
365
|
+
path: The file path to delete.
|
366
|
+
"""
|
367
|
+
pass
|
368
|
+
|
369
|
+
@abstractmethod
|
370
|
+
async def create_dir(self, path: str) -> None:
|
371
|
+
"""Create a directory at the specified path.
|
372
|
+
|
373
|
+
Args:
|
374
|
+
path: The directory path to create.
|
375
|
+
"""
|
376
|
+
pass
|
377
|
+
|
378
|
+
@abstractmethod
|
379
|
+
async def delete_dir(self, path: str) -> None:
|
380
|
+
"""Delete a directory at the specified path.
|
381
|
+
|
382
|
+
Args:
|
383
|
+
path: The directory path to delete.
|
384
|
+
"""
|
385
|
+
pass
|
386
|
+
|
387
|
+
@abstractmethod
|
388
|
+
async def get_file_size(self, path: str) -> int:
|
389
|
+
"""Get the size of a file in bytes.
|
390
|
+
|
391
|
+
Args:
|
392
|
+
path: The file path to get the size of.
|
393
|
+
|
394
|
+
Returns:
|
395
|
+
The size of the file in bytes.
|
396
|
+
"""
|
397
|
+
pass
|
398
|
+
|
399
|
+
@abstractmethod
|
400
|
+
async def run_command(self, command: str) -> CommandResult:
|
401
|
+
"""Run shell command and return structured result.
|
402
|
+
|
403
|
+
Executes a shell command using subprocess.run with shell=True and check=False.
|
404
|
+
The command is run in the target environment and captures both stdout and stderr.
|
405
|
+
|
406
|
+
Args:
|
407
|
+
command (str): The shell command to execute
|
408
|
+
|
409
|
+
Returns:
|
410
|
+
CommandResult: A structured result containing:
|
411
|
+
- stdout (str): Standard output from the command
|
412
|
+
- stderr (str): Standard error from the command
|
413
|
+
- returncode (int): Exit code from the command (0 indicates success)
|
414
|
+
|
415
|
+
Raises:
|
416
|
+
RuntimeError: If the command execution fails at the system level
|
417
|
+
|
418
|
+
Example:
|
419
|
+
result = await interface.run_command("ls -la")
|
420
|
+
if result.returncode == 0:
|
421
|
+
print(f"Output: {result.stdout}")
|
422
|
+
else:
|
423
|
+
print(f"Error: {result.stderr}, Exit code: {result.returncode}")
|
424
|
+
"""
|
425
|
+
pass
|
426
|
+
|
427
|
+
# Accessibility Actions
|
428
|
+
@abstractmethod
|
429
|
+
async def get_accessibility_tree(self) -> Dict:
|
430
|
+
"""Get the accessibility tree of the current screen.
|
431
|
+
|
432
|
+
Returns:
|
433
|
+
Dict containing the hierarchical accessibility information of screen elements.
|
434
|
+
"""
|
435
|
+
pass
|
436
|
+
|
437
|
+
@abstractmethod
|
438
|
+
async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
|
439
|
+
"""Convert screenshot coordinates to screen coordinates.
|
440
|
+
|
441
|
+
Args:
|
442
|
+
x: X coordinate in screenshot space
|
443
|
+
y: Y coordinate in screenshot space
|
444
|
+
|
445
|
+
Returns:
|
446
|
+
tuple[float, float]: (x, y) coordinates in screen space
|
447
|
+
"""
|
448
|
+
pass
|
449
|
+
|
450
|
+
@abstractmethod
|
451
|
+
async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
|
452
|
+
"""Convert screen coordinates to screenshot coordinates.
|
453
|
+
|
454
|
+
Args:
|
455
|
+
x: X coordinate in screen space
|
456
|
+
y: Y coordinate in screen space
|
457
|
+
|
458
|
+
Returns:
|
459
|
+
tuple[float, float]: (x, y) coordinates in screenshot space
|
460
|
+
"""
|
461
|
+
pass
|
@@ -32,6 +32,21 @@ class GenericComputerInterface(BaseComputerInterface):
|
|
32
32
|
# Set logger name for the interface
|
33
33
|
self.logger = Logger(logger_name, LogLevel.NORMAL)
|
34
34
|
|
35
|
+
# Optional default delay time between commands (in seconds)
|
36
|
+
self.delay = 0.0
|
37
|
+
|
38
|
+
async def _handle_delay(self, delay: Optional[float] = None):
|
39
|
+
"""Handle delay between commands using async sleep.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
delay: Optional delay in seconds. If None, uses self.delay.
|
43
|
+
"""
|
44
|
+
if delay is not None:
|
45
|
+
if isinstance(delay, float) and delay > 0:
|
46
|
+
await asyncio.sleep(delay)
|
47
|
+
elif isinstance(self.delay, float) and self.delay > 0:
|
48
|
+
await asyncio.sleep(self.delay)
|
49
|
+
|
35
50
|
@property
|
36
51
|
def ws_uri(self) -> str:
|
37
52
|
"""Get the WebSocket URI using the current IP address.
|
@@ -44,42 +59,52 @@ class GenericComputerInterface(BaseComputerInterface):
|
|
44
59
|
return f"{protocol}://{self.ip_address}:{port}/ws"
|
45
60
|
|
46
61
|
# Mouse actions
|
47
|
-
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
|
62
|
+
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left", delay: Optional[float] = None) -> None:
|
48
63
|
await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
|
64
|
+
await self._handle_delay(delay)
|
49
65
|
|
50
|
-
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
|
66
|
+
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left", delay: Optional[float] = None) -> None:
|
51
67
|
await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
|
68
|
+
await self._handle_delay(delay)
|
52
69
|
|
53
|
-
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
70
|
+
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
54
71
|
await self._send_command("left_click", {"x": x, "y": y})
|
72
|
+
await self._handle_delay(delay)
|
55
73
|
|
56
|
-
async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
74
|
+
async def right_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
57
75
|
await self._send_command("right_click", {"x": x, "y": y})
|
76
|
+
await self._handle_delay(delay)
|
58
77
|
|
59
|
-
async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
78
|
+
async def double_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
|
60
79
|
await self._send_command("double_click", {"x": x, "y": y})
|
80
|
+
await self._handle_delay(delay)
|
61
81
|
|
62
|
-
async def move_cursor(self, x: int, y: int) -> None:
|
82
|
+
async def move_cursor(self, x: int, y: int, delay: Optional[float] = None) -> None:
|
63
83
|
await self._send_command("move_cursor", {"x": x, "y": y})
|
84
|
+
await self._handle_delay(delay)
|
64
85
|
|
65
|
-
async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5) -> None:
|
86
|
+
async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
|
66
87
|
await self._send_command(
|
67
88
|
"drag_to", {"x": x, "y": y, "button": button, "duration": duration}
|
68
89
|
)
|
90
|
+
await self._handle_delay(delay)
|
69
91
|
|
70
|
-
async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5) -> None:
|
92
|
+
async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
|
71
93
|
await self._send_command(
|
72
94
|
"drag", {"path": path, "button": button, "duration": duration}
|
73
95
|
)
|
96
|
+
await self._handle_delay(delay)
|
74
97
|
|
75
98
|
# Keyboard Actions
|
76
|
-
async def key_down(self, key: "KeyType") -> None:
|
99
|
+
async def key_down(self, key: "KeyType", delay: Optional[float] = None) -> None:
|
77
100
|
await self._send_command("key_down", {"key": key})
|
101
|
+
await self._handle_delay(delay)
|
78
102
|
|
79
|
-
async def key_up(self, key: "KeyType") -> None:
|
103
|
+
async def key_up(self, key: "KeyType", delay: Optional[float] = None) -> None:
|
80
104
|
await self._send_command("key_up", {"key": key})
|
105
|
+
await self._handle_delay(delay)
|
81
106
|
|
82
|
-
async def type_text(self, text: str) -> None:
|
107
|
+
async def type_text(self, text: str, delay: Optional[float] = None) -> None:
|
83
108
|
# Temporary fix for https://github.com/trycua/cua/issues/165
|
84
109
|
# Check if text contains Unicode characters
|
85
110
|
if any(ord(char) > 127 for char in text):
|
@@ -89,8 +114,9 @@ class GenericComputerInterface(BaseComputerInterface):
|
|
89
114
|
else:
|
90
115
|
# For ASCII text, use the regular typing method
|
91
116
|
await self._send_command("type_text", {"text": text})
|
117
|
+
await self._handle_delay(delay)
|
92
118
|
|
93
|
-
async def press(self, key: "KeyType") -> None:
|
119
|
+
async def press(self, key: "KeyType", delay: Optional[float] = None) -> None:
|
94
120
|
"""Press a single key.
|
95
121
|
|
96
122
|
Args:
|
@@ -126,16 +152,17 @@ class GenericComputerInterface(BaseComputerInterface):
|
|
126
152
|
raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
|
127
153
|
|
128
154
|
await self._send_command("press_key", {"key": actual_key})
|
155
|
+
await self._handle_delay(delay)
|
129
156
|
|
130
|
-
async def press_key(self, key: "KeyType") -> None:
|
157
|
+
async def press_key(self, key: "KeyType", delay: Optional[float] = None) -> None:
|
131
158
|
"""DEPRECATED: Use press() instead.
|
132
159
|
|
133
160
|
This method is kept for backward compatibility but will be removed in a future version.
|
134
161
|
Please use the press() method instead.
|
135
162
|
"""
|
136
|
-
await self.press(key)
|
163
|
+
await self.press(key, delay)
|
137
164
|
|
138
|
-
async def hotkey(self, *keys: "KeyType") -> None:
|
165
|
+
async def hotkey(self, *keys: "KeyType", delay: Optional[float] = None) -> None:
|
139
166
|
"""Press multiple keys simultaneously.
|
140
167
|
|
141
168
|
Args:
|
@@ -169,16 +196,20 @@ class GenericComputerInterface(BaseComputerInterface):
|
|
169
196
|
raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
|
170
197
|
|
171
198
|
await self._send_command("hotkey", {"keys": actual_keys})
|
199
|
+
await self._handle_delay(delay)
|
172
200
|
|
173
201
|
# Scrolling Actions
|
174
|
-
async def scroll(self, x: int, y: int) -> None:
|
202
|
+
async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None:
|
175
203
|
await self._send_command("scroll", {"x": x, "y": y})
|
204
|
+
await self._handle_delay(delay)
|
176
205
|
|
177
|
-
async def scroll_down(self, clicks: int = 1) -> None:
|
206
|
+
async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None:
|
178
207
|
await self._send_command("scroll_down", {"clicks": clicks})
|
179
|
-
|
180
|
-
|
208
|
+
await self._handle_delay(delay)
|
209
|
+
|
210
|
+
async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None:
|
181
211
|
await self._send_command("scroll_up", {"clicks": clicks})
|
212
|
+
await self._handle_delay(delay)
|
182
213
|
|
183
214
|
# Screen actions
|
184
215
|
async def screenshot(
|
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
|
|
6
6
|
|
7
7
|
[project]
|
8
8
|
name = "cua-computer"
|
9
|
-
version = "0.3.
|
9
|
+
version = "0.3.3"
|
10
10
|
description = "Computer-Use Interface (CUI) framework powering Cua"
|
11
11
|
readme = "README.md"
|
12
12
|
authors = [
|
@@ -57,7 +57,7 @@ target-version = [
|
|
57
57
|
|
58
58
|
[tool.ruff]
|
59
59
|
line-length = 100
|
60
|
-
target-version = "0.3.
|
60
|
+
target-version = "0.3.3"
|
61
61
|
select = [
|
62
62
|
"E",
|
63
63
|
"F",
|
@@ -71,7 +71,7 @@ docstring-code-format = true
|
|
71
71
|
|
72
72
|
[tool.mypy]
|
73
73
|
strict = true
|
74
|
-
python_version = "0.3.
|
74
|
+
python_version = "0.3.3"
|
75
75
|
ignore_missing_imports = true
|
76
76
|
disallow_untyped_defs = true
|
77
77
|
check_untyped_defs = true
|
@@ -1,304 +0,0 @@
|
|
1
|
-
"""Base interface for computer control."""
|
2
|
-
|
3
|
-
from abc import ABC, abstractmethod
|
4
|
-
from typing import Optional, Dict, Any, Tuple, List
|
5
|
-
from ..logger import Logger, LogLevel
|
6
|
-
from .models import MouseButton, CommandResult
|
7
|
-
|
8
|
-
class BaseComputerInterface(ABC):
|
9
|
-
"""Base class for computer control interfaces."""
|
10
|
-
|
11
|
-
def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None):
|
12
|
-
"""Initialize interface.
|
13
|
-
|
14
|
-
Args:
|
15
|
-
ip_address: IP address of the computer to control
|
16
|
-
username: Username for authentication
|
17
|
-
password: Password for authentication
|
18
|
-
api_key: Optional API key for cloud authentication
|
19
|
-
vm_name: Optional VM name for cloud authentication
|
20
|
-
"""
|
21
|
-
self.ip_address = ip_address
|
22
|
-
self.username = username
|
23
|
-
self.password = password
|
24
|
-
self.api_key = api_key
|
25
|
-
self.vm_name = vm_name
|
26
|
-
self.logger = Logger("cua.interface", LogLevel.NORMAL)
|
27
|
-
|
28
|
-
@abstractmethod
|
29
|
-
async def wait_for_ready(self, timeout: int = 60) -> None:
|
30
|
-
"""Wait for interface to be ready.
|
31
|
-
|
32
|
-
Args:
|
33
|
-
timeout: Maximum time to wait in seconds
|
34
|
-
|
35
|
-
Raises:
|
36
|
-
TimeoutError: If interface is not ready within timeout
|
37
|
-
"""
|
38
|
-
pass
|
39
|
-
|
40
|
-
@abstractmethod
|
41
|
-
def close(self) -> None:
|
42
|
-
"""Close the interface connection."""
|
43
|
-
pass
|
44
|
-
|
45
|
-
def force_close(self) -> None:
|
46
|
-
"""Force close the interface connection.
|
47
|
-
|
48
|
-
By default, this just calls close(), but subclasses can override
|
49
|
-
to provide more forceful cleanup.
|
50
|
-
"""
|
51
|
-
self.close()
|
52
|
-
|
53
|
-
# Mouse Actions
|
54
|
-
@abstractmethod
|
55
|
-
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
|
56
|
-
"""Press and hold a mouse button."""
|
57
|
-
pass
|
58
|
-
|
59
|
-
@abstractmethod
|
60
|
-
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
|
61
|
-
"""Release a mouse button."""
|
62
|
-
pass
|
63
|
-
|
64
|
-
@abstractmethod
|
65
|
-
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
66
|
-
"""Perform a left click."""
|
67
|
-
pass
|
68
|
-
|
69
|
-
@abstractmethod
|
70
|
-
async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
71
|
-
"""Perform a right click."""
|
72
|
-
pass
|
73
|
-
|
74
|
-
@abstractmethod
|
75
|
-
async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
76
|
-
"""Perform a double click."""
|
77
|
-
pass
|
78
|
-
|
79
|
-
@abstractmethod
|
80
|
-
async def move_cursor(self, x: int, y: int) -> None:
|
81
|
-
"""Move the cursor to specified position."""
|
82
|
-
pass
|
83
|
-
|
84
|
-
@abstractmethod
|
85
|
-
async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5) -> None:
|
86
|
-
"""Drag from current position to specified coordinates.
|
87
|
-
|
88
|
-
Args:
|
89
|
-
x: The x coordinate to drag to
|
90
|
-
y: The y coordinate to drag to
|
91
|
-
button: The mouse button to use ('left', 'middle', 'right')
|
92
|
-
duration: How long the drag should take in seconds
|
93
|
-
"""
|
94
|
-
pass
|
95
|
-
|
96
|
-
@abstractmethod
|
97
|
-
async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
|
98
|
-
"""Drag the cursor along a path of coordinates.
|
99
|
-
|
100
|
-
Args:
|
101
|
-
path: List of (x, y) coordinate tuples defining the drag path
|
102
|
-
button: The mouse button to use ('left', 'middle', 'right')
|
103
|
-
duration: Total time in seconds that the drag operation should take
|
104
|
-
"""
|
105
|
-
pass
|
106
|
-
|
107
|
-
# Keyboard Actions
|
108
|
-
@abstractmethod
|
109
|
-
async def key_down(self, key: str) -> None:
|
110
|
-
"""Press and hold a key."""
|
111
|
-
pass
|
112
|
-
|
113
|
-
@abstractmethod
|
114
|
-
async def key_up(self, key: str) -> None:
|
115
|
-
"""Release a key."""
|
116
|
-
pass
|
117
|
-
|
118
|
-
@abstractmethod
|
119
|
-
async def type_text(self, text: str) -> None:
|
120
|
-
"""Type the specified text."""
|
121
|
-
pass
|
122
|
-
|
123
|
-
@abstractmethod
|
124
|
-
async def press_key(self, key: str) -> None:
|
125
|
-
"""Press a single key."""
|
126
|
-
pass
|
127
|
-
|
128
|
-
@abstractmethod
|
129
|
-
async def hotkey(self, *keys: str) -> None:
|
130
|
-
"""Press multiple keys simultaneously."""
|
131
|
-
pass
|
132
|
-
|
133
|
-
# Scrolling Actions
|
134
|
-
@abstractmethod
|
135
|
-
async def scroll(self, x: int, y: int) -> None:
|
136
|
-
"""Scroll the mouse wheel."""
|
137
|
-
pass
|
138
|
-
|
139
|
-
@abstractmethod
|
140
|
-
async def scroll_down(self, clicks: int = 1) -> None:
|
141
|
-
"""Scroll down."""
|
142
|
-
pass
|
143
|
-
|
144
|
-
@abstractmethod
|
145
|
-
async def scroll_up(self, clicks: int = 1) -> None:
|
146
|
-
"""Scroll up."""
|
147
|
-
pass
|
148
|
-
|
149
|
-
# Screen Actions
|
150
|
-
@abstractmethod
|
151
|
-
async def screenshot(self) -> bytes:
|
152
|
-
"""Take a screenshot.
|
153
|
-
|
154
|
-
Returns:
|
155
|
-
Raw bytes of the screenshot image
|
156
|
-
"""
|
157
|
-
pass
|
158
|
-
|
159
|
-
@abstractmethod
|
160
|
-
async def get_screen_size(self) -> Dict[str, int]:
|
161
|
-
"""Get the screen dimensions.
|
162
|
-
|
163
|
-
Returns:
|
164
|
-
Dict with 'width' and 'height' keys
|
165
|
-
"""
|
166
|
-
pass
|
167
|
-
|
168
|
-
@abstractmethod
|
169
|
-
async def get_cursor_position(self) -> Dict[str, int]:
|
170
|
-
"""Get current cursor position."""
|
171
|
-
pass
|
172
|
-
|
173
|
-
# Clipboard Actions
|
174
|
-
@abstractmethod
|
175
|
-
async def copy_to_clipboard(self) -> str:
|
176
|
-
"""Get clipboard content."""
|
177
|
-
pass
|
178
|
-
|
179
|
-
@abstractmethod
|
180
|
-
async def set_clipboard(self, text: str) -> None:
|
181
|
-
"""Set clipboard content."""
|
182
|
-
pass
|
183
|
-
|
184
|
-
# File System Actions
|
185
|
-
@abstractmethod
|
186
|
-
async def file_exists(self, path: str) -> bool:
|
187
|
-
"""Check if file exists."""
|
188
|
-
pass
|
189
|
-
|
190
|
-
@abstractmethod
|
191
|
-
async def directory_exists(self, path: str) -> bool:
|
192
|
-
"""Check if directory exists."""
|
193
|
-
pass
|
194
|
-
|
195
|
-
@abstractmethod
|
196
|
-
async def list_dir(self, path: str) -> List[str]:
|
197
|
-
"""List directory contents."""
|
198
|
-
pass
|
199
|
-
|
200
|
-
@abstractmethod
|
201
|
-
async def read_text(self, path: str) -> str:
|
202
|
-
"""Read file text contents."""
|
203
|
-
pass
|
204
|
-
|
205
|
-
@abstractmethod
|
206
|
-
async def write_text(self, path: str, content: str) -> None:
|
207
|
-
"""Write file text contents."""
|
208
|
-
pass
|
209
|
-
|
210
|
-
@abstractmethod
|
211
|
-
async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes:
|
212
|
-
"""Read file binary contents with optional seeking support.
|
213
|
-
|
214
|
-
Args:
|
215
|
-
path: Path to the file
|
216
|
-
offset: Byte offset to start reading from (default: 0)
|
217
|
-
length: Number of bytes to read (default: None for entire file)
|
218
|
-
"""
|
219
|
-
pass
|
220
|
-
|
221
|
-
@abstractmethod
|
222
|
-
async def write_bytes(self, path: str, content: bytes) -> None:
|
223
|
-
"""Write file binary contents."""
|
224
|
-
pass
|
225
|
-
|
226
|
-
@abstractmethod
|
227
|
-
async def delete_file(self, path: str) -> None:
|
228
|
-
"""Delete file."""
|
229
|
-
pass
|
230
|
-
|
231
|
-
@abstractmethod
|
232
|
-
async def create_dir(self, path: str) -> None:
|
233
|
-
"""Create directory."""
|
234
|
-
pass
|
235
|
-
|
236
|
-
@abstractmethod
|
237
|
-
async def delete_dir(self, path: str) -> None:
|
238
|
-
"""Delete directory."""
|
239
|
-
pass
|
240
|
-
|
241
|
-
@abstractmethod
|
242
|
-
async def get_file_size(self, path: str) -> int:
|
243
|
-
"""Get the size of a file in bytes."""
|
244
|
-
pass
|
245
|
-
|
246
|
-
@abstractmethod
|
247
|
-
async def run_command(self, command: str) -> CommandResult:
|
248
|
-
"""Run shell command and return structured result.
|
249
|
-
|
250
|
-
Executes a shell command using subprocess.run with shell=True and check=False.
|
251
|
-
The command is run in the target environment and captures both stdout and stderr.
|
252
|
-
|
253
|
-
Args:
|
254
|
-
command (str): The shell command to execute
|
255
|
-
|
256
|
-
Returns:
|
257
|
-
CommandResult: A structured result containing:
|
258
|
-
- stdout (str): Standard output from the command
|
259
|
-
- stderr (str): Standard error from the command
|
260
|
-
- returncode (int): Exit code from the command (0 indicates success)
|
261
|
-
|
262
|
-
Raises:
|
263
|
-
RuntimeError: If the command execution fails at the system level
|
264
|
-
|
265
|
-
Example:
|
266
|
-
result = await interface.run_command("ls -la")
|
267
|
-
if result.returncode == 0:
|
268
|
-
print(f"Output: {result.stdout}")
|
269
|
-
else:
|
270
|
-
print(f"Error: {result.stderr}, Exit code: {result.returncode}")
|
271
|
-
"""
|
272
|
-
pass
|
273
|
-
|
274
|
-
# Accessibility Actions
|
275
|
-
@abstractmethod
|
276
|
-
async def get_accessibility_tree(self) -> Dict:
|
277
|
-
"""Get the accessibility tree of the current screen."""
|
278
|
-
pass
|
279
|
-
|
280
|
-
@abstractmethod
|
281
|
-
async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
|
282
|
-
"""Convert screenshot coordinates to screen coordinates.
|
283
|
-
|
284
|
-
Args:
|
285
|
-
x: X coordinate in screenshot space
|
286
|
-
y: Y coordinate in screenshot space
|
287
|
-
|
288
|
-
Returns:
|
289
|
-
tuple[float, float]: (x, y) coordinates in screen space
|
290
|
-
"""
|
291
|
-
pass
|
292
|
-
|
293
|
-
@abstractmethod
|
294
|
-
async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
|
295
|
-
"""Convert screen coordinates to screenshot coordinates.
|
296
|
-
|
297
|
-
Args:
|
298
|
-
x: X coordinate in screen space
|
299
|
-
y: Y coordinate in screen space
|
300
|
-
|
301
|
-
Returns:
|
302
|
-
tuple[float, float]: (x, y) coordinates in screenshot space
|
303
|
-
"""
|
304
|
-
pass
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|