cua-computer 0.3.2__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {cua_computer-0.3.2 → cua_computer-0.3.3}/PKG-INFO +1 -1
  2. cua_computer-0.3.3/computer/interface/base.py +461 -0
  3. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/interface/generic.py +50 -19
  4. {cua_computer-0.3.2 → cua_computer-0.3.3}/pyproject.toml +3 -3
  5. cua_computer-0.3.2/computer/interface/base.py +0 -304
  6. {cua_computer-0.3.2 → cua_computer-0.3.3}/README.md +0 -0
  7. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/__init__.py +0 -0
  8. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/computer.py +0 -0
  9. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/diorama_computer.py +0 -0
  10. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/helpers.py +0 -0
  11. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/interface/__init__.py +0 -0
  12. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/interface/factory.py +0 -0
  13. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/interface/linux.py +0 -0
  14. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/interface/macos.py +0 -0
  15. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/interface/models.py +0 -0
  16. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/interface/windows.py +0 -0
  17. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/logger.py +0 -0
  18. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/models.py +0 -0
  19. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/__init__.py +0 -0
  20. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/base.py +0 -0
  21. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/cloud/__init__.py +0 -0
  22. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/cloud/provider.py +0 -0
  23. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/factory.py +0 -0
  24. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/lume/__init__.py +0 -0
  25. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/lume/provider.py +0 -0
  26. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/lume_api.py +0 -0
  27. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/lumier/__init__.py +0 -0
  28. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/lumier/provider.py +0 -0
  29. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/winsandbox/__init__.py +0 -0
  30. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/winsandbox/provider.py +0 -0
  31. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/providers/winsandbox/setup_script.ps1 +0 -0
  32. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/telemetry.py +0 -0
  33. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/ui/__init__.py +0 -0
  34. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/ui/__main__.py +0 -0
  35. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/ui/gradio/__init__.py +0 -0
  36. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/ui/gradio/app.py +0 -0
  37. {cua_computer-0.3.2 → cua_computer-0.3.3}/computer/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-computer
3
- Version: 0.3.2
3
+ Version: 0.3.3
4
4
  Summary: Computer-Use Interface (CUI) framework powering Cua
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.11
@@ -0,0 +1,461 @@
1
+ """Base interface for computer control."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Optional, Dict, Any, Tuple, List
5
+ from ..logger import Logger, LogLevel
6
+ from .models import MouseButton, CommandResult
7
+
8
+ class BaseComputerInterface(ABC):
9
+ """Base class for computer control interfaces."""
10
+
11
+ def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None):
12
+ """Initialize interface.
13
+
14
+ Args:
15
+ ip_address: IP address of the computer to control
16
+ username: Username for authentication
17
+ password: Password for authentication
18
+ api_key: Optional API key for cloud authentication
19
+ vm_name: Optional VM name for cloud authentication
20
+ """
21
+ self.ip_address = ip_address
22
+ self.username = username
23
+ self.password = password
24
+ self.api_key = api_key
25
+ self.vm_name = vm_name
26
+ self.logger = Logger("cua.interface", LogLevel.NORMAL)
27
+
28
+ # Optional default delay time between commands (in seconds)
29
+ self.delay: float = 0.0
30
+
31
+ @abstractmethod
32
+ async def wait_for_ready(self, timeout: int = 60) -> None:
33
+ """Wait for interface to be ready.
34
+
35
+ Args:
36
+ timeout: Maximum time to wait in seconds
37
+
38
+ Raises:
39
+ TimeoutError: If interface is not ready within timeout
40
+ """
41
+ pass
42
+
43
+ @abstractmethod
44
+ def close(self) -> None:
45
+ """Close the interface connection."""
46
+ pass
47
+
48
+ def force_close(self) -> None:
49
+ """Force close the interface connection.
50
+
51
+ By default, this just calls close(), but subclasses can override
52
+ to provide more forceful cleanup.
53
+ """
54
+ self.close()
55
+
56
+ # Mouse Actions
57
+ @abstractmethod
58
+ async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None:
59
+ """Press and hold a mouse button.
60
+
61
+ Args:
62
+ x: X coordinate to press at. If None, uses current cursor position.
63
+ y: Y coordinate to press at. If None, uses current cursor position.
64
+ button: Mouse button to press ('left', 'middle', 'right').
65
+ delay: Optional delay in seconds after the action
66
+ """
67
+ pass
68
+
69
+ @abstractmethod
70
+ async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left", delay: Optional[float] = None) -> None:
71
+ """Release a mouse button.
72
+
73
+ Args:
74
+ x: X coordinate to release at. If None, uses current cursor position.
75
+ y: Y coordinate to release at. If None, uses current cursor position.
76
+ button: Mouse button to release ('left', 'middle', 'right').
77
+ delay: Optional delay in seconds after the action
78
+ """
79
+ pass
80
+
81
+ @abstractmethod
82
+ async def left_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
83
+ """Perform a left mouse button click.
84
+
85
+ Args:
86
+ x: X coordinate to click at. If None, uses current cursor position.
87
+ y: Y coordinate to click at. If None, uses current cursor position.
88
+ delay: Optional delay in seconds after the action
89
+ """
90
+ pass
91
+
92
+ @abstractmethod
93
+ async def right_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
94
+ """Perform a right mouse button click.
95
+
96
+ Args:
97
+ x: X coordinate to click at. If None, uses current cursor position.
98
+ y: Y coordinate to click at. If None, uses current cursor position.
99
+ delay: Optional delay in seconds after the action
100
+ """
101
+ pass
102
+
103
+ @abstractmethod
104
+ async def double_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
105
+ """Perform a double left mouse button click.
106
+
107
+ Args:
108
+ x: X coordinate to double-click at. If None, uses current cursor position.
109
+ y: Y coordinate to double-click at. If None, uses current cursor position.
110
+ delay: Optional delay in seconds after the action
111
+ """
112
+ pass
113
+
114
+ @abstractmethod
115
+ async def move_cursor(self, x: int, y: int, delay: Optional[float] = None) -> None:
116
+ """Move the cursor to the specified screen coordinates.
117
+
118
+ Args:
119
+ x: X coordinate to move cursor to.
120
+ y: Y coordinate to move cursor to.
121
+ delay: Optional delay in seconds after the action
122
+ """
123
+ pass
124
+
125
+ @abstractmethod
126
+ async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
127
+ """Drag from current position to specified coordinates.
128
+
129
+ Args:
130
+ x: The x coordinate to drag to
131
+ y: The y coordinate to drag to
132
+ button: The mouse button to use ('left', 'middle', 'right')
133
+ duration: How long the drag should take in seconds
134
+ delay: Optional delay in seconds after the action
135
+ """
136
+ pass
137
+
138
+ @abstractmethod
139
+ async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
140
+ """Drag the cursor along a path of coordinates.
141
+
142
+ Args:
143
+ path: List of (x, y) coordinate tuples defining the drag path
144
+ button: The mouse button to use ('left', 'middle', 'right')
145
+ duration: Total time in seconds that the drag operation should take
146
+ delay: Optional delay in seconds after the action
147
+ """
148
+ pass
149
+
150
+ # Keyboard Actions
151
+ @abstractmethod
152
+ async def key_down(self, key: str, delay: Optional[float] = None) -> None:
153
+ """Press and hold a key.
154
+
155
+ Args:
156
+ key: The key to press and hold (e.g., 'a', 'shift', 'ctrl').
157
+ delay: Optional delay in seconds after the action.
158
+ """
159
+ pass
160
+
161
+ @abstractmethod
162
+ async def key_up(self, key: str, delay: Optional[float] = None) -> None:
163
+ """Release a previously pressed key.
164
+
165
+ Args:
166
+ key: The key to release (e.g., 'a', 'shift', 'ctrl').
167
+ delay: Optional delay in seconds after the action.
168
+ """
169
+ pass
170
+
171
+ @abstractmethod
172
+ async def type_text(self, text: str, delay: Optional[float] = None) -> None:
173
+ """Type the specified text string.
174
+
175
+ Args:
176
+ text: The text string to type.
177
+ delay: Optional delay in seconds after the action.
178
+ """
179
+ pass
180
+
181
+ @abstractmethod
182
+ async def press_key(self, key: str, delay: Optional[float] = None) -> None:
183
+ """Press and release a single key.
184
+
185
+ Args:
186
+ key: The key to press (e.g., 'a', 'enter', 'escape').
187
+ delay: Optional delay in seconds after the action.
188
+ """
189
+ pass
190
+
191
+ @abstractmethod
192
+ async def hotkey(self, *keys: str, delay: Optional[float] = None) -> None:
193
+ """Press multiple keys simultaneously (keyboard shortcut).
194
+
195
+ Args:
196
+ *keys: Variable number of keys to press together (e.g., 'ctrl', 'c').
197
+ delay: Optional delay in seconds after the action.
198
+ """
199
+ pass
200
+
201
+ # Scrolling Actions
202
+ @abstractmethod
203
+ async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None:
204
+ """Scroll the mouse wheel by specified amounts.
205
+
206
+ Args:
207
+ x: Horizontal scroll amount (positive = right, negative = left).
208
+ y: Vertical scroll amount (positive = up, negative = down).
209
+ delay: Optional delay in seconds after the action.
210
+ """
211
+ pass
212
+
213
+ @abstractmethod
214
+ async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None:
215
+ """Scroll down by the specified number of clicks.
216
+
217
+ Args:
218
+ clicks: Number of scroll clicks to perform downward.
219
+ delay: Optional delay in seconds after the action.
220
+ """
221
+ pass
222
+
223
+ @abstractmethod
224
+ async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None:
225
+ """Scroll up by the specified number of clicks.
226
+
227
+ Args:
228
+ clicks: Number of scroll clicks to perform upward.
229
+ delay: Optional delay in seconds after the action.
230
+ """
231
+ pass
232
+
233
+ # Screen Actions
234
+ @abstractmethod
235
+ async def screenshot(self) -> bytes:
236
+ """Take a screenshot.
237
+
238
+ Returns:
239
+ Raw bytes of the screenshot image
240
+ """
241
+ pass
242
+
243
+ @abstractmethod
244
+ async def get_screen_size(self) -> Dict[str, int]:
245
+ """Get the screen dimensions.
246
+
247
+ Returns:
248
+ Dict with 'width' and 'height' keys
249
+ """
250
+ pass
251
+
252
+ @abstractmethod
253
+ async def get_cursor_position(self) -> Dict[str, int]:
254
+ """Get the current cursor position on screen.
255
+
256
+ Returns:
257
+ Dict with 'x' and 'y' keys containing cursor coordinates.
258
+ """
259
+ pass
260
+
261
+ # Clipboard Actions
262
+ @abstractmethod
263
+ async def copy_to_clipboard(self) -> str:
264
+ """Get the current clipboard content.
265
+
266
+ Returns:
267
+ The text content currently stored in the clipboard.
268
+ """
269
+ pass
270
+
271
+ @abstractmethod
272
+ async def set_clipboard(self, text: str) -> None:
273
+ """Set the clipboard content to the specified text.
274
+
275
+ Args:
276
+ text: The text to store in the clipboard.
277
+ """
278
+ pass
279
+
280
+ # File System Actions
281
+ @abstractmethod
282
+ async def file_exists(self, path: str) -> bool:
283
+ """Check if a file exists at the specified path.
284
+
285
+ Args:
286
+ path: The file path to check.
287
+
288
+ Returns:
289
+ True if the file exists, False otherwise.
290
+ """
291
+ pass
292
+
293
+ @abstractmethod
294
+ async def directory_exists(self, path: str) -> bool:
295
+ """Check if a directory exists at the specified path.
296
+
297
+ Args:
298
+ path: The directory path to check.
299
+
300
+ Returns:
301
+ True if the directory exists, False otherwise.
302
+ """
303
+ pass
304
+
305
+ @abstractmethod
306
+ async def list_dir(self, path: str) -> List[str]:
307
+ """List the contents of a directory.
308
+
309
+ Args:
310
+ path: The directory path to list.
311
+
312
+ Returns:
313
+ List of file and directory names in the specified directory.
314
+ """
315
+ pass
316
+
317
+ @abstractmethod
318
+ async def read_text(self, path: str) -> str:
319
+ """Read the text contents of a file.
320
+
321
+ Args:
322
+ path: The file path to read from.
323
+
324
+ Returns:
325
+ The text content of the file.
326
+ """
327
+ pass
328
+
329
+ @abstractmethod
330
+ async def write_text(self, path: str, content: str) -> None:
331
+ """Write text content to a file.
332
+
333
+ Args:
334
+ path: The file path to write to.
335
+ content: The text content to write.
336
+ """
337
+ pass
338
+
339
+ @abstractmethod
340
+ async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes:
341
+ """Read file binary contents with optional seeking support.
342
+
343
+ Args:
344
+ path: Path to the file
345
+ offset: Byte offset to start reading from (default: 0)
346
+ length: Number of bytes to read (default: None for entire file)
347
+ """
348
+ pass
349
+
350
+ @abstractmethod
351
+ async def write_bytes(self, path: str, content: bytes) -> None:
352
+ """Write binary content to a file.
353
+
354
+ Args:
355
+ path: The file path to write to.
356
+ content: The binary content to write.
357
+ """
358
+ pass
359
+
360
+ @abstractmethod
361
+ async def delete_file(self, path: str) -> None:
362
+ """Delete a file at the specified path.
363
+
364
+ Args:
365
+ path: The file path to delete.
366
+ """
367
+ pass
368
+
369
+ @abstractmethod
370
+ async def create_dir(self, path: str) -> None:
371
+ """Create a directory at the specified path.
372
+
373
+ Args:
374
+ path: The directory path to create.
375
+ """
376
+ pass
377
+
378
+ @abstractmethod
379
+ async def delete_dir(self, path: str) -> None:
380
+ """Delete a directory at the specified path.
381
+
382
+ Args:
383
+ path: The directory path to delete.
384
+ """
385
+ pass
386
+
387
+ @abstractmethod
388
+ async def get_file_size(self, path: str) -> int:
389
+ """Get the size of a file in bytes.
390
+
391
+ Args:
392
+ path: The file path to get the size of.
393
+
394
+ Returns:
395
+ The size of the file in bytes.
396
+ """
397
+ pass
398
+
399
+ @abstractmethod
400
+ async def run_command(self, command: str) -> CommandResult:
401
+ """Run shell command and return structured result.
402
+
403
+ Executes a shell command using subprocess.run with shell=True and check=False.
404
+ The command is run in the target environment and captures both stdout and stderr.
405
+
406
+ Args:
407
+ command (str): The shell command to execute
408
+
409
+ Returns:
410
+ CommandResult: A structured result containing:
411
+ - stdout (str): Standard output from the command
412
+ - stderr (str): Standard error from the command
413
+ - returncode (int): Exit code from the command (0 indicates success)
414
+
415
+ Raises:
416
+ RuntimeError: If the command execution fails at the system level
417
+
418
+ Example:
419
+ result = await interface.run_command("ls -la")
420
+ if result.returncode == 0:
421
+ print(f"Output: {result.stdout}")
422
+ else:
423
+ print(f"Error: {result.stderr}, Exit code: {result.returncode}")
424
+ """
425
+ pass
426
+
427
+ # Accessibility Actions
428
+ @abstractmethod
429
+ async def get_accessibility_tree(self) -> Dict:
430
+ """Get the accessibility tree of the current screen.
431
+
432
+ Returns:
433
+ Dict containing the hierarchical accessibility information of screen elements.
434
+ """
435
+ pass
436
+
437
+ @abstractmethod
438
+ async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
439
+ """Convert screenshot coordinates to screen coordinates.
440
+
441
+ Args:
442
+ x: X coordinate in screenshot space
443
+ y: Y coordinate in screenshot space
444
+
445
+ Returns:
446
+ tuple[float, float]: (x, y) coordinates in screen space
447
+ """
448
+ pass
449
+
450
+ @abstractmethod
451
+ async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
452
+ """Convert screen coordinates to screenshot coordinates.
453
+
454
+ Args:
455
+ x: X coordinate in screen space
456
+ y: Y coordinate in screen space
457
+
458
+ Returns:
459
+ tuple[float, float]: (x, y) coordinates in screenshot space
460
+ """
461
+ pass
@@ -32,6 +32,21 @@ class GenericComputerInterface(BaseComputerInterface):
32
32
  # Set logger name for the interface
33
33
  self.logger = Logger(logger_name, LogLevel.NORMAL)
34
34
 
35
+ # Optional default delay time between commands (in seconds)
36
+ self.delay = 0.0
37
+
38
+ async def _handle_delay(self, delay: Optional[float] = None):
39
+ """Handle delay between commands using async sleep.
40
+
41
+ Args:
42
+ delay: Optional delay in seconds. If None, uses self.delay.
43
+ """
44
+ if delay is not None:
45
+ if isinstance(delay, float) and delay > 0:
46
+ await asyncio.sleep(delay)
47
+ elif isinstance(self.delay, float) and self.delay > 0:
48
+ await asyncio.sleep(self.delay)
49
+
35
50
  @property
36
51
  def ws_uri(self) -> str:
37
52
  """Get the WebSocket URI using the current IP address.
@@ -44,42 +59,52 @@ class GenericComputerInterface(BaseComputerInterface):
44
59
  return f"{protocol}://{self.ip_address}:{port}/ws"
45
60
 
46
61
  # Mouse actions
47
- async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
62
+ async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left", delay: Optional[float] = None) -> None:
48
63
  await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
64
+ await self._handle_delay(delay)
49
65
 
50
- async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
66
+ async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left", delay: Optional[float] = None) -> None:
51
67
  await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
68
+ await self._handle_delay(delay)
52
69
 
53
- async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
70
+ async def left_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
54
71
  await self._send_command("left_click", {"x": x, "y": y})
72
+ await self._handle_delay(delay)
55
73
 
56
- async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
74
+ async def right_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
57
75
  await self._send_command("right_click", {"x": x, "y": y})
76
+ await self._handle_delay(delay)
58
77
 
59
- async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
78
+ async def double_click(self, x: Optional[int] = None, y: Optional[int] = None, delay: Optional[float] = None) -> None:
60
79
  await self._send_command("double_click", {"x": x, "y": y})
80
+ await self._handle_delay(delay)
61
81
 
62
- async def move_cursor(self, x: int, y: int) -> None:
82
+ async def move_cursor(self, x: int, y: int, delay: Optional[float] = None) -> None:
63
83
  await self._send_command("move_cursor", {"x": x, "y": y})
84
+ await self._handle_delay(delay)
64
85
 
65
- async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5) -> None:
86
+ async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
66
87
  await self._send_command(
67
88
  "drag_to", {"x": x, "y": y, "button": button, "duration": duration}
68
89
  )
90
+ await self._handle_delay(delay)
69
91
 
70
- async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5) -> None:
92
+ async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5, delay: Optional[float] = None) -> None:
71
93
  await self._send_command(
72
94
  "drag", {"path": path, "button": button, "duration": duration}
73
95
  )
96
+ await self._handle_delay(delay)
74
97
 
75
98
  # Keyboard Actions
76
- async def key_down(self, key: "KeyType") -> None:
99
+ async def key_down(self, key: "KeyType", delay: Optional[float] = None) -> None:
77
100
  await self._send_command("key_down", {"key": key})
101
+ await self._handle_delay(delay)
78
102
 
79
- async def key_up(self, key: "KeyType") -> None:
103
+ async def key_up(self, key: "KeyType", delay: Optional[float] = None) -> None:
80
104
  await self._send_command("key_up", {"key": key})
105
+ await self._handle_delay(delay)
81
106
 
82
- async def type_text(self, text: str) -> None:
107
+ async def type_text(self, text: str, delay: Optional[float] = None) -> None:
83
108
  # Temporary fix for https://github.com/trycua/cua/issues/165
84
109
  # Check if text contains Unicode characters
85
110
  if any(ord(char) > 127 for char in text):
@@ -89,8 +114,9 @@ class GenericComputerInterface(BaseComputerInterface):
89
114
  else:
90
115
  # For ASCII text, use the regular typing method
91
116
  await self._send_command("type_text", {"text": text})
117
+ await self._handle_delay(delay)
92
118
 
93
- async def press(self, key: "KeyType") -> None:
119
+ async def press(self, key: "KeyType", delay: Optional[float] = None) -> None:
94
120
  """Press a single key.
95
121
 
96
122
  Args:
@@ -126,16 +152,17 @@ class GenericComputerInterface(BaseComputerInterface):
126
152
  raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
127
153
 
128
154
  await self._send_command("press_key", {"key": actual_key})
155
+ await self._handle_delay(delay)
129
156
 
130
- async def press_key(self, key: "KeyType") -> None:
157
+ async def press_key(self, key: "KeyType", delay: Optional[float] = None) -> None:
131
158
  """DEPRECATED: Use press() instead.
132
159
 
133
160
  This method is kept for backward compatibility but will be removed in a future version.
134
161
  Please use the press() method instead.
135
162
  """
136
- await self.press(key)
163
+ await self.press(key, delay)
137
164
 
138
- async def hotkey(self, *keys: "KeyType") -> None:
165
+ async def hotkey(self, *keys: "KeyType", delay: Optional[float] = None) -> None:
139
166
  """Press multiple keys simultaneously.
140
167
 
141
168
  Args:
@@ -169,16 +196,20 @@ class GenericComputerInterface(BaseComputerInterface):
169
196
  raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
170
197
 
171
198
  await self._send_command("hotkey", {"keys": actual_keys})
199
+ await self._handle_delay(delay)
172
200
 
173
201
  # Scrolling Actions
174
- async def scroll(self, x: int, y: int) -> None:
202
+ async def scroll(self, x: int, y: int, delay: Optional[float] = None) -> None:
175
203
  await self._send_command("scroll", {"x": x, "y": y})
204
+ await self._handle_delay(delay)
176
205
 
177
- async def scroll_down(self, clicks: int = 1) -> None:
206
+ async def scroll_down(self, clicks: int = 1, delay: Optional[float] = None) -> None:
178
207
  await self._send_command("scroll_down", {"clicks": clicks})
179
-
180
- async def scroll_up(self, clicks: int = 1) -> None:
208
+ await self._handle_delay(delay)
209
+
210
+ async def scroll_up(self, clicks: int = 1, delay: Optional[float] = None) -> None:
181
211
  await self._send_command("scroll_up", {"clicks": clicks})
212
+ await self._handle_delay(delay)
182
213
 
183
214
  # Screen actions
184
215
  async def screenshot(
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "cua-computer"
9
- version = "0.3.2"
9
+ version = "0.3.3"
10
10
  description = "Computer-Use Interface (CUI) framework powering Cua"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -57,7 +57,7 @@ target-version = [
57
57
 
58
58
  [tool.ruff]
59
59
  line-length = 100
60
- target-version = "0.3.2"
60
+ target-version = "0.3.3"
61
61
  select = [
62
62
  "E",
63
63
  "F",
@@ -71,7 +71,7 @@ docstring-code-format = true
71
71
 
72
72
  [tool.mypy]
73
73
  strict = true
74
- python_version = "0.3.2"
74
+ python_version = "0.3.3"
75
75
  ignore_missing_imports = true
76
76
  disallow_untyped_defs = true
77
77
  check_untyped_defs = true
@@ -1,304 +0,0 @@
1
- """Base interface for computer control."""
2
-
3
- from abc import ABC, abstractmethod
4
- from typing import Optional, Dict, Any, Tuple, List
5
- from ..logger import Logger, LogLevel
6
- from .models import MouseButton, CommandResult
7
-
8
- class BaseComputerInterface(ABC):
9
- """Base class for computer control interfaces."""
10
-
11
- def __init__(self, ip_address: str, username: str = "lume", password: str = "lume", api_key: Optional[str] = None, vm_name: Optional[str] = None):
12
- """Initialize interface.
13
-
14
- Args:
15
- ip_address: IP address of the computer to control
16
- username: Username for authentication
17
- password: Password for authentication
18
- api_key: Optional API key for cloud authentication
19
- vm_name: Optional VM name for cloud authentication
20
- """
21
- self.ip_address = ip_address
22
- self.username = username
23
- self.password = password
24
- self.api_key = api_key
25
- self.vm_name = vm_name
26
- self.logger = Logger("cua.interface", LogLevel.NORMAL)
27
-
28
- @abstractmethod
29
- async def wait_for_ready(self, timeout: int = 60) -> None:
30
- """Wait for interface to be ready.
31
-
32
- Args:
33
- timeout: Maximum time to wait in seconds
34
-
35
- Raises:
36
- TimeoutError: If interface is not ready within timeout
37
- """
38
- pass
39
-
40
- @abstractmethod
41
- def close(self) -> None:
42
- """Close the interface connection."""
43
- pass
44
-
45
- def force_close(self) -> None:
46
- """Force close the interface connection.
47
-
48
- By default, this just calls close(), but subclasses can override
49
- to provide more forceful cleanup.
50
- """
51
- self.close()
52
-
53
- # Mouse Actions
54
- @abstractmethod
55
- async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
56
- """Press and hold a mouse button."""
57
- pass
58
-
59
- @abstractmethod
60
- async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
61
- """Release a mouse button."""
62
- pass
63
-
64
- @abstractmethod
65
- async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
66
- """Perform a left click."""
67
- pass
68
-
69
- @abstractmethod
70
- async def right_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
71
- """Perform a right click."""
72
- pass
73
-
74
- @abstractmethod
75
- async def double_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
76
- """Perform a double click."""
77
- pass
78
-
79
- @abstractmethod
80
- async def move_cursor(self, x: int, y: int) -> None:
81
- """Move the cursor to specified position."""
82
- pass
83
-
84
- @abstractmethod
85
- async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5) -> None:
86
- """Drag from current position to specified coordinates.
87
-
88
- Args:
89
- x: The x coordinate to drag to
90
- y: The y coordinate to drag to
91
- button: The mouse button to use ('left', 'middle', 'right')
92
- duration: How long the drag should take in seconds
93
- """
94
- pass
95
-
96
- @abstractmethod
97
- async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
98
- """Drag the cursor along a path of coordinates.
99
-
100
- Args:
101
- path: List of (x, y) coordinate tuples defining the drag path
102
- button: The mouse button to use ('left', 'middle', 'right')
103
- duration: Total time in seconds that the drag operation should take
104
- """
105
- pass
106
-
107
- # Keyboard Actions
108
- @abstractmethod
109
- async def key_down(self, key: str) -> None:
110
- """Press and hold a key."""
111
- pass
112
-
113
- @abstractmethod
114
- async def key_up(self, key: str) -> None:
115
- """Release a key."""
116
- pass
117
-
118
- @abstractmethod
119
- async def type_text(self, text: str) -> None:
120
- """Type the specified text."""
121
- pass
122
-
123
- @abstractmethod
124
- async def press_key(self, key: str) -> None:
125
- """Press a single key."""
126
- pass
127
-
128
- @abstractmethod
129
- async def hotkey(self, *keys: str) -> None:
130
- """Press multiple keys simultaneously."""
131
- pass
132
-
133
- # Scrolling Actions
134
- @abstractmethod
135
- async def scroll(self, x: int, y: int) -> None:
136
- """Scroll the mouse wheel."""
137
- pass
138
-
139
- @abstractmethod
140
- async def scroll_down(self, clicks: int = 1) -> None:
141
- """Scroll down."""
142
- pass
143
-
144
- @abstractmethod
145
- async def scroll_up(self, clicks: int = 1) -> None:
146
- """Scroll up."""
147
- pass
148
-
149
- # Screen Actions
150
- @abstractmethod
151
- async def screenshot(self) -> bytes:
152
- """Take a screenshot.
153
-
154
- Returns:
155
- Raw bytes of the screenshot image
156
- """
157
- pass
158
-
159
- @abstractmethod
160
- async def get_screen_size(self) -> Dict[str, int]:
161
- """Get the screen dimensions.
162
-
163
- Returns:
164
- Dict with 'width' and 'height' keys
165
- """
166
- pass
167
-
168
- @abstractmethod
169
- async def get_cursor_position(self) -> Dict[str, int]:
170
- """Get current cursor position."""
171
- pass
172
-
173
- # Clipboard Actions
174
- @abstractmethod
175
- async def copy_to_clipboard(self) -> str:
176
- """Get clipboard content."""
177
- pass
178
-
179
- @abstractmethod
180
- async def set_clipboard(self, text: str) -> None:
181
- """Set clipboard content."""
182
- pass
183
-
184
- # File System Actions
185
- @abstractmethod
186
- async def file_exists(self, path: str) -> bool:
187
- """Check if file exists."""
188
- pass
189
-
190
- @abstractmethod
191
- async def directory_exists(self, path: str) -> bool:
192
- """Check if directory exists."""
193
- pass
194
-
195
- @abstractmethod
196
- async def list_dir(self, path: str) -> List[str]:
197
- """List directory contents."""
198
- pass
199
-
200
- @abstractmethod
201
- async def read_text(self, path: str) -> str:
202
- """Read file text contents."""
203
- pass
204
-
205
- @abstractmethod
206
- async def write_text(self, path: str, content: str) -> None:
207
- """Write file text contents."""
208
- pass
209
-
210
- @abstractmethod
211
- async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes:
212
- """Read file binary contents with optional seeking support.
213
-
214
- Args:
215
- path: Path to the file
216
- offset: Byte offset to start reading from (default: 0)
217
- length: Number of bytes to read (default: None for entire file)
218
- """
219
- pass
220
-
221
- @abstractmethod
222
- async def write_bytes(self, path: str, content: bytes) -> None:
223
- """Write file binary contents."""
224
- pass
225
-
226
- @abstractmethod
227
- async def delete_file(self, path: str) -> None:
228
- """Delete file."""
229
- pass
230
-
231
- @abstractmethod
232
- async def create_dir(self, path: str) -> None:
233
- """Create directory."""
234
- pass
235
-
236
- @abstractmethod
237
- async def delete_dir(self, path: str) -> None:
238
- """Delete directory."""
239
- pass
240
-
241
- @abstractmethod
242
- async def get_file_size(self, path: str) -> int:
243
- """Get the size of a file in bytes."""
244
- pass
245
-
246
- @abstractmethod
247
- async def run_command(self, command: str) -> CommandResult:
248
- """Run shell command and return structured result.
249
-
250
- Executes a shell command using subprocess.run with shell=True and check=False.
251
- The command is run in the target environment and captures both stdout and stderr.
252
-
253
- Args:
254
- command (str): The shell command to execute
255
-
256
- Returns:
257
- CommandResult: A structured result containing:
258
- - stdout (str): Standard output from the command
259
- - stderr (str): Standard error from the command
260
- - returncode (int): Exit code from the command (0 indicates success)
261
-
262
- Raises:
263
- RuntimeError: If the command execution fails at the system level
264
-
265
- Example:
266
- result = await interface.run_command("ls -la")
267
- if result.returncode == 0:
268
- print(f"Output: {result.stdout}")
269
- else:
270
- print(f"Error: {result.stderr}, Exit code: {result.returncode}")
271
- """
272
- pass
273
-
274
- # Accessibility Actions
275
- @abstractmethod
276
- async def get_accessibility_tree(self) -> Dict:
277
- """Get the accessibility tree of the current screen."""
278
- pass
279
-
280
- @abstractmethod
281
- async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
282
- """Convert screenshot coordinates to screen coordinates.
283
-
284
- Args:
285
- x: X coordinate in screenshot space
286
- y: Y coordinate in screenshot space
287
-
288
- Returns:
289
- tuple[float, float]: (x, y) coordinates in screen space
290
- """
291
- pass
292
-
293
- @abstractmethod
294
- async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
295
- """Convert screen coordinates to screenshot coordinates.
296
-
297
- Args:
298
- x: X coordinate in screen space
299
- y: Y coordinate in screen space
300
-
301
- Returns:
302
- tuple[float, float]: (x, y) coordinates in screenshot space
303
- """
304
- pass
File without changes