cua-computer 0.4.4__tar.gz → 0.4.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {cua_computer-0.4.4 → cua_computer-0.4.6}/PKG-INFO +10 -81
  2. cua_computer-0.4.6/README.md +73 -0
  3. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/computer.py +2 -2
  4. cua_computer-0.4.6/computer/diorama_computer.py +243 -0
  5. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/winsandbox/provider.py +27 -6
  6. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/winsandbox/setup_script.ps1 +33 -8
  7. {cua_computer-0.4.4 → cua_computer-0.4.6}/pyproject.toml +3 -3
  8. cua_computer-0.4.4/README.md +0 -144
  9. cua_computer-0.4.4/computer/diorama_computer.py +0 -104
  10. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/__init__.py +0 -0
  11. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/helpers.py +0 -0
  12. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/interface/__init__.py +0 -0
  13. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/interface/base.py +0 -0
  14. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/interface/factory.py +0 -0
  15. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/interface/generic.py +0 -0
  16. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/interface/linux.py +0 -0
  17. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/interface/macos.py +0 -0
  18. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/interface/models.py +0 -0
  19. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/interface/windows.py +0 -0
  20. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/logger.py +0 -0
  21. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/models.py +0 -0
  22. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/__init__.py +0 -0
  23. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/base.py +0 -0
  24. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/cloud/__init__.py +0 -0
  25. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/cloud/provider.py +0 -0
  26. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/docker/__init__.py +0 -0
  27. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/docker/provider.py +0 -0
  28. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/factory.py +0 -0
  29. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/lume/__init__.py +0 -0
  30. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/lume/provider.py +0 -0
  31. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/lume_api.py +0 -0
  32. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/lumier/__init__.py +0 -0
  33. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/lumier/provider.py +0 -0
  34. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/providers/winsandbox/__init__.py +0 -0
  35. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/ui/__init__.py +0 -0
  36. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/ui/__main__.py +0 -0
  37. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/ui/gradio/__init__.py +0 -0
  38. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/ui/gradio/app.py +0 -0
  39. {cua_computer-0.4.4 → cua_computer-0.4.6}/computer/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-computer
3
- Version: 0.4.4
3
+ Version: 0.4.6
4
4
  Summary: Computer-Use Interface (CUI) framework powering Cua
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.11
@@ -26,8 +26,8 @@ Description-Content-Type: text/markdown
26
26
  <h1>
27
27
  <div class="image-wrapper" style="display: inline-block;">
28
28
  <picture>
29
- <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
30
- <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
29
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;">
30
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;">
31
31
  <img alt="Shows my svg">
32
32
  </picture>
33
33
  </div>
@@ -44,7 +44,7 @@ Description-Content-Type: text/markdown
44
44
  ### Get started with Computer
45
45
 
46
46
  <div align="center">
47
- <img src="../../img/computer.png"/>
47
+ <img src="https://raw.githubusercontent.com/trycua/cua/main/img/computer.png"/>
48
48
  </div>
49
49
 
50
50
  ```python
@@ -87,82 +87,11 @@ The `cua-computer` PyPi package pulls automatically the latest executable versio
87
87
 
88
88
  Refer to this notebook for a step-by-step guide on how to use the Computer-Use Interface (CUI):
89
89
 
90
- - [Computer-Use Interface (CUI)](../../notebooks/computer_nb.ipynb)
90
+ - [Computer-Use Interface (CUI)](https://github.com/trycua/cua/blob/main/notebooks/computer_nb.ipynb)
91
91
 
92
- ## Using the Gradio Computer UI
93
-
94
- The computer module includes a Gradio UI for creating and sharing demonstration data. We make it easy for people to build community datasets for better computer use models with an upload to Huggingface feature.
95
-
96
- ```bash
97
- # Install with UI support
98
- pip install "cua-computer[ui]"
99
- ```
100
-
101
- > **Note:** For precise control of the computer, we recommend using VNC or Screen Sharing instead of the Computer Gradio UI.
102
-
103
- ### Building and Sharing Demonstrations with Huggingface
104
-
105
- Follow these steps to contribute your own demonstrations:
106
-
107
- #### 1. Set up Huggingface Access
108
-
109
- Set your HF_TOKEN in a .env file or in your environment variables:
110
-
111
- ```bash
112
- # In .env file
113
- HF_TOKEN=your_huggingface_token
114
- ```
115
-
116
- #### 2. Launch the Computer UI
117
-
118
- ```python
119
- # launch_ui.py
120
- from computer.ui.gradio.app import create_gradio_ui
121
- from dotenv import load_dotenv
122
- load_dotenv('.env')
123
-
124
- app = create_gradio_ui()
125
- app.launch(share=False)
126
- ```
127
-
128
- For examples, see [Computer UI Examples](../../examples/computer_ui_examples.py)
129
-
130
- #### 3. Record Your Tasks
131
-
132
- <details open>
133
- <summary>View demonstration video</summary>
134
- <video src="https://github.com/user-attachments/assets/de3c3477-62fe-413c-998d-4063e48de176" controls width="600"></video>
135
- </details>
136
-
137
- Record yourself performing various computer tasks using the UI.
138
-
139
- #### 4. Save Your Demonstrations
140
-
141
- <details open>
142
- <summary>View demonstration video</summary>
143
- <video src="https://github.com/user-attachments/assets/5ad1df37-026a-457f-8b49-922ae805faef" controls width="600"></video>
144
- </details>
145
-
146
- Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding").
147
-
148
- #### 5. Record Additional Demonstrations
149
-
150
- Repeat steps 3 and 4 until you have a good amount of demonstrations covering different tasks and scenarios.
151
-
152
- #### 6. Upload to Huggingface
153
-
154
- <details open>
155
- <summary>View demonstration video</summary>
156
- <video src="https://github.com/user-attachments/assets/c586d460-3877-4b5f-a736-3248886d2134" controls width="600"></video>
157
- </details>
158
-
159
- Upload your dataset to Huggingface by:
160
- - Naming it as `{your_username}/{dataset_name}`
161
- - Choosing public or private visibility
162
- - Optionally selecting specific tags to upload only tasks with certain tags
163
-
164
- #### Examples and Resources
165
-
166
- - Example Dataset: [ddupont/test-dataset](https://huggingface.co/datasets/ddupont/test-dataset)
167
- - Find Community Datasets: 🔍 [Browse CUA Datasets on Huggingface](https://huggingface.co/datasets?other=cua)
92
+ ## Docs
168
93
 
94
+ - [Computers](https://trycua.com/docs/computer-sdk/computers)
95
+ - [Commands](https://trycua.com/docs/computer-sdk/commands)
96
+ - [Computer UI](https://trycua.com/docs/computer-sdk/computer-ui)
97
+ - [Sandboxed Python](https://trycua.com/docs/computer-sdk/sandboxed-python)
@@ -0,0 +1,73 @@
1
+ <div align="center">
2
+ <h1>
3
+ <div class="image-wrapper" style="display: inline-block;">
4
+ <picture>
5
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;">
6
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;">
7
+ <img alt="Shows my svg">
8
+ </picture>
9
+ </div>
10
+
11
+ [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
12
+ [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
13
+ [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
14
+ [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
15
+ </h1>
16
+ </div>
17
+
18
+ **cua-computer** is a Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, PyAutoGUI-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). Computer relies on [Lume](https://github.com/trycua/lume) for creating and managing sandbox environments.
19
+
20
+ ### Get started with Computer
21
+
22
+ <div align="center">
23
+ <img src="https://raw.githubusercontent.com/trycua/cua/main/img/computer.png"/>
24
+ </div>
25
+
26
+ ```python
27
+ from computer import Computer
28
+
29
+ computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4")
30
+ try:
31
+ await computer.run()
32
+
33
+ screenshot = await computer.interface.screenshot()
34
+ with open("screenshot.png", "wb") as f:
35
+ f.write(screenshot)
36
+
37
+ await computer.interface.move_cursor(100, 100)
38
+ await computer.interface.left_click()
39
+ await computer.interface.right_click(300, 300)
40
+ await computer.interface.double_click(400, 400)
41
+
42
+ await computer.interface.type("Hello, World!")
43
+ await computer.interface.press_key("enter")
44
+
45
+ await computer.interface.set_clipboard("Test clipboard")
46
+ content = await computer.interface.copy_to_clipboard()
47
+ print(f"Clipboard content: {content}")
48
+ finally:
49
+ await computer.stop()
50
+ ```
51
+
52
+ ## Install
53
+
54
+ To install the Computer-Use Interface (CUI):
55
+
56
+ ```bash
57
+ pip install "cua-computer[all]"
58
+ ```
59
+
60
+ The `cua-computer` PyPi package pulls automatically the latest executable version of Lume through [pylume](https://github.com/trycua/pylume).
61
+
62
+ ## Run
63
+
64
+ Refer to this notebook for a step-by-step guide on how to use the Computer-Use Interface (CUI):
65
+
66
+ - [Computer-Use Interface (CUI)](https://github.com/trycua/cua/blob/main/notebooks/computer_nb.ipynb)
67
+
68
+ ## Docs
69
+
70
+ - [Computers](https://trycua.com/docs/computer-sdk/computers)
71
+ - [Commands](https://trycua.com/docs/computer-sdk/commands)
72
+ - [Computer UI](https://trycua.com/docs/computer-sdk/computer-ui)
73
+ - [Sandboxed Python](https://trycua.com/docs/computer-sdk/sandboxed-python)
@@ -154,8 +154,8 @@ class Computer:
154
154
  self.interface_logger = Logger("computer.interface", verbosity)
155
155
 
156
156
  if not use_host_computer_server:
157
- if ":" not in image or len(image.split(":")) != 2:
158
- raise ValueError("Image must be in the format <image_name>:<tag>")
157
+ if ":" not in image:
158
+ image = f"{image}:latest"
159
159
 
160
160
  if not name:
161
161
  # Normalize the name to be used for the VM
@@ -0,0 +1,243 @@
1
+ import asyncio
2
+ from .interface.models import KeyType, Key
3
+
4
+ class DioramaComputer:
5
+ """
6
+ A Computer-compatible proxy for Diorama that sends commands over the ComputerInterface.
7
+ """
8
+ def __init__(self, computer, apps):
9
+ """
10
+ Initialize the DioramaComputer with a computer instance and list of apps.
11
+
12
+ Args:
13
+ computer: The computer instance to proxy commands through
14
+ apps: List of applications available in the diorama environment
15
+ """
16
+ self.computer = computer
17
+ self.apps = apps
18
+ self.interface = DioramaComputerInterface(computer, apps)
19
+ self._initialized = False
20
+
21
+ async def __aenter__(self):
22
+ """
23
+ Async context manager entry point.
24
+
25
+ Returns:
26
+ self: The DioramaComputer instance
27
+ """
28
+ self._initialized = True
29
+ return self
30
+
31
+ async def run(self):
32
+ """
33
+ Initialize and run the DioramaComputer if not already initialized.
34
+
35
+ Returns:
36
+ self: The DioramaComputer instance
37
+ """
38
+ if not self._initialized:
39
+ await self.__aenter__()
40
+ return self
41
+
42
+ class DioramaComputerInterface:
43
+ """
44
+ Diorama Interface proxy that sends diorama_cmds via the Computer's interface.
45
+ """
46
+ def __init__(self, computer, apps):
47
+ """
48
+ Initialize the DioramaComputerInterface.
49
+
50
+ Args:
51
+ computer: The computer instance to send commands through
52
+ apps: List of applications available in the diorama environment
53
+ """
54
+ self.computer = computer
55
+ self.apps = apps
56
+ self._scene_size = None
57
+
58
+ async def _send_cmd(self, action, arguments=None):
59
+ """
60
+ Send a command to the diorama interface through the computer.
61
+
62
+ Args:
63
+ action (str): The action/command to execute
64
+ arguments (dict, optional): Additional arguments for the command
65
+
66
+ Returns:
67
+ The result from the diorama command execution
68
+
69
+ Raises:
70
+ RuntimeError: If the computer interface is not initialized or command fails
71
+ """
72
+ arguments = arguments or {}
73
+ arguments = {"app_list": self.apps, **arguments}
74
+ # Use the computer's interface (must be initialized)
75
+ iface = getattr(self.computer, "_interface", None)
76
+ if iface is None:
77
+ raise RuntimeError("Computer interface not initialized. Call run() first.")
78
+ result = await iface.diorama_cmd(action, arguments)
79
+ if not result.get("success"):
80
+ raise RuntimeError(f"Diorama command failed: {result.get('error')}\n{result.get('trace')}")
81
+ return result.get("result")
82
+
83
+ async def screenshot(self, as_bytes=True):
84
+ """
85
+ Take a screenshot of the diorama scene.
86
+
87
+ Args:
88
+ as_bytes (bool): If True, return image as bytes; if False, return PIL Image object
89
+
90
+ Returns:
91
+ bytes or PIL.Image: Screenshot data in the requested format
92
+ """
93
+ from PIL import Image
94
+ import base64
95
+ result = await self._send_cmd("screenshot")
96
+ # assume result is a b64 string of an image
97
+ img_bytes = base64.b64decode(result)
98
+ import io
99
+ img = Image.open(io.BytesIO(img_bytes))
100
+ self._scene_size = img.size
101
+ return img_bytes if as_bytes else img
102
+
103
+ async def get_screen_size(self):
104
+ """
105
+ Get the dimensions of the diorama scene.
106
+
107
+ Returns:
108
+ dict: Dictionary containing 'width' and 'height' keys with pixel dimensions
109
+ """
110
+ if not self._scene_size:
111
+ await self.screenshot(as_bytes=False)
112
+ return {"width": self._scene_size[0], "height": self._scene_size[1]}
113
+
114
+ async def move_cursor(self, x, y):
115
+ """
116
+ Move the cursor to the specified coordinates.
117
+
118
+ Args:
119
+ x (int): X coordinate to move cursor to
120
+ y (int): Y coordinate to move cursor to
121
+ """
122
+ await self._send_cmd("move_cursor", {"x": x, "y": y})
123
+
124
+ async def left_click(self, x=None, y=None):
125
+ """
126
+ Perform a left mouse click at the specified coordinates or current cursor position.
127
+
128
+ Args:
129
+ x (int, optional): X coordinate to click at. If None, clicks at current cursor position
130
+ y (int, optional): Y coordinate to click at. If None, clicks at current cursor position
131
+ """
132
+ await self._send_cmd("left_click", {"x": x, "y": y})
133
+
134
+ async def right_click(self, x=None, y=None):
135
+ """
136
+ Perform a right mouse click at the specified coordinates or current cursor position.
137
+
138
+ Args:
139
+ x (int, optional): X coordinate to click at. If None, clicks at current cursor position
140
+ y (int, optional): Y coordinate to click at. If None, clicks at current cursor position
141
+ """
142
+ await self._send_cmd("right_click", {"x": x, "y": y})
143
+
144
+ async def double_click(self, x=None, y=None):
145
+ """
146
+ Perform a double mouse click at the specified coordinates or current cursor position.
147
+
148
+ Args:
149
+ x (int, optional): X coordinate to double-click at. If None, clicks at current cursor position
150
+ y (int, optional): Y coordinate to double-click at. If None, clicks at current cursor position
151
+ """
152
+ await self._send_cmd("double_click", {"x": x, "y": y})
153
+
154
+ async def scroll_up(self, clicks=1):
155
+ """
156
+ Scroll up by the specified number of clicks.
157
+
158
+ Args:
159
+ clicks (int): Number of scroll clicks to perform upward. Defaults to 1
160
+ """
161
+ await self._send_cmd("scroll_up", {"clicks": clicks})
162
+
163
+ async def scroll_down(self, clicks=1):
164
+ """
165
+ Scroll down by the specified number of clicks.
166
+
167
+ Args:
168
+ clicks (int): Number of scroll clicks to perform downward. Defaults to 1
169
+ """
170
+ await self._send_cmd("scroll_down", {"clicks": clicks})
171
+
172
+ async def drag_to(self, x, y, duration=0.5):
173
+ """
174
+ Drag from the current cursor position to the specified coordinates.
175
+
176
+ Args:
177
+ x (int): X coordinate to drag to
178
+ y (int): Y coordinate to drag to
179
+ duration (float): Duration of the drag operation in seconds. Defaults to 0.5
180
+ """
181
+ await self._send_cmd("drag_to", {"x": x, "y": y, "duration": duration})
182
+
183
+ async def get_cursor_position(self):
184
+ """
185
+ Get the current cursor position.
186
+
187
+ Returns:
188
+ dict: Dictionary containing the current cursor coordinates
189
+ """
190
+ return await self._send_cmd("get_cursor_position")
191
+
192
+ async def type_text(self, text):
193
+ """
194
+ Type the specified text at the current cursor position.
195
+
196
+ Args:
197
+ text (str): The text to type
198
+ """
199
+ await self._send_cmd("type_text", {"text": text})
200
+
201
+ async def press_key(self, key):
202
+ """
203
+ Press a single key.
204
+
205
+ Args:
206
+ key: The key to press
207
+ """
208
+ await self._send_cmd("press_key", {"key": key})
209
+
210
+ async def hotkey(self, *keys):
211
+ """
212
+ Press multiple keys simultaneously as a hotkey combination.
213
+
214
+ Args:
215
+ *keys: Variable number of keys to press together. Can be Key enum instances or strings
216
+
217
+ Raises:
218
+ ValueError: If any key is not a Key enum or string type
219
+ """
220
+ actual_keys = []
221
+ for key in keys:
222
+ if isinstance(key, Key):
223
+ actual_keys.append(key.value)
224
+ elif isinstance(key, str):
225
+ # Try to convert to enum if it matches a known key
226
+ key_or_enum = Key.from_string(key)
227
+ actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
228
+ else:
229
+ raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
230
+ await self._send_cmd("hotkey", {"keys": actual_keys})
231
+
232
+ async def to_screen_coordinates(self, x, y):
233
+ """
234
+ Convert coordinates to screen coordinates.
235
+
236
+ Args:
237
+ x (int): X coordinate to convert
238
+ y (int): Y coordinate to convert
239
+
240
+ Returns:
241
+ dict: Dictionary containing the converted screen coordinates
242
+ """
243
+ return await self._send_cmd("to_screen_coordinates", {"x": x, "y": y})
@@ -5,6 +5,7 @@ import asyncio
5
5
  import logging
6
6
  import time
7
7
  from typing import Dict, Any, Optional, List
8
+ from pathlib import Path
8
9
 
9
10
  from ..base import BaseVMProvider, VMProviderType
10
11
 
@@ -242,8 +243,15 @@ class WinSandboxProvider(BaseVMProvider):
242
243
 
243
244
  networking = run_opts.get("networking", self.networking)
244
245
 
245
- # Create folder mappers if shared directories are specified
246
+ # Create folder mappers; always map a persistent venv directory on host for caching packages
246
247
  folder_mappers = []
248
+ # Ensure host side persistent venv directory exists (Path.home()/wsb_venv)
249
+ host_wsb_env = Path.home() / ".cua" / "wsb_cache"
250
+ try:
251
+ host_wsb_env.mkdir(parents=True, exist_ok=True)
252
+ except Exception:
253
+ # If cannot create, continue without persistent mapping
254
+ host_wsb_env = None
247
255
  shared_directories = run_opts.get("shared_directories", [])
248
256
  for shared_dir in shared_directories:
249
257
  if isinstance(shared_dir, dict):
@@ -255,6 +263,15 @@ class WinSandboxProvider(BaseVMProvider):
255
263
 
256
264
  if host_path and os.path.exists(host_path):
257
265
  folder_mappers.append(winsandbox.FolderMapper(host_path))
266
+
267
+ # Add mapping for the persistent venv directory (read/write) so it appears in Sandbox Desktop
268
+ if host_wsb_env is not None and host_wsb_env.exists():
269
+ try:
270
+ folder_mappers.append(
271
+ winsandbox.FolderMapper(str(host_wsb_env), read_only=False)
272
+ )
273
+ except Exception as e:
274
+ self.logger.warning(f"Failed to map host winsandbox_venv: {e}")
258
275
 
259
276
  self.logger.info(f"Creating Windows Sandbox: {name}")
260
277
  self.logger.info(f"Memory: {memory_mb}MB, Networking: {networking}")
@@ -290,8 +307,10 @@ class WinSandboxProvider(BaseVMProvider):
290
307
 
291
308
  self.logger.info(f"Windows Sandbox {name} created successfully")
292
309
 
310
+ venv_exists = (host_wsb_env / "venv" / "Lib" / "site-packages" / "computer_server").exists() if host_wsb_env else False
311
+
293
312
  # Setup the computer server in the sandbox
294
- await self._setup_computer_server(sandbox, name)
313
+ await self._setup_computer_server(sandbox, name, wait_for_venv=(not venv_exists))
295
314
 
296
315
  return {
297
316
  "success": True,
@@ -423,7 +442,7 @@ class WinSandboxProvider(BaseVMProvider):
423
442
  if total_attempts % 10 == 0:
424
443
  self.logger.info(f"Still waiting for Windows Sandbox {name} IP after {total_attempts} attempts...")
425
444
 
426
- async def _setup_computer_server(self, sandbox, name: str, visible: bool = False):
445
+ async def _setup_computer_server(self, sandbox, name: str, visible: bool = False, wait_for_venv: bool = True):
427
446
  """Setup the computer server in the Windows Sandbox using RPyC.
428
447
 
429
448
  Args:
@@ -471,10 +490,12 @@ class WinSandboxProvider(BaseVMProvider):
471
490
  creationflags=creation_flags,
472
491
  shell=False
473
492
  )
474
-
475
- # # Sleep for 30 seconds
476
- # await asyncio.sleep(30)
477
493
 
494
+ if wait_for_venv:
495
+ print("Waiting for venv to be created for the first time setup of Windows Sandbox...")
496
+ print("This may take a minute...")
497
+ await asyncio.sleep(120)
498
+
478
499
  ip = await self.get_ip(name)
479
500
  self.logger.info(f"Sandbox IP: {ip}")
480
501
  self.logger.info(f"Setup script started in background in sandbox {name} with PID: {process.pid}")
@@ -79,23 +79,48 @@ try {
79
79
  $pythonVersion = & $pythonExe --version 2>&1
80
80
  Write-Host "Python version: $pythonVersion"
81
81
 
82
- # Step 2: Install cua-computer-server directly
83
- Write-Host "Step 2: Installing cua-computer-server..."
82
+ # Step 2: Create a dedicated virtual environment in mapped Desktop folder (persistent)
83
+ Write-Host "Step 2: Creating virtual environment (if needed)..."
84
+ $cachePath = "C:\Users\WDAGUtilityAccount\Desktop\wsb_cache"
85
+ $venvPath = "C:\Users\WDAGUtilityAccount\Desktop\wsb_cache\venv"
86
+ if (!(Test-Path $venvPath)) {
87
+ Write-Host "Creating venv at: $venvPath"
88
+ & $pythonExe -m venv $venvPath
89
+ } else {
90
+ Write-Host "Venv already exists at: $venvPath"
91
+ }
92
+ # Hide the folder to keep Desktop clean
93
+ try {
94
+ $item = Get-Item $cachePath -ErrorAction SilentlyContinue
95
+ if ($item) {
96
+ if (-not ($item.Attributes -band [IO.FileAttributes]::Hidden)) {
97
+ $item.Attributes = $item.Attributes -bor [IO.FileAttributes]::Hidden
98
+ }
99
+ }
100
+ } catch { }
101
+ $venvPython = Join-Path $venvPath "Scripts\python.exe"
102
+ if (!(Test-Path $venvPython)) {
103
+ throw "Virtual environment Python not found at $venvPython"
104
+ }
105
+ Write-Host "Using venv Python: $venvPython"
106
+
107
+ # Step 3: Install cua-computer-server into the venv
108
+ Write-Host "Step 3: Installing cua-computer-server..."
84
109
 
85
110
  Write-Host "Upgrading pip..."
86
- & $pythonExe -m pip install --upgrade pip --quiet
111
+ & $venvPython -m pip install --upgrade pip --quiet
87
112
 
88
113
  Write-Host "Installing cua-computer-server..."
89
- & $pythonExe -m pip install cua-computer-server --quiet
114
+ & $venvPython -m pip install cua-computer-server
90
115
 
91
116
  Write-Host "cua-computer-server installation completed."
92
117
 
93
- # Step 3: Start computer server in background
94
- Write-Host "Step 3: Starting computer server in background..."
95
- Write-Host "Starting computer server with: $pythonExe"
118
+ # Step 4: Start computer server in background using the venv Python
119
+ Write-Host "Step 4: Starting computer server in background..."
120
+ Write-Host "Starting computer server with: $venvPython"
96
121
 
97
122
  # Start the computer server in the background
98
- $serverProcess = Start-Process -FilePath $pythonExe -ArgumentList "-m", "computer_server.main" -WindowStyle Hidden -PassThru
123
+ $serverProcess = Start-Process -FilePath $venvPython -ArgumentList "-m", "computer_server.main" -WindowStyle Hidden -PassThru
99
124
  Write-Host "Computer server started in background with PID: $($serverProcess.Id)"
100
125
 
101
126
  # Give it a moment to start
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "cua-computer"
9
- version = "0.4.4"
9
+ version = "0.4.6"
10
10
  description = "Computer-Use Interface (CUI) framework powering Cua"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -57,7 +57,7 @@ target-version = [
57
57
 
58
58
  [tool.ruff]
59
59
  line-length = 100
60
- target-version = "0.4.4"
60
+ target-version = "0.4.6"
61
61
  select = [
62
62
  "E",
63
63
  "F",
@@ -71,7 +71,7 @@ docstring-code-format = true
71
71
 
72
72
  [tool.mypy]
73
73
  strict = true
74
- python_version = "0.4.4"
74
+ python_version = "0.4.6"
75
75
  ignore_missing_imports = true
76
76
  disallow_untyped_defs = true
77
77
  check_untyped_defs = true
@@ -1,144 +0,0 @@
1
- <div align="center">
2
- <h1>
3
- <div class="image-wrapper" style="display: inline-block;">
4
- <picture>
5
- <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
6
- <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
7
- <img alt="Shows my svg">
8
- </picture>
9
- </div>
10
-
11
- [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
12
- [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
13
- [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
14
- [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
15
- </h1>
16
- </div>
17
-
18
- **cua-computer** is a Computer-Use Interface (CUI) framework powering Cua for interacting with local macOS and Linux sandboxes, PyAutoGUI-compatible, and pluggable with any AI agent systems (Cua, Langchain, CrewAI, AutoGen). Computer relies on [Lume](https://github.com/trycua/lume) for creating and managing sandbox environments.
19
-
20
- ### Get started with Computer
21
-
22
- <div align="center">
23
- <img src="../../img/computer.png"/>
24
- </div>
25
-
26
- ```python
27
- from computer import Computer
28
-
29
- computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4")
30
- try:
31
- await computer.run()
32
-
33
- screenshot = await computer.interface.screenshot()
34
- with open("screenshot.png", "wb") as f:
35
- f.write(screenshot)
36
-
37
- await computer.interface.move_cursor(100, 100)
38
- await computer.interface.left_click()
39
- await computer.interface.right_click(300, 300)
40
- await computer.interface.double_click(400, 400)
41
-
42
- await computer.interface.type("Hello, World!")
43
- await computer.interface.press_key("enter")
44
-
45
- await computer.interface.set_clipboard("Test clipboard")
46
- content = await computer.interface.copy_to_clipboard()
47
- print(f"Clipboard content: {content}")
48
- finally:
49
- await computer.stop()
50
- ```
51
-
52
- ## Install
53
-
54
- To install the Computer-Use Interface (CUI):
55
-
56
- ```bash
57
- pip install "cua-computer[all]"
58
- ```
59
-
60
- The `cua-computer` PyPi package pulls automatically the latest executable version of Lume through [pylume](https://github.com/trycua/pylume).
61
-
62
- ## Run
63
-
64
- Refer to this notebook for a step-by-step guide on how to use the Computer-Use Interface (CUI):
65
-
66
- - [Computer-Use Interface (CUI)](../../notebooks/computer_nb.ipynb)
67
-
68
- ## Using the Gradio Computer UI
69
-
70
- The computer module includes a Gradio UI for creating and sharing demonstration data. We make it easy for people to build community datasets for better computer use models with an upload to Huggingface feature.
71
-
72
- ```bash
73
- # Install with UI support
74
- pip install "cua-computer[ui]"
75
- ```
76
-
77
- > **Note:** For precise control of the computer, we recommend using VNC or Screen Sharing instead of the Computer Gradio UI.
78
-
79
- ### Building and Sharing Demonstrations with Huggingface
80
-
81
- Follow these steps to contribute your own demonstrations:
82
-
83
- #### 1. Set up Huggingface Access
84
-
85
- Set your HF_TOKEN in a .env file or in your environment variables:
86
-
87
- ```bash
88
- # In .env file
89
- HF_TOKEN=your_huggingface_token
90
- ```
91
-
92
- #### 2. Launch the Computer UI
93
-
94
- ```python
95
- # launch_ui.py
96
- from computer.ui.gradio.app import create_gradio_ui
97
- from dotenv import load_dotenv
98
- load_dotenv('.env')
99
-
100
- app = create_gradio_ui()
101
- app.launch(share=False)
102
- ```
103
-
104
- For examples, see [Computer UI Examples](../../examples/computer_ui_examples.py)
105
-
106
- #### 3. Record Your Tasks
107
-
108
- <details open>
109
- <summary>View demonstration video</summary>
110
- <video src="https://github.com/user-attachments/assets/de3c3477-62fe-413c-998d-4063e48de176" controls width="600"></video>
111
- </details>
112
-
113
- Record yourself performing various computer tasks using the UI.
114
-
115
- #### 4. Save Your Demonstrations
116
-
117
- <details open>
118
- <summary>View demonstration video</summary>
119
- <video src="https://github.com/user-attachments/assets/5ad1df37-026a-457f-8b49-922ae805faef" controls width="600"></video>
120
- </details>
121
-
122
- Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding").
123
-
124
- #### 5. Record Additional Demonstrations
125
-
126
- Repeat steps 3 and 4 until you have a good amount of demonstrations covering different tasks and scenarios.
127
-
128
- #### 6. Upload to Huggingface
129
-
130
- <details open>
131
- <summary>View demonstration video</summary>
132
- <video src="https://github.com/user-attachments/assets/c586d460-3877-4b5f-a736-3248886d2134" controls width="600"></video>
133
- </details>
134
-
135
- Upload your dataset to Huggingface by:
136
- - Naming it as `{your_username}/{dataset_name}`
137
- - Choosing public or private visibility
138
- - Optionally selecting specific tags to upload only tasks with certain tags
139
-
140
- #### Examples and Resources
141
-
142
- - Example Dataset: [ddupont/test-dataset](https://huggingface.co/datasets/ddupont/test-dataset)
143
- - Find Community Datasets: 🔍 [Browse CUA Datasets on Huggingface](https://huggingface.co/datasets?other=cua)
144
-
@@ -1,104 +0,0 @@
1
- import asyncio
2
- from .interface.models import KeyType, Key
3
-
4
- class DioramaComputer:
5
- """
6
- A Computer-compatible proxy for Diorama that sends commands over the ComputerInterface.
7
- """
8
- def __init__(self, computer, apps):
9
- self.computer = computer
10
- self.apps = apps
11
- self.interface = DioramaComputerInterface(computer, apps)
12
- self._initialized = False
13
-
14
- async def __aenter__(self):
15
- self._initialized = True
16
- return self
17
-
18
- async def run(self):
19
- if not self._initialized:
20
- await self.__aenter__()
21
- return self
22
-
23
- class DioramaComputerInterface:
24
- """
25
- Diorama Interface proxy that sends diorama_cmds via the Computer's interface.
26
- """
27
- def __init__(self, computer, apps):
28
- self.computer = computer
29
- self.apps = apps
30
- self._scene_size = None
31
-
32
- async def _send_cmd(self, action, arguments=None):
33
- arguments = arguments or {}
34
- arguments = {"app_list": self.apps, **arguments}
35
- # Use the computer's interface (must be initialized)
36
- iface = getattr(self.computer, "_interface", None)
37
- if iface is None:
38
- raise RuntimeError("Computer interface not initialized. Call run() first.")
39
- result = await iface.diorama_cmd(action, arguments)
40
- if not result.get("success"):
41
- raise RuntimeError(f"Diorama command failed: {result.get('error')}\n{result.get('trace')}")
42
- return result.get("result")
43
-
44
- async def screenshot(self, as_bytes=True):
45
- from PIL import Image
46
- import base64
47
- result = await self._send_cmd("screenshot")
48
- # assume result is a b64 string of an image
49
- img_bytes = base64.b64decode(result)
50
- import io
51
- img = Image.open(io.BytesIO(img_bytes))
52
- self._scene_size = img.size
53
- return img_bytes if as_bytes else img
54
-
55
- async def get_screen_size(self):
56
- if not self._scene_size:
57
- await self.screenshot(as_bytes=False)
58
- return {"width": self._scene_size[0], "height": self._scene_size[1]}
59
-
60
- async def move_cursor(self, x, y):
61
- await self._send_cmd("move_cursor", {"x": x, "y": y})
62
-
63
- async def left_click(self, x=None, y=None):
64
- await self._send_cmd("left_click", {"x": x, "y": y})
65
-
66
- async def right_click(self, x=None, y=None):
67
- await self._send_cmd("right_click", {"x": x, "y": y})
68
-
69
- async def double_click(self, x=None, y=None):
70
- await self._send_cmd("double_click", {"x": x, "y": y})
71
-
72
- async def scroll_up(self, clicks=1):
73
- await self._send_cmd("scroll_up", {"clicks": clicks})
74
-
75
- async def scroll_down(self, clicks=1):
76
- await self._send_cmd("scroll_down", {"clicks": clicks})
77
-
78
- async def drag_to(self, x, y, duration=0.5):
79
- await self._send_cmd("drag_to", {"x": x, "y": y, "duration": duration})
80
-
81
- async def get_cursor_position(self):
82
- return await self._send_cmd("get_cursor_position")
83
-
84
- async def type_text(self, text):
85
- await self._send_cmd("type_text", {"text": text})
86
-
87
- async def press_key(self, key):
88
- await self._send_cmd("press_key", {"key": key})
89
-
90
- async def hotkey(self, *keys):
91
- actual_keys = []
92
- for key in keys:
93
- if isinstance(key, Key):
94
- actual_keys.append(key.value)
95
- elif isinstance(key, str):
96
- # Try to convert to enum if it matches a known key
97
- key_or_enum = Key.from_string(key)
98
- actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
99
- else:
100
- raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
101
- await self._send_cmd("hotkey", {"keys": actual_keys})
102
-
103
- async def to_screen_coordinates(self, x, y):
104
- return await self._send_cmd("to_screen_coordinates", {"x": x, "y": y})