cua-computer 0.1.22__tar.gz → 0.1.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cua_computer-0.1.22 → cua_computer-0.1.24}/PKG-INFO +83 -2
- {cua_computer-0.1.22 → cua_computer-0.1.24}/README.md +79 -1
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/computer.py +7 -13
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/interface/macos.py +37 -7
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/interface/models.py +26 -2
- cua_computer-0.1.24/computer/ui/__init__.py +1 -0
- cua_computer-0.1.24/computer/ui/gradio/__init__.py +6 -0
- cua_computer-0.1.24/computer/ui/gradio/app.py +1510 -0
- {cua_computer-0.1.22 → cua_computer-0.1.24}/pyproject.toml +9 -3
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/__init__.py +0 -0
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/interface/__init__.py +0 -0
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/interface/base.py +0 -0
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/interface/factory.py +0 -0
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/interface/linux.py +0 -0
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/logger.py +0 -0
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/models.py +0 -0
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/telemetry.py +0 -0
- {cua_computer-0.1.22 → cua_computer-0.1.24}/computer/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cua-computer
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.24
|
4
4
|
Summary: Computer-Use Interface (CUI) framework powering Cua
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
6
6
|
Requires-Python: >=3.10
|
@@ -11,6 +11,9 @@ Requires-Dist: websockets>=12.0
|
|
11
11
|
Requires-Dist: aiohttp>=3.9.0
|
12
12
|
Requires-Dist: cua-core<0.2.0,>=0.1.0
|
13
13
|
Requires-Dist: pydantic>=2.11.1
|
14
|
+
Provides-Extra: ui
|
15
|
+
Requires-Dist: gradio<6.0.0,>=5.23.3; extra == "ui"
|
16
|
+
Requires-Dist: python-dotenv<2.0.0,>=1.0.1; extra == "ui"
|
14
17
|
Description-Content-Type: text/markdown
|
15
18
|
|
16
19
|
<div align="center">
|
@@ -78,4 +81,82 @@ The `cua-computer` PyPi package pulls automatically the latest executable versio
|
|
78
81
|
|
79
82
|
Refer to this notebook for a step-by-step guide on how to use the Computer-Use Interface (CUI):
|
80
83
|
|
81
|
-
- [Computer-Use Interface (CUI)](../../notebooks/computer_nb.ipynb)
|
84
|
+
- [Computer-Use Interface (CUI)](../../notebooks/computer_nb.ipynb)
|
85
|
+
|
86
|
+
## Using the Gradio Computer UI
|
87
|
+
|
88
|
+
The computer module includes a Gradio UI for creating and sharing demonstration data. We make it easy for people to build community datasets for better computer use models with an upload to Huggingface feature.
|
89
|
+
|
90
|
+
```bash
|
91
|
+
# Install with UI support
|
92
|
+
pip install "cua-computer[ui]"
|
93
|
+
```
|
94
|
+
|
95
|
+
> **Note:** For precise control of the computer, we recommend using VNC or Screen Sharing instead of the Computer Gradio UI.
|
96
|
+
|
97
|
+
### Building and Sharing Demonstrations with Huggingface
|
98
|
+
|
99
|
+
Follow these steps to contribute your own demonstrations:
|
100
|
+
|
101
|
+
#### 1. Set up Huggingface Access
|
102
|
+
|
103
|
+
Set your HF_TOKEN in a .env file or in your environment variables:
|
104
|
+
|
105
|
+
```bash
|
106
|
+
# In .env file
|
107
|
+
HF_TOKEN=your_huggingface_token
|
108
|
+
```
|
109
|
+
|
110
|
+
#### 2. Launch the Computer UI
|
111
|
+
|
112
|
+
```python
|
113
|
+
# launch_ui.py
|
114
|
+
from computer.ui.gradio.app import create_gradio_ui
|
115
|
+
from dotenv import load_dotenv
|
116
|
+
load_dotenv('.env')
|
117
|
+
|
118
|
+
app = create_gradio_ui()
|
119
|
+
app.launch(share=False)
|
120
|
+
```
|
121
|
+
|
122
|
+
For examples, see [Computer UI Examples](../../examples/computer_ui_examples.py)
|
123
|
+
|
124
|
+
#### 3. Record Your Tasks
|
125
|
+
|
126
|
+
<details open>
|
127
|
+
<summary>View demonstration video</summary>
|
128
|
+
<video src="https://github.com/user-attachments/assets/de3c3477-62fe-413c-998d-4063e48de176" controls width="600"></video>
|
129
|
+
</details>
|
130
|
+
|
131
|
+
Record yourself performing various computer tasks using the UI.
|
132
|
+
|
133
|
+
#### 4. Save Your Demonstrations
|
134
|
+
|
135
|
+
<details open>
|
136
|
+
<summary>View demonstration video</summary>
|
137
|
+
<video src="https://github.com/user-attachments/assets/5ad1df37-026a-457f-8b49-922ae805faef" controls width="600"></video>
|
138
|
+
</details>
|
139
|
+
|
140
|
+
Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding").
|
141
|
+
|
142
|
+
#### 5. Record Additional Demonstrations
|
143
|
+
|
144
|
+
Repeat steps 3 and 4 until you have a good amount of demonstrations covering different tasks and scenarios.
|
145
|
+
|
146
|
+
#### 6. Upload to Huggingface
|
147
|
+
|
148
|
+
<details open>
|
149
|
+
<summary>View demonstration video</summary>
|
150
|
+
<video src="https://github.com/user-attachments/assets/c586d460-3877-4b5f-a736-3248886d2134" controls width="600"></video>
|
151
|
+
</details>
|
152
|
+
|
153
|
+
Upload your dataset to Huggingface by:
|
154
|
+
- Naming it as `{your_username}/{dataset_name}`
|
155
|
+
- Choosing public or private visibility
|
156
|
+
- Optionally selecting specific tags to upload only tasks with certain tags
|
157
|
+
|
158
|
+
#### Examples and Resources
|
159
|
+
|
160
|
+
- Example Dataset: [ddupont/test-dataset](https://huggingface.co/datasets/ddupont/test-dataset)
|
161
|
+
- Find Community Datasets: 🔍 [Browse CUA Datasets on Huggingface](https://huggingface.co/datasets?other=cua)
|
162
|
+
|
@@ -63,4 +63,82 @@ The `cua-computer` PyPi package pulls automatically the latest executable versio
|
|
63
63
|
|
64
64
|
Refer to this notebook for a step-by-step guide on how to use the Computer-Use Interface (CUI):
|
65
65
|
|
66
|
-
- [Computer-Use Interface (CUI)](../../notebooks/computer_nb.ipynb)
|
66
|
+
- [Computer-Use Interface (CUI)](../../notebooks/computer_nb.ipynb)
|
67
|
+
|
68
|
+
## Using the Gradio Computer UI
|
69
|
+
|
70
|
+
The computer module includes a Gradio UI for creating and sharing demonstration data. We make it easy for people to build community datasets for better computer use models with an upload to Huggingface feature.
|
71
|
+
|
72
|
+
```bash
|
73
|
+
# Install with UI support
|
74
|
+
pip install "cua-computer[ui]"
|
75
|
+
```
|
76
|
+
|
77
|
+
> **Note:** For precise control of the computer, we recommend using VNC or Screen Sharing instead of the Computer Gradio UI.
|
78
|
+
|
79
|
+
### Building and Sharing Demonstrations with Huggingface
|
80
|
+
|
81
|
+
Follow these steps to contribute your own demonstrations:
|
82
|
+
|
83
|
+
#### 1. Set up Huggingface Access
|
84
|
+
|
85
|
+
Set your HF_TOKEN in a .env file or in your environment variables:
|
86
|
+
|
87
|
+
```bash
|
88
|
+
# In .env file
|
89
|
+
HF_TOKEN=your_huggingface_token
|
90
|
+
```
|
91
|
+
|
92
|
+
#### 2. Launch the Computer UI
|
93
|
+
|
94
|
+
```python
|
95
|
+
# launch_ui.py
|
96
|
+
from computer.ui.gradio.app import create_gradio_ui
|
97
|
+
from dotenv import load_dotenv
|
98
|
+
load_dotenv('.env')
|
99
|
+
|
100
|
+
app = create_gradio_ui()
|
101
|
+
app.launch(share=False)
|
102
|
+
```
|
103
|
+
|
104
|
+
For examples, see [Computer UI Examples](../../examples/computer_ui_examples.py)
|
105
|
+
|
106
|
+
#### 3. Record Your Tasks
|
107
|
+
|
108
|
+
<details open>
|
109
|
+
<summary>View demonstration video</summary>
|
110
|
+
<video src="https://github.com/user-attachments/assets/de3c3477-62fe-413c-998d-4063e48de176" controls width="600"></video>
|
111
|
+
</details>
|
112
|
+
|
113
|
+
Record yourself performing various computer tasks using the UI.
|
114
|
+
|
115
|
+
#### 4. Save Your Demonstrations
|
116
|
+
|
117
|
+
<details open>
|
118
|
+
<summary>View demonstration video</summary>
|
119
|
+
<video src="https://github.com/user-attachments/assets/5ad1df37-026a-457f-8b49-922ae805faef" controls width="600"></video>
|
120
|
+
</details>
|
121
|
+
|
122
|
+
Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding").
|
123
|
+
|
124
|
+
#### 5. Record Additional Demonstrations
|
125
|
+
|
126
|
+
Repeat steps 3 and 4 until you have a good amount of demonstrations covering different tasks and scenarios.
|
127
|
+
|
128
|
+
#### 6. Upload to Huggingface
|
129
|
+
|
130
|
+
<details open>
|
131
|
+
<summary>View demonstration video</summary>
|
132
|
+
<video src="https://github.com/user-attachments/assets/c586d460-3877-4b5f-a736-3248886d2134" controls width="600"></video>
|
133
|
+
</details>
|
134
|
+
|
135
|
+
Upload your dataset to Huggingface by:
|
136
|
+
- Naming it as `{your_username}/{dataset_name}`
|
137
|
+
- Choosing public or private visibility
|
138
|
+
- Optionally selecting specific tags to upload only tasks with certain tags
|
139
|
+
|
140
|
+
#### Examples and Resources
|
141
|
+
|
142
|
+
- Example Dataset: [ddupont/test-dataset](https://huggingface.co/datasets/ddupont/test-dataset)
|
143
|
+
- Find Community Datasets: 🔍 [Browse CUA Datasets on Huggingface](https://huggingface.co/datasets?other=cua)
|
144
|
+
|
@@ -227,24 +227,18 @@ class Computer:
|
|
227
227
|
self.logger.error(f"Failed to initialize PyLume context: {e}")
|
228
228
|
raise RuntimeError(f"Failed to initialize PyLume: {e}")
|
229
229
|
|
230
|
-
# Try to get the VM, if it doesn't exist,
|
230
|
+
# Try to get the VM, if it doesn't exist, return an error
|
231
231
|
try:
|
232
232
|
vm = await self.config.pylume.get_vm(self.config.name) # type: ignore[attr-defined]
|
233
233
|
self.logger.verbose(f"Found existing VM: {self.config.name}")
|
234
234
|
except Exception as e:
|
235
|
-
self.logger.
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
235
|
+
self.logger.error(f"VM not found: {self.config.name}")
|
236
|
+
self.logger.error(
|
237
|
+
f"Please pull the VM first with lume pull macos-sequoia-cua-sparse:latest: {e}"
|
238
|
+
)
|
239
|
+
raise RuntimeError(
|
240
|
+
f"VM not found: {self.config.name}. Please pull the VM first."
|
241
241
|
)
|
242
|
-
self.logger.info(f"Pulling image {self.config.image}:{self.config.tag}...")
|
243
|
-
try:
|
244
|
-
await self.config.pylume.pull_image(image_ref, name=self.config.name) # type: ignore[attr-defined]
|
245
|
-
except Exception as pull_error:
|
246
|
-
self.logger.error(f"Failed to pull image: {pull_error}")
|
247
|
-
raise RuntimeError(f"Failed to pull VM image: {pull_error}")
|
248
242
|
|
249
243
|
# Convert paths to SharedDirectory objects
|
250
244
|
shared_directories = []
|
@@ -377,17 +377,47 @@ class MacOSComputerInterface(BaseComputerInterface):
|
|
377
377
|
"""
|
378
378
|
await self.press(key)
|
379
379
|
|
380
|
-
async def hotkey(self, *keys:
|
381
|
-
|
380
|
+
async def hotkey(self, *keys: "KeyType") -> None:
|
381
|
+
"""Press multiple keys simultaneously.
|
382
|
+
|
383
|
+
Args:
|
384
|
+
*keys: Multiple keys to press simultaneously. Each key can be any of:
|
385
|
+
- A Key enum value (recommended), e.g. Key.COMMAND
|
386
|
+
- A direct key value string, e.g. 'command'
|
387
|
+
- A single character string, e.g. 'a'
|
388
|
+
|
389
|
+
Examples:
|
390
|
+
```python
|
391
|
+
# Using enums (recommended)
|
392
|
+
await interface.hotkey(Key.COMMAND, Key.C) # Copy
|
393
|
+
await interface.hotkey(Key.COMMAND, Key.V) # Paste
|
394
|
+
|
395
|
+
# Using mixed formats
|
396
|
+
await interface.hotkey(Key.COMMAND, 'a') # Select all
|
397
|
+
```
|
398
|
+
|
399
|
+
Raises:
|
400
|
+
ValueError: If any key type is invalid or not recognized
|
401
|
+
"""
|
402
|
+
actual_keys = []
|
403
|
+
for key in keys:
|
404
|
+
if isinstance(key, Key):
|
405
|
+
actual_keys.append(key.value)
|
406
|
+
elif isinstance(key, str):
|
407
|
+
# Try to convert to enum if it matches a known key
|
408
|
+
key_or_enum = Key.from_string(key)
|
409
|
+
actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
|
410
|
+
else:
|
411
|
+
raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
|
412
|
+
|
413
|
+
await self._send_command("hotkey", {"keys": actual_keys})
|
382
414
|
|
383
415
|
# Scrolling Actions
|
384
416
|
async def scroll_down(self, clicks: int = 1) -> None:
|
385
|
-
|
386
|
-
|
387
|
-
|
417
|
+
await self._send_command("scroll_down", {"clicks": clicks})
|
418
|
+
|
388
419
|
async def scroll_up(self, clicks: int = 1) -> None:
|
389
|
-
|
390
|
-
await self.hotkey("pageup")
|
420
|
+
await self._send_command("scroll_up", {"clicks": clicks})
|
391
421
|
|
392
422
|
# Screen Actions
|
393
423
|
async def screenshot(
|
@@ -7,6 +7,9 @@ NavigationKey = Literal['pagedown', 'pageup', 'home', 'end', 'left', 'right', 'u
|
|
7
7
|
# Special key literals
|
8
8
|
SpecialKey = Literal['enter', 'esc', 'tab', 'space', 'backspace', 'del']
|
9
9
|
|
10
|
+
# Modifier key literals
|
11
|
+
ModifierKey = Literal['ctrl', 'alt', 'shift', 'win', 'command', 'option']
|
12
|
+
|
10
13
|
# Function key literals
|
11
14
|
FunctionKey = Literal['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12']
|
12
15
|
|
@@ -35,6 +38,14 @@ class Key(Enum):
|
|
35
38
|
BACKSPACE = 'backspace'
|
36
39
|
DELETE = 'del'
|
37
40
|
|
41
|
+
# Modifier keys
|
42
|
+
ALT = 'alt'
|
43
|
+
CTRL = 'ctrl'
|
44
|
+
SHIFT = 'shift'
|
45
|
+
WIN = 'win'
|
46
|
+
COMMAND = 'command'
|
47
|
+
OPTION = 'option'
|
48
|
+
|
38
49
|
# Function keys
|
39
50
|
F1 = 'f1'
|
40
51
|
F2 = 'f2'
|
@@ -73,14 +84,27 @@ class Key(Enum):
|
|
73
84
|
'escape': cls.ESCAPE,
|
74
85
|
'esc': cls.ESC,
|
75
86
|
'delete': cls.DELETE,
|
76
|
-
'del': cls.DELETE
|
87
|
+
'del': cls.DELETE,
|
88
|
+
# Modifier key mappings
|
89
|
+
'alt': cls.ALT,
|
90
|
+
'ctrl': cls.CTRL,
|
91
|
+
'control': cls.CTRL,
|
92
|
+
'shift': cls.SHIFT,
|
93
|
+
'win': cls.WIN,
|
94
|
+
'windows': cls.WIN,
|
95
|
+
'super': cls.WIN,
|
96
|
+
'command': cls.COMMAND,
|
97
|
+
'cmd': cls.COMMAND,
|
98
|
+
'⌘': cls.COMMAND,
|
99
|
+
'option': cls.OPTION,
|
100
|
+
'⌥': cls.OPTION,
|
77
101
|
}
|
78
102
|
|
79
103
|
normalized = key.lower().strip()
|
80
104
|
return key_mapping.get(normalized, key)
|
81
105
|
|
82
106
|
# Combined key type
|
83
|
-
KeyType = Union[Key, NavigationKey, SpecialKey, FunctionKey, str]
|
107
|
+
KeyType = Union[Key, NavigationKey, SpecialKey, ModifierKey, FunctionKey, str]
|
84
108
|
|
85
109
|
class AccessibilityWindow(TypedDict):
|
86
110
|
"""Information about a window in the accessibility tree."""
|
@@ -0,0 +1 @@
|
|
1
|
+
"""UI modules for the Computer Interface."""
|