cua-computer 0.1.23__tar.gz → 0.1.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cua_computer-0.1.23 → cua_computer-0.1.25}/PKG-INFO +57 -9
- {cua_computer-0.1.23 → cua_computer-0.1.25}/README.md +56 -8
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/computer.py +7 -6
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/interface/base.py +11 -0
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/interface/macos.py +5 -0
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/interface/models.py +26 -2
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/ui/gradio/app.py +1 -1
- {cua_computer-0.1.23 → cua_computer-0.1.25}/pyproject.toml +3 -3
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/__init__.py +0 -0
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/interface/__init__.py +0 -0
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/interface/factory.py +0 -0
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/interface/linux.py +0 -0
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/logger.py +0 -0
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/models.py +0 -0
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/telemetry.py +0 -0
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/ui/__init__.py +0 -0
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/ui/gradio/__init__.py +0 -0
- {cua_computer-0.1.23 → cua_computer-0.1.25}/computer/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cua-computer
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.25
|
4
4
|
Summary: Computer-Use Interface (CUI) framework powering Cua
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
6
6
|
Requires-Python: >=3.10
|
@@ -44,7 +44,7 @@ Description-Content-Type: text/markdown
|
|
44
44
|
```python
|
45
45
|
from computer import Computer
|
46
46
|
|
47
|
-
computer = Computer(
|
47
|
+
computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4")
|
48
48
|
try:
|
49
49
|
await computer.run()
|
50
50
|
|
@@ -85,30 +85,78 @@ Refer to this notebook for a step-by-step guide on how to use the Computer-Use I
|
|
85
85
|
|
86
86
|
## Using the Gradio Computer UI
|
87
87
|
|
88
|
-
The computer module includes a Gradio UI for creating and sharing demonstration data.
|
88
|
+
The computer module includes a Gradio UI for creating and sharing demonstration data. We make it easy for people to build community datasets for better computer use models with an upload to Huggingface feature.
|
89
89
|
|
90
90
|
```bash
|
91
91
|
# Install with UI support
|
92
92
|
pip install "cua-computer[ui]"
|
93
93
|
```
|
94
94
|
|
95
|
+
> **Note:** For precise control of the computer, we recommend using VNC or Screen Sharing instead of the Computer Gradio UI.
|
95
96
|
|
96
|
-
|
97
|
-
<summary>View demonstration video</summary>
|
98
|
-
<video src="https://github.com/user-attachments/assets/7c683b58-f04d-4e8c-b63f-6ef36e9637d5" controls width="600"></video>
|
99
|
-
</details>
|
97
|
+
### Building and Sharing Demonstrations with Huggingface
|
100
98
|
|
101
|
-
|
99
|
+
Follow these steps to contribute your own demonstrations:
|
100
|
+
|
101
|
+
#### 1. Set up Huggingface Access
|
102
102
|
|
103
|
+
Set your HF_TOKEN in a .env file or in your environment variables:
|
104
|
+
|
105
|
+
```bash
|
106
|
+
# In .env file
|
107
|
+
HF_TOKEN=your_huggingface_token
|
108
|
+
```
|
103
109
|
|
104
|
-
|
110
|
+
#### 2. Launch the Computer UI
|
105
111
|
|
106
112
|
```python
|
107
113
|
# launch_ui.py
|
108
114
|
from computer.ui.gradio.app import create_gradio_ui
|
115
|
+
from dotenv import load_dotenv
|
116
|
+
load_dotenv('.env')
|
109
117
|
|
110
118
|
app = create_gradio_ui()
|
111
119
|
app.launch(share=False)
|
112
120
|
```
|
113
121
|
|
114
122
|
For examples, see [Computer UI Examples](../../examples/computer_ui_examples.py)
|
123
|
+
|
124
|
+
#### 3. Record Your Tasks
|
125
|
+
|
126
|
+
<details open>
|
127
|
+
<summary>View demonstration video</summary>
|
128
|
+
<video src="https://github.com/user-attachments/assets/de3c3477-62fe-413c-998d-4063e48de176" controls width="600"></video>
|
129
|
+
</details>
|
130
|
+
|
131
|
+
Record yourself performing various computer tasks using the UI.
|
132
|
+
|
133
|
+
#### 4. Save Your Demonstrations
|
134
|
+
|
135
|
+
<details open>
|
136
|
+
<summary>View demonstration video</summary>
|
137
|
+
<video src="https://github.com/user-attachments/assets/5ad1df37-026a-457f-8b49-922ae805faef" controls width="600"></video>
|
138
|
+
</details>
|
139
|
+
|
140
|
+
Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding").
|
141
|
+
|
142
|
+
#### 5. Record Additional Demonstrations
|
143
|
+
|
144
|
+
Repeat steps 3 and 4 until you have a good amount of demonstrations covering different tasks and scenarios.
|
145
|
+
|
146
|
+
#### 6. Upload to Huggingface
|
147
|
+
|
148
|
+
<details open>
|
149
|
+
<summary>View demonstration video</summary>
|
150
|
+
<video src="https://github.com/user-attachments/assets/c586d460-3877-4b5f-a736-3248886d2134" controls width="600"></video>
|
151
|
+
</details>
|
152
|
+
|
153
|
+
Upload your dataset to Huggingface by:
|
154
|
+
- Naming it as `{your_username}/{dataset_name}`
|
155
|
+
- Choosing public or private visibility
|
156
|
+
- Optionally selecting specific tags to upload only tasks with certain tags
|
157
|
+
|
158
|
+
#### Examples and Resources
|
159
|
+
|
160
|
+
- Example Dataset: [ddupont/test-dataset](https://huggingface.co/datasets/ddupont/test-dataset)
|
161
|
+
- Find Community Datasets: 🔍 [Browse CUA Datasets on Huggingface](https://huggingface.co/datasets?other=cua)
|
162
|
+
|
@@ -26,7 +26,7 @@
|
|
26
26
|
```python
|
27
27
|
from computer import Computer
|
28
28
|
|
29
|
-
computer = Computer(
|
29
|
+
computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4")
|
30
30
|
try:
|
31
31
|
await computer.run()
|
32
32
|
|
@@ -67,30 +67,78 @@ Refer to this notebook for a step-by-step guide on how to use the Computer-Use I
|
|
67
67
|
|
68
68
|
## Using the Gradio Computer UI
|
69
69
|
|
70
|
-
The computer module includes a Gradio UI for creating and sharing demonstration data.
|
70
|
+
The computer module includes a Gradio UI for creating and sharing demonstration data. We make it easy for people to build community datasets for better computer use models with an upload to Huggingface feature.
|
71
71
|
|
72
72
|
```bash
|
73
73
|
# Install with UI support
|
74
74
|
pip install "cua-computer[ui]"
|
75
75
|
```
|
76
76
|
|
77
|
+
> **Note:** For precise control of the computer, we recommend using VNC or Screen Sharing instead of the Computer Gradio UI.
|
77
78
|
|
78
|
-
|
79
|
-
<summary>View demonstration video</summary>
|
80
|
-
<video src="https://github.com/user-attachments/assets/7c683b58-f04d-4e8c-b63f-6ef36e9637d5" controls width="600"></video>
|
81
|
-
</details>
|
79
|
+
### Building and Sharing Demonstrations with Huggingface
|
82
80
|
|
83
|
-
|
81
|
+
Follow these steps to contribute your own demonstrations:
|
82
|
+
|
83
|
+
#### 1. Set up Huggingface Access
|
84
84
|
|
85
|
+
Set your HF_TOKEN in a .env file or in your environment variables:
|
86
|
+
|
87
|
+
```bash
|
88
|
+
# In .env file
|
89
|
+
HF_TOKEN=your_huggingface_token
|
90
|
+
```
|
85
91
|
|
86
|
-
|
92
|
+
#### 2. Launch the Computer UI
|
87
93
|
|
88
94
|
```python
|
89
95
|
# launch_ui.py
|
90
96
|
from computer.ui.gradio.app import create_gradio_ui
|
97
|
+
from dotenv import load_dotenv
|
98
|
+
load_dotenv('.env')
|
91
99
|
|
92
100
|
app = create_gradio_ui()
|
93
101
|
app.launch(share=False)
|
94
102
|
```
|
95
103
|
|
96
104
|
For examples, see [Computer UI Examples](../../examples/computer_ui_examples.py)
|
105
|
+
|
106
|
+
#### 3. Record Your Tasks
|
107
|
+
|
108
|
+
<details open>
|
109
|
+
<summary>View demonstration video</summary>
|
110
|
+
<video src="https://github.com/user-attachments/assets/de3c3477-62fe-413c-998d-4063e48de176" controls width="600"></video>
|
111
|
+
</details>
|
112
|
+
|
113
|
+
Record yourself performing various computer tasks using the UI.
|
114
|
+
|
115
|
+
#### 4. Save Your Demonstrations
|
116
|
+
|
117
|
+
<details open>
|
118
|
+
<summary>View demonstration video</summary>
|
119
|
+
<video src="https://github.com/user-attachments/assets/5ad1df37-026a-457f-8b49-922ae805faef" controls width="600"></video>
|
120
|
+
</details>
|
121
|
+
|
122
|
+
Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding").
|
123
|
+
|
124
|
+
#### 5. Record Additional Demonstrations
|
125
|
+
|
126
|
+
Repeat steps 3 and 4 until you have a good amount of demonstrations covering different tasks and scenarios.
|
127
|
+
|
128
|
+
#### 6. Upload to Huggingface
|
129
|
+
|
130
|
+
<details open>
|
131
|
+
<summary>View demonstration video</summary>
|
132
|
+
<video src="https://github.com/user-attachments/assets/c586d460-3877-4b5f-a736-3248886d2134" controls width="600"></video>
|
133
|
+
</details>
|
134
|
+
|
135
|
+
Upload your dataset to Huggingface by:
|
136
|
+
- Naming it as `{your_username}/{dataset_name}`
|
137
|
+
- Choosing public or private visibility
|
138
|
+
- Optionally selecting specific tags to upload only tasks with certain tags
|
139
|
+
|
140
|
+
#### Examples and Resources
|
141
|
+
|
142
|
+
- Example Dataset: [ddupont/test-dataset](https://huggingface.co/datasets/ddupont/test-dataset)
|
143
|
+
- Find Community Datasets: 🔍 [Browse CUA Datasets on Huggingface](https://huggingface.co/datasets?other=cua)
|
144
|
+
|
@@ -29,7 +29,7 @@ class Computer:
|
|
29
29
|
display: Union[Display, Dict[str, int], str] = "1024x768",
|
30
30
|
memory: str = "8GB",
|
31
31
|
cpu: str = "4",
|
32
|
-
|
32
|
+
os_type: OSType = "macos",
|
33
33
|
name: str = "",
|
34
34
|
image: str = "macos-sequoia-cua:latest",
|
35
35
|
shared_directories: Optional[List[str]] = None,
|
@@ -68,6 +68,7 @@ class Computer:
|
|
68
68
|
self.image = image
|
69
69
|
self.port = port
|
70
70
|
self.host = host
|
71
|
+
self.os_type = os_type
|
71
72
|
|
72
73
|
# Store telemetry preference
|
73
74
|
self._telemetry_enabled = telemetry_enabled
|
@@ -129,8 +130,8 @@ class Computer:
|
|
129
130
|
self.shared_paths = []
|
130
131
|
if shared_directories:
|
131
132
|
for path in shared_directories:
|
132
|
-
abs_path = os.path.abspath(os.path.expanduser(path))
|
133
|
-
if not os.path.exists(abs_path):
|
133
|
+
abs_path = os.path.abspath(os.path.expanduser(path))
|
134
|
+
if not os.path.exists(abs_path):
|
134
135
|
raise ValueError(f"Shared directory does not exist: {path}")
|
135
136
|
self.shared_paths.append(abs_path)
|
136
137
|
self._pylume_context = None
|
@@ -188,7 +189,7 @@ class Computer:
|
|
188
189
|
self._interface = cast(
|
189
190
|
BaseComputerInterface,
|
190
191
|
InterfaceFactory.create_interface_for_os(
|
191
|
-
os=self.
|
192
|
+
os=self.os_type, ip_address=ip_address # type: ignore[arg-type]
|
192
193
|
),
|
193
194
|
)
|
194
195
|
|
@@ -288,13 +289,13 @@ class Computer:
|
|
288
289
|
|
289
290
|
try:
|
290
291
|
# Initialize the interface using the factory with the specified OS
|
291
|
-
self.logger.info(f"Initializing interface for {self.
|
292
|
+
self.logger.info(f"Initializing interface for {self.os_type} at {ip_address}")
|
292
293
|
from .interface.base import BaseComputerInterface
|
293
294
|
|
294
295
|
self._interface = cast(
|
295
296
|
BaseComputerInterface,
|
296
297
|
InterfaceFactory.create_interface_for_os(
|
297
|
-
os=self.
|
298
|
+
os=self.os_type, ip_address=ip_address # type: ignore[arg-type]
|
298
299
|
),
|
299
300
|
)
|
300
301
|
|
@@ -79,6 +79,17 @@ class BaseComputerInterface(ABC):
|
|
79
79
|
"""
|
80
80
|
pass
|
81
81
|
|
82
|
+
@abstractmethod
|
83
|
+
async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
|
84
|
+
"""Drag the cursor along a path of coordinates.
|
85
|
+
|
86
|
+
Args:
|
87
|
+
path: List of (x, y) coordinate tuples defining the drag path
|
88
|
+
button: The mouse button to use ('left', 'middle', 'right')
|
89
|
+
duration: Total time in seconds that the drag operation should take
|
90
|
+
"""
|
91
|
+
pass
|
92
|
+
|
82
93
|
# Keyboard Actions
|
83
94
|
@abstractmethod
|
84
95
|
async def type_text(self, text: str) -> None:
|
@@ -328,6 +328,11 @@ class MacOSComputerInterface(BaseComputerInterface):
|
|
328
328
|
"drag_to", {"x": x, "y": y, "button": button, "duration": duration}
|
329
329
|
)
|
330
330
|
|
331
|
+
async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
|
332
|
+
await self._send_command(
|
333
|
+
"drag", {"path": path, "button": button, "duration": duration}
|
334
|
+
)
|
335
|
+
|
331
336
|
# Keyboard Actions
|
332
337
|
async def type_text(self, text: str) -> None:
|
333
338
|
await self._send_command("type_text", {"text": text})
|
@@ -7,6 +7,9 @@ NavigationKey = Literal['pagedown', 'pageup', 'home', 'end', 'left', 'right', 'u
|
|
7
7
|
# Special key literals
|
8
8
|
SpecialKey = Literal['enter', 'esc', 'tab', 'space', 'backspace', 'del']
|
9
9
|
|
10
|
+
# Modifier key literals
|
11
|
+
ModifierKey = Literal['ctrl', 'alt', 'shift', 'win', 'command', 'option']
|
12
|
+
|
10
13
|
# Function key literals
|
11
14
|
FunctionKey = Literal['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12']
|
12
15
|
|
@@ -35,6 +38,14 @@ class Key(Enum):
|
|
35
38
|
BACKSPACE = 'backspace'
|
36
39
|
DELETE = 'del'
|
37
40
|
|
41
|
+
# Modifier keys
|
42
|
+
ALT = 'alt'
|
43
|
+
CTRL = 'ctrl'
|
44
|
+
SHIFT = 'shift'
|
45
|
+
WIN = 'win'
|
46
|
+
COMMAND = 'command'
|
47
|
+
OPTION = 'option'
|
48
|
+
|
38
49
|
# Function keys
|
39
50
|
F1 = 'f1'
|
40
51
|
F2 = 'f2'
|
@@ -73,14 +84,27 @@ class Key(Enum):
|
|
73
84
|
'escape': cls.ESCAPE,
|
74
85
|
'esc': cls.ESC,
|
75
86
|
'delete': cls.DELETE,
|
76
|
-
'del': cls.DELETE
|
87
|
+
'del': cls.DELETE,
|
88
|
+
# Modifier key mappings
|
89
|
+
'alt': cls.ALT,
|
90
|
+
'ctrl': cls.CTRL,
|
91
|
+
'control': cls.CTRL,
|
92
|
+
'shift': cls.SHIFT,
|
93
|
+
'win': cls.WIN,
|
94
|
+
'windows': cls.WIN,
|
95
|
+
'super': cls.WIN,
|
96
|
+
'command': cls.COMMAND,
|
97
|
+
'cmd': cls.COMMAND,
|
98
|
+
'⌘': cls.COMMAND,
|
99
|
+
'option': cls.OPTION,
|
100
|
+
'⌥': cls.OPTION,
|
77
101
|
}
|
78
102
|
|
79
103
|
normalized = key.lower().strip()
|
80
104
|
return key_mapping.get(normalized, key)
|
81
105
|
|
82
106
|
# Combined key type
|
83
|
-
KeyType = Union[Key, NavigationKey, SpecialKey, FunctionKey, str]
|
107
|
+
KeyType = Union[Key, NavigationKey, SpecialKey, ModifierKey, FunctionKey, str]
|
84
108
|
|
85
109
|
class AccessibilityWindow(TypedDict):
|
86
110
|
"""Information about a window in the accessibility tree."""
|
@@ -532,7 +532,7 @@ async def handle_init_computer():
|
|
532
532
|
"""Initialize the computer instance and tools"""
|
533
533
|
global computer, tool_call_logs, tools
|
534
534
|
|
535
|
-
computer = Computer(
|
535
|
+
computer = Computer(os_type="macos", display="1024x768", memory="8GB", cpu="4")
|
536
536
|
await computer.run()
|
537
537
|
|
538
538
|
# Log computer initialization as a tool call
|
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
|
|
6
6
|
|
7
7
|
[project]
|
8
8
|
name = "cua-computer"
|
9
|
-
version = "0.1.
|
9
|
+
version = "0.1.25"
|
10
10
|
description = "Computer-Use Interface (CUI) framework powering Cua"
|
11
11
|
readme = "README.md"
|
12
12
|
authors = [
|
@@ -50,7 +50,7 @@ target-version = [
|
|
50
50
|
|
51
51
|
[tool.ruff]
|
52
52
|
line-length = 100
|
53
|
-
target-version = "0.1.
|
53
|
+
target-version = "0.1.25"
|
54
54
|
select = [
|
55
55
|
"E",
|
56
56
|
"F",
|
@@ -64,7 +64,7 @@ docstring-code-format = true
|
|
64
64
|
|
65
65
|
[tool.mypy]
|
66
66
|
strict = true
|
67
|
-
python_version = "0.1.
|
67
|
+
python_version = "0.1.25"
|
68
68
|
ignore_missing_imports = true
|
69
69
|
disallow_untyped_defs = true
|
70
70
|
check_untyped_defs = true
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|