cua-computer 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
computer/__init__.py CHANGED
@@ -12,7 +12,6 @@ logger = logging.getLogger("computer")
12
12
  try:
13
13
  # Import from core telemetry
14
14
  from core.telemetry import (
15
- flush,
16
15
  is_telemetry_enabled,
17
16
  record_event,
18
17
  )
@@ -30,9 +29,6 @@ try:
30
29
  "python_version": sys.version,
31
30
  },
32
31
  )
33
-
34
- # Flush events to ensure they're sent
35
- flush()
36
32
  else:
37
33
  logger.info("Telemetry is disabled")
38
34
  except ImportError as e:
computer/computer.py CHANGED
@@ -9,10 +9,18 @@ import re
9
9
  from .logger import Logger, LogLevel
10
10
  import json
11
11
  import logging
12
- from .telemetry import record_computer_initialization
12
+ from core.telemetry import is_telemetry_enabled, record_event
13
13
  import os
14
14
  from . import helpers
15
15
 
16
+ import platform
17
+
18
+ SYSTEM_INFO = {
19
+ "os": platform.system().lower(),
20
+ "os_version": platform.release(),
21
+ "python_version": platform.python_version(),
22
+ }
23
+
16
24
  # Import provider related modules
17
25
  from .providers.base import VMProviderType
18
26
  from .providers.factory import VMProviderFactory
@@ -152,6 +160,8 @@ class Computer:
152
160
  if not name:
153
161
  # Normalize the name to be used for the VM
154
162
  name = image.replace(":", "_")
163
+ # Remove any forward slashes
164
+ name = name.replace("/", "_")
155
165
 
156
166
  # Convert display parameter to Display object
157
167
  if isinstance(display, str):
@@ -190,8 +200,8 @@ class Computer:
190
200
  self.use_host_computer_server = use_host_computer_server
191
201
 
192
202
  # Record initialization in telemetry (if enabled)
193
- if telemetry_enabled:
194
- record_computer_initialization()
203
+ if telemetry_enabled and is_telemetry_enabled():
204
+ record_event("computer_initialized", SYSTEM_INFO)
195
205
  else:
196
206
  self.logger.debug("Telemetry disabled - skipping initialization tracking")
197
207
 
@@ -6,16 +6,35 @@ class DioramaComputer:
6
6
  A Computer-compatible proxy for Diorama that sends commands over the ComputerInterface.
7
7
  """
8
8
  def __init__(self, computer, apps):
9
+ """
10
+ Initialize the DioramaComputer with a computer instance and list of apps.
11
+
12
+ Args:
13
+ computer: The computer instance to proxy commands through
14
+ apps: List of applications available in the diorama environment
15
+ """
9
16
  self.computer = computer
10
17
  self.apps = apps
11
18
  self.interface = DioramaComputerInterface(computer, apps)
12
19
  self._initialized = False
13
20
 
14
21
  async def __aenter__(self):
22
+ """
23
+ Async context manager entry point.
24
+
25
+ Returns:
26
+ self: The DioramaComputer instance
27
+ """
15
28
  self._initialized = True
16
29
  return self
17
30
 
18
31
  async def run(self):
32
+ """
33
+ Initialize and run the DioramaComputer if not already initialized.
34
+
35
+ Returns:
36
+ self: The DioramaComputer instance
37
+ """
19
38
  if not self._initialized:
20
39
  await self.__aenter__()
21
40
  return self
@@ -25,11 +44,31 @@ class DioramaComputerInterface:
25
44
  Diorama Interface proxy that sends diorama_cmds via the Computer's interface.
26
45
  """
27
46
  def __init__(self, computer, apps):
47
+ """
48
+ Initialize the DioramaComputerInterface.
49
+
50
+ Args:
51
+ computer: The computer instance to send commands through
52
+ apps: List of applications available in the diorama environment
53
+ """
28
54
  self.computer = computer
29
55
  self.apps = apps
30
56
  self._scene_size = None
31
57
 
32
58
  async def _send_cmd(self, action, arguments=None):
59
+ """
60
+ Send a command to the diorama interface through the computer.
61
+
62
+ Args:
63
+ action (str): The action/command to execute
64
+ arguments (dict, optional): Additional arguments for the command
65
+
66
+ Returns:
67
+ The result from the diorama command execution
68
+
69
+ Raises:
70
+ RuntimeError: If the computer interface is not initialized or command fails
71
+ """
33
72
  arguments = arguments or {}
34
73
  arguments = {"app_list": self.apps, **arguments}
35
74
  # Use the computer's interface (must be initialized)
@@ -42,6 +81,15 @@ class DioramaComputerInterface:
42
81
  return result.get("result")
43
82
 
44
83
  async def screenshot(self, as_bytes=True):
84
+ """
85
+ Take a screenshot of the diorama scene.
86
+
87
+ Args:
88
+ as_bytes (bool): If True, return image as bytes; if False, return PIL Image object
89
+
90
+ Returns:
91
+ bytes or PIL.Image: Screenshot data in the requested format
92
+ """
45
93
  from PIL import Image
46
94
  import base64
47
95
  result = await self._send_cmd("screenshot")
@@ -53,41 +101,122 @@ class DioramaComputerInterface:
53
101
  return img_bytes if as_bytes else img
54
102
 
55
103
  async def get_screen_size(self):
104
+ """
105
+ Get the dimensions of the diorama scene.
106
+
107
+ Returns:
108
+ dict: Dictionary containing 'width' and 'height' keys with pixel dimensions
109
+ """
56
110
  if not self._scene_size:
57
111
  await self.screenshot(as_bytes=False)
58
112
  return {"width": self._scene_size[0], "height": self._scene_size[1]}
59
113
 
60
114
  async def move_cursor(self, x, y):
115
+ """
116
+ Move the cursor to the specified coordinates.
117
+
118
+ Args:
119
+ x (int): X coordinate to move cursor to
120
+ y (int): Y coordinate to move cursor to
121
+ """
61
122
  await self._send_cmd("move_cursor", {"x": x, "y": y})
62
123
 
63
124
  async def left_click(self, x=None, y=None):
125
+ """
126
+ Perform a left mouse click at the specified coordinates or current cursor position.
127
+
128
+ Args:
129
+ x (int, optional): X coordinate to click at. If None, clicks at current cursor position
130
+ y (int, optional): Y coordinate to click at. If None, clicks at current cursor position
131
+ """
64
132
  await self._send_cmd("left_click", {"x": x, "y": y})
65
133
 
66
134
  async def right_click(self, x=None, y=None):
135
+ """
136
+ Perform a right mouse click at the specified coordinates or current cursor position.
137
+
138
+ Args:
139
+ x (int, optional): X coordinate to click at. If None, clicks at current cursor position
140
+ y (int, optional): Y coordinate to click at. If None, clicks at current cursor position
141
+ """
67
142
  await self._send_cmd("right_click", {"x": x, "y": y})
68
143
 
69
144
  async def double_click(self, x=None, y=None):
145
+ """
146
+ Perform a double mouse click at the specified coordinates or current cursor position.
147
+
148
+ Args:
149
+ x (int, optional): X coordinate to double-click at. If None, clicks at current cursor position
150
+ y (int, optional): Y coordinate to double-click at. If None, clicks at current cursor position
151
+ """
70
152
  await self._send_cmd("double_click", {"x": x, "y": y})
71
153
 
72
154
  async def scroll_up(self, clicks=1):
155
+ """
156
+ Scroll up by the specified number of clicks.
157
+
158
+ Args:
159
+ clicks (int): Number of scroll clicks to perform upward. Defaults to 1
160
+ """
73
161
  await self._send_cmd("scroll_up", {"clicks": clicks})
74
162
 
75
163
  async def scroll_down(self, clicks=1):
164
+ """
165
+ Scroll down by the specified number of clicks.
166
+
167
+ Args:
168
+ clicks (int): Number of scroll clicks to perform downward. Defaults to 1
169
+ """
76
170
  await self._send_cmd("scroll_down", {"clicks": clicks})
77
171
 
78
172
  async def drag_to(self, x, y, duration=0.5):
173
+ """
174
+ Drag from the current cursor position to the specified coordinates.
175
+
176
+ Args:
177
+ x (int): X coordinate to drag to
178
+ y (int): Y coordinate to drag to
179
+ duration (float): Duration of the drag operation in seconds. Defaults to 0.5
180
+ """
79
181
  await self._send_cmd("drag_to", {"x": x, "y": y, "duration": duration})
80
182
 
81
183
  async def get_cursor_position(self):
184
+ """
185
+ Get the current cursor position.
186
+
187
+ Returns:
188
+ dict: Dictionary containing the current cursor coordinates
189
+ """
82
190
  return await self._send_cmd("get_cursor_position")
83
191
 
84
192
  async def type_text(self, text):
193
+ """
194
+ Type the specified text at the current cursor position.
195
+
196
+ Args:
197
+ text (str): The text to type
198
+ """
85
199
  await self._send_cmd("type_text", {"text": text})
86
200
 
87
201
  async def press_key(self, key):
202
+ """
203
+ Press a single key.
204
+
205
+ Args:
206
+ key: The key to press
207
+ """
88
208
  await self._send_cmd("press_key", {"key": key})
89
209
 
90
210
  async def hotkey(self, *keys):
211
+ """
212
+ Press multiple keys simultaneously as a hotkey combination.
213
+
214
+ Args:
215
+ *keys: Variable number of keys to press together. Can be Key enum instances or strings
216
+
217
+ Raises:
218
+ ValueError: If any key is not a Key enum or string type
219
+ """
91
220
  actual_keys = []
92
221
  for key in keys:
93
222
  if isinstance(key, Key):
@@ -101,4 +230,14 @@ class DioramaComputerInterface:
101
230
  await self._send_cmd("hotkey", {"keys": actual_keys})
102
231
 
103
232
  async def to_screen_coordinates(self, x, y):
233
+ """
234
+ Convert coordinates to screen coordinates.
235
+
236
+ Args:
237
+ x (int): X coordinate to convert
238
+ y (int): Y coordinate to convert
239
+
240
+ Returns:
241
+ dict: Dictionary containing the converted screen coordinates
242
+ """
104
243
  return await self._send_cmd("to_screen_coordinates", {"x": x, "y": y})
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-computer
3
- Version: 0.4.3
3
+ Version: 0.4.5
4
4
  Summary: Computer-Use Interface (CUI) framework powering Cua
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.11
@@ -26,8 +26,8 @@ Description-Content-Type: text/markdown
26
26
  <h1>
27
27
  <div class="image-wrapper" style="display: inline-block;">
28
28
  <picture>
29
- <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
30
- <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
29
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;">
30
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;">
31
31
  <img alt="Shows my svg">
32
32
  </picture>
33
33
  </div>
@@ -44,7 +44,7 @@ Description-Content-Type: text/markdown
44
44
  ### Get started with Computer
45
45
 
46
46
  <div align="center">
47
- <img src="../../img/computer.png"/>
47
+ <img src="https://raw.githubusercontent.com/trycua/cua/main/img/computer.png"/>
48
48
  </div>
49
49
 
50
50
  ```python
@@ -87,82 +87,11 @@ The `cua-computer` PyPi package pulls automatically the latest executable versio
87
87
 
88
88
  Refer to this notebook for a step-by-step guide on how to use the Computer-Use Interface (CUI):
89
89
 
90
- - [Computer-Use Interface (CUI)](../../notebooks/computer_nb.ipynb)
90
+ - [Computer-Use Interface (CUI)](https://github.com/trycua/cua/blob/main/notebooks/computer_nb.ipynb)
91
91
 
92
- ## Using the Gradio Computer UI
93
-
94
- The computer module includes a Gradio UI for creating and sharing demonstration data. We make it easy for people to build community datasets for better computer use models with an upload to Huggingface feature.
95
-
96
- ```bash
97
- # Install with UI support
98
- pip install "cua-computer[ui]"
99
- ```
100
-
101
- > **Note:** For precise control of the computer, we recommend using VNC or Screen Sharing instead of the Computer Gradio UI.
102
-
103
- ### Building and Sharing Demonstrations with Huggingface
104
-
105
- Follow these steps to contribute your own demonstrations:
106
-
107
- #### 1. Set up Huggingface Access
108
-
109
- Set your HF_TOKEN in a .env file or in your environment variables:
110
-
111
- ```bash
112
- # In .env file
113
- HF_TOKEN=your_huggingface_token
114
- ```
115
-
116
- #### 2. Launch the Computer UI
117
-
118
- ```python
119
- # launch_ui.py
120
- from computer.ui.gradio.app import create_gradio_ui
121
- from dotenv import load_dotenv
122
- load_dotenv('.env')
123
-
124
- app = create_gradio_ui()
125
- app.launch(share=False)
126
- ```
127
-
128
- For examples, see [Computer UI Examples](../../examples/computer_ui_examples.py)
129
-
130
- #### 3. Record Your Tasks
131
-
132
- <details open>
133
- <summary>View demonstration video</summary>
134
- <video src="https://github.com/user-attachments/assets/de3c3477-62fe-413c-998d-4063e48de176" controls width="600"></video>
135
- </details>
136
-
137
- Record yourself performing various computer tasks using the UI.
138
-
139
- #### 4. Save Your Demonstrations
140
-
141
- <details open>
142
- <summary>View demonstration video</summary>
143
- <video src="https://github.com/user-attachments/assets/5ad1df37-026a-457f-8b49-922ae805faef" controls width="600"></video>
144
- </details>
145
-
146
- Save each task by picking a descriptive name and adding relevant tags (e.g., "office", "web-browsing", "coding").
147
-
148
- #### 5. Record Additional Demonstrations
149
-
150
- Repeat steps 3 and 4 until you have a good amount of demonstrations covering different tasks and scenarios.
151
-
152
- #### 6. Upload to Huggingface
153
-
154
- <details open>
155
- <summary>View demonstration video</summary>
156
- <video src="https://github.com/user-attachments/assets/c586d460-3877-4b5f-a736-3248886d2134" controls width="600"></video>
157
- </details>
158
-
159
- Upload your dataset to Huggingface by:
160
- - Naming it as `{your_username}/{dataset_name}`
161
- - Choosing public or private visibility
162
- - Optionally selecting specific tags to upload only tasks with certain tags
163
-
164
- #### Examples and Resources
165
-
166
- - Example Dataset: [ddupont/test-dataset](https://huggingface.co/datasets/ddupont/test-dataset)
167
- - Find Community Datasets: 🔍 [Browse CUA Datasets on Huggingface](https://huggingface.co/datasets?other=cua)
92
+ ## Docs
168
93
 
94
+ - [Computers](https://trycua.com/docs/computer-sdk/computers)
95
+ - [Commands](https://trycua.com/docs/computer-sdk/commands)
96
+ - [Computer UI](https://trycua.com/docs/computer-sdk/computer-ui)
97
+ - [Sandboxed Python](https://trycua.com/docs/computer-sdk/sandboxed-python)
@@ -1,6 +1,6 @@
1
- computer/__init__.py,sha256=44ZBq815dMihgAHmBKn1S_GFNbElCXyZInh3hle1k9Y,1237
2
- computer/computer.py,sha256=u-M9pZM3Tc0PEtV13M6Dj8_yaW6HqMHJwlJLVbA7XtQ,42398
3
- computer/diorama_computer.py,sha256=jOP7_eXxxU6SMIoE25ni0YXPK0E7p5sZeLKmkYLh6G8,3871
1
+ computer/__init__.py,sha256=HG8dhCmSPjuQ4G-NGAoiXEhzhO37kwrHHmyboNhGWOA,1159
2
+ computer/computer.py,sha256=AjPqUAv1hF2AgK1eODRrQCvMikyxeuE6gxlwsnwNB-s,42701
3
+ computer/diorama_computer.py,sha256=3JaXKpcSi_OAVXtwlmNwQgrcnvqP1AxdlKEQ0XRJ0aQ,8569
4
4
  computer/helpers.py,sha256=iHkO2WhuCLc15g67kfMnpQWxfNRlz2YeJNEvYaL9jlM,1826
5
5
  computer/interface/__init__.py,sha256=xQvYjq5PMn9ZJOmRR5mWtONTl_0HVd8ACvW6AQnzDdw,262
6
6
  computer/interface/base.py,sha256=1beR4T0z5anb9NaNgKJrMJTF0BFIKyiHlokMLesOV5Q,15131
@@ -27,13 +27,12 @@ computer/providers/lumier/provider.py,sha256=BDgnTuik42H9OuCmnd-1TxM8p4vl_ahfrhN
27
27
  computer/providers/winsandbox/__init__.py,sha256=WsMVBBa_qFfqVHPQzg6j4PegQwLiIudkzUedpYkrfXU,244
28
28
  computer/providers/winsandbox/provider.py,sha256=vduDKUB1OuimvjJdUus9RQY8gv3XQfFgVS8fKOzHVME,19306
29
29
  computer/providers/winsandbox/setup_script.ps1,sha256=8aGwR7PEvqnYzCNyXTDKIwJ6pYrwyWYLRjmNT_jYIwQ,4623
30
- computer/telemetry.py,sha256=jHM3LJAgO2ltN3wlQ6mqCPUcmlS8F955KI70no-T3xA,3730
31
30
  computer/ui/__init__.py,sha256=pmo05ek9qiB_x7DPeE6Vf_8RsIOqTD0w1dBLMHfoOnY,45
32
31
  computer/ui/__main__.py,sha256=Jwy2oC_mGZLN0fX7WLqpjaQkbXMeM3ISrUc8WSRUG0c,284
33
32
  computer/ui/gradio/__init__.py,sha256=5_KimixM48-X74FCsLw7LbSt39MQfUMEL8-M9amK3Cw,117
34
33
  computer/ui/gradio/app.py,sha256=_V6FI-g0GJGMEk-C2iPFtxPO1Gn0juCaeCrWsBtjC4E,70395
35
34
  computer/utils.py,sha256=zY50NXB7r51GNLQ6l7lhG_qv0_ufpQ8n0-SDhCei8m4,2838
36
- cua_computer-0.4.3.dist-info/METADATA,sha256=ACsrn0_9Eyqrev62IeSun_AYPWO1mOtx-iHQCkmkIUM,5802
37
- cua_computer-0.4.3.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
38
- cua_computer-0.4.3.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
39
- cua_computer-0.4.3.dist-info/RECORD,,
35
+ cua_computer-0.4.5.dist-info/METADATA,sha256=jhHrPllu_5B85qOHCO75bkc6vGSg9gtq_O7RiRxak6M,3776
36
+ cua_computer-0.4.5.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
37
+ cua_computer-0.4.5.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
38
+ cua_computer-0.4.5.dist-info/RECORD,,
computer/telemetry.py DELETED
@@ -1,116 +0,0 @@
1
- """Computer telemetry for tracking anonymous usage and feature usage."""
2
-
3
- import logging
4
- import platform
5
- from typing import Any
6
-
7
- # Import the core telemetry module
8
- TELEMETRY_AVAILABLE = False
9
-
10
- try:
11
- from core.telemetry import (
12
- increment,
13
- is_telemetry_enabled,
14
- is_telemetry_globally_disabled,
15
- record_event,
16
- )
17
-
18
- def increment_counter(counter_name: str, value: int = 1) -> None:
19
- """Wrapper for increment to maintain backward compatibility."""
20
- if is_telemetry_enabled():
21
- increment(counter_name, value)
22
-
23
- def set_dimension(name: str, value: Any) -> None:
24
- """Set a dimension that will be attached to all events."""
25
- logger = logging.getLogger("computer.telemetry")
26
- logger.debug(f"Setting dimension {name}={value}")
27
-
28
- TELEMETRY_AVAILABLE = True
29
- logger = logging.getLogger("computer.telemetry")
30
- logger.info("Successfully imported telemetry")
31
- except ImportError as e:
32
- logger = logging.getLogger("computer.telemetry")
33
- logger.warning(f"Could not import telemetry: {e}")
34
- TELEMETRY_AVAILABLE = False
35
-
36
-
37
- # Local fallbacks in case core telemetry isn't available
38
- def _noop(*args: Any, **kwargs: Any) -> None:
39
- """No-op function for when telemetry is not available."""
40
- pass
41
-
42
-
43
- logger = logging.getLogger("computer.telemetry")
44
-
45
- # If telemetry isn't available, use no-op functions
46
- if not TELEMETRY_AVAILABLE:
47
- logger.debug("Telemetry not available, using no-op functions")
48
- record_event = _noop # type: ignore
49
- increment_counter = _noop # type: ignore
50
- set_dimension = _noop # type: ignore
51
- get_telemetry_client = lambda: None # type: ignore
52
- flush = _noop # type: ignore
53
- is_telemetry_enabled = lambda: False # type: ignore
54
- is_telemetry_globally_disabled = lambda: True # type: ignore
55
-
56
- # Get system info once to use in telemetry
57
- SYSTEM_INFO = {
58
- "os": platform.system().lower(),
59
- "os_version": platform.release(),
60
- "python_version": platform.python_version(),
61
- }
62
-
63
-
64
- def enable_telemetry() -> bool:
65
- """Enable telemetry if available.
66
-
67
- Returns:
68
- bool: True if telemetry was successfully enabled, False otherwise
69
- """
70
- global TELEMETRY_AVAILABLE
71
-
72
- # Check if globally disabled using core function
73
- if TELEMETRY_AVAILABLE and is_telemetry_globally_disabled():
74
- logger.info("Telemetry is globally disabled via environment variable - cannot enable")
75
- return False
76
-
77
- # Already enabled
78
- if TELEMETRY_AVAILABLE:
79
- return True
80
-
81
- # Try to import and enable
82
- try:
83
- # Verify we can import core telemetry
84
- from core.telemetry import record_event # type: ignore
85
-
86
- TELEMETRY_AVAILABLE = True
87
- logger.info("Telemetry successfully enabled")
88
- return True
89
- except ImportError as e:
90
- logger.warning(f"Could not enable telemetry: {e}")
91
- return False
92
-
93
-
94
- def is_telemetry_enabled() -> bool:
95
- """Check if telemetry is enabled.
96
-
97
- Returns:
98
- bool: True if telemetry is enabled, False otherwise
99
- """
100
- # Use the core function if available, otherwise use our local flag
101
- if TELEMETRY_AVAILABLE:
102
- from core.telemetry import is_telemetry_enabled as core_is_enabled
103
-
104
- return core_is_enabled()
105
- return False
106
-
107
-
108
- def record_computer_initialization() -> None:
109
- """Record when a computer instance is initialized."""
110
- if TELEMETRY_AVAILABLE and is_telemetry_enabled():
111
- record_event("computer_initialized", SYSTEM_INFO)
112
-
113
- # Set dimensions that will be attached to all events
114
- set_dimension("os", SYSTEM_INFO["os"])
115
- set_dimension("os_version", SYSTEM_INFO["os_version"])
116
- set_dimension("python_version", SYSTEM_INFO["python_version"])