PyPI - cua-computer - Versions diffs - 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl - Mend

cua-computer 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

computer/computer.py +196 -10
computer/diorama_computer.py +13 -2
computer/helpers.py +49 -0
computer/interface/base.py +67 -1
computer/interface/linux.py +121 -57
computer/interface/macos.py +96 -32
computer/interface/models.py +3 -0
computer/providers/cloud/provider.py +3 -3
computer/ui/gradio/app.py +81 -30
{cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/METADATA +1 -1
{cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/RECORD +13 -12
{cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/WHEEL +0 -0
{cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/entry_points.txt +0 -0

computer/computer.py CHANGED Viewed

@@ -11,6 +11,7 @@ import json
 import logging
 from .telemetry import record_computer_initialization
 import os
+from . import helpers
 # Import provider related modules
 from .providers.base import VMProviderType
@@ -180,24 +181,25 @@ class Computer:
             self.logger.debug("Telemetry disabled - skipping initialization tracking")
     async def __aenter__(self):
-        """Enter async context manager."""
+        """Start the computer."""
+        await self.run()
         return self
     async def __aexit__(self, exc_type, exc_val, exc_tb):
-        """Exit async context manager."""
-        pass
+        """Stop the computer."""
+        await self.disconnect()
     def __enter__(self):
-        """Enter synchronous context manager."""
-        # Run the event loop to call the async run method
+        """Start the computer."""
+        # Run the event loop to call the async enter method
         loop = asyncio.get_event_loop()
-        loop.run_until_complete(self.run())
+        loop.run_until_complete(self.__aenter__())
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
-        """Exit synchronous context manager."""
-        # We could add cleanup here if needed in the future
-        pass
+        """Stop the computer."""
+        loop = asyncio.get_event_loop()
+        loop.run_until_complete(self.__aexit__(exc_type, exc_val, exc_tb))
     async def run(self) -> Optional[str]:
         """Initialize the VM and computer interface."""
@@ -460,6 +462,10 @@ class Computer:
             # Set the initialization flag and clear the initializing flag
             self._initialized = True
+            # Set this instance as the default computer for remote decorators
+            helpers.set_default_computer(self)
             self.logger.info("Computer successfully initialized")
         except Exception as e:
             raise
@@ -468,9 +474,14 @@ class Computer:
             duration_ms = (time.time() - start_time) * 1000
             self.logger.debug(f"Computer initialization took {duration_ms:.2f}ms")
         return
+    async def disconnect(self) -> None:
+        """Disconnect from the computer's WebSocket interface."""
+        if self._interface:
+            self._interface.close()
     async def stop(self) -> None:
-        """Stop computer control."""
+        """Disconnect from the computer's WebSocket interface and stop the computer."""
         start_time = time.time()
         try:
@@ -491,6 +502,7 @@ class Computer:
                 await self.config.vm_provider.__aexit__(None, None, None)
                 self._provider_context = None
+            await self.disconnect()
             self.logger.info("Computer stopped")
         except Exception as e:
             self.logger.debug(f"Error during cleanup: {e}")  # Log as debug since this might be expected
@@ -722,3 +734,177 @@ class Computer:
             tuple[float, float]: (x, y) coordinates in screenshot space
         """
         return await self.interface.to_screenshot_coordinates(x, y)
+    # Add virtual environment management functions to computer interface
+    async def venv_install(self, venv_name: str, requirements: list[str]) -> tuple[str, str]:
+        """Install packages in a virtual environment.
+        Args:
+            venv_name: Name of the virtual environment
+            requirements: List of package requirements to install
+        Returns:
+            Tuple of (stdout, stderr) from the installation command
+        """
+        requirements = requirements or []
+        # Create virtual environment if it doesn't exist
+        venv_path = f"~/.venvs/{venv_name}"
+        create_cmd = f"mkdir -p ~/.venvs && python3 -m venv {venv_path}"
+        # Check if venv exists, if not create it
+        check_cmd = f"test -d {venv_path} || ({create_cmd})"
+        _, _ = await self.interface.run_command(check_cmd)
+        # Install packages
+        requirements_str = " ".join(requirements)
+        install_cmd = f". {venv_path}/bin/activate && pip install {requirements_str}"
+        return await self.interface.run_command(install_cmd)
+    async def venv_cmd(self, venv_name: str, command: str) -> tuple[str, str]:
+        """Execute a shell command in a virtual environment.
+        Args:
+            venv_name: Name of the virtual environment
+            command: Shell command to execute in the virtual environment
+        Returns:
+            Tuple of (stdout, stderr) from the command execution
+        """
+        venv_path = f"~/.venvs/{venv_name}"
+        # Check if virtual environment exists
+        check_cmd = f"test -d {venv_path}"
+        stdout, stderr = await self.interface.run_command(check_cmd)
+        if stderr or "test:" in stdout:  # venv doesn't exist
+            return "", f"Virtual environment '{venv_name}' does not exist. Create it first using venv_install."
+        # Activate virtual environment and run command
+        full_command = f". {venv_path}/bin/activate && {command}"
+        return await self.interface.run_command(full_command)
+    async def venv_exec(self, venv_name: str, python_func, *args, **kwargs):
+        """Execute Python function in a virtual environment using source code extraction.
+        Args:
+            venv_name: Name of the virtual environment
+            python_func: A callable function to execute
+            *args: Positional arguments to pass to the function
+            **kwargs: Keyword arguments to pass to the function
+        Returns:
+            The result of the function execution, or raises any exception that occurred
+        """
+        import base64
+        import inspect
+        import json
+        import textwrap
+        try:
+            # Get function source code using inspect.getsource
+            source = inspect.getsource(python_func)
+            # Remove common leading whitespace (dedent)
+            func_source = textwrap.dedent(source).strip()
+            # Remove decorators
+            while func_source.lstrip().startswith("@"):
+                func_source = func_source.split("\n", 1)[1].strip()
+            # Get function name for execution
+            func_name = python_func.__name__
+            # Serialize args and kwargs as JSON (safer than dill for cross-version compatibility)
+            args_json = json.dumps(args, default=str)
+            kwargs_json = json.dumps(kwargs, default=str)
+        except OSError as e:
+            raise Exception(f"Cannot retrieve source code for function {python_func.__name__}: {e}")
+        except Exception as e:
+            raise Exception(f"Failed to reconstruct function source: {e}")
+        # Create Python code that will define and execute the function
+        python_code = f'''
+import json
+import traceback
+try:
+    # Define the function from source
+{textwrap.indent(func_source, "    ")}
+    # Deserialize args and kwargs from JSON
+    args_json = """{args_json}"""
+    kwargs_json = """{kwargs_json}"""
+    args = json.loads(args_json)
+    kwargs = json.loads(kwargs_json)
+    # Execute the function
+    result = {func_name}(*args, **kwargs)
+    # Create success output payload
+    output_payload = {{
+        "success": True,
+        "result": result,
+        "error": None
+    }}
+except Exception as e:
+    # Create error output payload
+    output_payload = {{
+        "success": False,
+        "result": None,
+        "error": {{
+            "type": type(e).__name__,
+            "message": str(e),
+            "traceback": traceback.format_exc()
+        }}
+    }}
+# Serialize the output payload as JSON
+import json
+output_json = json.dumps(output_payload, default=str)
+# Print the JSON output with markers
+print(f"<<<VENV_EXEC_START>>>{{output_json}}<<<VENV_EXEC_END>>>")
+'''
+        # Encode the Python code in base64 to avoid shell escaping issues
+        encoded_code = base64.b64encode(python_code.encode('utf-8')).decode('ascii')
+        # Execute the Python code in the virtual environment
+        python_command = f"python -c \"import base64; exec(base64.b64decode('{encoded_code}').decode('utf-8'))\""
+        stdout, stderr = await self.venv_cmd(venv_name, python_command)
+        # Parse the output to extract the payload
+        start_marker = "<<<VENV_EXEC_START>>>"
+        end_marker = "<<<VENV_EXEC_END>>>"
+        # Print original stdout
+        print(stdout[:stdout.find(start_marker)])
+        if start_marker in stdout and end_marker in stdout:
+            start_idx = stdout.find(start_marker) + len(start_marker)
+            end_idx = stdout.find(end_marker)
+            if start_idx < end_idx:
+                output_json = stdout[start_idx:end_idx]
+                try:
+                    # Decode and deserialize the output payload from JSON
+                    output_payload = json.loads(output_json)
+                except Exception as e:
+                    raise Exception(f"Failed to decode output payload: {e}")
+                if output_payload["success"]:
+                    return output_payload["result"]
+                else:
+                    # Recreate and raise the original exception
+                    error_info = output_payload["error"]
+                    error_class = eval(error_info["type"])
+                    raise error_class(error_info["message"])
+            else:
+                raise Exception("Invalid output format: markers found but no content between them")
+        else:
+            # Fallback: return stdout/stderr if no payload markers found
+            raise Exception(f"No output payload found. stdout: {stdout}, stderr: {stderr}")

computer/diorama_computer.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+from .interface.models import KeyType, Key
 class DioramaComputer:
     """
@@ -37,7 +38,7 @@ class DioramaComputerInterface:
             raise RuntimeError("Computer interface not initialized. Call run() first.")
         result = await iface.diorama_cmd(action, arguments)
         if not result.get("success"):
-            raise RuntimeError(f"Diorama command failed: {result.get('error')}")
+            raise RuntimeError(f"Diorama command failed: {result.get('error')}\n{result.get('trace')}")
         return result.get("result")
     async def screenshot(self, as_bytes=True):
@@ -87,7 +88,17 @@ class DioramaComputerInterface:
         await self._send_cmd("press_key", {"key": key})
     async def hotkey(self, *keys):
-        await self._send_cmd("hotkey", {"keys": list(keys)})
+        actual_keys = []
+        for key in keys:
+            if isinstance(key, Key):
+                actual_keys.append(key.value)
+            elif isinstance(key, str):
+                # Try to convert to enum if it matches a known key
+                key_or_enum = Key.from_string(key)
+                actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
+            else:
+                raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
+        await self._send_cmd("hotkey", {"keys": actual_keys})
     async def to_screen_coordinates(self, x, y):
         return await self._send_cmd("to_screen_coordinates", {"x": x, "y": y})

computer/helpers.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""
+Helper functions and decorators for the Computer module.
+"""
+import asyncio
+from functools import wraps
+from typing import Any, Callable, Optional, TypeVar, cast
+# Global reference to the default computer instance
+_default_computer = None
+def set_default_computer(computer):
+    """
+    Set the default computer instance to be used by the remote decorator.
+    Args:
+        computer: The computer instance to use as default
+    """
+    global _default_computer
+    _default_computer = computer
+def sandboxed(venv_name: str = "default", computer: str = "default", max_retries: int = 3):
+    """
+    Decorator that wraps a function to be executed remotely via computer.venv_exec
+    Args:
+        venv_name: Name of the virtual environment to execute in
+        computer: The computer instance to use, or "default" to use the globally set default
+        max_retries: Maximum number of retries for the remote execution
+    """
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            # Determine which computer instance to use
+            comp = computer if computer != "default" else _default_computer
+            if comp is None:
+                raise RuntimeError("No computer instance available. Either specify a computer instance or call set_default_computer() first.")
+            for i in range(max_retries):
+                try:
+                    return await comp.venv_exec(venv_name, func, *args, **kwargs)
+                except Exception as e:
+                    print(f"Attempt {i+1} failed: {e}")
+                    await asyncio.sleep(1)
+                    if i == max_retries - 1:
+                        raise e
+        return wrapper
+    return decorator

computer/interface/base.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from abc import ABC, abstractmethod
 from typing import Optional, Dict, Any, Tuple, List
 from ..logger import Logger, LogLevel
+from .models import MouseButton
 class BaseComputerInterface(ABC):
@@ -51,6 +52,16 @@ class BaseComputerInterface(ABC):
         self.close()
     # Mouse Actions
+    @abstractmethod
+    async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
+        """Press and hold a mouse button."""
+        pass
+    @abstractmethod
+    async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
+        """Release a mouse button."""
+        pass
     @abstractmethod
     async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
         """Perform a left click."""
@@ -95,6 +106,16 @@ class BaseComputerInterface(ABC):
         pass
     # Keyboard Actions
+    @abstractmethod
+    async def key_down(self, key: str) -> None:
+        """Press and hold a key."""
+        pass
+    @abstractmethod
+    async def key_up(self, key: str) -> None:
+        """Release a key."""
+        pass
     @abstractmethod
     async def type_text(self, text: str) -> None:
         """Type the specified text."""
@@ -111,6 +132,11 @@ class BaseComputerInterface(ABC):
         pass
     # Scrolling Actions
+    @abstractmethod
+    async def scroll(self, x: int, y: int) -> None:
+        """Scroll the mouse wheel."""
+        pass
     @abstractmethod
     async def scroll_down(self, clicks: int = 1) -> None:
         """Scroll down."""
@@ -166,7 +192,47 @@ class BaseComputerInterface(ABC):
     async def directory_exists(self, path: str) -> bool:
         """Check if directory exists."""
         pass
+    @abstractmethod
+    async def list_dir(self, path: str) -> List[str]:
+        """List directory contents."""
+        pass
+    @abstractmethod
+    async def read_text(self, path: str) -> str:
+        """Read file text contents."""
+        pass
+    @abstractmethod
+    async def write_text(self, path: str, content: str) -> None:
+        """Write file text contents."""
+        pass
+    @abstractmethod
+    async def read_bytes(self, path: str) -> bytes:
+        """Read file binary contents."""
+        pass
+    @abstractmethod
+    async def write_bytes(self, path: str, content: bytes) -> None:
+        """Write file binary contents."""
+        pass
+    @abstractmethod
+    async def delete_file(self, path: str) -> None:
+        """Delete file."""
+        pass
+    @abstractmethod
+    async def create_dir(self, path: str) -> None:
+        """Create directory."""
+        pass
+    @abstractmethod
+    async def delete_dir(self, path: str) -> None:
+        """Delete directory."""
+        pass
     @abstractmethod
     async def run_command(self, command: str) -> Tuple[str, str]:
         """Run shell command."""

computer/interface/linux.py CHANGED Viewed

@@ -8,8 +8,8 @@ import websockets
 from ..logger import Logger, LogLevel
 from .base import BaseComputerInterface
-from ..utils import decode_base64_image, bytes_to_image, draw_box, resize_image
-from .models import Key, KeyType
+from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image
+from .models import Key, KeyType, MouseButton
 class LinuxComputerInterface(BaseComputerInterface):
@@ -22,11 +22,12 @@ class LinuxComputerInterface(BaseComputerInterface):
         self._closed = False
         self._last_ping = 0
         self._ping_interval = 5  # Send ping every 5 seconds
-        self._ping_timeout = 10  # Wait 10 seconds for pong response
+        self._ping_timeout = 120  # Wait 120 seconds for pong response
         self._reconnect_delay = 1  # Start with 1 second delay
         self._max_reconnect_delay = 30  # Maximum delay between reconnection attempts
         self._log_connection_attempts = True  # Flag to control connection attempt logging
         self._authenticated = False  # Track authentication status
+        self._command_lock = asyncio.Lock()  # Lock to ensure only one command at a time
         # Set logger name for Linux interface
         self.logger = Logger("cua.interface.linux", LogLevel.NORMAL)
@@ -86,7 +87,7 @@ class LinuxComputerInterface(BaseComputerInterface):
                                 close_timeout=5,
                                 compression=None,  # Disable compression to reduce overhead
                             ),
-                            timeout=30,
+                            timeout=120,
                         )
                         self.logger.info("WebSocket connection established")
@@ -193,58 +194,62 @@ class LinuxComputerInterface(BaseComputerInterface):
         retry_count = 0
         last_error = None
-        while retry_count < max_retries:
-            try:
-                await self._ensure_connection()
-                if not self._ws:
-                    raise ConnectionError("WebSocket connection is not established")
-                # Handle authentication if needed
-                if self.api_key and self.vm_name and not self._authenticated:
-                    self.logger.info("Performing authentication handshake...")
-                    auth_message = {
-                        "command": "authenticate",
-                        "params": {
-                            "api_key": self.api_key,
-                            "container_name": self.vm_name
+        # Acquire lock to ensure only one command is processed at a time
+        async with self._command_lock:
+            self.logger.debug(f"Acquired lock for command: {command}")
+            while retry_count < max_retries:
+                try:
+                    await self._ensure_connection()
+                    if not self._ws:
+                        raise ConnectionError("WebSocket connection is not established")
+                    # Handle authentication if needed
+                    if self.api_key and self.vm_name and not self._authenticated:
+                        self.logger.info("Performing authentication handshake...")
+                        auth_message = {
+                            "command": "authenticate",
+                            "params": {
+                                "api_key": self.api_key,
+                                "container_name": self.vm_name
+                            }
                         }
-                    }
-                    await self._ws.send(json.dumps(auth_message))
-                    # Wait for authentication response
-                    auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
-                    auth_result = json.loads(auth_response)
-                    if not auth_result.get("success"):
-                        error_msg = auth_result.get("error", "Authentication failed")
-                        self.logger.error(f"Authentication failed: {error_msg}")
-                        self._authenticated = False
-                        raise ConnectionError(f"Authentication failed: {error_msg}")
-                    self.logger.info("Authentication successful")
-                    self._authenticated = True
-                message = {"command": command, "params": params or {}}
-                await self._ws.send(json.dumps(message))
-                response = await asyncio.wait_for(self._ws.recv(), timeout=30)
-                return json.loads(response)
-            except Exception as e:
-                last_error = e
-                retry_count += 1
-                if retry_count < max_retries:
-                    # Only log at debug level for intermediate retries
-                    self.logger.debug(
-                        f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
-                    )
-                    await asyncio.sleep(1)
-                    continue
-                else:
-                    # Only log at error level for the final failure
-                    self.logger.error(
-                        f"Failed to send command '{command}' after {max_retries} retries"
-                    )
-                    self.logger.debug(f"Command failure details: {e}")
-                raise last_error if last_error else RuntimeError("Failed to send command")
+                        await self._ws.send(json.dumps(auth_message))
+                        # Wait for authentication response
+                        auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
+                        auth_result = json.loads(auth_response)
+                        if not auth_result.get("success"):
+                            error_msg = auth_result.get("error", "Authentication failed")
+                            self.logger.error(f"Authentication failed: {error_msg}")
+                            self._authenticated = False
+                            raise ConnectionError(f"Authentication failed: {error_msg}")
+                        self.logger.info("Authentication successful")
+                        self._authenticated = True
+                    message = {"command": command, "params": params or {}}
+                    await self._ws.send(json.dumps(message))
+                    response = await asyncio.wait_for(self._ws.recv(), timeout=30)
+                    self.logger.debug(f"Completed command: {command}")
+                    return json.loads(response)
+                except Exception as e:
+                    last_error = e
+                    retry_count += 1
+                    if retry_count < max_retries:
+                        # Only log at debug level for intermediate retries
+                        self.logger.debug(
+                            f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
+                        )
+                        await asyncio.sleep(1)
+                        continue
+                    else:
+                        # Only log at error level for the final failure
+                        self.logger.error(
+                            f"Failed to send command '{command}' after {max_retries} retries"
+                        )
+                        self.logger.debug(f"Command failure details: {e}")
+                        raise last_error if last_error else RuntimeError("Failed to send command")
     async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
         """Wait for WebSocket connection to become available."""
@@ -344,6 +349,12 @@ class LinuxComputerInterface(BaseComputerInterface):
             self._ws = None
     # Mouse Actions
+    async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
+        await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
+    async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
+        await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
     async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
         await self._send_command("left_click", {"x": x, "y": y})
@@ -356,17 +367,23 @@ class LinuxComputerInterface(BaseComputerInterface):
     async def move_cursor(self, x: int, y: int) -> None:
         await self._send_command("move_cursor", {"x": x, "y": y})
-    async def drag_to(self, x: int, y: int, button: str = "left", duration: float = 0.5) -> None:
+    async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5) -> None:
         await self._send_command(
             "drag_to", {"x": x, "y": y, "button": button, "duration": duration}
         )
-    async def drag(self, path: List[Tuple[int, int]], button: str = "left", duration: float = 0.5) -> None:
+    async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5) -> None:
         await self._send_command(
             "drag", {"path": path, "button": button, "duration": duration}
         )
     # Keyboard Actions
+    async def key_down(self, key: "KeyType") -> None:
+        await self._send_command("key_down", {"key": key})
+    async def key_up(self, key: "KeyType") -> None:
+        await self._send_command("key_up", {"key": key})
     async def type_text(self, text: str) -> None:
         # Temporary fix for https://github.com/trycua/cua/issues/165
         # Check if text contains Unicode characters
@@ -459,6 +476,9 @@ class LinuxComputerInterface(BaseComputerInterface):
         await self._send_command("hotkey", {"keys": actual_keys})
     # Scrolling Actions
+    async def scroll(self, x: int, y: int) -> None:
+        await self._send_command("scroll", {"x": x, "y": y})
     async def scroll_down(self, clicks: int = 1) -> None:
         await self._send_command("scroll_down", {"clicks": clicks})
@@ -552,6 +572,50 @@ class LinuxComputerInterface(BaseComputerInterface):
         result = await self._send_command("directory_exists", {"path": path})
         return result.get("exists", False)
+    async def list_dir(self, path: str) -> list[str]:
+        result = await self._send_command("list_dir", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to list directory"))
+        return result.get("files", [])
+    async def read_text(self, path: str) -> str:
+        result = await self._send_command("read_text", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to read file"))
+        return result.get("content", "")
+    async def write_text(self, path: str, content: str) -> None:
+        result = await self._send_command("write_text", {"path": path, "content": content})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to write file"))
+    async def read_bytes(self, path: str) -> bytes:
+        result = await self._send_command("read_bytes", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to read file"))
+        content_b64 = result.get("content_b64", "")
+        return decode_base64_image(content_b64)
+    async def write_bytes(self, path: str, content: bytes) -> None:
+        result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content)})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to write file"))
+    async def delete_file(self, path: str) -> None:
+        result = await self._send_command("delete_file", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to delete file"))
+    async def create_dir(self, path: str) -> None:
+        result = await self._send_command("create_dir", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to create directory"))
+    async def delete_dir(self, path: str) -> None:
+        result = await self._send_command("delete_dir", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to delete directory"))
     async def run_command(self, command: str) -> Tuple[str, str]:
         result = await self._send_command("run_command", {"command": command})
         if not result.get("success", False):

computer/interface/macos.py CHANGED Viewed

@@ -8,8 +8,8 @@ import websockets
 from ..logger import Logger, LogLevel
 from .base import BaseComputerInterface
-from ..utils import decode_base64_image, bytes_to_image, draw_box, resize_image
-from .models import Key, KeyType
+from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image
+from .models import Key, KeyType, MouseButton
 class MacOSComputerInterface(BaseComputerInterface):
@@ -22,10 +22,11 @@ class MacOSComputerInterface(BaseComputerInterface):
         self._closed = False
         self._last_ping = 0
         self._ping_interval = 5  # Send ping every 5 seconds
-        self._ping_timeout = 10  # Wait 10 seconds for pong response
+        self._ping_timeout = 120  # Wait 120 seconds for pong response
         self._reconnect_delay = 1  # Start with 1 second delay
         self._max_reconnect_delay = 30  # Maximum delay between reconnection attempts
         self._log_connection_attempts = True  # Flag to control connection attempt logging
+        self._command_lock = asyncio.Lock()  # Lock to ensure only one command at a time
         # Set logger name for macOS interface
         self.logger = Logger("cua.interface.macos", LogLevel.NORMAL)
@@ -85,7 +86,7 @@ class MacOSComputerInterface(BaseComputerInterface):
                                 close_timeout=5,
                                 compression=None,  # Disable compression to reduce overhead
                             ),
-                            timeout=30,
+                            timeout=120,
                         )
                         self.logger.info("WebSocket connection established")
@@ -219,35 +220,39 @@ class MacOSComputerInterface(BaseComputerInterface):
         retry_count = 0
         last_error = None
-        while retry_count < max_retries:
-            try:
-                await self._ensure_connection()
-                if not self._ws:
-                    raise ConnectionError("WebSocket connection is not established")
-                message = {"command": command, "params": params or {}}
-                await self._ws.send(json.dumps(message))
-                response = await asyncio.wait_for(self._ws.recv(), timeout=30)
-                return json.loads(response)
-            except Exception as e:
-                last_error = e
-                retry_count += 1
-                if retry_count < max_retries:
-                    # Only log at debug level for intermediate retries
-                    self.logger.debug(
-                        f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
-                    )
-                    await asyncio.sleep(1)
-                    continue
-                else:
-                    # Only log at error level for the final failure
-                    self.logger.error(
-                        f"Failed to send command '{command}' after {max_retries} retries"
-                    )
-                    self.logger.debug(f"Command failure details: {e}")
-                    raise
+        # Acquire lock to ensure only one command is processed at a time
+        async with self._command_lock:
+            self.logger.debug(f"Acquired lock for command: {command}")
+            while retry_count < max_retries:
+                try:
+                    await self._ensure_connection()
+                    if not self._ws:
+                        raise ConnectionError("WebSocket connection is not established")
+                    message = {"command": command, "params": params or {}}
+                    await self._ws.send(json.dumps(message))
+                    response = await asyncio.wait_for(self._ws.recv(), timeout=120)
+                    self.logger.debug(f"Completed command: {command}")
+                    return json.loads(response)
+                except Exception as e:
+                    last_error = e
+                    retry_count += 1
+                    if retry_count < max_retries:
+                        # Only log at debug level for intermediate retries
+                        self.logger.debug(
+                            f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
+                        )
+                        await asyncio.sleep(1)
+                        continue
+                    else:
+                        # Only log at error level for the final failure
+                        self.logger.error(
+                            f"Failed to send command '{command}' after {max_retries} retries"
+                        )
+                        self.logger.debug(f"Command failure details: {e}")
+                        raise
-        raise last_error if last_error else RuntimeError("Failed to send command")
+            raise last_error if last_error else RuntimeError("Failed to send command")
     async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
         """Wait for WebSocket connection to become available."""
@@ -351,6 +356,12 @@ class MacOSComputerInterface(BaseComputerInterface):
         return await self._send_command("diorama_cmd", {"action": action, "arguments": arguments or {}})
     # Mouse Actions
+    async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
+        await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
+    async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
+        await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
     async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
         await self._send_command("left_click", {"x": x, "y": y})
@@ -374,6 +385,12 @@ class MacOSComputerInterface(BaseComputerInterface):
         )
     # Keyboard Actions
+    async def key_down(self, key: "KeyType") -> None:
+        await self._send_command("key_down", {"key": key})
+    async def key_up(self, key: "KeyType") -> None:
+        await self._send_command("key_up", {"key": key})
     async def type_text(self, text: str) -> None:
         # Temporary fix for https://github.com/trycua/cua/issues/165
         # Check if text contains Unicode characters
@@ -466,6 +483,9 @@ class MacOSComputerInterface(BaseComputerInterface):
         await self._send_command("hotkey", {"keys": actual_keys})
     # Scrolling Actions
+    async def scroll(self, x: int, y: int) -> None:
+        await self._send_command("scroll", {"x": x, "y": y})
     async def scroll_down(self, clicks: int = 1) -> None:
         await self._send_command("scroll_down", {"clicks": clicks})
@@ -559,6 +579,50 @@ class MacOSComputerInterface(BaseComputerInterface):
         result = await self._send_command("directory_exists", {"path": path})
         return result.get("exists", False)
+    async def list_dir(self, path: str) -> list[str]:
+        result = await self._send_command("list_dir", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to list directory"))
+        return result.get("files", [])
+    async def read_text(self, path: str) -> str:
+        result = await self._send_command("read_text", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to read file"))
+        return result.get("content", "")
+    async def write_text(self, path: str, content: str) -> None:
+        result = await self._send_command("write_text", {"path": path, "content": content})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to write file"))
+    async def read_bytes(self, path: str) -> bytes:
+        result = await self._send_command("read_bytes", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to read file"))
+        content_b64 = result.get("content_b64", "")
+        return decode_base64_image(content_b64)
+    async def write_bytes(self, path: str, content: bytes) -> None:
+        result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content)})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to write file"))
+    async def delete_file(self, path: str) -> None:
+        result = await self._send_command("delete_file", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to delete file"))
+    async def create_dir(self, path: str) -> None:
+        result = await self._send_command("create_dir", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to create directory"))
+    async def delete_dir(self, path: str) -> None:
+        result = await self._send_command("delete_dir", {"path": path})
+        if not result.get("success", False):
+            raise RuntimeError(result.get("error", "Failed to delete directory"))
     async def run_command(self, command: str) -> Tuple[str, str]:
         result = await self._send_command("run_command", {"command": command})
         if not result.get("success", False):

computer/interface/models.py CHANGED Viewed

@@ -106,6 +106,9 @@ class Key(Enum):
 # Combined key type
 KeyType = Union[Key, NavigationKey, SpecialKey, ModifierKey, FunctionKey, str]
+# Key type for mouse actions
+MouseButton = Literal['left', 'right', 'middle']
 class AccessibilityWindow(TypedDict):
     """Information about a window in the accessibility tree."""
     app_name: str

computer/providers/cloud/provider.py CHANGED Viewed

@@ -52,11 +52,11 @@ class CloudProvider(BaseVMProvider):
         return []
     async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
-        logger.warning("CloudProvider.run_vm is not implemented")
-        return {"name": name, "status": "unavailable", "message": "CloudProvider is not implemented"}
+        # logger.warning("CloudProvider.run_vm is not implemented")
+        return {"name": name, "status": "unavailable", "message": "CloudProvider.run_vm is not implemented"}
     async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
-        logger.warning("CloudProvider.stop_vm is not implemented")
+        logger.warning("CloudProvider.stop_vm is not implemented. To clean up resources, please use Computer.disconnect()")
         return {"name": name, "status": "stopped", "message": "CloudProvider is not implemented"}
     async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:

computer/ui/gradio/app.py CHANGED Viewed

@@ -463,7 +463,7 @@ async def execute(name, action, arguments):
         elif action == "left_click":
             if "x" in arguments and "y" in arguments:
                 await computer.interface.move_cursor(arguments["x"], arguments["y"])
-            await computer.interface.left_click()
+            await computer.interface.left_click(arguments["x"], arguments["y"])
             await asyncio.sleep(0.5)
         elif action == "right_click":
             if "x" in arguments and "y" in arguments:
@@ -528,43 +528,75 @@ async def execute(name, action, arguments):
     return results
-async def handle_init_computer(os_choice: str):
-    """Initialize the computer instance and tools for macOS or Ubuntu"""
+async def handle_init_computer(os_choice: str, app_list=None, provider="lume"):
+    """Initialize the computer instance and tools for macOS or Ubuntu
+    Args:
+        os_choice: The OS to use ("macOS" or "Ubuntu")
+        app_list: Optional list of apps to focus on using the app-use experiment
+        provider: The provider to use ("lume" or "self")
+    """
     global computer, tool_call_logs, tools
+    # Check if we should enable app-use experiment
+    use_app_experiment = app_list and len(app_list) > 0
+    experiments = ["app-use"] if use_app_experiment else None
+    # Determine if we should use host computer server
+    use_host_computer_server = provider == "self"
     if os_choice == "Ubuntu":
-        computer = Computer(
-            image="ubuntu-noble-vanilla:latest",
-            os_type="linux",
-            provider_type=VMProviderType.LUME,
-            display="1024x768",
-            memory="8GB",
-            cpu="4"
-        )
         os_type_str = "linux"
         image_str = "ubuntu-noble-vanilla:latest"
+    else:
+        os_type_str = "macos"
+        image_str = "macos-sequoia-cua:latest"
+    # Create computer instance with appropriate configuration
+    if use_host_computer_server:
+        computer = Computer(
+            os_type=os_type_str,
+            use_host_computer_server=True,
+            experiments=experiments
+        )
     else:
         computer = Computer(
-            image="macos-sequoia-cua:latest",
-            os_type="macos",
+            image=image_str,
+            os_type=os_type_str,
             provider_type=VMProviderType.LUME,
             display="1024x768",
             memory="8GB",
-            cpu="4"
+            cpu="4",
+            experiments=experiments
         )
-        os_type_str = "macos"
-        image_str = "macos-sequoia-cua:latest"
     await computer.run()
+    # If app list is provided, create desktop from apps
+    if use_app_experiment:
+        computer = computer.create_desktop_from_apps(app_list)
     # Log computer initialization as a tool call
-    result = await execute("computer", "initialize", {
+    init_params = {
         "os": os_type_str,
-        "image": image_str,
-        "display": "1024x768",
-        "memory": "8GB",
-        "cpu": "4"
-    })
+        "provider": provider
+    }
+    # Add VM-specific parameters if not using host computer server
+    if not use_host_computer_server:
+        init_params.update({
+            "image": image_str,
+            "display": "1024x768",
+            "memory": "8GB",
+            "cpu": "4"
+        })
+    # Add app list to the log if provided
+    if use_app_experiment:
+        init_params["apps"] = app_list
+        init_params["experiments"] = ["app-use"]
+    result = await execute("computer", "initialize", init_params)
     return result["screenshot"], json.dumps(tool_call_logs, indent=2)
@@ -1029,12 +1061,31 @@ def create_gradio_ui():
                     setup_status = gr.Textbox(label="Setup Status", value="")
                 with gr.Group():
-                    os_choice = gr.Radio(
-                        label="OS",
-                        choices=["macOS", "Ubuntu"],
-                        value="macOS",
-                        interactive=False # disable until the ubuntu image is ready
-                    )
+                    with gr.Accordion("Computer Configuration", open=False):
+                        with gr.Row():
+                            os_choice = gr.Radio(
+                                label="OS",
+                                choices=["macOS", "Ubuntu"],
+                                value="macOS",
+                                interactive=False # disable until the ubuntu image is ready
+                            )
+                            # Provider selection radio
+                            provider_choice = gr.Radio(
+                                label="Provider",
+                                choices=["lume", "self"],
+                                value="lume",
+                                info="'lume' uses a VM, 'self' uses the host computer server"
+                            )
+                        # App filtering dropdown for app-use experiment
+                        app_filter = gr.Dropdown(
+                            label="Filter by apps (App-Use)",
+                            multiselect=True,
+                            allow_custom_value=True,
+                            info="When apps are selected, the computer will focus on those apps using the app-use experiment"
+                        )
                     start_btn = gr.Button("Initialize Computer")
                 with gr.Group():
@@ -1199,7 +1250,7 @@ def create_gradio_ui():
         )
         img.select(handle_click, inputs=[img, click_type], outputs=[img, action_log])
-        start_btn.click(handle_init_computer, inputs=[os_choice], outputs=[img, action_log])
+        start_btn.click(handle_init_computer, inputs=[os_choice, app_filter, provider_choice], outputs=[img, action_log])
         wait_btn.click(handle_wait, outputs=[img, action_log])
         # DONE and FAIL buttons just do a placeholder action

{cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cua-computer
-Version: 0.2.8
+Version: 0.2.10
 Summary: Computer-Use Interface (CUI) framework powering Cua
 Author-Email: TryCua <gh@trycua.com>
 Requires-Python: >=3.11

{cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,19 @@
 computer/__init__.py,sha256=QOxNrrJAuLRnsUC2zIFgRfzVSuDSXiYHlEF-9vkhV0o,1241
-computer/computer.py,sha256=FtHpNcVjQKNclLlQo5Idt2tBYnuf0417nzVjDMGTRxk,33306
-computer/diorama_computer.py,sha256=tOzTCTyARD38Eeoc8gjF1rF3eo0kYCkV-70bsxsKAZE,3297
+computer/computer.py,sha256=vFJEyMkvTlT49SEO1QgLe8yMX6DbvdI9eDWjSd3CwCQ,40555
+computer/diorama_computer.py,sha256=jOP7_eXxxU6SMIoE25ni0YXPK0E7p5sZeLKmkYLh6G8,3871
+computer/helpers.py,sha256=0ob9d9ynVGi0JRxhHCgXTuHPHFpa8AVKldn6k0hvxOo,1766
 computer/interface/__init__.py,sha256=xQvYjq5PMn9ZJOmRR5mWtONTl_0HVd8ACvW6AQnzDdw,262
-computer/interface/base.py,sha256=wmLBiX7rB8cG2Q4fmchdKpjralktzicuYhAh6fDIeqw,6025
+computer/interface/base.py,sha256=Uc3pp-8_9YJpawAwt1ixaVN3N0_MtY6nAOSvuKy0Mpc,7863
 computer/interface/factory.py,sha256=RjAZAB_jFuS8JierYjLbapRX6RqFE0qE3BiIyP5UDOE,1441
-computer/interface/linux.py,sha256=CT1N0QA52TNKBbFG2LXdN6yAGWWJ12_2hTMEI8yNoM4,26865
-computer/interface/macos.py,sha256=SZh3CB_Co9y5xPVfPoo1yIXFqAyDoRSx5nEXNN1od1I,27340
-computer/interface/models.py,sha256=RZKVUdwKrKUoFqwlx2Dk8Egkmq_AInlIu_d0xg7SZzw,3238
+computer/interface/linux.py,sha256=40SXd-xqYWFUaTnx3Tf7lIDEtluNwYoDkCZaESkIvRE,30468
+computer/interface/macos.py,sha256=uFU9bmPJqPPxlUBw9u1TG3ksqXqB4azJ0pYYx9cRM6w,30848
+computer/interface/models.py,sha256=CYbX3PLlWqjFuDiLWMiBzPmmXB8_g9VNLfBFBC6RtvI,3317
 computer/logger.py,sha256=UVvnmZGOWVF9TCsixEbeQnDZ3wBPAJ2anW3Zp-MoJ8Y,2896
 computer/models.py,sha256=iFNM1QfZArD8uf66XJXb2EDIREsfrxqqA5_liLBMfrE,1188
 computer/providers/__init__.py,sha256=hS9lLxmmHa1u82XJJ_xuqSKipClsYUEPx-8OK9ogtVg,194
 computer/providers/base.py,sha256=J_9r6pJsvGAFDRl56jog_atN7e8uzrvlCQEdRRqye_U,3624
 computer/providers/cloud/__init__.py,sha256=SDAcfhI2BlmVBrBZOHxQd3i1bJZjMIfl7QgmqjXa4z8,144
-computer/providers/cloud/provider.py,sha256=gpBl_ZVbwk-0FhYycne-69KslnrAoDSZcyzetpLfiKE,2864
+computer/providers/cloud/provider.py,sha256=XEdCrnZzRwvvkPHIwfhfJl3xB6W7tZKdBI0duKEXLw4,2930
 computer/providers/factory.py,sha256=9qVdt-fIovSNOokGMZ_2B1VPCLSZeDky4edcXyelZy4,4616
 computer/providers/lume/__init__.py,sha256=E6hTbVQF5lLZD8JyG4rTwUnCBO4q9K8UkYNQ31R0h7c,193
 computer/providers/lume/provider.py,sha256=grLZeXd4Y8iYsNq2gfNGcQq1bnTcNYNepEv-mxmROG4,20562
@@ -22,9 +23,9 @@ computer/providers/lumier/provider.py,sha256=CXwAKwJfR9ALFGM5u7UIZ-YrFwPvew_01wT
 computer/telemetry.py,sha256=FvNFpxgeRuCMdNpREuSL7bOMZy9gSzY4J0rLeNDw0CU,3746
 computer/ui/__init__.py,sha256=pmo05ek9qiB_x7DPeE6Vf_8RsIOqTD0w1dBLMHfoOnY,45
 computer/ui/gradio/__init__.py,sha256=5_KimixM48-X74FCsLw7LbSt39MQfUMEL8-M9amK3Cw,117
-computer/ui/gradio/app.py,sha256=o31nphBcb6zM5OKPuODTjuOzSJ3lt61kQHpUeMBBs70,65077
+computer/ui/gradio/app.py,sha256=pLMoMpxyKsGhg9wlsiqyKiRujd-lzubs0nGWAtkleL0,67316
 computer/utils.py,sha256=zY50NXB7r51GNLQ6l7lhG_qv0_ufpQ8n0-SDhCei8m4,2838
-cua_computer-0.2.8.dist-info/METADATA,sha256=DU7TQmx6VicwaYyE6faehHUy6oLweVKJ1nGBOctOSGY,5844
-cua_computer-0.2.8.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
-cua_computer-0.2.8.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
-cua_computer-0.2.8.dist-info/RECORD,,
+cua_computer-0.2.10.dist-info/METADATA,sha256=dC5av4YtGJH20X77m7FPpn8J1bUFmU1p1J7qJX32HGs,5845
+cua_computer-0.2.10.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
+cua_computer-0.2.10.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
+cua_computer-0.2.10.dist-info/RECORD,,

{cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/entry_points.txt RENAMED Viewed

File without changes

cua-computer 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

cua-computer 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl