PyPI - cua-agent - Versions diffs - 0.1.29__py3-none-any.whl → 0.1.31__py3-none-any.whl - Mend

cua-agent 0.1.29py3-none-any.whl → 0.1.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (18) hide show

agent/core/factory.py +19 -0
agent/core/types.py +1 -0
agent/providers/omni/clients/oaicompat.py +12 -2
agent/providers/openai/tools/computer.py +3 -7
agent/providers/uitars/__init__.py +1 -0
agent/providers/uitars/clients/base.py +35 -0
agent/providers/uitars/clients/oaicompat.py +216 -0
agent/providers/uitars/loop.py +598 -0
agent/providers/uitars/prompts.py +63 -0
agent/providers/uitars/tools/__init__.py +1 -0
agent/providers/uitars/tools/computer.py +283 -0
agent/providers/uitars/tools/manager.py +60 -0
agent/providers/uitars/utils.py +153 -0
agent/ui/gradio/app.py +12 -2
{cua_agent-0.1.29.dist-info → cua_agent-0.1.31.dist-info}/METADATA +20 -19
{cua_agent-0.1.29.dist-info → cua_agent-0.1.31.dist-info}/RECORD +18 -9
{cua_agent-0.1.29.dist-info → cua_agent-0.1.31.dist-info}/WHEEL +0 -0
{cua_agent-0.1.29.dist-info → cua_agent-0.1.31.dist-info}/entry_points.txt +0 -0

agent/providers/uitars/tools/computer.py ADDED Viewed

@@ -0,0 +1,283 @@
+"""Computer tool for UI-TARS."""
+import asyncio
+import base64
+import logging
+import re
+from typing import Any, Dict, List, Optional, Literal, Union
+from computer import Computer
+from ....core.tools.base import ToolResult, ToolFailure
+from ....core.tools.computer import BaseComputerTool
+logger = logging.getLogger(__name__)
+class ComputerTool(BaseComputerTool):
+    """
+    A tool that allows the UI-TARS agent to interact with the screen, keyboard, and mouse.
+    """
+    name: str = "computer"
+    width: Optional[int] = None
+    height: Optional[int] = None
+    computer: Computer
+    def __init__(self, computer: Computer):
+        """Initialize the computer tool.
+        Args:
+            computer: Computer instance
+        """
+        super().__init__(computer)
+        self.computer = computer
+        self.width = None
+        self.height = None
+        self.logger = logging.getLogger(__name__)
+    def to_params(self) -> Dict[str, Any]:
+        """Convert tool to API parameters.
+        Returns:
+            Dictionary with tool parameters
+        """
+        if self.width is None or self.height is None:
+            raise RuntimeError(
+                "Screen dimensions not initialized. Call initialize_dimensions() first."
+            )
+        return {
+            "type": "computer",
+            "display_width": self.width,
+            "display_height": self.height,
+        }
+    async def initialize_dimensions(self) -> None:
+        """Initialize screen dimensions from the computer interface."""
+        try:
+            display_size = await self.computer.interface.get_screen_size()
+            self.width = display_size["width"]
+            self.height = display_size["height"]
+            self.logger.info(f"Initialized screen dimensions to {self.width}x{self.height}")
+        except Exception as e:
+            # Fall back to defaults if we can't get accurate dimensions
+            self.width = 1024
+            self.height = 768
+            self.logger.warning(
+                f"Failed to get screen dimensions, using defaults: {self.width}x{self.height}. Error: {e}"
+            )
+    async def __call__(
+        self,
+        *,
+        action: str,
+        **kwargs,
+    ) -> ToolResult:
+        """Execute a computer action.
+        Args:
+            action: The action to perform (based on UI-TARS action space)
+            **kwargs: Additional parameters for the action
+        Returns:
+            ToolResult containing action output and possibly a base64 image
+        """
+        try:
+            # Ensure dimensions are initialized
+            if self.width is None or self.height is None:
+                await self.initialize_dimensions()
+                if self.width is None or self.height is None:
+                    return ToolFailure(error="Failed to initialize screen dimensions")
+            # Handle actions defined in UI-TARS action space (from prompts.py)
+            # Handle standard click (left click)
+            if action == "click":
+                if "x" in kwargs and "y" in kwargs:
+                    x, y = kwargs["x"], kwargs["y"]
+                    await self.computer.interface.left_click(x, y)
+                    # Wait briefly for UI to update
+                    await asyncio.sleep(0.5)
+                    # Take screenshot after action
+                    screenshot = await self.computer.interface.screenshot()
+                    base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
+                    return ToolResult(
+                        output=f"Clicked at ({x}, {y})",
+                        base64_image=base64_screenshot,
+                    )
+                else:
+                    return ToolFailure(error="Missing coordinates for click action")
+            # Handle double click
+            elif action == "left_double":
+                if "x" in kwargs and "y" in kwargs:
+                    x, y = kwargs["x"], kwargs["y"]
+                    await self.computer.interface.double_click(x, y)
+                    # Wait briefly for UI to update
+                    await asyncio.sleep(0.5)
+                    # Take screenshot after action
+                    screenshot = await self.computer.interface.screenshot()
+                    base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
+                    return ToolResult(
+                        output=f"Double-clicked at ({x}, {y})",
+                        base64_image=base64_screenshot,
+                    )
+                else:
+                    return ToolFailure(error="Missing coordinates for left_double action")
+            # Handle right click
+            elif action == "right_single":
+                if "x" in kwargs and "y" in kwargs:
+                    x, y = kwargs["x"], kwargs["y"]
+                    await self.computer.interface.right_click(x, y)
+                    # Wait briefly for UI to update
+                    await asyncio.sleep(0.5)
+                    # Take screenshot after action
+                    screenshot = await self.computer.interface.screenshot()
+                    base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
+                    return ToolResult(
+                        output=f"Right-clicked at ({x}, {y})",
+                        base64_image=base64_screenshot,
+                    )
+                else:
+                    return ToolFailure(error="Missing coordinates for right_single action")
+            # Handle typing text
+            elif action == "type_text":
+                if "text" in kwargs:
+                    text = kwargs["text"]
+                    await self.computer.interface.type_text(text)
+                    # Wait for UI to update
+                    await asyncio.sleep(0.3)
+                    # Take screenshot after action
+                    screenshot = await self.computer.interface.screenshot()
+                    base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
+                    return ToolResult(
+                        output=f"Typed: {text}",
+                        base64_image=base64_screenshot,
+                    )
+                else:
+                    return ToolFailure(error="Missing text for type action")
+            # Handle hotkey
+            elif action == "hotkey":
+                if "keys" in kwargs:
+                    keys = kwargs["keys"]
+                    if len(keys) > 1:
+                        await self.computer.interface.hotkey(*keys)
+                    else:
+                        # Single key press
+                        await self.computer.interface.press_key(keys[0])
+                    # Wait for UI to update
+                    await asyncio.sleep(0.3)
+                    # Take screenshot after action
+                    screenshot = await self.computer.interface.screenshot()
+                    base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
+                    return ToolResult(
+                        output=f"Pressed hotkey: {', '.join(keys)}",
+                        base64_image=base64_screenshot,
+                    )
+                else:
+                    return ToolFailure(error="Missing keys for hotkey action")
+            # Handle drag action
+            elif action == "drag":
+                if all(k in kwargs for k in ["start_x", "start_y", "end_x", "end_y"]):
+                    start_x, start_y = kwargs["start_x"], kwargs["start_y"]
+                    end_x, end_y = kwargs["end_x"], kwargs["end_y"]
+                    # Perform drag
+                    await self.computer.interface.move_cursor(start_x, start_y)
+                    await self.computer.interface.drag_to(end_x, end_y)
+                    # Wait for UI to update
+                    await asyncio.sleep(0.5)
+                    # Take screenshot after action
+                    screenshot = await self.computer.interface.screenshot()
+                    base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
+                    return ToolResult(
+                        output=f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y})",
+                        base64_image=base64_screenshot,
+                    )
+                else:
+                    return ToolFailure(error="Missing coordinates for drag action")
+            # Handle scroll action
+            elif action == "scroll":
+                if all(k in kwargs for k in ["x", "y", "direction"]):
+                    x, y = kwargs["x"], kwargs["y"]
+                    direction = kwargs["direction"]
+                    # Move cursor to position
+                    await self.computer.interface.move_cursor(x, y)
+                    # Scroll based on direction
+                    if direction == "down":
+                        await self.computer.interface.scroll_down(5)
+                    elif direction == "up":
+                        await self.computer.interface.scroll_up(5)
+                    elif direction == "right":
+                        pass # await self.computer.interface.scroll_right(5)
+                    elif direction == "left":
+                        pass # await self.computer.interface.scroll_left(5)
+                    else:
+                        return ToolFailure(error=f"Invalid scroll direction: {direction}")
+                    # Wait for UI to update
+                    await asyncio.sleep(0.5)
+                    # Take screenshot after action
+                    screenshot = await self.computer.interface.screenshot()
+                    base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
+                    return ToolResult(
+                        output=f"Scrolled {direction} at ({x}, {y})",
+                        base64_image=base64_screenshot,
+                    )
+                else:
+                    return ToolFailure(error="Missing parameters for scroll action")
+            # Handle wait action
+            elif action == "wait":
+                # Sleep for 5 seconds as specified in the action space
+                await asyncio.sleep(5)
+                # Take screenshot after waiting
+                screenshot = await self.computer.interface.screenshot()
+                base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
+                return ToolResult(
+                    output="Waited for 5 seconds",
+                    base64_image=base64_screenshot,
+                )
+            # Handle finished action (task completion)
+            elif action == "finished":
+                content = kwargs.get("content", "Task completed")
+                return ToolResult(
+                    output=f"Task finished: {content}",
+                )
+                return await self._handle_scroll(action)
+            else:
+                return ToolFailure(error=f"Unsupported action: {action}")
+        except Exception as e:
+            self.logger.error(f"Error in ComputerTool.__call__: {str(e)}")
+            return ToolFailure(error=f"Failed to execute {action}: {str(e)}")

agent/providers/uitars/tools/manager.py ADDED Viewed

@@ -0,0 +1,60 @@
+"""Tool manager for the UI-TARS provider."""
+import logging
+from typing import Any, Dict, List, Optional
+from computer import Computer
+from ....core.tools import BaseToolManager
+from ....core.tools.collection import ToolCollection
+from .computer import ComputerTool
+logger = logging.getLogger(__name__)
+class ToolManager(BaseToolManager):
+    """Manages UI-TARS provider tool initialization and execution."""
+    def __init__(self, computer: Computer):
+        """Initialize the tool manager.
+        Args:
+            computer: Computer instance for computer-related tools
+        """
+        super().__init__(computer)
+        # Initialize UI-TARS-specific tools
+        self.computer_tool = ComputerTool(self.computer)
+        self._initialized = False
+    def _initialize_tools(self) -> ToolCollection:
+        """Initialize all available tools."""
+        return ToolCollection(self.computer_tool)
+    async def _initialize_tools_specific(self) -> None:
+        """Initialize UI-TARS provider-specific tool requirements."""
+        await self.computer_tool.initialize_dimensions()
+    def get_tool_params(self) -> List[Dict[str, Any]]:
+        """Get tool parameters for API calls.
+        Returns:
+            List of tool parameters for the current provider's API
+        """
+        if self.tools is None:
+            raise RuntimeError("Tools not initialized. Call initialize() first.")
+        return self.tools.to_params()
+    async def execute_tool(self, name: str, tool_input: dict[str, Any]) -> Any:
+        """Execute a tool with the given input.
+        Args:
+            name: Name of the tool to execute
+            tool_input: Input parameters for the tool
+        Returns:
+            Result of the tool execution
+        """
+        if self.tools is None:
+            raise RuntimeError("Tools not initialized. Call initialize() first.")
+        return await self.tools.run(name=name, tool_input=tool_input)

agent/providers/uitars/utils.py ADDED Viewed

@@ -0,0 +1,153 @@
+"""Utility functions for the UI-TARS provider."""
+import logging
+import base64
+import re
+from typing import Any, Dict, List, Optional, Union, Tuple
+logger = logging.getLogger(__name__)
+def add_box_token(input_string: str) -> str:
+    """Add box tokens to the coordinates in the model response.
+    Args:
+        input_string: Raw model response
+    Returns:
+        String with box tokens added
+    """
+    if "Action: " not in input_string or "start_box=" not in input_string:
+        return input_string
+    suffix = input_string.split("Action: ")[0] + "Action: "
+    actions = input_string.split("Action: ")[1:]
+    processed_actions = []
+    for action in actions:
+        action = action.strip()
+        coordinates = re.findall(r"(start_box|end_box)='\((\d+),\s*(\d+)\)'", action)
+        updated_action = action
+        for coord_type, x, y in coordinates:
+            updated_action = updated_action.replace(
+                f"{coord_type}='({x},{y})'",
+                f"{coord_type}='<|box_start|>({x},{y})<|box_end|>'"
+            )
+        processed_actions.append(updated_action)
+    return suffix + "\n\n".join(processed_actions)
+def parse_actions(response: str) -> List[str]:
+    """Parse actions from UI-TARS model response.
+    Args:
+        response: The raw model response text
+    Returns:
+        List of parsed actions
+    """
+    actions = []
+    # Extract the Action part from the response
+    if "Action:" in response:
+        action_text = response.split("Action:")[-1].strip()
+        # Clean up and format action
+        if action_text:
+            # Handle multiple actions separated by newlines
+            action_parts = action_text.split("\n\n")
+            for part in action_parts:
+                if part.strip():
+                    actions.append(part.strip())
+    return actions
+def parse_action_parameters(action: str) -> Tuple[str, Dict[str, Any]]:
+    """Parse parameters from an action string.
+    Args:
+        action: The action string to parse
+    Returns:
+        Tuple of (action_name, action_parameters)
+    """
+    # Handle "finished" action
+    if action.startswith("finished"):
+        return "finished", {}
+    # Parse action parameters
+    action_match = re.match(r'(\w+)\((.*)\)', action)
+    if not action_match:
+        logger.warning(f"Could not parse action: {action}")
+        return "", {}
+    action_name = action_match.group(1)
+    action_params_str = action_match.group(2)
+    tool_args = {"action": action_name}
+    # Extract coordinate values from the action
+    if "start_box" in action_params_str:
+        # Extract all box coordinates
+        box_pattern = r"(start_box|end_box)='(?:<\|box_start\|>)?\((\d+),\s*(\d+)\)(?:<\|box_end\|>)?'"
+        box_matches = re.findall(box_pattern, action_params_str)
+        # Handle click-type actions
+        if action_name in ["click", "left_double", "right_single"]:
+            # Get coordinates from start_box
+            for box_type, x, y in box_matches:
+                if box_type == "start_box":
+                    tool_args["x"] = int(x)
+                    tool_args["y"] = int(y)
+                    break
+        # Handle drag action
+        elif action_name == "drag":
+            start_x, start_y = None, None
+            end_x, end_y = None, None
+            for box_type, x, y in box_matches:
+                if box_type == "start_box":
+                    start_x, start_y = int(x), int(y)
+                elif box_type == "end_box":
+                    end_x, end_y = int(x), int(y)
+            if not None in [start_x, start_y, end_x, end_y]:
+                tool_args["start_x"] = start_x
+                tool_args["start_y"] = start_y
+                tool_args["end_x"] = end_x
+                tool_args["end_y"] = end_y
+        # Handle scroll action
+        elif action_name == "scroll":
+            # Get coordinates from start_box
+            for box_type, x, y in box_matches:
+                if box_type == "start_box":
+                    tool_args["x"] = int(x)
+                    tool_args["y"] = int(y)
+                    break
+            # Extract direction
+            direction_match = re.search(r"direction='([^']+)'", action_params_str)
+            if direction_match:
+                tool_args["direction"] = direction_match.group(1)
+    # Handle typing text
+    elif action_name == "type":
+        # Extract text content
+        content_match = re.search(r"content='([^']*)'", action_params_str)
+        if content_match:
+            # Unescape escaped characters
+            text = content_match.group(1).replace("\\'", "'").replace('\\"', '"').replace("\\n", "\n")
+            tool_args = {"action": "type_text", "text": text}
+    # Handle hotkey
+    elif action_name == "hotkey":
+        # Extract key combination
+        key_match = re.search(r"key='([^']*)'", action_params_str)
+        if key_match:
+            keys = key_match.group(1).split()
+            tool_args = {"action": "hotkey", "keys": keys}
+    return action_name, tool_args

agent/ui/gradio/app.py CHANGED Viewed

@@ -162,6 +162,10 @@ MODEL_MAPPINGS = {
         "claude-3-5-sonnet-20240620": "claude-3-5-sonnet-20240620",
         "claude-3-7-sonnet-20250219": "claude-3-7-sonnet-20250219",
     },
+    "uitars": {
+        # UI-TARS models default to custom endpoint
+        "default": "ByteDance-Seed/UI-TARS-1.5-7B",
+    },
     "ollama": {
         # For Ollama models, we keep the original name
         "default": "llama3",  # A common default model
@@ -191,6 +195,7 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
         "ANTHROPIC": AgentLoop.ANTHROPIC,
         "OMNI": AgentLoop.OMNI,
         "OMNI-OLLAMA": AgentLoop.OMNI,  # Special case for Ollama models with OMNI parser
+        "UITARS": AgentLoop.UITARS,     # UI-TARS implementation
     }
     agent_loop = loop_provider_map.get(loop_provider, AgentLoop.OPENAI)
@@ -281,7 +286,9 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
         # Assign the determined model name
         model_name_to_use = cleaned_model_name
         # agent_loop remains AgentLoop.OMNI
+    elif agent_loop == AgentLoop.UITARS:
+        provider = LLMProvider.OAICOMPAT
+        model_name_to_use = MODEL_MAPPINGS["uitars"]["default"]  # Default
     else:
         # Default to OpenAI if unrecognized loop
         provider = LLMProvider.OPENAI
@@ -551,6 +558,7 @@ def create_gradio_ui(
         "OPENAI": openai_models,
         "ANTHROPIC": anthropic_models,
         "OMNI": omni_models + ["Custom model..."],  # Add custom model option
+        "UITARS": ["Custom model..."],  # UI-TARS options
     }
     # --- Apply Saved Settings (override defaults if available) ---
@@ -692,7 +700,7 @@ def create_gradio_ui(
                 with gr.Accordion("Configuration", open=True):
                     # Configuration options
                     agent_loop = gr.Dropdown(
-                        choices=["OPENAI", "ANTHROPIC", "OMNI"],
+                        choices=["OPENAI", "ANTHROPIC", "OMNI", "UITARS"],
                         label="Agent Loop",
                         value=initial_loop,
                         info="Select the agent loop provider",
@@ -807,6 +815,8 @@ def create_gradio_ui(
                         provider, cleaned_model_name_from_func, agent_loop_type = (
                             get_provider_and_model(model_string_to_analyze, agent_loop_choice)
                         )
+                        print(f"provider={provider} cleaned_model_name_from_func={cleaned_model_name_from_func} agent_loop_type={agent_loop_type} agent_loop_choice={agent_loop_choice}")
                         # Determine the final model name to send to the agent
                         # If custom selected, use the custom text box value, otherwise use the cleaned name

{cua_agent-0.1.29.dist-info → cua_agent-0.1.31.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cua-agent
-Version: 0.1.29
+Version: 0.1.31
 Summary: CUA (Computer Use) Agent for AI-driven computer interaction
 Author-Email: TryCua <gh@trycua.com>
 Requires-Python: >=3.10
@@ -21,6 +21,8 @@ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "anthropic"
 Provides-Extra: openai
 Requires-Dist: openai<2.0.0,>=1.14.0; extra == "openai"
 Requires-Dist: httpx<0.29.0,>=0.27.0; extra == "openai"
+Provides-Extra: uitars
+Requires-Dist: httpx<0.29.0,>=0.27.0; extra == "uitars"
 Provides-Extra: ui
 Requires-Dist: gradio<6.0.0,>=5.23.3; extra == "ui"
 Requires-Dist: python-dotenv<2.0.0,>=1.0.1; extra == "ui"
@@ -99,6 +101,7 @@ pip install "cua-agent[all]"
 # or install specific loop providers
 pip install "cua-agent[openai]" # OpenAI Cua Loop
 pip install "cua-agent[anthropic]" # Anthropic Cua Loop
+pip install "cua-agent[uitars]"    # UI-Tars support
 pip install "cua-agent[omni]" # Cua Loop based on OmniParser (includes Ollama for local models)
 pip install "cua-agent[ui]" # Gradio UI for the agent
 ```
@@ -118,6 +121,9 @@ async with Computer() as macos_computer:
       # or
       # loop=AgentLoop.OMNI,
       # model=LLM(provider=LLMProvider.OLLAMA, model="gemma3")
+      # or
+      # loop=AgentLoop.UITARS,
+      # model=LLM(provider=LLMProvider.OAICOMPAT, model="tgi", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1")
   )
   tasks = [
@@ -143,7 +149,13 @@ Refer to these notebooks for step-by-step guides on how to use the Computer-Use
 ## Using the Gradio UI
-The agent includes a Gradio-based user interface for easy interaction. To use it:
+The agent includes a Gradio-based user interface for easier interaction.
+<div align="center">
+    <img src="../../img/agent_gradio_ui.png"/>
+</div>
+To use it:
 ```bash
 # Install with Gradio support
@@ -192,6 +204,10 @@ The Gradio UI provides:
 - Configuration of agent parameters
 - Chat interface for interacting with the agent
+### Using UI-TARS
+You can use UI-TARS by first following the [deployment guide](https://github.com/bytedance/UI-TARS/blob/main/README_deploy.md). This will give you a provider URL like this: `https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1` which you can use in the gradio UI.
 ## Agent Loops
 The `cua-agent` package provides three agent loops variations, based on different CUA models providers and techniques:
@@ -200,6 +216,7 @@ The `cua-agent` package provides three agent loops variations, based on differen
 |:-----------|:-----------------|:------------|:-------------|
 | `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
 | `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
+| `AgentLoop.UITARS` | • `ByteDance-Seed/UI-TARS-1.5-7B` | Uses ByteDance's UI-TARS 1.5 model | Not Required |
 | `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama or OpenAI-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
 ## AgentResponse
@@ -241,25 +258,9 @@ async for result in agent.run(task):
           print(output)
 ```
-### Gradio UI
-You can also interact with the agent using a Gradio interface.
-```python
-# Ensure environment variables (e.g., API keys) are loaded
-# You might need a helper function like load_dotenv_files() if using .env
-# from utils import load_dotenv_files
-# load_dotenv_files()
-from agent.ui.gradio.app import create_gradio_ui
-app = create_gradio_ui()
-app.launch(share=False)
-```
 **Note on Settings Persistence:**
 *   The Gradio UI automatically saves your configuration (Agent Loop, Model Choice, Custom Base URL, Save Trajectory state, Recent Images count) to a file named `.gradio_settings.json` in the project's root directory when you successfully run a task.
 *   This allows your preferences to persist between sessions.
 *   API keys entered into the custom provider field are **not** saved in this file for security reasons. Manage API keys using environment variables (e.g., `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`) or a `.env` file.
-*   It's recommended to add `.gradio_settings.json` to your `.gitignore` file.
+*   It's recommended to add `.gradio_settings.json` to your `.gitignore` file.

cua-agent 0.1.29__py3-none-any.whl → 0.1.31__py3-none-any.whl

Potentially problematic release.

cua-agent 0.1.29py3-none-any.whl → 0.1.31py3-none-any.whl