PyPI - cua-agent - Versions diffs - 0.4.14__py3-none-any.whl → 0.7.16__py3-none-any.whl - Mend

cua-agent 0.4.14py3-none-any.whl → 0.7.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (82) hide show

agent/__init__.py +4 -19
agent/__main__.py +2 -1
agent/adapters/__init__.py +6 -0
agent/adapters/azure_ml_adapter.py +283 -0
agent/adapters/cua_adapter.py +161 -0
agent/adapters/huggingfacelocal_adapter.py +67 -125
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +370 -0
agent/adapters/models/__init__.py +41 -0
agent/adapters/models/generic.py +78 -0
agent/adapters/models/internvl.py +290 -0
agent/adapters/models/opencua.py +115 -0
agent/adapters/models/qwen2_5_vl.py +78 -0
agent/agent.py +431 -241
agent/callbacks/__init__.py +10 -3
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +54 -98
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +140 -0
agent/callbacks/otel.py +291 -0
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/prompt_instructions.py +47 -0
agent/callbacks/telemetry.py +106 -69
agent/callbacks/trajectory_saver.py +178 -70
agent/cli.py +269 -119
agent/computers/__init__.py +14 -9
agent/computers/base.py +32 -19
agent/computers/cua.py +52 -25
agent/computers/custom.py +78 -71
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +359 -235
agent/integrations/hud/__init__.py +164 -74
agent/integrations/hud/agent.py +338 -342
agent/integrations/hud/proxy.py +297 -0
agent/loops/__init__.py +44 -14
agent/loops/anthropic.py +590 -492
agent/loops/base.py +19 -15
agent/loops/composed_grounded.py +142 -144
agent/loops/fara/__init__.py +8 -0
agent/loops/fara/config.py +506 -0
agent/loops/fara/helpers.py +357 -0
agent/loops/fara/schema.py +143 -0
agent/loops/gelato.py +183 -0
agent/loops/gemini.py +935 -0
agent/loops/generic_vlm.py +601 -0
agent/loops/glm45v.py +140 -135
agent/loops/gta1.py +48 -51
agent/loops/holo.py +218 -0
agent/loops/internvl.py +180 -0
agent/loops/moondream3.py +493 -0
agent/loops/omniparser.py +326 -226
agent/loops/openai.py +63 -56
agent/loops/opencua.py +134 -0
agent/loops/uiins.py +175 -0
agent/loops/uitars.py +262 -212
agent/loops/uitars2.py +951 -0
agent/playground/__init__.py +5 -0
agent/playground/server.py +301 -0
agent/proxy/examples.py +196 -0
agent/proxy/handlers.py +255 -0
agent/responses.py +486 -339
agent/tools/__init__.py +24 -0
agent/tools/base.py +253 -0
agent/tools/browser_tool.py +423 -0
agent/types.py +20 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +25 -22
agent/ui/gradio/ui_components.py +314 -167
cua_agent-0.7.16.dist-info/METADATA +85 -0
cua_agent-0.7.16.dist-info/RECORD +79 -0
{cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
agent/integrations/hud/adapter.py +0 -121
agent/integrations/hud/computer_handler.py +0 -187
agent/telemetry.py +0 -142
cua_agent-0.4.14.dist-info/METADATA +0 -436
cua_agent-0.4.14.dist-info/RECORD +0 -50
{cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0

agent/playground/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Playground server for Cua agents."""
+from .server import PlaygroundServer
+__all__ = ["PlaygroundServer"]

agent/playground/server.py ADDED Viewed

@@ -0,0 +1,301 @@
+"""Playground server implementation for Cua agents."""
+import asyncio
+import logging
+import os
+import platform
+import socket
+import traceback
+import webbrowser
+from typing import Any, Dict, List, Optional, Union
+from urllib.parse import quote
+import uvicorn
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+logger = logging.getLogger(__name__)
+class PlaygroundServer:
+    """Playground server for running Cua agents via HTTP API."""
+    def __init__(self, agent_instance=None):
+        """
+        Initialize the playground server.
+        Args:
+            agent_instance: Optional pre-configured agent instance to use
+        """
+        self.agent_instance = agent_instance
+        self.app = FastAPI(
+            title="Cua Playground Server",
+            description="Playground server for Cua agents",
+            version="0.1.0",
+        )
+        self._setup_middleware()
+        self._setup_routes()
+        self.server = None
+        self.port = None
+    def _setup_middleware(self):
+        """Setup CORS middleware."""
+        self.app.add_middleware(
+            CORSMiddleware,
+            allow_origins=["*"],
+            allow_credentials=True,
+            allow_methods=["*"],
+            allow_headers=["*"],
+        )
+    def _setup_routes(self):
+        """Setup API routes."""
+        @self.app.get("/status")
+        async def status():
+            """Health check endpoint."""
+            sys = platform.system().lower()
+            if "darwin" in sys or sys in ("macos", "mac"):
+                os_type = "macos"
+            elif "windows" in sys:
+                os_type = "windows"
+            else:
+                os_type = "linux"
+            return {
+                "status": "ok",
+                "os_type": os_type,
+                "features": ["agent", "playground"],
+            }
+        @self.app.post("/responses")
+        async def responses_endpoint(request: Request):
+            """
+            Run ComputerAgent for up to 2 turns.
+            Body JSON:
+            {
+              "model": "...",                 # required
+              "input": "... or messages[]",   # required
+              "agent_kwargs": { ... },         # optional, passed directly to ComputerAgent
+              "env": { ... }                   # optional env overrides for agent
+            }
+            """
+            # Import here to avoid circular imports
+            try:
+                from agent import ComputerAgent
+            except ImportError:
+                raise HTTPException(status_code=501, detail="ComputerAgent not available")
+            # Parse request body
+            try:
+                body = await request.json()
+            except Exception as e:
+                raise HTTPException(status_code=400, detail=f"Invalid JSON body: {str(e)}")
+            model = body.get("model")
+            input_data = body.get("input")
+            if not model or input_data is None:
+                raise HTTPException(status_code=400, detail="'model' and 'input' are required")
+            agent_kwargs: Dict[str, Any] = body.get("agent_kwargs") or {}
+            env_overrides: Dict[str, str] = body.get("env") or {}
+            # Simple env override context
+            class _EnvOverride:
+                def __init__(self, overrides: Dict[str, str]):
+                    self.overrides = overrides
+                    self._original: Dict[str, Optional[str]] = {}
+                def __enter__(self):
+                    for k, v in (self.overrides or {}).items():
+                        self._original[k] = os.environ.get(k)
+                        os.environ[k] = str(v)
+                def __exit__(self, exc_type, exc, tb):
+                    for k, old in self._original.items():
+                        if old is None:
+                            os.environ.pop(k, None)
+                        else:
+                            os.environ[k] = old
+            # Convert input to messages
+            def _to_messages(data: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
+                if isinstance(data, str):
+                    return [{"role": "user", "content": data}]
+                if isinstance(data, list):
+                    return data
+                return []
+            messages = _to_messages(input_data)
+            error = None
+            with _EnvOverride(env_overrides):
+                # Use pre-configured agent if available, otherwise create new one
+                if self.agent_instance:
+                    agent = self.agent_instance
+                else:
+                    agent = ComputerAgent(model=model, **agent_kwargs)  # type: ignore[arg-type]
+                total_output: List[Any] = []
+                total_usage: Dict[str, Any] = {}
+                pending_computer_call_ids = set()
+                try:
+                    async for result in agent.run(messages):
+                        total_output += result["output"]
+                        # Try to collect usage if present
+                        if (
+                            isinstance(result, dict)
+                            and "usage" in result
+                            and isinstance(result["usage"], dict)
+                        ):
+                            # Merge usage counters
+                            for k, v in result["usage"].items():
+                                if isinstance(v, (int, float)):
+                                    total_usage[k] = total_usage.get(k, 0) + v
+                                else:
+                                    total_usage[k] = v
+                        for msg in result.get("output", []):
+                            if msg.get("type") == "computer_call":
+                                pending_computer_call_ids.add(msg["call_id"])
+                            elif msg.get("type") == "computer_call_output":
+                                pending_computer_call_ids.discard(msg["call_id"])
+                            elif msg.get("type") == "function_call":
+                                pending_computer_call_ids.add(msg["call_id"])
+                            elif msg.get("type") == "function_call_output":
+                                pending_computer_call_ids.discard(msg["call_id"])
+                        # exit if no pending computer calls
+                        if not pending_computer_call_ids:
+                            break
+                except Exception as e:
+                    logger.error(f"Error running agent: {str(e)}")
+                    logger.error(traceback.format_exc())
+                    error = str(e)
+            # Build response payload
+            payload = {
+                "model": model,
+                "error": error,
+                "output": total_output,
+                "usage": total_usage,
+                "status": "completed" if not error else "failed",
+            }
+            # CORS: allow any origin
+            headers = {
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+            }
+            return JSONResponse(content=payload, headers=headers)
+    def _find_available_port(self, start_port: int = 8000, max_attempts: int = 100) -> int:
+        """Find an available port starting from start_port."""
+        for port in range(start_port, start_port + max_attempts):
+            try:
+                with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+                    s.bind(("127.0.0.1", port))
+                    return port
+            except OSError:
+                continue
+        raise RuntimeError(
+            f"Could not find an available port in range {start_port}-{start_port + max_attempts}"
+        )
+    async def start_async(self, port: Optional[int] = None, open_browser: bool = False):
+        """
+        Start the playground server asynchronously.
+        Args:
+            port: Port to run the server on. If None, finds an available port.
+            open_browser: Whether to open the browser automatically.
+        """
+        if port is None:
+            port = self._find_available_port()
+        self.port = port
+        host = f"http://localhost:{port}"
+        logger.info(f"Starting playground server on {host}")
+        if open_browser:
+            # Construct the playground URL
+            encoded_host = quote(host, safe="")
+            encoded_model = quote(self.agent_instance.model, safe="")
+            encoded_vnc_url = quote("http://localhost:8006/?autoconnect=true", safe="")
+            # Build URL with custom_model if agent instance is configured
+            playground_url = (
+                # f"http://cua.ai/dashboard/playground"
+                f"http://localhost:3000/dashboard/playground"
+                f"?host={encoded_host}"
+                f"&port={port}"
+                f"&id=localhost"
+                f"&name=localhost"
+                f"&custom_model={encoded_model}"
+                f"&custom_vnc_url={encoded_vnc_url}"
+                f"&vnc_password=null"
+                f"&resize=scale"
+                f"&fullscreen=true"
+            )
+            logger.info(f"Opening browser at: {playground_url}")
+            webbrowser.open(playground_url)
+        config = uvicorn.Config(
+            self.app,
+            host="0.0.0.0",
+            port=port,
+            log_level="info",
+        )
+        self.server = uvicorn.Server(config)
+        await self.server.serve()
+    def start(self, port: Optional[int] = None, open_browser: bool = False):
+        """
+        Start the playground server (blocking).
+        Args:
+            port: Port to run the server on. If None, finds an available port.
+            open_browser: Whether to open the browser automatically.
+        """
+        # Check if there's already a running event loop
+        try:
+            loop = asyncio.get_running_loop()
+            # If we're in an async context, schedule as a task
+            import threading
+            # Run the server in a separate thread to avoid blocking
+            server_thread = threading.Thread(
+                target=self._run_in_new_loop,
+                args=(port, open_browser),
+                daemon=True,
+            )
+            server_thread.start()
+            # Give the server a moment to start and open browser
+            import time
+            time.sleep(1)
+        except RuntimeError:
+            # No running loop, can use asyncio.run() safely
+            asyncio.run(self.start_async(port=port, open_browser=open_browser))
+    def _run_in_new_loop(self, port: Optional[int] = None, open_browser: bool = False):
+        """Helper to run server in a new event loop (for threading)."""
+        new_loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(new_loop)
+        try:
+            new_loop.run_until_complete(self.start_async(port=port, open_browser=open_browser))
+        finally:
+            new_loop.close()
+    async def stop(self):
+        """Stop the playground server."""
+        if self.server:
+            logger.info("Stopping playground server")
+            await self.server.shutdown()

agent/proxy/examples.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""
+Example usage of the proxy server and client requests.
+"""
+import dotenv
+dotenv.load_dotenv()
+import asyncio
+import json
+import os
+from typing import Any, Dict
+import aiohttp
+async def test_http_endpoint():
+    """Test the HTTP /responses endpoint."""
+    anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
+    assert isinstance(anthropic_api_key, str), "ANTHROPIC_API_KEY environment variable must be set"
+    # Example 1: Simple text request
+    simple_request = {
+        "model": "anthropic/claude-sonnet-4-5-20250929",
+        "input": "Tell me a three sentence bedtime story about a unicorn.",
+        "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
+    }
+    # Example 2: Multi-modal request with image
+    multimodal_request = {
+        "model": "anthropic/claude-sonnet-4-5-20250929",
+        "input": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "input_text", "text": "what is in this image?"},
+                    {
+                        "type": "input_image",
+                        "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
+                    },
+                ],
+            }
+        ],
+        "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
+    }
+    # Example 3: Request with custom agent and computer kwargs
+    custom_request = {
+        "model": "anthropic/claude-sonnet-4-5-20250929",
+        "input": "Take a screenshot and tell me what you see",
+        "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
+    }
+    # Test requests
+    base_url = "https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443"
+    # base_url = "http://localhost:8000"
+    api_key = os.getenv("CUA_API_KEY")
+    assert isinstance(api_key, str), "CUA_API_KEY environment variable must be set"
+    async with aiohttp.ClientSession() as session:
+        for i, request_data in enumerate(
+            [
+                simple_request,
+                # multimodal_request,
+                custom_request,
+            ],
+            1,
+        ):
+            print(f"\n--- Test {i} ---")
+            print(f"Request: {json.dumps(request_data, indent=2)}")
+            try:
+                print(f"Sending request to {base_url}/responses")
+                async with session.post(
+                    f"{base_url}/responses",
+                    json=request_data,
+                    headers={"Content-Type": "application/json", "X-API-Key": api_key},
+                ) as response:
+                    result = await response.json()
+                    print(f"Status: {response.status}")
+                    print(f"Response: {json.dumps(result, indent=2)}")
+            except Exception as e:
+                print(f"Error: {e}")
+def curl_examples():
+    """Print curl command examples."""
+    print("=== CURL Examples ===\n")
+    print("1. Simple text request:")
+    print(
+        """curl http://localhost:8000/responses \\
+  -H "Content-Type: application/json" \\
+  -d '{
+    "model": "anthropic/claude-sonnet-4-5-20250929",
+    "input": "Tell me a three sentence bedtime story about a unicorn."
+  }'"""
+    )
+    print("\n2. Multi-modal request with image:")
+    print(
+        """curl http://localhost:8000/responses \\
+  -H "Content-Type: application/json" \\
+  -d '{
+    "model": "anthropic/claude-sonnet-4-5-20250929",
+    "input": [
+      {
+        "role": "user",
+        "content": [
+          {"type": "input_text", "text": "what is in this image?"},
+          {
+            "type": "input_image",
+            "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+          }
+        ]
+      }
+    ]
+  }'"""
+    )
+    print("\n3. Request with custom configuration:")
+    print(
+        """curl http://localhost:8000/responses \\
+  -H "Content-Type: application/json" \\
+  -d '{
+    "model": "anthropic/claude-sonnet-4-5-20250929",
+    "input": "Take a screenshot and tell me what you see",
+    "agent_kwargs": {
+      "save_trajectory": true,
+      "verbosity": 20
+    },
+    "computer_kwargs": {
+      "os_type": "linux",
+      "provider_type": "cloud"
+    }
+  }'"""
+    )
+async def test_p2p_client():
+    """Example P2P client using peerjs-python."""
+    try:
+        from aiortc import RTCConfiguration, RTCIceServer
+        from peerjs import ConnectionEventType, Peer, PeerOptions
+        # Set up client peer
+        options = PeerOptions(
+            host="0.peerjs.com",
+            port=443,
+            secure=True,
+            config=RTCConfiguration(iceServers=[RTCIceServer(urls="stun:stun.l.google.com:19302")]),
+        )
+        client_peer = Peer(id="test-client", peer_options=options)
+        await client_peer.start()
+        # Connect to proxy server
+        connection = client_peer.connect("computer-agent-proxy")
+        @connection.on(ConnectionEventType.Open)
+        async def connection_open():
+            print("Connected to proxy server")
+            # Send a test request
+            request = {
+                "model": "anthropic/claude-sonnet-4-5-20250929",
+                "input": "Hello from P2P client!",
+            }
+            await connection.send(json.dumps(request))
+        @connection.on(ConnectionEventType.Data)
+        async def connection_data(data):
+            print(f"Received response: {data}")
+            await client_peer.destroy()
+        # Wait for connection
+        await asyncio.sleep(10)
+    except ImportError:
+        print("P2P dependencies not available. Install peerjs-python for P2P testing.")
+    except Exception as e:
+        print(f"P2P test error: {e}")
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1 and sys.argv[1] == "curl":
+        curl_examples()
+    elif len(sys.argv) > 1 and sys.argv[1] == "p2p":
+        asyncio.run(test_p2p_client())
+    else:
+        asyncio.run(test_http_endpoint())

cua-agent 0.4.14__py3-none-any.whl → 0.7.16__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.14py3-none-any.whl → 0.7.16py3-none-any.whl