PyPI - cua-agent - Versions diffs - 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl - Mend

cua-agent 0.4.22py3-none-any.whl → 0.7.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (79) hide show

agent/__init__.py +4 -10
agent/__main__.py +2 -1
agent/adapters/__init__.py +4 -0
agent/adapters/azure_ml_adapter.py +283 -0
agent/adapters/cua_adapter.py +161 -0
agent/adapters/huggingfacelocal_adapter.py +67 -125
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +110 -99
agent/adapters/models/__init__.py +41 -0
agent/adapters/models/generic.py +78 -0
agent/adapters/models/internvl.py +290 -0
agent/adapters/models/opencua.py +115 -0
agent/adapters/models/qwen2_5_vl.py +78 -0
agent/agent.py +337 -185
agent/callbacks/__init__.py +9 -4
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +54 -98
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +35 -33
agent/callbacks/otel.py +291 -0
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/prompt_instructions.py +47 -0
agent/callbacks/telemetry.py +99 -61
agent/callbacks/trajectory_saver.py +95 -69
agent/cli.py +269 -119
agent/computers/__init__.py +14 -9
agent/computers/base.py +32 -19
agent/computers/cua.py +52 -25
agent/computers/custom.py +78 -71
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +359 -235
agent/integrations/hud/__init__.py +38 -99
agent/integrations/hud/agent.py +369 -0
agent/integrations/hud/proxy.py +166 -52
agent/loops/__init__.py +44 -14
agent/loops/anthropic.py +579 -492
agent/loops/base.py +19 -15
agent/loops/composed_grounded.py +136 -150
agent/loops/fara/__init__.py +8 -0
agent/loops/fara/config.py +506 -0
agent/loops/fara/helpers.py +357 -0
agent/loops/fara/schema.py +143 -0
agent/loops/gelato.py +183 -0
agent/loops/gemini.py +935 -0
agent/loops/generic_vlm.py +601 -0
agent/loops/glm45v.py +140 -135
agent/loops/gta1.py +48 -51
agent/loops/holo.py +218 -0
agent/loops/internvl.py +180 -0
agent/loops/moondream3.py +493 -0
agent/loops/omniparser.py +326 -226
agent/loops/openai.py +50 -51
agent/loops/opencua.py +134 -0
agent/loops/uiins.py +175 -0
agent/loops/uitars.py +247 -206
agent/loops/uitars2.py +951 -0
agent/playground/__init__.py +5 -0
agent/playground/server.py +301 -0
agent/proxy/examples.py +61 -57
agent/proxy/handlers.py +46 -39
agent/responses.py +447 -347
agent/tools/__init__.py +24 -0
agent/tools/base.py +253 -0
agent/tools/browser_tool.py +423 -0
agent/types.py +11 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +25 -22
agent/ui/gradio/ui_components.py +314 -167
cua_agent-0.7.16.dist-info/METADATA +85 -0
cua_agent-0.7.16.dist-info/RECORD +79 -0
{cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
cua_agent-0.4.22.dist-info/METADATA +0 -436
cua_agent-0.4.22.dist-info/RECORD +0 -51
{cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0

agent/playground/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Playground server for Cua agents."""
+from .server import PlaygroundServer
+__all__ = ["PlaygroundServer"]

agent/playground/server.py ADDED Viewed

@@ -0,0 +1,301 @@
+"""Playground server implementation for Cua agents."""
+import asyncio
+import logging
+import os
+import platform
+import socket
+import traceback
+import webbrowser
+from typing import Any, Dict, List, Optional, Union
+from urllib.parse import quote
+import uvicorn
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+logger = logging.getLogger(__name__)
+class PlaygroundServer:
+    """Playground server for running Cua agents via HTTP API."""
+    def __init__(self, agent_instance=None):
+        """
+        Initialize the playground server.
+        Args:
+            agent_instance: Optional pre-configured agent instance to use
+        """
+        self.agent_instance = agent_instance
+        self.app = FastAPI(
+            title="Cua Playground Server",
+            description="Playground server for Cua agents",
+            version="0.1.0",
+        )
+        self._setup_middleware()
+        self._setup_routes()
+        self.server = None
+        self.port = None
+    def _setup_middleware(self):
+        """Setup CORS middleware."""
+        self.app.add_middleware(
+            CORSMiddleware,
+            allow_origins=["*"],
+            allow_credentials=True,
+            allow_methods=["*"],
+            allow_headers=["*"],
+        )
+    def _setup_routes(self):
+        """Setup API routes."""
+        @self.app.get("/status")
+        async def status():
+            """Health check endpoint."""
+            sys = platform.system().lower()
+            if "darwin" in sys or sys in ("macos", "mac"):
+                os_type = "macos"
+            elif "windows" in sys:
+                os_type = "windows"
+            else:
+                os_type = "linux"
+            return {
+                "status": "ok",
+                "os_type": os_type,
+                "features": ["agent", "playground"],
+            }
+        @self.app.post("/responses")
+        async def responses_endpoint(request: Request):
+            """
+            Run ComputerAgent for up to 2 turns.
+            Body JSON:
+            {
+              "model": "...",                 # required
+              "input": "... or messages[]",   # required
+              "agent_kwargs": { ... },         # optional, passed directly to ComputerAgent
+              "env": { ... }                   # optional env overrides for agent
+            }
+            """
+            # Import here to avoid circular imports
+            try:
+                from agent import ComputerAgent
+            except ImportError:
+                raise HTTPException(status_code=501, detail="ComputerAgent not available")
+            # Parse request body
+            try:
+                body = await request.json()
+            except Exception as e:
+                raise HTTPException(status_code=400, detail=f"Invalid JSON body: {str(e)}")
+            model = body.get("model")
+            input_data = body.get("input")
+            if not model or input_data is None:
+                raise HTTPException(status_code=400, detail="'model' and 'input' are required")
+            agent_kwargs: Dict[str, Any] = body.get("agent_kwargs") or {}
+            env_overrides: Dict[str, str] = body.get("env") or {}
+            # Simple env override context
+            class _EnvOverride:
+                def __init__(self, overrides: Dict[str, str]):
+                    self.overrides = overrides
+                    self._original: Dict[str, Optional[str]] = {}
+                def __enter__(self):
+                    for k, v in (self.overrides or {}).items():
+                        self._original[k] = os.environ.get(k)
+                        os.environ[k] = str(v)
+                def __exit__(self, exc_type, exc, tb):
+                    for k, old in self._original.items():
+                        if old is None:
+                            os.environ.pop(k, None)
+                        else:
+                            os.environ[k] = old
+            # Convert input to messages
+            def _to_messages(data: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
+                if isinstance(data, str):
+                    return [{"role": "user", "content": data}]
+                if isinstance(data, list):
+                    return data
+                return []
+            messages = _to_messages(input_data)
+            error = None
+            with _EnvOverride(env_overrides):
+                # Use pre-configured agent if available, otherwise create new one
+                if self.agent_instance:
+                    agent = self.agent_instance
+                else:
+                    agent = ComputerAgent(model=model, **agent_kwargs)  # type: ignore[arg-type]
+                total_output: List[Any] = []
+                total_usage: Dict[str, Any] = {}
+                pending_computer_call_ids = set()
+                try:
+                    async for result in agent.run(messages):
+                        total_output += result["output"]
+                        # Try to collect usage if present
+                        if (
+                            isinstance(result, dict)
+                            and "usage" in result
+                            and isinstance(result["usage"], dict)
+                        ):
+                            # Merge usage counters
+                            for k, v in result["usage"].items():
+                                if isinstance(v, (int, float)):
+                                    total_usage[k] = total_usage.get(k, 0) + v
+                                else:
+                                    total_usage[k] = v
+                        for msg in result.get("output", []):
+                            if msg.get("type") == "computer_call":
+                                pending_computer_call_ids.add(msg["call_id"])
+                            elif msg.get("type") == "computer_call_output":
+                                pending_computer_call_ids.discard(msg["call_id"])
+                            elif msg.get("type") == "function_call":
+                                pending_computer_call_ids.add(msg["call_id"])
+                            elif msg.get("type") == "function_call_output":
+                                pending_computer_call_ids.discard(msg["call_id"])
+                        # exit if no pending computer calls
+                        if not pending_computer_call_ids:
+                            break
+                except Exception as e:
+                    logger.error(f"Error running agent: {str(e)}")
+                    logger.error(traceback.format_exc())
+                    error = str(e)
+            # Build response payload
+            payload = {
+                "model": model,
+                "error": error,
+                "output": total_output,
+                "usage": total_usage,
+                "status": "completed" if not error else "failed",
+            }
+            # CORS: allow any origin
+            headers = {
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+            }
+            return JSONResponse(content=payload, headers=headers)
+    def _find_available_port(self, start_port: int = 8000, max_attempts: int = 100) -> int:
+        """Find an available port starting from start_port."""
+        for port in range(start_port, start_port + max_attempts):
+            try:
+                with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+                    s.bind(("127.0.0.1", port))
+                    return port
+            except OSError:
+                continue
+        raise RuntimeError(
+            f"Could not find an available port in range {start_port}-{start_port + max_attempts}"
+        )
+    async def start_async(self, port: Optional[int] = None, open_browser: bool = False):
+        """
+        Start the playground server asynchronously.
+        Args:
+            port: Port to run the server on. If None, finds an available port.
+            open_browser: Whether to open the browser automatically.
+        """
+        if port is None:
+            port = self._find_available_port()
+        self.port = port
+        host = f"http://localhost:{port}"
+        logger.info(f"Starting playground server on {host}")
+        if open_browser:
+            # Construct the playground URL
+            encoded_host = quote(host, safe="")
+            encoded_model = quote(self.agent_instance.model, safe="")
+            encoded_vnc_url = quote("http://localhost:8006/?autoconnect=true", safe="")
+            # Build URL with custom_model if agent instance is configured
+            playground_url = (
+                # f"http://cua.ai/dashboard/playground"
+                f"http://localhost:3000/dashboard/playground"
+                f"?host={encoded_host}"
+                f"&port={port}"
+                f"&id=localhost"
+                f"&name=localhost"
+                f"&custom_model={encoded_model}"
+                f"&custom_vnc_url={encoded_vnc_url}"
+                f"&vnc_password=null"
+                f"&resize=scale"
+                f"&fullscreen=true"
+            )
+            logger.info(f"Opening browser at: {playground_url}")
+            webbrowser.open(playground_url)
+        config = uvicorn.Config(
+            self.app,
+            host="0.0.0.0",
+            port=port,
+            log_level="info",
+        )
+        self.server = uvicorn.Server(config)
+        await self.server.serve()
+    def start(self, port: Optional[int] = None, open_browser: bool = False):
+        """
+        Start the playground server (blocking).
+        Args:
+            port: Port to run the server on. If None, finds an available port.
+            open_browser: Whether to open the browser automatically.
+        """
+        # Check if there's already a running event loop
+        try:
+            loop = asyncio.get_running_loop()
+            # If we're in an async context, schedule as a task
+            import threading
+            # Run the server in a separate thread to avoid blocking
+            server_thread = threading.Thread(
+                target=self._run_in_new_loop,
+                args=(port, open_browser),
+                daemon=True,
+            )
+            server_thread.start()
+            # Give the server a moment to start and open browser
+            import time
+            time.sleep(1)
+        except RuntimeError:
+            # No running loop, can use asyncio.run() safely
+            asyncio.run(self.start_async(port=port, open_browser=open_browser))
+    def _run_in_new_loop(self, port: Optional[int] = None, open_browser: bool = False):
+        """Helper to run server in a new event loop (for threading)."""
+        new_loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(new_loop)
+        try:
+            new_loop.run_until_complete(self.start_async(port=port, open_browser=open_browser))
+        finally:
+            new_loop.close()
+    async def stop(self):
+        """Stop the playground server."""
+        if self.server:
+            logger.info("Stopping playground server")
+            await self.server.shutdown()

agent/proxy/examples.py CHANGED Viewed

@@ -1,34 +1,35 @@
 """
 Example usage of the proxy server and client requests.
 """
 import dotenv
 dotenv.load_dotenv()
 import asyncio
 import json
 import os
+from typing import Any, Dict
 import aiohttp
-from typing import Dict, Any
 async def test_http_endpoint():
     """Test the HTTP /responses endpoint."""
     anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
     assert isinstance(anthropic_api_key, str), "ANTHROPIC_API_KEY environment variable must be set"
     # Example 1: Simple text request
     simple_request = {
-        "model": "anthropic/claude-3-5-sonnet-20241022",
+        "model": "anthropic/claude-sonnet-4-5-20250929",
         "input": "Tell me a three sentence bedtime story about a unicorn.",
-        "env": {
-            "ANTHROPIC_API_KEY": anthropic_api_key
-        }
+        "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
     }
     # Example 2: Multi-modal request with image
     multimodal_request = {
-        "model": "anthropic/claude-3-5-sonnet-20241022",
+        "model": "anthropic/claude-sonnet-4-5-20250929",
         "input": [
             {
                 "role": "user",
@@ -36,73 +37,75 @@ async def test_http_endpoint():
                     {"type": "input_text", "text": "what is in this image?"},
                     {
                         "type": "input_image",
-                        "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
-                    }
-                ]
+                        "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
+                    },
+                ],
             }
         ],
-        "env": {
-            "ANTHROPIC_API_KEY": anthropic_api_key
-        }
+        "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
     }
     # Example 3: Request with custom agent and computer kwargs
     custom_request = {
-        "model": "anthropic/claude-3-5-sonnet-20241022",
+        "model": "anthropic/claude-sonnet-4-5-20250929",
         "input": "Take a screenshot and tell me what you see",
-        "env": {
-            "ANTHROPIC_API_KEY": anthropic_api_key
-        }
+        "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
     }
     # Test requests
     base_url = "https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443"
     # base_url = "http://localhost:8000"
     api_key = os.getenv("CUA_API_KEY")
     assert isinstance(api_key, str), "CUA_API_KEY environment variable must be set"
     async with aiohttp.ClientSession() as session:
-        for i, request_data in enumerate([
-            simple_request,
-            # multimodal_request,
-            custom_request
-        ], 1):
+        for i, request_data in enumerate(
+            [
+                simple_request,
+                # multimodal_request,
+                custom_request,
+            ],
+            1,
+        ):
             print(f"\n--- Test {i} ---")
             print(f"Request: {json.dumps(request_data, indent=2)}")
             try:
                 print(f"Sending request to {base_url}/responses")
                 async with session.post(
                     f"{base_url}/responses",
                     json=request_data,
-                    headers={"Content-Type": "application/json", "X-API-Key": api_key}
+                    headers={"Content-Type": "application/json", "X-API-Key": api_key},
                 ) as response:
                     result = await response.json()
                     print(f"Status: {response.status}")
                     print(f"Response: {json.dumps(result, indent=2)}")
             except Exception as e:
                 print(f"Error: {e}")
 def curl_examples():
     """Print curl command examples."""
     print("=== CURL Examples ===\n")
     print("1. Simple text request:")
-    print("""curl http://localhost:8000/responses \\
+    print(
+        """curl http://localhost:8000/responses \\
   -H "Content-Type: application/json" \\
   -d '{
-    "model": "anthropic/claude-3-5-sonnet-20241022",
+    "model": "anthropic/claude-sonnet-4-5-20250929",
     "input": "Tell me a three sentence bedtime story about a unicorn."
-  }'""")
+  }'"""
+    )
     print("\n2. Multi-modal request with image:")
-    print("""curl http://localhost:8000/responses \\
+    print(
+        """curl http://localhost:8000/responses \\
   -H "Content-Type: application/json" \\
   -d '{
-    "model": "anthropic/claude-3-5-sonnet-20241022",
+    "model": "anthropic/claude-sonnet-4-5-20250929",
     "input": [
       {
         "role": "user",
@@ -115,13 +118,15 @@ def curl_examples():
         ]
       }
     ]
-  }'""")
+  }'"""
+    )
     print("\n3. Request with custom configuration:")
-    print("""curl http://localhost:8000/responses \\
+    print(
+        """curl http://localhost:8000/responses \\
   -H "Content-Type: application/json" \\
   -d '{
-    "model": "anthropic/claude-3-5-sonnet-20241022",
+    "model": "anthropic/claude-sonnet-4-5-20250929",
     "input": "Take a screenshot and tell me what you see",
     "agent_kwargs": {
       "save_trajectory": true,
@@ -131,50 +136,49 @@ def curl_examples():
       "os_type": "linux",
       "provider_type": "cloud"
     }
-  }'""")
+  }'"""
+    )
 async def test_p2p_client():
     """Example P2P client using peerjs-python."""
     try:
-        from peerjs import Peer, PeerOptions, ConnectionEventType
         from aiortc import RTCConfiguration, RTCIceServer
+        from peerjs import ConnectionEventType, Peer, PeerOptions
         # Set up client peer
         options = PeerOptions(
             host="0.peerjs.com",
             port=443,
             secure=True,
-            config=RTCConfiguration(
-                iceServers=[RTCIceServer(urls="stun:stun.l.google.com:19302")]
-            )
+            config=RTCConfiguration(iceServers=[RTCIceServer(urls="stun:stun.l.google.com:19302")]),
         )
         client_peer = Peer(id="test-client", peer_options=options)
         await client_peer.start()
         # Connect to proxy server
         connection = client_peer.connect("computer-agent-proxy")
         @connection.on(ConnectionEventType.Open)
         async def connection_open():
             print("Connected to proxy server")
             # Send a test request
             request = {
-                "model": "anthropic/claude-3-5-sonnet-20241022",
-                "input": "Hello from P2P client!"
+                "model": "anthropic/claude-sonnet-4-5-20250929",
+                "input": "Hello from P2P client!",
             }
             await connection.send(json.dumps(request))
         @connection.on(ConnectionEventType.Data)
         async def connection_data(data):
             print(f"Received response: {data}")
             await client_peer.destroy()
         # Wait for connection
         await asyncio.sleep(10)
     except ImportError:
         print("P2P dependencies not available. Install peerjs-python for P2P testing.")
     except Exception as e:
@@ -183,7 +187,7 @@ async def test_p2p_client():
 if __name__ == "__main__":
     import sys
     if len(sys.argv) > 1 and sys.argv[1] == "curl":
         curl_examples()
     elif len(sys.argv) > 1 and sys.argv[1] == "p2p":

cua-agent 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.22py3-none-any.whl → 0.7.16py3-none-any.whl