PyPI - cua-agent - Versions diffs - 0.4.34__py3-none-any.whl → 0.4.36__py3-none-any.whl - Mend

cua-agent 0.4.34py3-none-any.whl → 0.4.36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (61) hide show

agent/__init__.py +4 -10
agent/__main__.py +2 -1
agent/adapters/huggingfacelocal_adapter.py +54 -61
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +110 -99
agent/adapters/models/__init__.py +14 -6
agent/adapters/models/generic.py +7 -4
agent/adapters/models/internvl.py +66 -30
agent/adapters/models/opencua.py +23 -8
agent/adapters/models/qwen2_5_vl.py +7 -4
agent/agent.py +184 -158
agent/callbacks/__init__.py +4 -4
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +18 -13
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +3 -1
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/telemetry.py +67 -61
agent/callbacks/trajectory_saver.py +90 -70
agent/cli.py +115 -110
agent/computers/__init__.py +13 -8
agent/computers/base.py +32 -19
agent/computers/cua.py +33 -25
agent/computers/custom.py +78 -71
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +235 -185
agent/integrations/hud/__init__.py +15 -21
agent/integrations/hud/agent.py +101 -83
agent/integrations/hud/proxy.py +90 -57
agent/loops/__init__.py +25 -21
agent/loops/anthropic.py +537 -483
agent/loops/base.py +13 -14
agent/loops/composed_grounded.py +135 -149
agent/loops/gemini.py +31 -12
agent/loops/glm45v.py +135 -133
agent/loops/gta1.py +47 -50
agent/loops/holo.py +4 -2
agent/loops/internvl.py +6 -11
agent/loops/moondream3.py +36 -12
agent/loops/omniparser.py +215 -210
agent/loops/openai.py +49 -50
agent/loops/opencua.py +29 -41
agent/loops/qwen.py +510 -0
agent/loops/uitars.py +237 -202
agent/proxy/examples.py +54 -50
agent/proxy/handlers.py +27 -34
agent/responses.py +330 -330
agent/types.py +11 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +23 -18
agent/ui/gradio/ui_components.py +310 -161
{cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/METADATA +18 -10
cua_agent-0.4.36.dist-info/RECORD +64 -0
cua_agent-0.4.34.dist-info/RECORD +0 -63
{cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/WHEEL +0 -0
{cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/entry_points.txt +0 -0

agent/proxy/examples.py CHANGED Viewed

@@ -1,19 +1,22 @@
 """
 Example usage of the proxy server and client requests.
 """
 import dotenv
 dotenv.load_dotenv()
 import asyncio
 import json
 import os
+from typing import Any, Dict
 import aiohttp
-from typing import Dict, Any
 async def test_http_endpoint():
     """Test the HTTP /responses endpoint."""
     anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
     assert isinstance(anthropic_api_key, str), "ANTHROPIC_API_KEY environment variable must be set"
@@ -21,11 +24,9 @@ async def test_http_endpoint():
     simple_request = {
         "model": "anthropic/claude-3-5-sonnet-20241022",
         "input": "Tell me a three sentence bedtime story about a unicorn.",
-        "env": {
-            "ANTHROPIC_API_KEY": anthropic_api_key
-        }
+        "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
     }
     # Example 2: Multi-modal request with image
     multimodal_request = {
         "model": "anthropic/claude-3-5-sonnet-20241022",
@@ -36,70 +37,72 @@ async def test_http_endpoint():
                     {"type": "input_text", "text": "what is in this image?"},
                     {
                         "type": "input_image",
-                        "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
-                    }
-                ]
+                        "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
+                    },
+                ],
             }
         ],
-        "env": {
-            "ANTHROPIC_API_KEY": anthropic_api_key
-        }
+        "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
     }
     # Example 3: Request with custom agent and computer kwargs
     custom_request = {
         "model": "anthropic/claude-3-5-sonnet-20241022",
         "input": "Take a screenshot and tell me what you see",
-        "env": {
-            "ANTHROPIC_API_KEY": anthropic_api_key
-        }
+        "env": {"ANTHROPIC_API_KEY": anthropic_api_key},
     }
     # Test requests
     base_url = "https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443"
     # base_url = "http://localhost:8000"
     api_key = os.getenv("CUA_API_KEY")
     assert isinstance(api_key, str), "CUA_API_KEY environment variable must be set"
     async with aiohttp.ClientSession() as session:
-        for i, request_data in enumerate([
-            simple_request,
-            # multimodal_request,
-            custom_request
-        ], 1):
+        for i, request_data in enumerate(
+            [
+                simple_request,
+                # multimodal_request,
+                custom_request,
+            ],
+            1,
+        ):
             print(f"\n--- Test {i} ---")
             print(f"Request: {json.dumps(request_data, indent=2)}")
             try:
                 print(f"Sending request to {base_url}/responses")
                 async with session.post(
                     f"{base_url}/responses",
                     json=request_data,
-                    headers={"Content-Type": "application/json", "X-API-Key": api_key}
+                    headers={"Content-Type": "application/json", "X-API-Key": api_key},
                 ) as response:
                     result = await response.json()
                     print(f"Status: {response.status}")
                     print(f"Response: {json.dumps(result, indent=2)}")
             except Exception as e:
                 print(f"Error: {e}")
 def curl_examples():
     """Print curl command examples."""
     print("=== CURL Examples ===\n")
     print("1. Simple text request:")
-    print("""curl http://localhost:8000/responses \\
+    print(
+        """curl http://localhost:8000/responses \\
   -H "Content-Type: application/json" \\
   -d '{
     "model": "anthropic/claude-3-5-sonnet-20241022",
     "input": "Tell me a three sentence bedtime story about a unicorn."
-  }'""")
+  }'"""
+    )
     print("\n2. Multi-modal request with image:")
-    print("""curl http://localhost:8000/responses \\
+    print(
+        """curl http://localhost:8000/responses \\
   -H "Content-Type: application/json" \\
   -d '{
     "model": "anthropic/claude-3-5-sonnet-20241022",
@@ -115,10 +118,12 @@ def curl_examples():
         ]
       }
     ]
-  }'""")
+  }'"""
+    )
     print("\n3. Request with custom configuration:")
-    print("""curl http://localhost:8000/responses \\
+    print(
+        """curl http://localhost:8000/responses \\
   -H "Content-Type: application/json" \\
   -d '{
     "model": "anthropic/claude-3-5-sonnet-20241022",
@@ -131,50 +136,49 @@ def curl_examples():
       "os_type": "linux",
       "provider_type": "cloud"
     }
-  }'""")
+  }'"""
+    )
 async def test_p2p_client():
     """Example P2P client using peerjs-python."""
     try:
-        from peerjs import Peer, PeerOptions, ConnectionEventType
         from aiortc import RTCConfiguration, RTCIceServer
+        from peerjs import ConnectionEventType, Peer, PeerOptions
         # Set up client peer
         options = PeerOptions(
             host="0.peerjs.com",
             port=443,
             secure=True,
-            config=RTCConfiguration(
-                iceServers=[RTCIceServer(urls="stun:stun.l.google.com:19302")]
-            )
+            config=RTCConfiguration(iceServers=[RTCIceServer(urls="stun:stun.l.google.com:19302")]),
         )
         client_peer = Peer(id="test-client", peer_options=options)
         await client_peer.start()
         # Connect to proxy server
         connection = client_peer.connect("computer-agent-proxy")
         @connection.on(ConnectionEventType.Open)
         async def connection_open():
             print("Connected to proxy server")
             # Send a test request
             request = {
                 "model": "anthropic/claude-3-5-sonnet-20241022",
-                "input": "Hello from P2P client!"
+                "input": "Hello from P2P client!",
             }
             await connection.send(json.dumps(request))
         @connection.on(ConnectionEventType.Data)
         async def connection_data(data):
             print(f"Received response: {data}")
             await client_peer.destroy()
         # Wait for connection
         await asyncio.sleep(10)
     except ImportError:
         print("P2P dependencies not available. Install peerjs-python for P2P testing.")
     except Exception as e:
@@ -183,7 +187,7 @@ async def test_p2p_client():
 if __name__ == "__main__":
     import sys
     if len(sys.argv) > 1 and sys.argv[1] == "curl":
         curl_examples()
     elif len(sys.argv) > 1 and sys.argv[1] == "p2p":

agent/proxy/handlers.py CHANGED Viewed

@@ -7,24 +7,25 @@ import json
 import logging
 import os
 from contextlib import contextmanager
-from typing import Dict, Any, List, Union, Optional
+from typing import Any, Dict, List, Optional, Union
-from ..agent import ComputerAgent
 from computer import Computer
+from ..agent import ComputerAgent
 logger = logging.getLogger(__name__)
 class ResponsesHandler:
     """Handler for /responses endpoint that processes agent requests."""
     def __init__(self):
         self.computer = None
         self.agent = None
         # Simple in-memory caches
         self._computer_cache: Dict[str, Any] = {}
         self._agent_cache: Dict[str, Any] = {}
     async def setup_computer_agent(
         self,
         model: str,
@@ -75,7 +76,9 @@ class ResponsesHandler:
                 computer = Computer(**default_c_config)
                 await computer.__aenter__()
                 self._computer_cache[comp_key] = computer
-                logger.info(f"Computer created and cached with key={comp_key} config={default_c_config}")
+                logger.info(
+                    f"Computer created and cached with key={comp_key} config={default_c_config}"
+                )
             else:
                 logger.info(f"Reusing cached computer for key={comp_key}")
@@ -115,14 +118,14 @@ class ResponsesHandler:
         # Bind current agent reference
         self.agent = agent
     async def process_request(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
         """
         Process a /responses request and return the result.
         Args:
             request_data: Dictionary containing model, input, and optional kwargs
         Returns:
             Dictionary with the agent's response
         """
@@ -133,12 +136,12 @@ class ResponsesHandler:
             agent_kwargs = request_data.get("agent_kwargs", {})
             computer_kwargs = request_data.get("computer_kwargs", {})
             env_overrides = request_data.get("env", {}) or {}
             if not model:
                 raise ValueError("Model is required")
             if not input_data:
                 raise ValueError("Input is required")
             # Apply env overrides for the duration of this request
             with self._env_overrides(env_overrides):
                 # Set up (and possibly reuse) computer and agent via caches
@@ -155,28 +158,22 @@ class ResponsesHandler:
                 # Run agent and get first result
                 async for result in agent.run(messages):
                     # Return the first result and break
-                    return {
-                        "success": True,
-                        "result": result,
-                        "model": model
-                    }
+                    return {"success": True, "result": result, "model": model}
             # If no results were yielded
-            return {
-                "success": False,
-                "error": "No results from agent",
-                "model": model
-            }
+            return {"success": False, "error": "No results from agent", "model": model}
         except Exception as e:
             logger.error(f"Error processing request: {e}")
             return {
                 "success": False,
                 "error": str(e),
-                "model": request_data.get("model", "unknown")
+                "model": request_data.get("model", "unknown"),
             }
-    def _convert_input_to_messages(self, input_data: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
+    def _convert_input_to_messages(
+        self, input_data: Union[str, List[Dict[str, Any]]]
+    ) -> List[Dict[str, Any]]:
         """Convert input data to messages format."""
         if isinstance(input_data, str):
             # Simple string input
@@ -192,22 +189,18 @@ class ResponsesHandler:
                         if part.get("type") == "input_text":
                             content_parts.append({"type": "text", "text": part["text"]})
                         elif part.get("type") == "input_image":
-                            content_parts.append({
-                                "type": "image_url",
-                                "image_url": {"url": part["image_url"]}
-                            })
+                            content_parts.append(
+                                {"type": "image_url", "image_url": {"url": part["image_url"]}}
+                            )
                         else:
                             content_parts.append(part)
-                    messages.append({
-                        "role": msg["role"],
-                        "content": content_parts
-                    })
+                    messages.append({"role": msg["role"], "content": content_parts})
                 else:
                     messages.append(msg)
             return messages
         else:
             raise ValueError("Input must be string or list of messages")
     async def cleanup(self):
         """Clean up resources."""
         if self.computer:

cua-agent 0.4.34__py3-none-any.whl → 0.4.36__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.34py3-none-any.whl → 0.4.36py3-none-any.whl