PyPI - cua-agent - Versions diffs - 0.4.34__py3-none-any.whl → 0.4.35__py3-none-any.whl - Mend

cua-agent 0.4.34py3-none-any.whl → 0.4.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (61) hide show

agent/__init__.py +4 -10
agent/__main__.py +2 -1
agent/adapters/huggingfacelocal_adapter.py +54 -61
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +110 -99
agent/adapters/models/__init__.py +14 -6
agent/adapters/models/generic.py +7 -4
agent/adapters/models/internvl.py +66 -30
agent/adapters/models/opencua.py +23 -8
agent/adapters/models/qwen2_5_vl.py +7 -4
agent/agent.py +184 -158
agent/callbacks/__init__.py +4 -4
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +18 -13
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +3 -1
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/telemetry.py +67 -61
agent/callbacks/trajectory_saver.py +90 -70
agent/cli.py +115 -110
agent/computers/__init__.py +13 -8
agent/computers/base.py +26 -17
agent/computers/cua.py +27 -23
agent/computers/custom.py +72 -69
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +235 -185
agent/integrations/hud/__init__.py +15 -21
agent/integrations/hud/agent.py +101 -83
agent/integrations/hud/proxy.py +90 -57
agent/loops/__init__.py +25 -21
agent/loops/anthropic.py +537 -483
agent/loops/base.py +13 -14
agent/loops/composed_grounded.py +135 -149
agent/loops/gemini.py +31 -12
agent/loops/glm45v.py +135 -133
agent/loops/gta1.py +47 -50
agent/loops/holo.py +4 -2
agent/loops/internvl.py +6 -11
agent/loops/moondream3.py +36 -12
agent/loops/omniparser.py +212 -209
agent/loops/openai.py +49 -50
agent/loops/opencua.py +29 -41
agent/loops/qwen.py +475 -0
agent/loops/uitars.py +237 -202
agent/proxy/examples.py +54 -50
agent/proxy/handlers.py +27 -34
agent/responses.py +330 -330
agent/types.py +11 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +23 -18
agent/ui/gradio/ui_components.py +310 -161
{cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/METADATA +18 -10
cua_agent-0.4.35.dist-info/RECORD +64 -0
cua_agent-0.4.34.dist-info/RECORD +0 -63
{cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/WHEEL +0 -0
{cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/entry_points.txt +0 -0

agent/adapters/human_adapter.py CHANGED Viewed

@@ -1,22 +1,23 @@
-import os
 import asyncio
+import os
+from typing import Any, AsyncIterator, Dict, Iterator, List
 import requests
-from typing import List, Dict, Any, Iterator, AsyncIterator
-from litellm.types.utils import GenericStreamingChunk, ModelResponse
+from litellm import acompletion, completion
 from litellm.llms.custom_llm import CustomLLM
-from litellm import completion, acompletion
+from litellm.types.utils import GenericStreamingChunk, ModelResponse
 class HumanAdapter(CustomLLM):
     """Human Adapter for human-in-the-loop completions.
     This adapter sends completion requests to a human completion server
     where humans can review and respond to AI requests.
     """
     def __init__(self, base_url: str | None = None, timeout: float = 300.0, **kwargs):
         """Initialize the human adapter.
         Args:
             base_url: Base URL for the human completion server.
                      Defaults to HUMAN_BASE_URL environment variable or http://localhost:8002
@@ -24,60 +25,58 @@ class HumanAdapter(CustomLLM):
             **kwargs: Additional arguments
         """
         super().__init__()
-        self.base_url = base_url or os.getenv('HUMAN_BASE_URL', 'http://localhost:8002')
+        self.base_url = base_url or os.getenv("HUMAN_BASE_URL", "http://localhost:8002")
         self.timeout = timeout
         # Ensure base_url doesn't end with slash
-        self.base_url = self.base_url.rstrip('/')
+        self.base_url = self.base_url.rstrip("/")
     def _queue_completion(self, messages: List[Dict[str, Any]], model: str) -> str:
         """Queue a completion request and return the call ID.
         Args:
             messages: Messages in OpenAI format
             model: Model name
         Returns:
             Call ID for tracking the request
         Raises:
             Exception: If queueing fails
         """
         try:
             response = requests.post(
-                f"{self.base_url}/queue",
-                json={"messages": messages, "model": model},
-                timeout=10
+                f"{self.base_url}/queue", json={"messages": messages, "model": model}, timeout=10
             )
             response.raise_for_status()
             return response.json()["id"]
         except requests.RequestException as e:
             raise Exception(f"Failed to queue completion request: {e}")
     def _wait_for_completion(self, call_id: str) -> Dict[str, Any]:
         """Wait for human to complete the call.
         Args:
             call_id: ID of the queued completion call
         Returns:
             Dict containing response and/or tool_calls
         Raises:
             TimeoutError: If timeout is exceeded
             Exception: If completion fails
         """
         import time
         start_time = time.time()
         while True:
             try:
                 # Check status
                 status_response = requests.get(f"{self.base_url}/status/{call_id}")
                 status_response.raise_for_status()
                 status_data = status_response.json()
                 if status_data["status"] == "completed":
                     result = {}
                     if "response" in status_data and status_data["response"]:
@@ -88,38 +87,41 @@ class HumanAdapter(CustomLLM):
                 elif status_data["status"] == "failed":
                     error_msg = status_data.get("error", "Unknown error")
                     raise Exception(f"Completion failed: {error_msg}")
                 # Check timeout
                 if time.time() - start_time > self.timeout:
-                    raise TimeoutError(f"Timeout waiting for human response after {self.timeout} seconds")
+                    raise TimeoutError(
+                        f"Timeout waiting for human response after {self.timeout} seconds"
+                    )
                 # Wait before checking again
                 time.sleep(1.0)
             except requests.RequestException as e:
                 if time.time() - start_time > self.timeout:
                     raise TimeoutError(f"Timeout waiting for human response: {e}")
                 # Continue trying if we haven't timed out
                 time.sleep(1.0)
     async def _async_wait_for_completion(self, call_id: str) -> Dict[str, Any]:
         """Async version of wait_for_completion.
         Args:
             call_id: ID of the queued completion call
         Returns:
             Dict containing response and/or tool_calls
         Raises:
             TimeoutError: If timeout is exceeded
             Exception: If completion fails
         """
-        import aiohttp
         import time
+        import aiohttp
         start_time = time.time()
         async with aiohttp.ClientSession() as session:
             while True:
                 try:
@@ -127,7 +129,7 @@ class HumanAdapter(CustomLLM):
                     async with session.get(f"{self.base_url}/status/{call_id}") as response:
                         response.raise_for_status()
                         status_data = await response.json()
                     if status_data["status"] == "completed":
                         result = {}
                         if "response" in status_data and status_data["response"]:
@@ -138,166 +140,158 @@ class HumanAdapter(CustomLLM):
                     elif status_data["status"] == "failed":
                         error_msg = status_data.get("error", "Unknown error")
                         raise Exception(f"Completion failed: {error_msg}")
                     # Check timeout
                     if time.time() - start_time > self.timeout:
-                        raise TimeoutError(f"Timeout waiting for human response after {self.timeout} seconds")
+                        raise TimeoutError(
+                            f"Timeout waiting for human response after {self.timeout} seconds"
+                        )
                     # Wait before checking again
                     await asyncio.sleep(1.0)
                 except Exception as e:
                     if time.time() - start_time > self.timeout:
                         raise TimeoutError(f"Timeout waiting for human response: {e}")
                     # Continue trying if we haven't timed out
                     await asyncio.sleep(1.0)
     def _generate_response(self, messages: List[Dict[str, Any]], model: str) -> Dict[str, Any]:
         """Generate a human response for the given messages.
         Args:
             messages: Messages in OpenAI format
             model: Model name
         Returns:
             Dict containing response and/or tool_calls
         """
         # Queue the completion request
         call_id = self._queue_completion(messages, model)
         # Wait for human response
         response = self._wait_for_completion(call_id)
         return response
-    async def _async_generate_response(self, messages: List[Dict[str, Any]], model: str) -> Dict[str, Any]:
+    async def _async_generate_response(
+        self, messages: List[Dict[str, Any]], model: str
+    ) -> Dict[str, Any]:
         """Async version of _generate_response.
         Args:
             messages: Messages in OpenAI format
             model: Model name
         Returns:
             Dict containing response and/or tool_calls
         """
         # Queue the completion request (sync operation)
         call_id = self._queue_completion(messages, model)
         # Wait for human response (async)
         response = await self._async_wait_for_completion(call_id)
         return response
     def completion(self, *args, **kwargs) -> ModelResponse:
         """Synchronous completion method.
         Returns:
             ModelResponse with human-generated text or tool calls
         """
-        messages = kwargs.get('messages', [])
-        model = kwargs.get('model', 'human')
+        messages = kwargs.get("messages", [])
+        model = kwargs.get("model", "human")
         # Generate human response
         human_response_data = self._generate_response(messages, model)
         # Create ModelResponse with proper structure
-        from litellm.types.utils import ModelResponse, Choices, Message
-        import uuid
         import time
+        import uuid
+        from litellm.types.utils import Choices, Message, ModelResponse
         # Create message content based on response type
         if "tool_calls" in human_response_data and human_response_data["tool_calls"]:
             # Tool calls response
             message = Message(
                 role="assistant",
                 content=human_response_data.get("response", ""),
-                tool_calls=human_response_data["tool_calls"]
+                tool_calls=human_response_data["tool_calls"],
             )
         else:
             # Text response
-            message = Message(
-                role="assistant",
-                content=human_response_data.get("response", "")
-            )
-        choice = Choices(
-            finish_reason="stop",
-            index=0,
-            message=message
-        )
+            message = Message(role="assistant", content=human_response_data.get("response", ""))
+        choice = Choices(finish_reason="stop", index=0, message=message)
         result = ModelResponse(
             id=f"human-{uuid.uuid4()}",
             choices=[choice],
             created=int(time.time()),
             model=f"human/{model}",
-            object="chat.completion"
+            object="chat.completion",
         )
         return result
     async def acompletion(self, *args, **kwargs) -> ModelResponse:
         """Asynchronous completion method.
         Returns:
             ModelResponse with human-generated text or tool calls
         """
-        messages = kwargs.get('messages', [])
-        model = kwargs.get('model', 'human')
+        messages = kwargs.get("messages", [])
+        model = kwargs.get("model", "human")
         # Generate human response
         human_response_data = await self._async_generate_response(messages, model)
         # Create ModelResponse with proper structure
-        from litellm.types.utils import ModelResponse, Choices, Message
-        import uuid
         import time
+        import uuid
+        from litellm.types.utils import Choices, Message, ModelResponse
         # Create message content based on response type
         if "tool_calls" in human_response_data and human_response_data["tool_calls"]:
             # Tool calls response
             message = Message(
                 role="assistant",
                 content=human_response_data.get("response", ""),
-                tool_calls=human_response_data["tool_calls"]
+                tool_calls=human_response_data["tool_calls"],
             )
         else:
             # Text response
-            message = Message(
-                role="assistant",
-                content=human_response_data.get("response", "")
-            )
-        choice = Choices(
-            finish_reason="stop",
-            index=0,
-            message=message
-        )
+            message = Message(role="assistant", content=human_response_data.get("response", ""))
+        choice = Choices(finish_reason="stop", index=0, message=message)
         result = ModelResponse(
             id=f"human-{uuid.uuid4()}",
             choices=[choice],
             created=int(time.time()),
             model=f"human/{model}",
-            object="chat.completion"
+            object="chat.completion",
         )
         return result
     def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
         """Synchronous streaming method.
         Yields:
             Streaming chunks with human-generated text or tool calls
         """
-        messages = kwargs.get('messages', [])
-        model = kwargs.get('model', 'human')
+        messages = kwargs.get("messages", [])
+        model = kwargs.get("model", "human")
         # Generate human response
         human_response_data = self._generate_response(messages, model)
         import time
         # Handle tool calls vs text response
         if "tool_calls" in human_response_data and human_response_data["tool_calls"]:
             # Stream tool calls as a single chunk
@@ -319,22 +313,26 @@ class HumanAdapter(CustomLLM):
                 "is_finished": True,
                 "text": response_text,
                 "tool_use": None,
-                "usage": {"completion_tokens": len(response_text.split()), "prompt_tokens": 0, "total_tokens": len(response_text.split())},
+                "usage": {
+                    "completion_tokens": len(response_text.split()),
+                    "prompt_tokens": 0,
+                    "total_tokens": len(response_text.split()),
+                },
             }
             yield generic_chunk
     async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
         """Asynchronous streaming method.
         Yields:
             Streaming chunks with human-generated text or tool calls
         """
-        messages = kwargs.get('messages', [])
-        model = kwargs.get('model', 'human')
+        messages = kwargs.get("messages", [])
+        model = kwargs.get("model", "human")
         # Generate human response
         human_response = await self._async_generate_response(messages, model)
         # Return as single streaming chunk
         generic_streaming_chunk: GenericStreamingChunk = {
             "finish_reason": "stop",
@@ -342,7 +340,11 @@ class HumanAdapter(CustomLLM):
             "is_finished": True,
             "text": human_response,
             "tool_use": None,
-            "usage": {"completion_tokens": len(human_response.split()), "prompt_tokens": 0, "total_tokens": len(human_response.split())},
+            "usage": {
+                "completion_tokens": len(human_response.split()),
+                "prompt_tokens": 0,
+                "total_tokens": len(human_response.split()),
+            },
         }
-        yield generic_streaming_chunk
+        yield generic_streaming_chunk

cua-agent 0.4.34__py3-none-any.whl → 0.4.35__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.34py3-none-any.whl → 0.4.35py3-none-any.whl