PyPI - cua-agent - Versions diffs - 0.4.34__py3-none-any.whl → 0.4.35__py3-none-any.whl - Mend

cua-agent 0.4.34py3-none-any.whl → 0.4.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (61) hide show

agent/__init__.py +4 -10
agent/__main__.py +2 -1
agent/adapters/huggingfacelocal_adapter.py +54 -61
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +110 -99
agent/adapters/models/__init__.py +14 -6
agent/adapters/models/generic.py +7 -4
agent/adapters/models/internvl.py +66 -30
agent/adapters/models/opencua.py +23 -8
agent/adapters/models/qwen2_5_vl.py +7 -4
agent/agent.py +184 -158
agent/callbacks/__init__.py +4 -4
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +18 -13
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +3 -1
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/telemetry.py +67 -61
agent/callbacks/trajectory_saver.py +90 -70
agent/cli.py +115 -110
agent/computers/__init__.py +13 -8
agent/computers/base.py +26 -17
agent/computers/cua.py +27 -23
agent/computers/custom.py +72 -69
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +235 -185
agent/integrations/hud/__init__.py +15 -21
agent/integrations/hud/agent.py +101 -83
agent/integrations/hud/proxy.py +90 -57
agent/loops/__init__.py +25 -21
agent/loops/anthropic.py +537 -483
agent/loops/base.py +13 -14
agent/loops/composed_grounded.py +135 -149
agent/loops/gemini.py +31 -12
agent/loops/glm45v.py +135 -133
agent/loops/gta1.py +47 -50
agent/loops/holo.py +4 -2
agent/loops/internvl.py +6 -11
agent/loops/moondream3.py +36 -12
agent/loops/omniparser.py +212 -209
agent/loops/openai.py +49 -50
agent/loops/opencua.py +29 -41
agent/loops/qwen.py +475 -0
agent/loops/uitars.py +237 -202
agent/proxy/examples.py +54 -50
agent/proxy/handlers.py +27 -34
agent/responses.py +330 -330
agent/types.py +11 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +23 -18
agent/ui/gradio/ui_components.py +310 -161
{cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/METADATA +18 -10
cua_agent-0.4.35.dist-info/RECORD +64 -0
cua_agent-0.4.34.dist-info/RECORD +0 -63
{cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/WHEEL +0 -0
{cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/entry_points.txt +0 -0

agent/types.py CHANGED Viewed

@@ -2,37 +2,43 @@
 Type definitions for agent
 """
-from typing import Dict, List, Any, Optional, Callable, Protocol, Literal
-from pydantic import BaseModel
 import re
-from litellm import ResponseInputParam, ResponsesAPIResponse, ToolParam
 from collections.abc import Iterable
+from typing import Any, Callable, Dict, List, Literal, Optional, Protocol
+from litellm import ResponseInputParam, ResponsesAPIResponse, ToolParam
+from pydantic import BaseModel
 # Agent input types
 Messages = str | ResponseInputParam | List[Dict[str, Any]]
 Tools = Optional[Iterable[ToolParam]]
 # Agent output types
-AgentResponse = ResponsesAPIResponse
+AgentResponse = ResponsesAPIResponse
 AgentCapability = Literal["step", "click"]
 # Exception types
 class ToolError(RuntimeError):
     """Base exception for tool-related errors"""
     pass
 class IllegalArgumentError(ToolError):
     """Exception raised when function arguments are invalid"""
     pass
 # Agent config registration
 class AgentConfigInfo(BaseModel):
     """Information about a registered agent config"""
     agent_class: type
     models_regex: str
     priority: int = 0
     def matches_model(self, model: str) -> bool:
         """Check if this agent config matches the given model"""
         return bool(re.match(self.models_regex, model))

agent/ui/__init__.py CHANGED Viewed

@@ -2,6 +2,6 @@
 UI components for agent
 """
-from .gradio import launch_ui, create_gradio_ui
+from .gradio import create_gradio_ui, launch_ui
 __all__ = ["launch_ui", "create_gradio_ui"]

agent/ui/__main__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 from .gradio import launch_ui
 if __name__ == "__main__":
-    launch_ui()
+    launch_ui()

agent/ui/gradio/app.py CHANGED Viewed

@@ -18,21 +18,21 @@ Requirements:
     - OpenAI or Anthropic API key
 """
-import os
 import asyncio
-import logging
 import json
+import logging
+import os
 import platform
 from pathlib import Path
-from typing import Dict, List, Optional, AsyncGenerator, Any, Tuple, Union
+from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union, cast
 import gradio as gr
-from gradio.components.chatbot import MetadataDict
-from typing import cast
 # Import from agent package
 from agent import ComputerAgent
-from agent.types import Messages, AgentResponse
+from agent.types import AgentResponse, Messages
 from computer import Computer
+from gradio.components.chatbot import MetadataDict
 # Global variables
 global_agent = None
@@ -42,11 +42,13 @@ SETTINGS_FILE = Path(".gradio_settings.json")
 logging.basicConfig(level=logging.INFO)
 import dotenv
 if dotenv.load_dotenv():
     print(f"DEBUG - Loaded environment variables from {dotenv.find_dotenv()}")
 else:
     print("DEBUG - No .env file found")
 # --- Settings Load/Save Functions ---
 def load_settings() -> Dict[str, Any]:
     """Loads settings from the JSON file."""
@@ -84,7 +86,7 @@ def save_settings(settings: Dict[str, Any]):
 #     async def on_screenshot(self, screenshot_base64: str, action_type: str = "") -> None:
 #         """Add screenshot to chatbot when a screenshot is taken."""
 #         image_markdown = f"![Screenshot after {action_type}](data:image/png;base64,{screenshot_base64})"
 #         if self.chatbot_history is not None:
 #             self.chatbot_history.append(
 #                 gr.ChatMessage(
@@ -141,7 +143,7 @@ def get_model_string(model_name: str, loop_provider: str) -> str:
             ollama_model = model_name.split("OMNI: Ollama ", 1)[1]
             return f"omniparser+ollama_chat/{ollama_model}"
         return "omniparser+ollama_chat/llama3"
     # Map based on loop provider
     mapping = MODEL_MAPPINGS.get(loop_provider.lower(), MODEL_MAPPINGS["openai"])
     return mapping.get(model_name, mapping["default"])
@@ -151,6 +153,7 @@ def get_ollama_models() -> List[str]:
     """Get available models from Ollama if installed."""
     try:
         import subprocess
         result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
         if result.returncode == 0:
             lines = result.stdout.strip().split("\n")
@@ -174,16 +177,14 @@ def create_computer_instance(
     os_type: str = "macos",
     provider_type: str = "lume",
     name: Optional[str] = None,
-    api_key: Optional[str] = None
+    api_key: Optional[str] = None,
 ) -> Computer:
     """Create or get the global Computer instance."""
     global global_computer
     if global_computer is None:
         if provider_type == "localhost":
             global_computer = Computer(
-                verbosity=verbosity,
-                os_type=os_type,
-                use_host_computer_server=True
+                verbosity=verbosity, os_type=os_type, use_host_computer_server=True
             )
         else:
             global_computer = Computer(
@@ -191,7 +192,7 @@ def create_computer_instance(
                 os_type=os_type,
                 provider_type=provider_type,
                 name=name if name else "",
-                api_key=api_key
+                api_key=api_key,
             )
     return global_computer
@@ -217,7 +218,7 @@ def create_agent(
         os_type=computer_os,
         provider_type=computer_provider,
         name=computer_name,
-        api_key=computer_api_key
+        api_key=computer_api_key,
     )
     # Handle custom models
@@ -233,12 +234,15 @@ def create_agent(
         "only_n_most_recent_images": only_n_most_recent_images,
         "verbosity": verbosity,
     }
     if save_trajectory:
         agent_kwargs["trajectory_dir"] = "trajectories"
     if max_trajectory_budget:
-        agent_kwargs["max_trajectory_budget"] = {"max_budget": max_trajectory_budget, "raise_error": True}
+        agent_kwargs["max_trajectory_budget"] = {
+            "max_budget": max_trajectory_budget,
+            "raise_error": True,
+        }
     global_agent = ComputerAgent(**agent_kwargs)
     return global_agent
@@ -247,7 +251,8 @@ def create_agent(
 def launch_ui():
     """Standalone function to launch the Gradio app."""
     from agent.ui.gradio.ui_components import create_gradio_ui
-    print(f"Starting Gradio app for CUA Agent...")
+    print("Starting Gradio app for CUA Agent...")
     demo = create_gradio_ui()
     demo.launch(share=False, inbrowser=True)

cua-agent 0.4.34__py3-none-any.whl → 0.4.35__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.34py3-none-any.whl → 0.4.35py3-none-any.whl