PyPI - cua-agent - Versions diffs - 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl - Mend

cua-agent 0.4.22py3-none-any.whl → 0.7.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (79) hide show

agent/__init__.py +4 -10
agent/__main__.py +2 -1
agent/adapters/__init__.py +4 -0
agent/adapters/azure_ml_adapter.py +283 -0
agent/adapters/cua_adapter.py +161 -0
agent/adapters/huggingfacelocal_adapter.py +67 -125
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +110 -99
agent/adapters/models/__init__.py +41 -0
agent/adapters/models/generic.py +78 -0
agent/adapters/models/internvl.py +290 -0
agent/adapters/models/opencua.py +115 -0
agent/adapters/models/qwen2_5_vl.py +78 -0
agent/agent.py +337 -185
agent/callbacks/__init__.py +9 -4
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +54 -98
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +35 -33
agent/callbacks/otel.py +291 -0
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/prompt_instructions.py +47 -0
agent/callbacks/telemetry.py +99 -61
agent/callbacks/trajectory_saver.py +95 -69
agent/cli.py +269 -119
agent/computers/__init__.py +14 -9
agent/computers/base.py +32 -19
agent/computers/cua.py +52 -25
agent/computers/custom.py +78 -71
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +359 -235
agent/integrations/hud/__init__.py +38 -99
agent/integrations/hud/agent.py +369 -0
agent/integrations/hud/proxy.py +166 -52
agent/loops/__init__.py +44 -14
agent/loops/anthropic.py +579 -492
agent/loops/base.py +19 -15
agent/loops/composed_grounded.py +136 -150
agent/loops/fara/__init__.py +8 -0
agent/loops/fara/config.py +506 -0
agent/loops/fara/helpers.py +357 -0
agent/loops/fara/schema.py +143 -0
agent/loops/gelato.py +183 -0
agent/loops/gemini.py +935 -0
agent/loops/generic_vlm.py +601 -0
agent/loops/glm45v.py +140 -135
agent/loops/gta1.py +48 -51
agent/loops/holo.py +218 -0
agent/loops/internvl.py +180 -0
agent/loops/moondream3.py +493 -0
agent/loops/omniparser.py +326 -226
agent/loops/openai.py +50 -51
agent/loops/opencua.py +134 -0
agent/loops/uiins.py +175 -0
agent/loops/uitars.py +247 -206
agent/loops/uitars2.py +951 -0
agent/playground/__init__.py +5 -0
agent/playground/server.py +301 -0
agent/proxy/examples.py +61 -57
agent/proxy/handlers.py +46 -39
agent/responses.py +447 -347
agent/tools/__init__.py +24 -0
agent/tools/base.py +253 -0
agent/tools/browser_tool.py +423 -0
agent/types.py +11 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +25 -22
agent/ui/gradio/ui_components.py +314 -167
cua_agent-0.7.16.dist-info/METADATA +85 -0
cua_agent-0.7.16.dist-info/RECORD +79 -0
{cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
cua_agent-0.4.22.dist-info/METADATA +0 -436
cua_agent-0.4.22.dist-info/RECORD +0 -51
{cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0

agent/proxy/handlers.py CHANGED Viewed

@@ -2,29 +2,34 @@
 Request handlers for the proxy endpoints.
 """
-import asyncio
 import json
 import logging
 import os
+import re
 from contextlib import contextmanager
-from typing import Dict, Any, List, Union, Optional
+from typing import Any, Dict, List, Optional, Union
-from ..agent import ComputerAgent
 from computer import Computer
+from ..agent import ComputerAgent
+from ..tools.browser_tool import BrowserTool
 logger = logging.getLogger(__name__)
+# Pattern to detect FARA models (case-insensitive)
+FARA_MODEL_PATTERN = re.compile(r"(?i).*fara.*")
 class ResponsesHandler:
     """Handler for /responses endpoint that processes agent requests."""
     def __init__(self):
         self.computer = None
         self.agent = None
         # Simple in-memory caches
         self._computer_cache: Dict[str, Any] = {}
         self._agent_cache: Dict[str, Any] = {}
     async def setup_computer_agent(
         self,
         model: str,
@@ -75,7 +80,9 @@ class ResponsesHandler:
                 computer = Computer(**default_c_config)
                 await computer.__aenter__()
                 self._computer_cache[comp_key] = computer
-                logger.info(f"Computer created and cached with key={comp_key} config={default_c_config}")
+                logger.info(
+                    f"Computer created and cached with key={comp_key} config={default_c_config}"
+                )
             else:
                 logger.info(f"Reusing cached computer for key={comp_key}")
@@ -88,41 +95,51 @@ class ResponsesHandler:
         agent_key_payload = {"model": model, **agent_kwargs_for_key}
         agent_key = _stable_key(agent_key_payload)
+        # Determine the appropriate tool based on model type
+        # FARA models require BrowserTool instead of Computer for browser-specific actions
+        # (visit_url, web_search, terminate, history_back, etc.)
+        is_fara_model = bool(FARA_MODEL_PATTERN.match(model))
+        if is_fara_model and computer is not None:
+            tool = BrowserTool(interface=computer.interface)
+            logger.info(f"Using BrowserTool for FARA model: {model}")
+        else:
+            tool = computer
         agent = self._agent_cache.get(agent_key)
         if agent is None:
             # Default agent configuration
             default_a_config: Dict[str, Any] = {"model": model}
             if not has_custom_tools:
-                default_a_config["tools"] = [computer]
+                default_a_config["tools"] = [tool]
             # Apply user overrides, but keep tools unless user explicitly sets
             if agent_kwargs:
                 if not has_custom_tools:
-                    agent_kwargs.setdefault("tools", [computer])
+                    agent_kwargs.setdefault("tools", [tool])
                 default_a_config.update(agent_kwargs)
             # JSON-derived kwargs may have loose types; ignore static arg typing here
             agent = ComputerAgent(**default_a_config)  # type: ignore[arg-type]
             self._agent_cache[agent_key] = agent
             logger.info(f"Agent created and cached with key={agent_key} model={model}")
         else:
-            # Ensure cached agent uses the current computer tool (in case object differs)
+            # Ensure cached agent uses the current tool (in case object differs)
             # Only update if tools not explicitly provided in agent_kwargs
             if not has_custom_tools:
                 try:
-                    agent.tools = [computer]
+                    agent.tools = [tool]
                 except Exception:
                     pass
             logger.info(f"Reusing cached agent for key={agent_key}")
         # Bind current agent reference
         self.agent = agent
     async def process_request(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
         """
         Process a /responses request and return the result.
         Args:
             request_data: Dictionary containing model, input, and optional kwargs
         Returns:
             Dictionary with the agent's response
         """
@@ -133,12 +150,12 @@ class ResponsesHandler:
             agent_kwargs = request_data.get("agent_kwargs", {})
             computer_kwargs = request_data.get("computer_kwargs", {})
             env_overrides = request_data.get("env", {}) or {}
             if not model:
                 raise ValueError("Model is required")
             if not input_data:
                 raise ValueError("Input is required")
             # Apply env overrides for the duration of this request
             with self._env_overrides(env_overrides):
                 # Set up (and possibly reuse) computer and agent via caches
@@ -155,28 +172,22 @@ class ResponsesHandler:
                 # Run agent and get first result
                 async for result in agent.run(messages):
                     # Return the first result and break
-                    return {
-                        "success": True,
-                        "result": result,
-                        "model": model
-                    }
+                    return {"success": True, "result": result, "model": model}
             # If no results were yielded
-            return {
-                "success": False,
-                "error": "No results from agent",
-                "model": model
-            }
+            return {"success": False, "error": "No results from agent", "model": model}
         except Exception as e:
             logger.error(f"Error processing request: {e}")
             return {
                 "success": False,
                 "error": str(e),
-                "model": request_data.get("model", "unknown")
+                "model": request_data.get("model", "unknown"),
             }
-    def _convert_input_to_messages(self, input_data: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
+    def _convert_input_to_messages(
+        self, input_data: Union[str, List[Dict[str, Any]]]
+    ) -> List[Dict[str, Any]]:
         """Convert input data to messages format."""
         if isinstance(input_data, str):
             # Simple string input
@@ -192,22 +203,18 @@ class ResponsesHandler:
                         if part.get("type") == "input_text":
                             content_parts.append({"type": "text", "text": part["text"]})
                         elif part.get("type") == "input_image":
-                            content_parts.append({
-                                "type": "image_url",
-                                "image_url": {"url": part["image_url"]}
-                            })
+                            content_parts.append(
+                                {"type": "image_url", "image_url": {"url": part["image_url"]}}
+                            )
                         else:
                             content_parts.append(part)
-                    messages.append({
-                        "role": msg["role"],
-                        "content": content_parts
-                    })
+                    messages.append({"role": msg["role"], "content": content_parts})
                 else:
                     messages.append(msg)
             return messages
         else:
             raise ValueError("Input must be string or list of messages")
     async def cleanup(self):
         """Clean up resources."""
         if self.computer:

cua-agent 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.22py3-none-any.whl → 0.7.16py3-none-any.whl