PyPI - cua-agent - Versions diffs - 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl - Mend

cua-agent 0.4.22py3-none-any.whl → 0.7.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (79) hide show

agent/__init__.py +4 -10
agent/__main__.py +2 -1
agent/adapters/__init__.py +4 -0
agent/adapters/azure_ml_adapter.py +283 -0
agent/adapters/cua_adapter.py +161 -0
agent/adapters/huggingfacelocal_adapter.py +67 -125
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +110 -99
agent/adapters/models/__init__.py +41 -0
agent/adapters/models/generic.py +78 -0
agent/adapters/models/internvl.py +290 -0
agent/adapters/models/opencua.py +115 -0
agent/adapters/models/qwen2_5_vl.py +78 -0
agent/agent.py +337 -185
agent/callbacks/__init__.py +9 -4
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +54 -98
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +35 -33
agent/callbacks/otel.py +291 -0
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/prompt_instructions.py +47 -0
agent/callbacks/telemetry.py +99 -61
agent/callbacks/trajectory_saver.py +95 -69
agent/cli.py +269 -119
agent/computers/__init__.py +14 -9
agent/computers/base.py +32 -19
agent/computers/cua.py +52 -25
agent/computers/custom.py +78 -71
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +359 -235
agent/integrations/hud/__init__.py +38 -99
agent/integrations/hud/agent.py +369 -0
agent/integrations/hud/proxy.py +166 -52
agent/loops/__init__.py +44 -14
agent/loops/anthropic.py +579 -492
agent/loops/base.py +19 -15
agent/loops/composed_grounded.py +136 -150
agent/loops/fara/__init__.py +8 -0
agent/loops/fara/config.py +506 -0
agent/loops/fara/helpers.py +357 -0
agent/loops/fara/schema.py +143 -0
agent/loops/gelato.py +183 -0
agent/loops/gemini.py +935 -0
agent/loops/generic_vlm.py +601 -0
agent/loops/glm45v.py +140 -135
agent/loops/gta1.py +48 -51
agent/loops/holo.py +218 -0
agent/loops/internvl.py +180 -0
agent/loops/moondream3.py +493 -0
agent/loops/omniparser.py +326 -226
agent/loops/openai.py +50 -51
agent/loops/opencua.py +134 -0
agent/loops/uiins.py +175 -0
agent/loops/uitars.py +247 -206
agent/loops/uitars2.py +951 -0
agent/playground/__init__.py +5 -0
agent/playground/server.py +301 -0
agent/proxy/examples.py +61 -57
agent/proxy/handlers.py +46 -39
agent/responses.py +447 -347
agent/tools/__init__.py +24 -0
agent/tools/base.py +253 -0
agent/tools/browser_tool.py +423 -0
agent/types.py +11 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +25 -22
agent/ui/gradio/ui_components.py +314 -167
cua_agent-0.7.16.dist-info/METADATA +85 -0
cua_agent-0.7.16.dist-info/RECORD +79 -0
{cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
cua_agent-0.4.22.dist-info/METADATA +0 -436
cua_agent-0.4.22.dist-info/RECORD +0 -51
{cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0

agent/callbacks/__init__.py CHANGED Viewed

@@ -3,19 +3,24 @@ Callback system for ComputerAgent preprocessing and postprocessing hooks.
 """
 from .base import AsyncCallbackHandler
+from .budget_manager import BudgetManagerCallback
 from .image_retention import ImageRetentionCallback
 from .logging import LoggingCallback
-from .trajectory_saver import TrajectorySaverCallback
-from .budget_manager import BudgetManagerCallback
-from .telemetry import TelemetryCallback
+from .otel import OtelCallback, OtelErrorCallback
 from .operator_validator import OperatorNormalizerCallback
+from .prompt_instructions import PromptInstructionsCallback
+from .telemetry import TelemetryCallback
+from .trajectory_saver import TrajectorySaverCallback
 __all__ = [
     "AsyncCallbackHandler",
-    "ImageRetentionCallback",
+    "ImageRetentionCallback",
     "LoggingCallback",
     "TrajectorySaverCallback",
     "BudgetManagerCallback",
     "TelemetryCallback",
+    "OtelCallback",
+    "OtelErrorCallback",
     "OperatorNormalizerCallback",
+    "PromptInstructionsCallback",
 ]

agent/callbacks/base.py CHANGED Viewed

@@ -3,7 +3,7 @@ Base callback handler interface for ComputerAgent preprocessing and postprocessi
 """
 from abc import ABC, abstractmethod
-from typing import List, Dict, Any, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 class AsyncCallbackHandler(ABC):
@@ -16,42 +16,52 @@ class AsyncCallbackHandler(ABC):
         """Called at the start of an agent run loop."""
         pass
-    async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
+    async def on_run_end(
+        self,
+        kwargs: Dict[str, Any],
+        old_items: List[Dict[str, Any]],
+        new_items: List[Dict[str, Any]],
+    ) -> None:
         """Called at the end of an agent run loop."""
         pass
-    async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
+    async def on_run_continue(
+        self,
+        kwargs: Dict[str, Any],
+        old_items: List[Dict[str, Any]],
+        new_items: List[Dict[str, Any]],
+    ) -> bool:
         """Called during agent run loop to determine if execution should continue.
         Args:
             kwargs: Run arguments
             old_items: Original messages
             new_items: New messages generated during run
         Returns:
             True to continue execution, False to stop
         """
         return True
     async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Called before messages are sent to the agent loop.
         Args:
             messages: List of message dictionaries to preprocess
         Returns:
             List of preprocessed message dictionaries
         """
         return messages
     async def on_llm_end(self, output: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Called after the agent loop returns output.
         Args:
             output: List of output message dictionaries to postprocess
         Returns:
             List of postprocessed output dictionaries
         """
@@ -60,63 +70,67 @@ class AsyncCallbackHandler(ABC):
     async def on_computer_call_start(self, item: Dict[str, Any]) -> None:
         """
         Called when a computer call is about to start.
         Args:
             item: The computer call item dictionary
         """
         pass
-    async def on_computer_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
+    async def on_computer_call_end(
+        self, item: Dict[str, Any], result: List[Dict[str, Any]]
+    ) -> None:
         """
         Called when a computer call has completed.
         Args:
             item: The computer call item dictionary
             result: The result of the computer call
         """
         pass
     async def on_function_call_start(self, item: Dict[str, Any]) -> None:
         """
         Called when a function call is about to start.
         Args:
             item: The function call item dictionary
         """
         pass
-    async def on_function_call_end(self, item: Dict[str, Any], result: List[Dict[str, Any]]) -> None:
+    async def on_function_call_end(
+        self, item: Dict[str, Any], result: List[Dict[str, Any]]
+    ) -> None:
         """
         Called when a function call has completed.
         Args:
             item: The function call item dictionary
             result: The result of the function call
         """
         pass
     async def on_text(self, item: Dict[str, Any]) -> None:
         """
         Called when a text message is encountered.
         Args:
             item: The message item dictionary
         """
         pass
     async def on_api_start(self, kwargs: Dict[str, Any]) -> None:
         """
         Called when an API call is about to start.
         Args:
             kwargs: The kwargs being passed to the API call
         """
         pass
     async def on_api_end(self, kwargs: Dict[str, Any], result: Any) -> None:
         """
         Called when an API call has completed.
         Args:
             kwargs: The kwargs that were passed to the API call
             result: The result of the API call
@@ -126,7 +140,7 @@ class AsyncCallbackHandler(ABC):
     async def on_usage(self, usage: Dict[str, Any]) -> None:
         """
         Called when usage information is received.
         Args:
             usage: The usage information
         """
@@ -135,7 +149,7 @@ class AsyncCallbackHandler(ABC):
     async def on_screenshot(self, screenshot: Union[str, bytes], name: str = "screenshot") -> None:
         """
         Called when a screenshot is taken.
         Args:
             screenshot: The screenshot image
             name: The name of the screenshot
@@ -145,9 +159,9 @@ class AsyncCallbackHandler(ABC):
     async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
         """
         Called when responses are received.
         Args:
             kwargs: The kwargs being passed to the agent loop
             responses: The responses received
         """
-        pass
+        pass

agent/callbacks/budget_manager.py CHANGED Viewed

@@ -1,17 +1,23 @@
-from typing import Dict, List, Any
+from typing import Any, Dict, List
 from .base import AsyncCallbackHandler
 class BudgetExceededError(Exception):
     """Exception raised when budget is exceeded."""
     pass
 class BudgetManagerCallback(AsyncCallbackHandler):
     """Budget manager callback that tracks usage costs and can stop execution when budget is exceeded."""
-    def __init__(self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False):
+    def __init__(
+        self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False
+    ):
         """
         Initialize BudgetManagerCallback.
         Args:
             max_budget: Maximum budget allowed
             reset_after_each_run: Whether to reset budget after each run
@@ -21,24 +27,30 @@ class BudgetManagerCallback(AsyncCallbackHandler):
         self.reset_after_each_run = reset_after_each_run
         self.raise_error = raise_error
         self.total_cost = 0.0
     async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
         """Reset budget if configured to do so."""
         if self.reset_after_each_run:
             self.total_cost = 0.0
     async def on_usage(self, usage: Dict[str, Any]) -> None:
         """Track usage costs."""
         if "response_cost" in usage:
             self.total_cost += usage["response_cost"]
-    async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
+    async def on_run_continue(
+        self,
+        kwargs: Dict[str, Any],
+        old_items: List[Dict[str, Any]],
+        new_items: List[Dict[str, Any]],
+    ) -> bool:
         """Check if budget allows continuation."""
         if self.total_cost >= self.max_budget:
             if self.raise_error:
-                raise BudgetExceededError(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
+                raise BudgetExceededError(
+                    f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}"
+                )
             else:
                 print(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
             return False
         return True

agent/callbacks/image_retention.py CHANGED Viewed

@@ -2,7 +2,8 @@
 Image retention callback handler that limits the number of recent images in message history.
 """
-from typing import List, Dict, Any, Optional
+from typing import Any, Dict, List, Optional
 from .base import AsyncCallbackHandler
@@ -11,129 +12,84 @@ class ImageRetentionCallback(AsyncCallbackHandler):
     Callback handler that applies image retention policy to limit the number
     of recent images in message history to prevent context window overflow.
     """
     def __init__(self, only_n_most_recent_images: Optional[int] = None):
         """
         Initialize the image retention callback.
         Args:
             only_n_most_recent_images: If set, only keep the N most recent images in message history
         """
         self.only_n_most_recent_images = only_n_most_recent_images
     async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """
         Apply image retention policy to messages before sending to agent loop.
         Args:
             messages: List of message dictionaries
         Returns:
             List of messages with image retention policy applied
         """
         if self.only_n_most_recent_images is None:
             return messages
         return self._apply_image_retention(messages)
     def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Apply image retention policy to keep only the N most recent images.
         Removes computer_call_output items with image_url and their corresponding computer_call items,
         keeping only the most recent N image pairs based on only_n_most_recent_images setting.
         Args:
             messages: List of message dictionaries
         Returns:
             Filtered list of messages with image retention applied
         """
         if self.only_n_most_recent_images is None:
             return messages
-        # First pass: Assign call_id to reasoning items based on the next computer_call
-        messages_with_call_ids = []
-        for i, msg in enumerate(messages):
-            msg_copy = msg.copy() if isinstance(msg, dict) else msg
-            # If this is a reasoning item without a call_id, find the next computer_call
-            if (msg_copy.get("type") == "reasoning" and
-                not msg_copy.get("call_id")):
-                # Look ahead for the next computer_call
-                for j in range(i + 1, len(messages)):
-                    next_msg = messages[j]
-                    if (next_msg.get("type") == "computer_call" and
-                        next_msg.get("call_id")):
-                        msg_copy["call_id"] = next_msg.get("call_id")
-                        break
-            messages_with_call_ids.append(msg_copy)
-        # Find all computer_call_output items with images and their call_ids
-        image_call_ids = []
-        for msg in reversed(messages_with_call_ids):  # Process in reverse to get most recent first
-            if (msg.get("type") == "computer_call_output" and
-                isinstance(msg.get("output"), dict) and
-                "image_url" in msg.get("output", {})):
-                call_id = msg.get("call_id")
-                if call_id and call_id not in image_call_ids:
-                    image_call_ids.append(call_id)
-                    if len(image_call_ids) >= self.only_n_most_recent_images:
-                        break
-        # Keep the most recent N image call_ids (reverse to get chronological order)
-        keep_call_ids = set(image_call_ids[:self.only_n_most_recent_images])
-        # Filter messages: remove computer_call, computer_call_output, and reasoning for old images
-        filtered_messages = []
-        for msg in messages_with_call_ids:
-            msg_type = msg.get("type")
-            call_id = msg.get("call_id")
-            # Remove old computer_call items
-            if msg_type == "computer_call" and call_id not in keep_call_ids:
-                # Check if this call_id corresponds to an image call
-                has_image_output = any(
-                    m.get("type") == "computer_call_output" and
-                    m.get("call_id") == call_id and
-                    isinstance(m.get("output"), dict) and
-                    "image_url" in m.get("output", {})
-                    for m in messages_with_call_ids
-                )
-                if has_image_output:
-                    continue  # Skip this computer_call
-            # Remove old computer_call_output items with images
-            if (msg_type == "computer_call_output" and
-                call_id not in keep_call_ids and
-                isinstance(msg.get("output"), dict) and
-                "image_url" in msg.get("output", {})):
-                continue  # Skip this computer_call_output
-            # Remove old reasoning items that are paired with removed computer calls
-            if (msg_type == "reasoning" and
-                call_id and call_id not in keep_call_ids):
-                # Check if this call_id corresponds to an image call that's being removed
-                has_image_output = any(
-                    m.get("type") == "computer_call_output" and
-                    m.get("call_id") == call_id and
-                    isinstance(m.get("output"), dict) and
-                    "image_url" in m.get("output", {})
-                    for m in messages_with_call_ids
-                )
-                if has_image_output:
-                    continue  # Skip this reasoning item
-            filtered_messages.append(msg)
-        # Clean up: Remove call_id from reasoning items before returning
-        final_messages = []
-        for msg in filtered_messages:
-            if msg.get("type") == "reasoning" and "call_id" in msg:
-                # Create a copy without call_id for reasoning items
-                cleaned_msg = {k: v for k, v in msg.items() if k != "call_id"}
-                final_messages.append(cleaned_msg)
-            else:
-                final_messages.append(msg)
-        return final_messages
+        # Gather indices of all computer_call_output messages that contain an image_url
+        output_indices: List[int] = []
+        for idx, msg in enumerate(messages):
+            if msg.get("type") == "computer_call_output":
+                out = msg.get("output")
+                if isinstance(out, dict) and ("image_url" in out):
+                    output_indices.append(idx)
+        # Nothing to trim
+        if len(output_indices) <= self.only_n_most_recent_images:
+            return messages
+        # Determine which outputs to keep (most recent N)
+        keep_output_indices = set(output_indices[-self.only_n_most_recent_images :])
+        # Build set of indices to remove in one pass
+        to_remove: set[int] = set()
+        for idx in output_indices:
+            if idx in keep_output_indices:
+                continue  # keep this screenshot and its context
+            to_remove.add(idx)  # remove the computer_call_output itself
+            # Remove the immediately preceding computer_call with matching call_id (if present)
+            call_id = messages[idx].get("call_id")
+            prev_idx = idx - 1
+            if (
+                prev_idx >= 0
+                and messages[prev_idx].get("type") == "computer_call"
+                and messages[prev_idx].get("call_id") == call_id
+            ):
+                to_remove.add(prev_idx)
+                # Check a single reasoning immediately before that computer_call
+                r_idx = prev_idx - 1
+                if r_idx >= 0 and messages[r_idx].get("type") == "reasoning":
+                    to_remove.add(r_idx)
+        # Construct filtered list
+        filtered = [m for i, m in enumerate(messages) if i not in to_remove]
+        return filtered

cua-agent 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.22py3-none-any.whl → 0.7.16py3-none-any.whl