PyPI - cua-agent - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.17__py3-none-any.whl - Mend

cua-agent 0.1.6py3-none-any.whl → 0.1.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (42) hide show

agent/__init__.py +3 -2
agent/core/__init__.py +0 -5
agent/core/computer_agent.py +21 -28
agent/core/loop.py +78 -124
agent/core/messages.py +279 -125
agent/core/types.py +35 -0
agent/core/visualization.py +197 -0
agent/providers/anthropic/api/client.py +142 -1
agent/providers/anthropic/api_handler.py +140 -0
agent/providers/anthropic/callbacks/__init__.py +5 -0
agent/providers/anthropic/loop.py +206 -220
agent/providers/anthropic/response_handler.py +229 -0
agent/providers/anthropic/tools/bash.py +0 -97
agent/providers/anthropic/utils.py +370 -0
agent/providers/omni/__init__.py +1 -20
agent/providers/omni/api_handler.py +42 -0
agent/providers/omni/clients/anthropic.py +4 -0
agent/providers/omni/image_utils.py +0 -72
agent/providers/omni/loop.py +490 -606
agent/providers/omni/parser.py +58 -4
agent/providers/omni/tools/__init__.py +25 -7
agent/providers/omni/tools/base.py +29 -0
agent/providers/omni/tools/bash.py +43 -38
agent/providers/omni/tools/computer.py +144 -182
agent/providers/omni/tools/manager.py +25 -45
agent/providers/omni/types.py +0 -4
agent/providers/omni/utils.py +224 -145
{cua_agent-0.1.6.dist-info → cua_agent-0.1.17.dist-info}/METADATA +6 -36
cua_agent-0.1.17.dist-info/RECORD +63 -0
agent/providers/omni/callbacks.py +0 -78
agent/providers/omni/clients/groq.py +0 -101
agent/providers/omni/experiment.py +0 -276
agent/providers/omni/messages.py +0 -171
agent/providers/omni/tool_manager.py +0 -91
agent/providers/omni/visualization.py +0 -130
agent/types/__init__.py +0 -23
agent/types/base.py +0 -41
agent/types/messages.py +0 -36
cua_agent-0.1.6.dist-info/RECORD +0 -64
/agent/{types → core}/tools.py +0 -0
{cua_agent-0.1.6.dist-info → cua_agent-0.1.17.dist-info}/WHEEL +0 -0
{cua_agent-0.1.6.dist-info → cua_agent-0.1.17.dist-info}/entry_points.txt +0 -0

agent/providers/anthropic/response_handler.py ADDED Viewed

@@ -0,0 +1,229 @@
+"""Response and tool handling for Anthropic provider."""
+import logging
+from typing import Any, Dict, List, Optional, Tuple, cast
+from anthropic.types.beta import (
+    BetaMessage,
+    BetaMessageParam,
+    BetaTextBlock,
+    BetaTextBlockParam,
+    BetaToolUseBlockParam,
+    BetaContentBlockParam,
+)
+from .tools import ToolResult
+logger = logging.getLogger(__name__)
+class AnthropicResponseHandler:
+    """Handles Anthropic API responses and tool execution results."""
+    def __init__(self, loop):
+        """Initialize the response handler.
+        Args:
+            loop: Reference to the parent loop instance that provides context
+        """
+        self.loop = loop
+    async def handle_response(
+        self, response: BetaMessage, messages: List[Dict[str, Any]]
+    ) -> Tuple[List[Dict[str, Any]], bool]:
+        """Handle the Anthropic API response.
+        Args:
+            response: API response
+            messages: List of messages for context
+        Returns:
+            Tuple containing:
+            - List of new messages to be added
+            - Boolean indicating if the loop should continue
+        """
+        try:
+            new_messages = []
+            # Convert response to parameter format
+            response_params = self.response_to_params(response)
+            # Collect all existing tool_use IDs from previous messages for validation
+            existing_tool_use_ids = set()
+            for msg in messages:
+                if msg.get("role") == "assistant" and isinstance(msg.get("content"), list):
+                    for block in msg.get("content", []):
+                        if (
+                            isinstance(block, dict)
+                            and block.get("type") == "tool_use"
+                            and "id" in block
+                        ):
+                            existing_tool_use_ids.add(block["id"])
+            # Also add new tool_use IDs from the current response
+            current_tool_use_ids = set()
+            for block in response_params:
+                if isinstance(block, dict) and block.get("type") == "tool_use" and "id" in block:
+                    current_tool_use_ids.add(block["id"])
+                    existing_tool_use_ids.add(block["id"])
+            logger.info(f"Existing tool_use IDs in conversation: {existing_tool_use_ids}")
+            logger.info(f"New tool_use IDs in current response: {current_tool_use_ids}")
+            # Create assistant message
+            new_messages.append(
+                {
+                    "role": "assistant",
+                    "content": response_params,
+                }
+            )
+            if self.loop.callback_manager is None:
+                raise RuntimeError(
+                    "Callback manager not initialized. Call initialize_client() first."
+                )
+            # Handle tool use blocks and collect results
+            tool_result_content = []
+            for content_block in response_params:
+                # Notify callback of content
+                self.loop.callback_manager.on_content(cast(BetaContentBlockParam, content_block))
+                # Handle tool use
+                if content_block.get("type") == "tool_use":
+                    if self.loop.tool_manager is None:
+                        raise RuntimeError(
+                            "Tool manager not initialized. Call initialize_client() first."
+                        )
+                    # Execute the tool
+                    result = await self.loop.tool_manager.execute_tool(
+                        name=content_block["name"],
+                        tool_input=cast(Dict[str, Any], content_block["input"]),
+                    )
+                    # Verify the tool_use ID exists in the conversation (which it should now)
+                    tool_use_id = content_block["id"]
+                    if tool_use_id in existing_tool_use_ids:
+                        # Create tool result and add to content
+                        tool_result = self.make_tool_result(cast(ToolResult, result), tool_use_id)
+                        tool_result_content.append(tool_result)
+                        # Notify callback of tool result
+                        self.loop.callback_manager.on_tool_result(
+                            cast(ToolResult, result), content_block["id"]
+                        )
+                    else:
+                        logger.warning(
+                            f"Tool use ID {tool_use_id} not found in previous messages. Skipping tool result."
+                        )
+            # If no tool results, we're done
+            if not tool_result_content:
+                # Signal completion
+                self.loop.callback_manager.on_content({"type": "text", "text": "<DONE>"})
+                return new_messages, False
+            # Add tool results as user message
+            new_messages.append({"content": tool_result_content, "role": "user"})
+            return new_messages, True
+        except Exception as e:
+            logger.error(f"Error handling response: {str(e)}")
+            new_messages.append(
+                {
+                    "role": "assistant",
+                    "content": f"Error: {str(e)}",
+                }
+            )
+            return new_messages, False
+    def response_to_params(
+        self,
+        response: BetaMessage,
+    ) -> List[Dict[str, Any]]:
+        """Convert API response to message parameters.
+        Args:
+            response: API response message
+        Returns:
+            List of content blocks
+        """
+        result = []
+        for block in response.content:
+            if isinstance(block, BetaTextBlock):
+                result.append({"type": "text", "text": block.text})
+            else:
+                result.append(cast(Dict[str, Any], block.model_dump()))
+        return result
+    def make_tool_result(self, result: ToolResult, tool_use_id: str) -> Dict[str, Any]:
+        """Convert a tool result to API format.
+        Args:
+            result: Tool execution result
+            tool_use_id: ID of the tool use
+        Returns:
+            Formatted tool result
+        """
+        if result.content:
+            return {
+                "type": "tool_result",
+                "content": result.content,
+                "tool_use_id": tool_use_id,
+                "is_error": bool(result.error),
+            }
+        tool_result_content = []
+        is_error = False
+        if result.error:
+            is_error = True
+            tool_result_content = [
+                {
+                    "type": "text",
+                    "text": self.maybe_prepend_system_tool_result(result, result.error),
+                }
+            ]
+        else:
+            if result.output:
+                tool_result_content.append(
+                    {
+                        "type": "text",
+                        "text": self.maybe_prepend_system_tool_result(result, result.output),
+                    }
+                )
+            if result.base64_image:
+                tool_result_content.append(
+                    {
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": "image/png",
+                            "data": result.base64_image,
+                        },
+                    }
+                )
+        return {
+            "type": "tool_result",
+            "content": tool_result_content,
+            "tool_use_id": tool_use_id,
+            "is_error": is_error,
+        }
+    def maybe_prepend_system_tool_result(self, result: ToolResult, result_text: str) -> str:
+        """Prepend system information to tool result if available.
+        Args:
+            result: Tool execution result
+            result_text: Text to prepend to
+        Returns:
+            Text with system information prepended if available
+        """
+        if result.system:
+            result_text = f"<s>{result.system}</s>\n{result_text}"
+        return result_text

agent/providers/anthropic/tools/bash.py CHANGED Viewed

@@ -7,102 +7,6 @@ from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult
 from ....core.tools.bash import BaseBashTool
-class _BashSession:
-    """A session of a bash shell."""
-    _started: bool
-    _process: asyncio.subprocess.Process
-    command: str = "/bin/bash"
-    _output_delay: float = 0.2  # seconds
-    _timeout: float = 120.0  # seconds
-    _sentinel: str = "<<exit>>"
-    def __init__(self):
-        self._started = False
-        self._timed_out = False
-    async def start(self):
-        if self._started:
-            return
-        self._process = await asyncio.create_subprocess_shell(
-            self.command,
-            preexec_fn=os.setsid,
-            shell=True,
-            bufsize=0,
-            stdin=asyncio.subprocess.PIPE,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-        )
-        self._started = True
-    def stop(self):
-        """Terminate the bash shell."""
-        if not self._started:
-            raise ToolError("Session has not started.")
-        if self._process.returncode is not None:
-            return
-        self._process.terminate()
-    async def run(self, command: str):
-        """Execute a command in the bash shell."""
-        if not self._started:
-            raise ToolError("Session has not started.")
-        if self._process.returncode is not None:
-            return ToolResult(
-                system="tool must be restarted",
-                error=f"bash has exited with returncode {self._process.returncode}",
-            )
-        if self._timed_out:
-            raise ToolError(
-                f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
-            )
-        # we know these are not None because we created the process with PIPEs
-        assert self._process.stdin
-        assert self._process.stdout
-        assert self._process.stderr
-        # send command to the process
-        self._process.stdin.write(command.encode() + f"; echo '{self._sentinel}'\n".encode())
-        await self._process.stdin.drain()
-        # read output from the process, until the sentinel is found
-        try:
-            async with asyncio.timeout(self._timeout):
-                while True:
-                    await asyncio.sleep(self._output_delay)
-                    # if we read directly from stdout/stderr, it will wait forever for
-                    # EOF. use the StreamReader buffer directly instead.
-                    output = (
-                        self._process.stdout._buffer.decode()
-                    )  # pyright: ignore[reportAttributeAccessIssue]
-                    if self._sentinel in output:
-                        # strip the sentinel and break
-                        output = output[: output.index(self._sentinel)]
-                        break
-        except asyncio.TimeoutError:
-            self._timed_out = True
-            raise ToolError(
-                f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
-            ) from None
-        if output.endswith("\n"):
-            output = output[:-1]
-        error = self._process.stderr._buffer.decode()  # pyright: ignore[reportAttributeAccessIssue]
-        if error.endswith("\n"):
-            error = error[:-1]
-        # clear the buffers so that the next output can be read correctly
-        self._process.stdout._buffer.clear()  # pyright: ignore[reportAttributeAccessIssue]
-        self._process.stderr._buffer.clear()  # pyright: ignore[reportAttributeAccessIssue]
-        return CLIResult(output=output, error=error)
 class BashTool(BaseBashTool, BaseAnthropicTool):
     """
     A tool that allows the agent to run bash commands.
@@ -124,7 +28,6 @@ class BashTool(BaseBashTool, BaseAnthropicTool):
         # Then initialize the Anthropic tool
         BaseAnthropicTool.__init__(self)
         # Initialize bash session
-        self._session = _BashSession()
     async def __call__(self, command: str | None = None, restart: bool = False, **kwargs):
         """Execute a bash command.

cua-agent 0.1.6__py3-none-any.whl → 0.1.17__py3-none-any.whl

Potentially problematic release.

cua-agent 0.1.6py3-none-any.whl → 0.1.17py3-none-any.whl