PyPI - cua-agent - Versions diffs - 0.1.31__tar.gz → 0.1.33__tar.gz - Mend

cua-agent 0.1.31tar.gz → 0.1.33tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (84) hide show

{cua_agent-0.1.31 → cua_agent-0.1.33}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cua-agent
-Version: 0.1.31
+Version: 0.1.33
 Summary: CUA (Computer Use) Agent for AI-driven computer interaction
 Author-Email: TryCua <gh@trycua.com>
 Requires-Python: >=3.10
@@ -120,10 +120,10 @@ async with Computer() as macos_computer:
       # model=LLM(provider=LLMProvider.ANTHROPIC)
       # or
       # loop=AgentLoop.OMNI,
-      # model=LLM(provider=LLMProvider.OLLAMA, model="gemma3")
+      # model=LLM(provider=LLMProvider.OLLAMA, name="gemma3")
       # or
       # loop=AgentLoop.UITARS,
-      # model=LLM(provider=LLMProvider.OAICOMPAT, model="tgi", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1")
+      # model=LLM(provider=LLMProvider.OAICOMPAT, name="ByteDance-Seed/UI-TARS-1.5-7B", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1")
   )
   tasks = [

{cua_agent-0.1.31 → cua_agent-0.1.33}/README.md RENAMED Viewed

@@ -50,10 +50,10 @@ async with Computer() as macos_computer:
       # model=LLM(provider=LLMProvider.ANTHROPIC)
       # or
       # loop=AgentLoop.OMNI,
-      # model=LLM(provider=LLMProvider.OLLAMA, model="gemma3")
+      # model=LLM(provider=LLMProvider.OLLAMA, name="gemma3")
       # or
       # loop=AgentLoop.UITARS,
-      # model=LLM(provider=LLMProvider.OAICOMPAT, model="tgi", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1")
+      # model=LLM(provider=LLMProvider.OAICOMPAT, name="ByteDance-Seed/UI-TARS-1.5-7B", provider_base_url="https://**************.us-east-1.aws.endpoints.huggingface.cloud/v1")
   )
   tasks = [

{cua_agent-0.1.31 → cua_agent-0.1.33}/agent/providers/anthropic/loop.py RENAMED Viewed

@@ -279,6 +279,8 @@ class AnthropicLoop(BaseLoop):
                     messages,
                     model=self.model,
                 )
+                # Log standardized response for ease of parsing
+                self._log_api_call("agent_response", request=None, response=openai_compatible_response)
                 await queue.put(openai_compatible_response)
                 if not should_continue:

{cua_agent-0.1.31 → cua_agent-0.1.33}/agent/providers/anthropic/tools/computer.py RENAMED Viewed

@@ -161,15 +161,17 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
                     self.logger.info(f"Moving cursor to ({x}, {y})")
                     await self.computer.interface.move_cursor(x, y)
                 elif action == "left_click_drag":
-                    self.logger.info(f"Dragging from ({x}, {y})")
-                    # First move to the position
-                    await self.computer.interface.move_cursor(x, y)
-                    # Then perform drag operation - check if drag_to exists or we need to use other methods
-                    try:
-                        await self.computer.interface.drag_to(x, y)
-                    except Exception as e:
-                        self.logger.error(f"Error during drag operation: {str(e)}")
-                        raise ToolError(f"Failed to perform drag: {str(e)}")
+                    # Get the start coordinate from kwargs
+                    start_coordinate = kwargs.get("start_coordinate")
+                    if not start_coordinate:
+                        raise ToolError("start_coordinate is required for left_click_drag action")
+                    start_x, start_y = start_coordinate
+                    end_x, end_y = x, y
+                    self.logger.info(f"Dragging from ({start_x}, {start_y}) to ({end_x}, {end_y})")
+                    await self.computer.interface.move_cursor(start_x, start_y)
+                    await self.computer.interface.drag_to(end_x, end_y)
                 # Wait briefly for any UI changes
                 await asyncio.sleep(0.5)

{cua_agent-0.1.31 → cua_agent-0.1.33}/agent/providers/omni/loop.py RENAMED Viewed

@@ -670,6 +670,8 @@ class OmniLoop(BaseLoop):
                     parsed_screen=parsed_screen,
                     parser=self.parser
                 )
+                # Log standardized response for ease of parsing
+                self._log_api_call("agent_response", request=None, response=openai_compatible_response)
                 # Yield the response to the caller
                 yield openai_compatible_response

{cua_agent-0.1.31 → cua_agent-0.1.33}/agent/providers/openai/loop.py RENAMED Viewed

@@ -201,16 +201,7 @@ class OpenAILoop(BaseLoop):
                 # Emit screenshot callbacks
                 await self.handle_screenshot(screenshot_base64, action_type="initial_state")
-                # Save screenshot if requested
-                if self.save_trajectory:
-                    # Ensure screenshot_base64 is a string
-                    if not isinstance(screenshot_base64, str):
-                        logger.warning(
-                            "Converting non-string screenshot_base64 to string for _save_screenshot"
-                        )
-                    self._save_screenshot(screenshot_base64, action_type="state")
-                    logger.info("Screenshot saved to trajectory")
+                self._save_screenshot(screenshot_base64, action_type="state")
                 # First add any existing user messages that were passed to run()
                 user_query = None
@@ -276,6 +267,10 @@ class OpenAILoop(BaseLoop):
                     )
                     # Don't reset last_response_id to None - keep the previous value if available
+                # Log standardized response for ease of parsing
+                # Since this is the openAI responses format, we don't need to convert it to agent response format
+                self._log_api_call("agent_response", request=None, response=response)
                 # Process API response
                 await queue.put(response)
@@ -347,6 +342,7 @@ class OpenAILoop(BaseLoop):
                         # Process screenshot through hooks
                         action_type = f"after_{action.get('type', 'action')}"
                         await self.handle_screenshot(screenshot_base64, action_type=action_type)
+                        self._save_screenshot(screenshot_base64, action_type=action_type)
                         # Create computer_call_output
                         computer_call_output = {
@@ -393,6 +389,7 @@ class OpenAILoop(BaseLoop):
                         # Process the response
                         # await self.response_handler.process_response(response, queue)
+                        self._log_api_call("agent_response", request=None, response=response)
                         await queue.put(response)
                     except Exception as e:
                         logger.error(f"Error executing computer action: {str(e)}")

{cua_agent-0.1.31 → cua_agent-0.1.33}/agent/providers/openai/tools/computer.py RENAMED Viewed

@@ -44,6 +44,7 @@ Action = Literal[
     "double_click",
     "screenshot",
     "scroll",
+    "drag",
 ]
@@ -165,6 +166,11 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
                 scroll_x = kwargs.get("scroll_x", 0) // 50
                 scroll_y = kwargs.get("scroll_y", 0) // 50
                 return await self.handle_scroll(x, y, scroll_x, scroll_y)
+            elif type == "drag":
+                path = kwargs.get("path")
+                if not path or not isinstance(path, list) or len(path) < 2:
+                    raise ToolError("path is required for drag action and must contain at least 2 points")
+                return await self.handle_drag(path)
             elif type == "screenshot":
                 return await self.screenshot()
             elif type == "wait":
@@ -302,6 +308,41 @@ class ComputerTool(BaseComputerTool, BaseOpenAITool):
             self.logger.error(f"Error in handle_scroll: {str(e)}")
             raise ToolError(f"Failed to scroll at ({x}, {y}): {str(e)}")
+    async def handle_drag(self, path: List[Dict[str, int]]) -> ToolResult:
+        """Handle mouse drag operation using a path of coordinates.
+        Args:
+            path: List of coordinate points {"x": int, "y": int} defining the drag path
+        Returns:
+            ToolResult with the operation result and screenshot
+        """
+        try:
+            # Convert from [{"x": x, "y": y}, ...] format to [(x, y), ...] format
+            points = [(p["x"], p["y"]) for p in path]
+            # Perform drag action
+            if len(points) == 2:
+                await self.computer.interface.move_cursor(points[0][0], points[0][1])
+                await self.computer.interface.drag_to(points[1][0], points[1][1])
+            else:
+                await self.computer.interface.drag(points, button="left")
+            # Wait for UI to update
+            await asyncio.sleep(0.5)
+            # Take screenshot after action
+            screenshot = await self.computer.interface.screenshot()
+            base64_screenshot = base64.b64encode(screenshot).decode("utf-8")
+            return ToolResult(
+                output=f"Dragged from ({path[0]['x']}, {path[0]['y']}) to ({path[-1]['x']}, {path[-1]['y']})",
+                base64_image=base64_screenshot,
+            )
+        except Exception as e:
+            self.logger.error(f"Error in handle_drag: {str(e)}")
+            raise ToolError(f"Failed to perform drag operation: {str(e)}")
     async def screenshot(self) -> ToolResult:
         """Take a screenshot."""
         try:

{cua_agent-0.1.31 → cua_agent-0.1.33}/agent/providers/uitars/clients/oaicompat.py RENAMED Viewed

@@ -190,25 +190,21 @@ class OAICompatClient(BaseUITarsClient):
                     response_text = await response.text()
                     logger.debug(f"Response content: {response_text}")
+                    # if 503, then the endpoint is still warming up
+                    if response.status == 503:
+                        logger.error(f"Endpoint is still warming up, please try again later")
+                        raise Exception(f"Endpoint is still warming up: {response_text}")
                     # Try to parse as JSON if the content type is appropriate
                     if "application/json" in response.headers.get('Content-Type', ''):
                         response_json = await response.json()
                     else:
                         raise Exception(f"Response is not JSON format")
-                        # # Optionally try to parse it anyway
-                        # try:
-                        #     import json
-                        #     response_json = json.loads(response_text)
-                        # except json.JSONDecodeError as e:
-                        #     print(f"Failed to parse response as JSON: {e}")
                     if response.status != 200:
-                        error_msg = response_json.get("error", {}).get(
-                            "message", str(response_json)
-                        )
-                        logger.error(f"Error in API call: {error_msg}")
-                        raise Exception(f"API error: {error_msg}")
+                        logger.error(f"Error in API call: {response_text}")
+                        raise Exception(f"API error: {response_text}")
                     return response_json
         except Exception as e:

{cua_agent-0.1.31 → cua_agent-0.1.33}/agent/providers/uitars/loop.py RENAMED Viewed

@@ -17,7 +17,7 @@ from ...core.types import AgentResponse, LLMProvider
 from ...core.visualization import VisualizationHelper
 from computer import Computer
-from .utils import add_box_token, parse_actions, parse_action_parameters
+from .utils import add_box_token, parse_actions, parse_action_parameters, to_agent_response_format
 from .tools.manager import ToolManager
 from .tools.computer import ToolResult
 from .prompts import COMPUTER_USE, SYSTEM_PROMPT, MAC_SPECIFIC_NOTES
@@ -440,7 +440,7 @@ class UITARSLoop(BaseLoop):
     # MAIN LOOP - IMPLEMENTING ABSTRACT METHOD
     ###########################################
-    async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[Dict[str, Any], None]:
+    async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
         """Run the agent loop with provided messages.
         Args:
@@ -507,41 +507,16 @@ class UITARSLoop(BaseLoop):
                 # Update whether an action screenshot was saved this turn
                 action_screenshot_saved = action_screenshot_saved or new_screenshot_saved
-                # Parse actions from the raw response
-                raw_response = response["choices"][0]["message"]["content"]
-                parsed_actions = parse_actions(raw_response)
-                # Extract thought content if available
-                thought = ""
-                if "Thought:" in raw_response:
-                    thought_match = re.search(r"Thought: (.*?)(?=\s*Action:|$)", raw_response, re.DOTALL)
-                    if thought_match:
-                        thought = thought_match.group(1).strip()
-                # Create standardized thought response format
-                thought_response = {
-                    "role": "assistant",
-                    "content": thought or raw_response,
-                    "metadata": {
-                        "title": "🧠 UI-TARS Thoughts"
-                    }
-                }
+                agent_response = await to_agent_response_format(
+                    response,
+                    messages,
+                    model=self.model,
+                )
+                # Log standardized response for ease of parsing
+                self._log_api_call("agent_response", request=None, response=agent_response)
+                yield agent_response
-                # Create action response format
-                action_response = {
-                    "role": "assistant",
-                    "content": str(parsed_actions),
-                    "metadata": {
-                        "title": "🖱️ UI-TARS Actions",
-                    }
-                }
-                # Yield both responses to the caller (thoughts first, then actions)
-                yield thought_response
-                if parsed_actions:
-                    yield action_response
                 # Check if we should continue this conversation
                 running = should_continue
@@ -562,7 +537,8 @@ class UITARSLoop(BaseLoop):
                     logger.error(f"Maximum retry attempts reached. Last error was: {str(e)}")
                 yield {
-                    "error": str(e),
+                    "role": "assistant",
+                    "content": f"Error: {str(e)}",
                     "metadata": {"title": "❌ Error"},
                 }

{cua_agent-0.1.31 → cua_agent-0.1.33}/agent/providers/uitars/utils.py RENAMED Viewed

@@ -4,9 +4,114 @@ import logging
 import base64
 import re
 from typing import Any, Dict, List, Optional, Union, Tuple
+from datetime import datetime
 logger = logging.getLogger(__name__)
+from ...core.types import AgentResponse
+async def to_agent_response_format(
+    response: Dict[str, Any],
+    messages: List[Dict[str, Any]],
+    model: Optional[str] = None,
+) -> AgentResponse:
+    """Convert raw UI-TARS response to agent response format.
+    Args:
+        response: Raw UI-TARS response
+        messages: List of messages in standard format
+        model: Optional model name
+    Returns:
+        AgentResponse: Standardized agent response format
+    """
+    # Create unique IDs for this response
+    response_id = f"resp_{datetime.now().strftime('%Y%m%d%H%M%S')}_{id(response)}"
+    reasoning_id = f"rs_{response_id}"
+    action_id = f"cu_{response_id}"
+    call_id = f"call_{response_id}"
+    # Parse actions from the raw response
+    content = response["choices"][0]["message"]["content"]
+    actions = parse_actions(content)
+    # Extract thought content if available
+    reasoning_text = ""
+    if "Thought:" in content:
+        thought_match = re.search(r"Thought: (.*?)(?=\s*Action:|$)", content, re.DOTALL)
+        if thought_match:
+            reasoning_text = thought_match.group(1).strip()
+    # Create output items
+    output_items = []
+    if reasoning_text:
+        output_items.append({
+            "type": "reasoning",
+            "id": reasoning_id,
+            "text": reasoning_text
+        })
+    if actions:
+        for i, action in enumerate(actions):
+            action_name, tool_args = parse_action_parameters(action)
+            if action_name == "finished":
+                output_items.append({
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [{
+                        "type": "output_text",
+                        "text": tool_args["content"]
+                    }],
+                    "id": f"action_{i}_{action_id}",
+                    "status": "completed"
+                })
+            else:
+                if tool_args.get("action") == action_name:
+                    del tool_args["action"]
+                output_items.append({
+                    "type": "computer_call",
+                    "id": f"{action}_{i}_{action_id}",
+                    "call_id": f"call_{i}_{action_id}",
+                    "action": { "type": action_name, **tool_args },
+                    "pending_safety_checks": [],
+                    "status": "completed"
+                })
+    # Create agent response
+    agent_response = AgentResponse(
+        id=response_id,
+        object="response",
+        created_at=int(datetime.now().timestamp()),
+        status="completed",
+        error=None,
+        incomplete_details=None,
+        instructions=None,
+        max_output_tokens=None,
+        model=model or response["model"],
+        output=output_items,
+        parallel_tool_calls=True,
+        previous_response_id=None,
+        reasoning={"effort": "medium"},
+        store=True,
+        temperature=0.0,
+        top_p=0.7,
+        text={"format": {"type": "text"}},
+        tool_choice="auto",
+        tools=[
+            {
+                "type": "computer_use_preview",
+                "display_height": 768,
+                "display_width": 1024,
+                "environment": "mac",
+            }
+        ],
+        truncation="auto",
+        usage=response["usage"],
+        user=None,
+        metadata={},
+        response=response
+    )
+    return agent_response
 def add_box_token(input_string: str) -> str:
     """Add box tokens to the coordinates in the model response.
@@ -74,7 +179,13 @@ def parse_action_parameters(action: str) -> Tuple[str, Dict[str, Any]]:
     """
     # Handle "finished" action
     if action.startswith("finished"):
-        return "finished", {}
+        # Parse content if it exists
+        content_match = re.search(r"content='([^']*)'", action)
+        if content_match:
+            content = content_match.group(1)
+            return "finished", {"content": content}
+        else:
+            return "finished", {}
     # Parse action parameters
     action_match = re.match(r'(\w+)\((.*)\)', action)

{cua_agent-0.1.31 → cua_agent-0.1.33}/agent/ui/gradio/app.py RENAMED Viewed

@@ -35,6 +35,7 @@ from pathlib import Path
 from typing import Dict, List, Optional, AsyncGenerator, Any, Tuple, Union
 import gradio as gr
 from gradio.components.chatbot import MetadataDict
+from typing import cast
 # Import from agent package
 from agent.core.types import AgentResponse
@@ -322,63 +323,6 @@ def get_ollama_models() -> List[str]:
         logging.error(f"Error getting Ollama models: {e}")
         return []
-def extract_synthesized_text(
-    result: Union[AgentResponse, Dict[str, Any]],
-) -> Tuple[str, MetadataDict]:
-    """Extract synthesized text from the agent result."""
-    synthesized_text = ""
-    metadata = MetadataDict()
-    if "output" in result and result["output"]:
-        for output in result["output"]:
-            if output.get("type") == "reasoning":
-                metadata["title"] = "🧠 Reasoning"
-                content = output.get("content", "")
-                if content:
-                    synthesized_text += f"{content}\n"
-            elif output.get("type") == "message":
-                # Handle message type outputs - can contain rich content
-                content = output.get("content", [])
-                # Content is usually an array of content blocks
-                if isinstance(content, list):
-                    for block in content:
-                        if isinstance(block, dict) and block.get("type") == "output_text":
-                            text_value = block.get("text", "")
-                            if text_value:
-                                synthesized_text += f"{text_value}\n"
-            elif output.get("type") == "computer_call":
-                action = output.get("action", {})
-                action_type = action.get("type", "")
-                # Create a descriptive text about the action
-                if action_type == "click":
-                    button = action.get("button", "")
-                    x = action.get("x", "")
-                    y = action.get("y", "")
-                    synthesized_text += f"Clicked {button} at position ({x}, {y}).\n"
-                elif action_type == "type":
-                    text = action.get("text", "")
-                    synthesized_text += f"Typed: {text}.\n"
-                elif action_type == "keypress":
-                    # Extract key correctly from either keys array or key field
-                    if isinstance(action.get("keys"), list):
-                        key = ", ".join(action.get("keys"))
-                    else:
-                        key = action.get("key", "")
-                    synthesized_text += f"Pressed key: {key}\n"
-                else:
-                    synthesized_text += f"Performed {action_type} action.\n"
-                metadata["status"] = "done"
-                metadata["title"] = f"🛠️ {synthesized_text.strip().splitlines()[-1]}"
-    return synthesized_text.strip(), metadata
 def create_computer_instance(verbosity: int = logging.INFO) -> Computer:
     """Create or get the global Computer instance."""
     global global_computer
@@ -447,66 +391,6 @@ def create_agent(
     return global_agent
-def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> Tuple[str, MetadataDict]:
-    """Process agent results for the Gradio UI."""
-    # Extract text content
-    text_obj = result.get("text", {})
-    metadata = result.get("metadata", {})
-    # Create a properly typed MetadataDict
-    metadata_dict = MetadataDict()
-    metadata_dict["title"] = metadata.get("title", "")
-    metadata_dict["status"] = "done"
-    metadata = metadata_dict
-    # For OpenAI's Computer-Use Agent, text field is an object with format property
-    if (
-        text_obj
-        and isinstance(text_obj, dict)
-        and "format" in text_obj
-        and not text_obj.get("value", "")
-    ):
-        content, metadata = extract_synthesized_text(result)
-    else:
-        if not text_obj:
-            text_obj = result
-        # For other types of results, try to get text directly
-        if isinstance(text_obj, dict):
-            if "value" in text_obj:
-                content = text_obj["value"]
-            elif "text" in text_obj:
-                content = text_obj["text"]
-            elif "content" in text_obj:
-                content = text_obj["content"]
-            else:
-                content = ""
-        else:
-            content = str(text_obj) if text_obj else ""
-    # If still no content but we have outputs, create a summary
-    if not content and "output" in result and result["output"]:
-        output = result["output"]
-        for out in output:
-            if out.get("type") == "reasoning":
-                content = out.get("content", "")
-                if content:
-                    break
-            elif out.get("type") == "computer_call":
-                action = out.get("action", {})
-                action_type = action.get("type", "")
-                if action_type:
-                    content = f"Performing action: {action_type}"
-                    break
-    # Clean up the text - ensure content is a string
-    if not isinstance(content, str):
-        content = str(content) if content else ""
-    return content, metadata
 def create_gradio_ui(
     provider_name: str = "openai",
     model_name: str = "gpt-4o",
@@ -907,17 +791,64 @@ def create_gradio_ui(
                         # Stream responses from the agent
                         async for result in global_agent.run(last_user_message):
-                            # Process result
-                            content, metadata = process_agent_result(result)
-                            # Skip empty content
-                            if content or metadata.get("title"):
-                                history.append(
-                                    gr.ChatMessage(
-                                        role="assistant", content=content, metadata=metadata
+                            print(f"DEBUG - Agent response ------- START")
+                            from pprint import pprint
+                            pprint(result)
+                            print(f"DEBUG - Agent response ------- END")
+                            def generate_gradio_messages():
+                                if result.get("content"):
+                                    yield gr.ChatMessage(
+                                        role="assistant",
+                                        content=result.get("content", ""),
+                                        metadata=cast(MetadataDict, result.get("metadata", {}))
                                     )
-                                )
-                            yield history
+                                else:
+                                    outputs = result.get("output", [])
+                                    for output in outputs:
+                                        if output.get("type") == "message":
+                                            content = output.get("content", [])
+                                            for content_part in content:
+                                                if content_part.get("text"):
+                                                    yield gr.ChatMessage(
+                                                        role=output.get("role", "assistant"),
+                                                        content=content_part.get("text", ""),
+                                                        metadata=content_part.get("metadata", {})
+                                                    )
+                                        elif output.get("type") == "reasoning":
+                                            # if it's openAI, we only have access to a summary of the reasoning
+                                            summary_content = output.get("summary", [])
+                                            if summary_content:
+                                                for summary_part in summary_content:
+                                                    if summary_part.get("type") == "summary_text":
+                                                        yield gr.ChatMessage(
+                                                            role="assistant",
+                                                            content=summary_part.get("text", "")
+                                                        )
+                                            else:
+                                                summary_content = output.get("text", "")
+                                                if summary_content:
+                                                    yield gr.ChatMessage(
+                                                        role="assistant",
+                                                        content=summary_content,
+                                                    )
+                                        elif output.get("type") == "computer_call":
+                                            action = output.get("action", {})
+                                            action_type = action.get("type", "")
+                                            if action_type:
+                                                action_title = f"🛠️ Performing {action_type}"
+                                                if action.get("x") and action.get("y"):
+                                                    action_title += f" at ({action['x']}, {action['y']})"
+                                                yield gr.ChatMessage(
+                                                    role="assistant",
+                                                    content=f"```json\n{json.dumps(action)}\n```",
+                                                    metadata={"title": action_title}
+                                                )
+                            for message in generate_gradio_messages():
+                                history.append(message)
+                                yield history
                     except Exception as e:
                         import traceback

{cua_agent-0.1.31 → cua_agent-0.1.33}/pyproject.toml RENAMED Viewed

@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
 [project]
 name = "cua-agent"
-version = "0.1.31"
+version = "0.1.33"
 description = "CUA (Computer Use) Agent for AI-driven computer interaction"
 readme = "README.md"
 authors = [
@@ -108,7 +108,7 @@ target-version = [
 [tool.ruff]
 line-length = 100
-target-version = "0.1.31"
+target-version = "0.1.33"
 select = [
     "E",
     "F",
@@ -122,7 +122,7 @@ docstring-code-format = true
 [tool.mypy]
 strict = true
-python_version = "0.1.31"
+python_version = "0.1.33"
 ignore_missing_imports = true
 disallow_untyped_defs = true
 check_untyped_defs = true