PyPI - cua-agent - Versions diffs - 0.4.0b4__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

cua-agent 0.4.0b4py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (19) hide show

agent/__init__.py +46 -1
agent/agent.py +18 -1
agent/callbacks/__init__.py +2 -0
agent/callbacks/budget_manager.py +43 -43
agent/callbacks/telemetry.py +210 -0
agent/cli.py +27 -15
agent/loops/anthropic.py +659 -18
agent/responses.py +207 -207
agent/telemetry.py +142 -0
agent/ui/__init__.py +2 -2
agent/ui/__main__.py +4 -0
agent/ui/gradio/__init__.py +2 -2
agent/ui/gradio/app.py +19 -19
agent/ui/gradio/ui_components.py +28 -10
{cua_agent-0.4.0b4.dist-info → cua_agent-0.4.2.dist-info}/METADATA +2 -2
cua_agent-0.4.2.dist-info/RECORD +33 -0
cua_agent-0.4.0b4.dist-info/RECORD +0 -30
{cua_agent-0.4.0b4.dist-info → cua_agent-0.4.2.dist-info}/WHEEL +0 -0
{cua_agent-0.4.0b4.dist-info → cua_agent-0.4.2.dist-info}/entry_points.txt +0 -0

agent/responses.py CHANGED Viewed

@@ -1,207 +1,207 @@
-"""
-Functions for making various Responses API items from different types of responses.
-Based on the OpenAI spec for Responses API items.
-"""
-import base64
-import json
-import uuid
-from typing import List, Dict, Any, Literal, Union, Optional
-from openai.types.responses.response_computer_tool_call_param import (
-    ResponseComputerToolCallParam,
-    ActionClick,
-    ActionDoubleClick,
-    ActionDrag,
-    ActionDragPath,
-    ActionKeypress,
-    ActionMove,
-    ActionScreenshot,
-    ActionScroll,
-    ActionType as ActionTypeAction,
-    ActionWait,
-    PendingSafetyCheck
-)
-from openai.types.responses.response_function_tool_call_param import ResponseFunctionToolCallParam
-from openai.types.responses.response_output_text_param import ResponseOutputTextParam
-from openai.types.responses.response_reasoning_item_param import ResponseReasoningItemParam, Summary
-from openai.types.responses.response_output_message_param import ResponseOutputMessageParam
-from openai.types.responses.easy_input_message_param import EasyInputMessageParam
-from openai.types.responses.response_input_image_param import ResponseInputImageParam
-def random_id():
-    return str(uuid.uuid4())
-# User message items
-def make_input_image_item(image_data: Union[str, bytes]) -> EasyInputMessageParam:
-    return EasyInputMessageParam(
-        content=[
-            ResponseInputImageParam(
-                type="input_image",
-                image_url=f"data:image/png;base64,{base64.b64encode(image_data).decode('utf-8') if isinstance(image_data, bytes) else image_data}"
-            )
-        ],
-        role="user",
-        type="message"
-    )
-# Text items
-def make_reasoning_item(reasoning: str) -> ResponseReasoningItemParam:
-    return ResponseReasoningItemParam(
-        id=random_id(),
-        summary=[
-            Summary(text=reasoning, type="summary_text")
-        ],
-        type="reasoning"
-    )
-def make_output_text_item(content: str) -> ResponseOutputMessageParam:
-    return ResponseOutputMessageParam(
-        id=random_id(),
-        content=[
-            ResponseOutputTextParam(
-                text=content,
-                type="output_text",
-                annotations=[]
-            )
-        ],
-        role="assistant",
-        status="completed",
-        type="message"
-    )
-# Function call items
-def make_function_call_item(function_name: str, arguments: Dict[str, Any], call_id: Optional[str] = None) -> ResponseFunctionToolCallParam:
-    return ResponseFunctionToolCallParam(
-        id=random_id(),
-        call_id=call_id if call_id else random_id(),
-        name=function_name,
-        arguments=json.dumps(arguments),
-        status="completed",
-        type="function_call"
-    )
-# Computer tool call items
-def make_click_item(x: int, y: int, button: Literal["left", "right", "wheel", "back", "forward"] = "left", call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
-    return ResponseComputerToolCallParam(
-        id=random_id(),
-        call_id=call_id if call_id else random_id(),
-        action=ActionClick(
-            button=button,
-            type="click",
-            x=x,
-            y=y
-        ),
-        pending_safety_checks=[],
-        status="completed",
-        type="computer_call"
-    )
-def make_double_click_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
-    return ResponseComputerToolCallParam(
-        id=random_id(),
-        call_id=call_id if call_id else random_id(),
-        action=ActionDoubleClick(
-            type="double_click",
-            x=x,
-            y=y
-        ),
-        pending_safety_checks=[],
-        status="completed",
-        type="computer_call"
-    )
-def make_drag_item(path: List[Dict[str, int]], call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
-    drag_path = [ActionDragPath(x=point["x"], y=point["y"]) for point in path]
-    return ResponseComputerToolCallParam(
-        id=random_id(),
-        call_id=call_id if call_id else random_id(),
-        action=ActionDrag(
-            path=drag_path,
-            type="drag"
-        ),
-        pending_safety_checks=[],
-        status="completed",
-        type="computer_call"
-    )
-def make_keypress_item(keys: List[str], call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
-    return ResponseComputerToolCallParam(
-        id=random_id(),
-        call_id=call_id if call_id else random_id(),
-        action=ActionKeypress(
-            keys=keys,
-            type="keypress"
-        ),
-        pending_safety_checks=[],
-        status="completed",
-        type="computer_call"
-    )
-def make_move_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
-    return ResponseComputerToolCallParam(
-        id=random_id(),
-        call_id=call_id if call_id else random_id(),
-        action=ActionMove(
-            type="move",
-            x=x,
-            y=y
-        ),
-        pending_safety_checks=[],
-        status="completed",
-        type="computer_call"
-    )
-def make_screenshot_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
-    return ResponseComputerToolCallParam(
-        id=random_id(),
-        call_id=call_id if call_id else random_id(),
-        action=ActionScreenshot(
-            type="screenshot"
-        ),
-        pending_safety_checks=[],
-        status="completed",
-        type="computer_call"
-    )
-def make_scroll_item(x: int, y: int, scroll_x: int, scroll_y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
-    return ResponseComputerToolCallParam(
-        id=random_id(),
-        call_id=call_id if call_id else random_id(),
-        action=ActionScroll(
-            scroll_x=scroll_x,
-            scroll_y=scroll_y,
-            type="scroll",
-            x=x,
-            y=y
-        ),
-        pending_safety_checks=[],
-        status="completed",
-        type="computer_call"
-    )
-def make_type_item(text: str, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
-    return ResponseComputerToolCallParam(
-        id=random_id(),
-        call_id=call_id if call_id else random_id(),
-        action=ActionTypeAction(
-            text=text,
-            type="type"
-        ),
-        pending_safety_checks=[],
-        status="completed",
-        type="computer_call"
-    )
-def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
-    return ResponseComputerToolCallParam(
-        id=random_id(),
-        call_id=call_id if call_id else random_id(),
-        action=ActionWait(
-            type="wait"
-        ),
-        pending_safety_checks=[],
-        status="completed",
-        type="computer_call"
-    )
+"""
+Functions for making various Responses API items from different types of responses.
+Based on the OpenAI spec for Responses API items.
+"""
+import base64
+import json
+import uuid
+from typing import List, Dict, Any, Literal, Union, Optional
+from openai.types.responses.response_computer_tool_call_param import (
+    ResponseComputerToolCallParam,
+    ActionClick,
+    ActionDoubleClick,
+    ActionDrag,
+    ActionDragPath,
+    ActionKeypress,
+    ActionMove,
+    ActionScreenshot,
+    ActionScroll,
+    ActionType as ActionTypeAction,
+    ActionWait,
+    PendingSafetyCheck
+)
+from openai.types.responses.response_function_tool_call_param import ResponseFunctionToolCallParam
+from openai.types.responses.response_output_text_param import ResponseOutputTextParam
+from openai.types.responses.response_reasoning_item_param import ResponseReasoningItemParam, Summary
+from openai.types.responses.response_output_message_param import ResponseOutputMessageParam
+from openai.types.responses.easy_input_message_param import EasyInputMessageParam
+from openai.types.responses.response_input_image_param import ResponseInputImageParam
+def random_id():
+    return str(uuid.uuid4())
+# User message items
+def make_input_image_item(image_data: Union[str, bytes]) -> EasyInputMessageParam:
+    return EasyInputMessageParam(
+        content=[
+            ResponseInputImageParam(
+                type="input_image",
+                image_url=f"data:image/png;base64,{base64.b64encode(image_data).decode('utf-8') if isinstance(image_data, bytes) else image_data}"
+            )
+        ],
+        role="user",
+        type="message"
+    )
+# Text items
+def make_reasoning_item(reasoning: str) -> ResponseReasoningItemParam:
+    return ResponseReasoningItemParam(
+        id=random_id(),
+        summary=[
+            Summary(text=reasoning, type="summary_text")
+        ],
+        type="reasoning"
+    )
+def make_output_text_item(content: str) -> ResponseOutputMessageParam:
+    return ResponseOutputMessageParam(
+        id=random_id(),
+        content=[
+            ResponseOutputTextParam(
+                text=content,
+                type="output_text",
+                annotations=[]
+            )
+        ],
+        role="assistant",
+        status="completed",
+        type="message"
+    )
+# Function call items
+def make_function_call_item(function_name: str, arguments: Dict[str, Any], call_id: Optional[str] = None) -> ResponseFunctionToolCallParam:
+    return ResponseFunctionToolCallParam(
+        id=random_id(),
+        call_id=call_id if call_id else random_id(),
+        name=function_name,
+        arguments=json.dumps(arguments),
+        status="completed",
+        type="function_call"
+    )
+# Computer tool call items
+def make_click_item(x: int, y: int, button: Literal["left", "right", "wheel", "back", "forward"] = "left", call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
+    return ResponseComputerToolCallParam(
+        id=random_id(),
+        call_id=call_id if call_id else random_id(),
+        action=ActionClick(
+            button=button,
+            type="click",
+            x=x,
+            y=y
+        ),
+        pending_safety_checks=[],
+        status="completed",
+        type="computer_call"
+    )
+def make_double_click_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
+    return ResponseComputerToolCallParam(
+        id=random_id(),
+        call_id=call_id if call_id else random_id(),
+        action=ActionDoubleClick(
+            type="double_click",
+            x=x,
+            y=y
+        ),
+        pending_safety_checks=[],
+        status="completed",
+        type="computer_call"
+    )
+def make_drag_item(path: List[Dict[str, int]], call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
+    drag_path = [ActionDragPath(x=point["x"], y=point["y"]) for point in path]
+    return ResponseComputerToolCallParam(
+        id=random_id(),
+        call_id=call_id if call_id else random_id(),
+        action=ActionDrag(
+            path=drag_path,
+            type="drag"
+        ),
+        pending_safety_checks=[],
+        status="completed",
+        type="computer_call"
+    )
+def make_keypress_item(keys: List[str], call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
+    return ResponseComputerToolCallParam(
+        id=random_id(),
+        call_id=call_id if call_id else random_id(),
+        action=ActionKeypress(
+            keys=keys,
+            type="keypress"
+        ),
+        pending_safety_checks=[],
+        status="completed",
+        type="computer_call"
+    )
+def make_move_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
+    return ResponseComputerToolCallParam(
+        id=random_id(),
+        call_id=call_id if call_id else random_id(),
+        action=ActionMove(
+            type="move",
+            x=x,
+            y=y
+        ),
+        pending_safety_checks=[],
+        status="completed",
+        type="computer_call"
+    )
+def make_screenshot_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
+    return ResponseComputerToolCallParam(
+        id=random_id(),
+        call_id=call_id if call_id else random_id(),
+        action=ActionScreenshot(
+            type="screenshot"
+        ),
+        pending_safety_checks=[],
+        status="completed",
+        type="computer_call"
+    )
+def make_scroll_item(x: int, y: int, scroll_x: int, scroll_y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
+    return ResponseComputerToolCallParam(
+        id=random_id(),
+        call_id=call_id if call_id else random_id(),
+        action=ActionScroll(
+            scroll_x=scroll_x,
+            scroll_y=scroll_y,
+            type="scroll",
+            x=x,
+            y=y
+        ),
+        pending_safety_checks=[],
+        status="completed",
+        type="computer_call"
+    )
+def make_type_item(text: str, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
+    return ResponseComputerToolCallParam(
+        id=random_id(),
+        call_id=call_id if call_id else random_id(),
+        action=ActionTypeAction(
+            text=text,
+            type="type"
+        ),
+        pending_safety_checks=[],
+        status="completed",
+        type="computer_call"
+    )
+def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
+    return ResponseComputerToolCallParam(
+        id=random_id(),
+        call_id=call_id if call_id else random_id(),
+        action=ActionWait(
+            type="wait"
+        ),
+        pending_safety_checks=[],
+        status="completed",
+        type="computer_call"
+    )

agent/telemetry.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""Agent telemetry for tracking anonymous usage and feature usage."""
+import logging
+import os
+import platform
+import sys
+from typing import Dict, Any, Callable
+# Import the core telemetry module
+TELEMETRY_AVAILABLE = False
+# Local fallbacks in case core telemetry isn't available
+def _noop(*args: Any, **kwargs: Any) -> None:
+    """No-op function for when telemetry is not available."""
+    pass
+# Define default functions with unique names to avoid shadowing
+_default_record_event = _noop
+_default_increment_counter = _noop
+_default_set_dimension = _noop
+_default_get_telemetry_client = lambda: None
+_default_flush = _noop
+_default_is_telemetry_enabled = lambda: False
+_default_is_telemetry_globally_disabled = lambda: True
+# Set the actual functions to the defaults initially
+record_event = _default_record_event
+increment_counter = _default_increment_counter
+set_dimension = _default_set_dimension
+get_telemetry_client = _default_get_telemetry_client
+flush = _default_flush
+is_telemetry_enabled = _default_is_telemetry_enabled
+is_telemetry_globally_disabled = _default_is_telemetry_globally_disabled
+logger = logging.getLogger("agent.telemetry")
+try:
+    # Import from core telemetry
+    from core.telemetry import (
+        record_event as core_record_event,
+        increment as core_increment,
+        get_telemetry_client as core_get_telemetry_client,
+        flush as core_flush,
+        is_telemetry_enabled as core_is_telemetry_enabled,
+        is_telemetry_globally_disabled as core_is_telemetry_globally_disabled,
+    )
+    # Override the default functions with actual implementations
+    record_event = core_record_event
+    get_telemetry_client = core_get_telemetry_client
+    flush = core_flush
+    is_telemetry_enabled = core_is_telemetry_enabled
+    is_telemetry_globally_disabled = core_is_telemetry_globally_disabled
+    def increment_counter(counter_name: str, value: int = 1) -> None:
+        """Wrapper for increment to maintain backward compatibility."""
+        if is_telemetry_enabled():
+            core_increment(counter_name, value)
+    def set_dimension(name: str, value: Any) -> None:
+        """Set a dimension that will be attached to all events."""
+        logger.debug(f"Setting dimension {name}={value}")
+    TELEMETRY_AVAILABLE = True
+    logger.info("Successfully imported telemetry")
+except ImportError as e:
+    logger.warning(f"Could not import telemetry: {e}")
+    logger.debug("Telemetry not available, using no-op functions")
+# Get system info once to use in telemetry
+SYSTEM_INFO = {
+    "os": platform.system().lower(),
+    "os_version": platform.release(),
+    "python_version": platform.python_version(),
+}
+def enable_telemetry() -> bool:
+    """Enable telemetry if available.
+    Returns:
+        bool: True if telemetry was successfully enabled, False otherwise
+    """
+    global TELEMETRY_AVAILABLE, record_event, increment_counter, get_telemetry_client, flush, is_telemetry_enabled, is_telemetry_globally_disabled
+    # Check if globally disabled using core function
+    if TELEMETRY_AVAILABLE and is_telemetry_globally_disabled():
+        logger.info("Telemetry is globally disabled via environment variable - cannot enable")
+        return False
+    # Already enabled
+    if TELEMETRY_AVAILABLE:
+        return True
+    # Try to import and enable
+    try:
+        from core.telemetry import (
+            record_event,
+            increment,
+            get_telemetry_client,
+            flush,
+            is_telemetry_globally_disabled,
+        )
+        # Check again after import
+        if is_telemetry_globally_disabled():
+            logger.info("Telemetry is globally disabled via environment variable - cannot enable")
+            return False
+        TELEMETRY_AVAILABLE = True
+        logger.info("Telemetry successfully enabled")
+        return True
+    except ImportError as e:
+        logger.warning(f"Could not enable telemetry: {e}")
+        return False
+def is_telemetry_enabled() -> bool:
+    """Check if telemetry is enabled.
+    Returns:
+        bool: True if telemetry is enabled, False otherwise
+    """
+    # Use the core function if available, otherwise use our local flag
+    if TELEMETRY_AVAILABLE:
+        from core.telemetry import is_telemetry_enabled as core_is_enabled
+        return core_is_enabled()
+    return False
+def record_agent_initialization() -> None:
+    """Record when an agent instance is initialized."""
+    if TELEMETRY_AVAILABLE and is_telemetry_enabled():
+        record_event("agent_initialized", SYSTEM_INFO)
+        # Set dimensions that will be attached to all events
+        set_dimension("os", SYSTEM_INFO["os"])
+        set_dimension("os_version", SYSTEM_INFO["os_version"])
+        set_dimension("python_version", SYSTEM_INFO["python_version"])

agent/ui/__init__.py CHANGED Viewed

@@ -2,6 +2,6 @@
 UI components for agent
 """
-from .gradio import test_cua, create_gradio_ui
+from .gradio import launch_ui, create_gradio_ui
-__all__ = ["test_cua", "create_gradio_ui"]
+__all__ = ["launch_ui", "create_gradio_ui"]

agent/ui/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .gradio import launch_ui
+if __name__ == "__main__":
+    launch_ui()

agent/ui/gradio/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Gradio UI for agent
 """
-from .app import test_cua
+from .app import launch_ui
 from .ui_components import create_gradio_ui
-__all__ = ["test_cua", "create_gradio_ui"]
+__all__ = ["launch_ui", "create_gradio_ui"]

agent/ui/gradio/app.py CHANGED Viewed

@@ -72,26 +72,26 @@ def save_settings(settings: Dict[str, Any]):
         print(f"Warning: Could not save settings to {SETTINGS_FILE}: {e}")
-# Custom Screenshot Handler for Gradio chat
-class GradioChatScreenshotHandler:
-    """Custom handler that adds screenshots to the Gradio chatbot."""
+# # Custom Screenshot Handler for Gradio chat
+# class GradioChatScreenshotHandler:
+#     """Custom handler that adds screenshots to the Gradio chatbot."""
-    def __init__(self, chatbot_history: List[gr.ChatMessage]):
-        self.chatbot_history = chatbot_history
-        print("GradioChatScreenshotHandler initialized")
+#     def __init__(self, chatbot_history: List[gr.ChatMessage]):
+#         self.chatbot_history = chatbot_history
+#         print("GradioChatScreenshotHandler initialized")
-    async def on_screenshot(self, screenshot_base64: str, action_type: str = "") -> None:
-        """Add screenshot to chatbot when a screenshot is taken."""
-        image_markdown = f"![Screenshot after {action_type}](data:image/png;base64,{screenshot_base64})"
+#     async def on_screenshot(self, screenshot_base64: str, action_type: str = "") -> None:
+#         """Add screenshot to chatbot when a screenshot is taken."""
+#         image_markdown = f"![Screenshot after {action_type}](data:image/png;base64,{screenshot_base64})"
-        if self.chatbot_history is not None:
-            self.chatbot_history.append(
-                gr.ChatMessage(
-                    role="assistant",
-                    content=image_markdown,
-                    metadata={"title": f"🖥️ Screenshot - {action_type}", "status": "done"},
-                )
-            )
+#         if self.chatbot_history is not None:
+#             self.chatbot_history.append(
+#                 gr.ChatMessage(
+#                     role="assistant",
+#                     content=image_markdown,
+#                     metadata={"title": f"🖥️ Screenshot - {action_type}", "status": "done"},
+#                 )
+#             )
 # Detect platform capabilities
@@ -236,7 +236,7 @@ def create_agent(
     return global_agent
-def test_cua():
+def launch_ui():
     """Standalone function to launch the Gradio app."""
     from agent.ui.gradio.ui_components import create_gradio_ui
     print(f"Starting Gradio app for CUA Agent...")
@@ -245,4 +245,4 @@ def test_cua():
 if __name__ == "__main__":
-    test_cua()
+    launch_ui()

cua-agent 0.4.0b4__py3-none-any.whl → 0.4.2__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.0b4py3-none-any.whl → 0.4.2py3-none-any.whl