PyPI - cua-agent - Versions diffs - 0.4.34__py3-none-any.whl → 0.4.35__py3-none-any.whl - Mend

cua-agent 0.4.34py3-none-any.whl → 0.4.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (61) hide show

agent/__init__.py +4 -10
agent/__main__.py +2 -1
agent/adapters/huggingfacelocal_adapter.py +54 -61
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +110 -99
agent/adapters/models/__init__.py +14 -6
agent/adapters/models/generic.py +7 -4
agent/adapters/models/internvl.py +66 -30
agent/adapters/models/opencua.py +23 -8
agent/adapters/models/qwen2_5_vl.py +7 -4
agent/agent.py +184 -158
agent/callbacks/__init__.py +4 -4
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +18 -13
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +3 -1
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/telemetry.py +67 -61
agent/callbacks/trajectory_saver.py +90 -70
agent/cli.py +115 -110
agent/computers/__init__.py +13 -8
agent/computers/base.py +26 -17
agent/computers/cua.py +27 -23
agent/computers/custom.py +72 -69
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +235 -185
agent/integrations/hud/__init__.py +15 -21
agent/integrations/hud/agent.py +101 -83
agent/integrations/hud/proxy.py +90 -57
agent/loops/__init__.py +25 -21
agent/loops/anthropic.py +537 -483
agent/loops/base.py +13 -14
agent/loops/composed_grounded.py +135 -149
agent/loops/gemini.py +31 -12
agent/loops/glm45v.py +135 -133
agent/loops/gta1.py +47 -50
agent/loops/holo.py +4 -2
agent/loops/internvl.py +6 -11
agent/loops/moondream3.py +36 -12
agent/loops/omniparser.py +212 -209
agent/loops/openai.py +49 -50
agent/loops/opencua.py +29 -41
agent/loops/qwen.py +475 -0
agent/loops/uitars.py +237 -202
agent/proxy/examples.py +54 -50
agent/proxy/handlers.py +27 -34
agent/responses.py +330 -330
agent/types.py +11 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +23 -18
agent/ui/gradio/ui_components.py +310 -161
{cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/METADATA +18 -10
cua_agent-0.4.35.dist-info/RECORD +64 -0
cua_agent-0.4.34.dist-info/RECORD +0 -63
{cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/WHEEL +0 -0
{cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/entry_points.txt +0 -0

agent/human_tool/ui.py CHANGED Viewed

@@ -1,14 +1,17 @@
-import gradio as gr
+import base64
+import io
 import json
 import time
-from typing import List, Dict, Any, Optional
 from datetime import datetime
+from typing import Any, Dict, List, Optional
+import gradio as gr
 import requests
-from .server import completion_queue
-import base64
-import io
 from PIL import Image
+from .server import completion_queue
 class HumanCompletionUI:
     def __init__(self, server_url: str = "http://localhost:8002"):
         self.server_url = server_url
@@ -20,7 +23,7 @@ class HumanCompletionUI:
         self.current_button: str = "left"
         self.current_scroll_x: int = 0
         self.current_scroll_y: int = -120
     def format_messages_for_chatbot(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Format messages for display in gr.Chatbot with type='messages'."""
         formatted = []
@@ -28,7 +31,7 @@ class HumanCompletionUI:
             role = msg.get("role", "user")
             content = msg.get("content", "")
             tool_calls = msg.get("tool_calls", [])
             # Handle different content formats
             if isinstance(content, list):
                 # Multi-modal content - can include text and images
@@ -55,7 +58,7 @@ class HumanCompletionUI:
                             else:
                                 # For URL images, create gr.Image with URL
                                 formatted_content.append(gr.Image(value=image_url))
                 # Determine final content format
                 if len(formatted_content) == 1:
                     content = formatted_content[0]
@@ -63,28 +66,28 @@ class HumanCompletionUI:
                     content = formatted_content
                 else:
                     content = "[Empty content]"
             # Ensure role is valid for Gradio Chatbot
             if role not in ["user", "assistant"]:
                 role = "assistant" if role == "system" else "user"
             # Invert roles for better display in human UI context
             # (what the AI says becomes "user", what human should respond becomes "assistant")
             if role == "user":
                 role = "assistant"
             else:
                 role = "user"
             # Add the main message if it has content
             if content and str(content).strip():
                 formatted.append({"role": role, "content": content})
             # Handle tool calls - create separate messages for each tool call
             if tool_calls:
                 for tool_call in tool_calls:
                     function_name = tool_call.get("function", {}).get("name", "unknown")
                     arguments_str = tool_call.get("function", {}).get("arguments", "{}")
                     try:
                         # Parse arguments to format them nicely
                         arguments = json.loads(arguments_str)
@@ -92,18 +95,20 @@ class HumanCompletionUI:
                     except json.JSONDecodeError:
                         # If parsing fails, use the raw string
                         formatted_args = arguments_str
                     # Create a formatted message for the tool call
                     tool_call_content = f"```json\n{formatted_args}\n```"
-                    formatted.append({
-                        "role": role,
-                        "content": tool_call_content,
-                        "metadata": {"title": f"🛠️ Used {function_name}"}
-                    })
+                    formatted.append(
+                        {
+                            "role": role,
+                            "content": tool_call_content,
+                            "metadata": {"title": f"🛠️ Used {function_name}"},
+                        }
+                    )
         return formatted
     def get_pending_calls(self) -> List[Dict[str, Any]]:
         """Get pending calls from the server."""
         try:
@@ -113,38 +118,39 @@ class HumanCompletionUI:
         except Exception as e:
             print(f"Error fetching pending calls: {e}")
         return []
     def complete_call_with_response(self, call_id: str, response: str) -> bool:
         """Complete a call with a text response."""
         try:
             response_data = {"response": response}
             response_obj = requests.post(
-                f"{self.server_url}/complete/{call_id}",
-                json=response_data,
-                timeout=10
+                f"{self.server_url}/complete/{call_id}", json=response_data, timeout=10
             )
             response_obj.raise_for_status()
             return True
         except requests.RequestException as e:
             print(f"Error completing call: {e}")
             return False
     def complete_call_with_tool_calls(self, call_id: str, tool_calls: List[Dict[str, Any]]) -> bool:
         """Complete a call with tool calls."""
         try:
             response_data = {"tool_calls": tool_calls}
             response_obj = requests.post(
-                f"{self.server_url}/complete/{call_id}",
-                json=response_data,
-                timeout=10
+                f"{self.server_url}/complete/{call_id}", json=response_data, timeout=10
             )
             response_obj.raise_for_status()
             return True
         except requests.RequestException as e:
             print(f"Error completing call: {e}")
             return False
-    def complete_call(self, call_id: str, response: Optional[str] = None, tool_calls: Optional[List[Dict[str, Any]]] = None) -> bool:
+    def complete_call(
+        self,
+        call_id: str,
+        response: Optional[str] = None,
+        tool_calls: Optional[List[Dict[str, Any]]] = None,
+    ) -> bool:
         """Complete a call with either a response or tool calls."""
         try:
             response_data = {}
@@ -152,25 +158,23 @@ class HumanCompletionUI:
                 response_data["response"] = response
             if tool_calls:
                 response_data["tool_calls"] = tool_calls
             response_obj = requests.post(
-                f"{self.server_url}/complete/{call_id}",
-                json=response_data,
-                timeout=10
+                f"{self.server_url}/complete/{call_id}", json=response_data, timeout=10
             )
             response_obj.raise_for_status()
             return True
         except requests.RequestException as e:
             print(f"Error completing call: {e}")
             return False
     def get_last_image_from_messages(self, messages: List[Dict[str, Any]]) -> Optional[Any]:
         """Extract the last image from the messages for display above conversation."""
         last_image = None
         for msg in reversed(messages):  # Start from the last message
             content = msg.get("content", "")
             if isinstance(content, list):
                 for item in reversed(content):  # Get the last image in the message
                     if item.get("type") == "image_url":
@@ -189,13 +193,13 @@ class HumanCompletionUI:
                             else:
                                 # For URL images, return the URL
                                 return image_url
         return last_image
     def refresh_pending_calls(self):
         """Refresh the list of pending calls."""
         pending_calls = self.get_pending_calls()
         if not pending_calls:
             return (
                 gr.update(choices=["latest"], value="latest"),  # dropdown
@@ -205,27 +209,27 @@ class HumanCompletionUI:
                 gr.update(visible=False),  # click_actions_group hidden
                 gr.update(visible=False),  # actions_group hidden
             )
         # Sort pending calls by created_at to get oldest first
         sorted_calls = sorted(pending_calls, key=lambda x: x.get("created_at", ""))
         # Create choices for dropdown
         choices = [("latest", "latest")]  # Add "latest" option first
         for call in sorted_calls:
             call_id = call["id"]
             model = call.get("model", "unknown")
             created_at = call.get("created_at", "")
             # Format timestamp
             try:
-                dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
+                dt = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
                 time_str = dt.strftime("%H:%M:%S")
             except:
                 time_str = created_at
             choice_label = f"{call_id[:8]}... ({model}) - {time_str}"
             choices.append((choice_label, call_id))
         # Default to "latest" which shows the oldest pending conversation
         selected_call_id = "latest"
         if selected_call_id == "latest" and sorted_calls:
@@ -239,7 +243,7 @@ class HumanCompletionUI:
             conversation = []
             self.current_call_id = None
             self.last_image = None
         return (
             gr.update(choices=choices, value="latest"),
             gr.update(value=self.last_image),
@@ -248,7 +252,7 @@ class HumanCompletionUI:
             gr.update(visible=True),  # click_actions_group visible when there is a call
             gr.update(visible=True),  # actions_group visible when there is a call
         )
     def on_call_selected(self, selected_choice):
         """Handle when a call is selected from the dropdown."""
         if not selected_choice:
@@ -259,7 +263,7 @@ class HumanCompletionUI:
                 gr.update(visible=False),  # click_actions_group hidden
                 gr.update(visible=False),  # actions_group hidden
             )
         pending_calls = self.get_pending_calls()
         if not pending_calls:
             return (
@@ -269,7 +273,7 @@ class HumanCompletionUI:
                 gr.update(visible=False),  # click_actions_group hidden
                 gr.update(visible=False),  # actions_group hidden
             )
         # Handle "latest" option
         if selected_choice == "latest":
             # Sort calls by created_at to get oldest first
@@ -284,17 +288,17 @@ class HumanCompletionUI:
                 if call_id_short in selected_choice:
                     call_id = call["id"]
                     break
             if not call_id:
                 return (
                     gr.update(value=None),  # no image
                     gr.update(value=[]),  # empty chatbot
-                    gr.update(interactive=False)
+                    gr.update(interactive=False),
                 )
             # Find the selected call
             selected_call = next((c for c in pending_calls if c["id"] == call_id), None)
         if not selected_call:
             return (
                 gr.update(value=None),  # no image
@@ -303,12 +307,12 @@ class HumanCompletionUI:
                 gr.update(visible=False),  # click_actions_group hidden
                 gr.update(visible=False),  # actions_group hidden
             )
         conversation = self.format_messages_for_chatbot(selected_call.get("messages", []))
         self.current_call_id = call_id
         # Get the last image from messages
         self.last_image = self.get_last_image_from_messages(selected_call.get("messages", []))
         return (
             gr.update(value=self.last_image),
             gr.update(value=conversation),
@@ -316,110 +320,111 @@ class HumanCompletionUI:
             gr.update(visible=True),  # click_actions_group visible
             gr.update(visible=True),  # actions_group visible
         )
     def submit_response(self, response_text: str):
         """Submit a text response to the current call."""
         if not self.current_call_id:
             return (
                 gr.update(value=response_text),  # keep response text
-                gr.update(value="❌ No call selected")  # status
+                gr.update(value="❌ No call selected"),  # status
             )
         if not response_text.strip():
             return (
                 gr.update(value=response_text),  # keep response text
-                gr.update(value="❌ Response cannot be empty")  # status
+                gr.update(value="❌ Response cannot be empty"),  # status
             )
         success = self.complete_call_with_response(self.current_call_id, response_text)
         if success:
             status_msg = "✅ Response submitted successfully!"
             return (
                 gr.update(value=""),  # clear response text
-                gr.update(value=status_msg)  # status
+                gr.update(value=status_msg),  # status
             )
         else:
             return (
                 gr.update(value=response_text),  # keep response text
-                gr.update(value="❌ Failed to submit response")  # status
+                gr.update(value="❌ Failed to submit response"),  # status
             )
     def submit_action(self, action_type: str, **kwargs) -> str:
         """Submit a computer action as a tool call."""
         if not self.current_call_id:
             return "❌ No call selected"
         import uuid
         # Create tool call structure
         action_data = {"type": action_type, **kwargs}
         tool_call = {
             "id": f"call_{uuid.uuid4().hex[:24]}",
             "type": "function",
-            "function": {
-                "name": "computer",
-                "arguments": json.dumps(action_data)
-            }
+            "function": {"name": "computer", "arguments": json.dumps(action_data)},
         }
         success = self.complete_call_with_tool_calls(self.current_call_id, [tool_call])
         if success:
             return f"✅ {action_type.capitalize()} action submitted as tool call"
         else:
             return f"❌ Failed to submit {action_type} action"
-    def submit_click_action(self, x: int, y: int, action_type: str = "click", button: str = "left") -> str:
+    def submit_click_action(
+        self, x: int, y: int, action_type: str = "click", button: str = "left"
+    ) -> str:
         """Submit a coordinate-based action."""
         if action_type == "click":
             return self.submit_action(action_type, x=x, y=y, button=button)
         else:
             return self.submit_action(action_type, x=x, y=y)
     def submit_type_action(self, text: str) -> str:
         """Submit a type action."""
         return self.submit_action("type", text=text)
     def submit_hotkey_action(self, keys: str) -> str:
         """Submit a hotkey action."""
         return self.submit_action("keypress", keys=keys)
     def submit_wait_action(self) -> str:
         """Submit a wait action with no kwargs."""
         return self.submit_action("wait")
-    def submit_description_click(self, description: str, action_type: str = "click", button: str = "left") -> str:
+    def submit_description_click(
+        self, description: str, action_type: str = "click", button: str = "left"
+    ) -> str:
         """Submit a description-based action."""
         if action_type == "click":
             return self.submit_action(action_type, element_description=description, button=button)
         else:
             return self.submit_action(action_type, element_description=description)
     def wait_for_pending_calls(self, max_seconds: float = 10.0, check_interval: float = 0.2):
         """Wait for pending calls to appear or until max_seconds elapsed.
         This method loops and checks for pending calls at regular intervals,
         returning as soon as a pending call is found or the maximum wait time is reached.
         Args:
             max_seconds: Maximum number of seconds to wait
             check_interval: How often to check for pending calls (in seconds)
         """
         import time
         start_time = time.time()
         while time.time() - start_time < max_seconds:
             # Check if there are any pending calls
             pending_calls = self.get_pending_calls()
             if pending_calls:
                 # Found pending calls, return immediately
                 return self.refresh_pending_calls()
             # Wait before checking again
             time.sleep(check_interval)
         # Max wait time reached, return current state
         return self.refresh_pending_calls()
@@ -427,79 +432,73 @@ class HumanCompletionUI:
 def create_ui():
     """Create the Gradio interface."""
     ui_handler = HumanCompletionUI()
     with gr.Blocks(title="Human-in-the-Loop Agent Tool", fill_width=True) as demo:
         gr.Markdown("# 🤖 Human-in-the-Loop Agent Tool")
         gr.Markdown("Review AI conversation requests and provide human responses.")
         with gr.Row():
             with gr.Column(scale=2):
                 with gr.Group():
                     screenshot_image = gr.Image(
-                        label="Interactive Screenshot",
-                        interactive=False,
-                        height=600
+                        label="Interactive Screenshot", interactive=False, height=600
                     )
                     # Action type selection for image clicks (wrapped for visibility control)
                     with gr.Group(visible=False) as click_actions_group:
                         with gr.Row():
                             action_type_radio = gr.Dropdown(
                                 label="Interactive Action",
-                                choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down", "scroll"],
+                                choices=[
+                                    "click",
+                                    "double_click",
+                                    "move",
+                                    "left_mouse_up",
+                                    "left_mouse_down",
+                                    "scroll",
+                                ],
                                 value="click",
-                                scale=2
+                                scale=2,
                             )
                             action_button_radio = gr.Dropdown(
                                 label="Button",
                                 choices=["left", "right", "wheel", "back", "forward"],
                                 value="left",
                                 visible=True,
-                                scale=1
+                                scale=1,
                             )
                             scroll_x_input = gr.Number(
-                                label="scroll_x",
-                                value=0,
-                                visible=False,
-                                scale=1
+                                label="scroll_x", value=0, visible=False, scale=1
                             )
                             scroll_y_input = gr.Number(
-                                label="scroll_y",
-                                value=-120,
-                                visible=False,
-                                scale=1
+                                label="scroll_y", value=-120, visible=False, scale=1
                             )
                     conversation_chatbot = gr.Chatbot(
-                        label="Conversation",
-                        type="messages",
-                        height=500,
-                        show_copy_button=True
+                        label="Conversation", type="messages", height=500, show_copy_button=True
                     )
             with gr.Column(scale=1):
                 with gr.Group():
                     call_dropdown = gr.Dropdown(
                         label="Select a pending conversation request",
                         choices=["latest"],
                         interactive=True,
-                        value="latest"
+                        value="latest",
                     )
                     refresh_btn = gr.Button("🔄 Refresh", variant="secondary")
                     status_display = gr.Textbox(
-                        label="Status",
-                        interactive=False,
-                        value="Ready to receive requests..."
+                        label="Status", interactive=False, value="Ready to receive requests..."
                     )
                 with gr.Group():
                     response_text = gr.Textbox(
-                        label="Message",
-                        lines=3,
-                        placeholder="Enter your message here..."
+                        label="Message", lines=3, placeholder="Enter your message here..."
                     )
-                    submit_btn = gr.Button("📤 Submit Message", variant="primary", interactive=False)
+                    submit_btn = gr.Button(
+                        "📤 Submit Message", variant="primary", interactive=False
+                    )
                 # Action Accordions (wrapped for visibility control)
                 with gr.Group(visible=False) as actions_group:
                     with gr.Tabs():
@@ -507,58 +506,73 @@ def create_ui():
                             with gr.Group():
                                 description_text = gr.Textbox(
                                     label="Element Description",
-                                    placeholder="e.g., 'Privacy and security option in left sidebar'"
+                                    placeholder="e.g., 'Privacy and security option in left sidebar'",
                                 )
                                 with gr.Row():
                                     description_action_type = gr.Dropdown(
                                         label="Action",
-                                        choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"],
-                                        value="click"
+                                        choices=[
+                                            "click",
+                                            "double_click",
+                                            "move",
+                                            "left_mouse_up",
+                                            "left_mouse_down",
+                                        ],
+                                        value="click",
                                     )
                                     description_button = gr.Dropdown(
                                         label="Button",
                                         choices=["left", "right", "wheel", "back", "forward"],
-                                        value="left"
+                                        value="left",
                                     )
                                 description_submit_btn = gr.Button("Submit Click Action")
                         with gr.Tab("📝 Type Action"):
                             with gr.Group():
                                 type_text = gr.Textbox(
-                                    label="Text to Type",
-                                    placeholder="Enter text to type..."
+                                    label="Text to Type", placeholder="Enter text to type..."
                                 )
                                 type_submit_btn = gr.Button("Submit Type")
                         with gr.Tab("⌨️ Keypress Action"):
                             with gr.Group():
                                 keypress_text = gr.Textbox(
-                                    label="Keys",
-                                    placeholder="e.g., ctrl+c, alt+tab"
+                                    label="Keys", placeholder="e.g., ctrl+c, alt+tab"
                                 )
                                 keypress_submit_btn = gr.Button("Submit Keypress")
                         with gr.Tab("🧰 Misc Actions"):
                             with gr.Group():
                                 misc_action_dropdown = gr.Dropdown(
-                                    label="Action",
-                                    choices=["wait"],
-                                    value="wait"
+                                    label="Action", choices=["wait"], value="wait"
                                 )
                                 misc_submit_btn = gr.Button("Submit Action")
         # Event handlers
         refresh_btn.click(
             fn=ui_handler.refresh_pending_calls,
-            outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
+            outputs=[
+                call_dropdown,
+                screenshot_image,
+                conversation_chatbot,
+                submit_btn,
+                click_actions_group,
+                actions_group,
+            ],
         )
         call_dropdown.change(
             fn=ui_handler.on_call_selected,
             inputs=[call_dropdown],
-            outputs=[screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
+            outputs=[
+                screenshot_image,
+                conversation_chatbot,
+                submit_btn,
+                click_actions_group,
+                actions_group,
+            ],
         )
         def handle_image_click(evt: gr.SelectData):
             if evt.index is not None:
                 x, y = evt.index
@@ -568,31 +582,44 @@ def create_ui():
                     sx_i = int(ui_handler.current_scroll_x or 0)
                     sy_i = int(ui_handler.current_scroll_y or 0)
                     # Submit a scroll action with x,y position and scroll deltas
-                    result = ui_handler.submit_action("scroll", x=x, y=y, scroll_x=sx_i, scroll_y=sy_i)
+                    result = ui_handler.submit_action(
+                        "scroll", x=x, y=y, scroll_x=sx_i, scroll_y=sy_i
+                    )
                 else:
                     result = ui_handler.submit_click_action(x, y, action_type, button)
                 ui_handler.wait_for_pending_calls()
                 return result
             return "No coordinates selected"
-        screenshot_image.select(
-            fn=handle_image_click,
-            outputs=[status_display]
-        ).then(
+        screenshot_image.select(fn=handle_image_click, outputs=[status_display]).then(
             fn=ui_handler.wait_for_pending_calls,
-            outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
+            outputs=[
+                call_dropdown,
+                screenshot_image,
+                conversation_chatbot,
+                submit_btn,
+                click_actions_group,
+                actions_group,
+            ],
         )
         # Response submission
         submit_btn.click(
             fn=ui_handler.submit_response,
             inputs=[response_text],
-            outputs=[response_text, status_display]
+            outputs=[response_text, status_display],
         ).then(
             fn=ui_handler.refresh_pending_calls,
-            outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
+            outputs=[
+                call_dropdown,
+                screenshot_image,
+                conversation_chatbot,
+                submit_btn,
+                click_actions_group,
+                actions_group,
+            ],
         )
         # Toggle visibility of controls based on action type
         def toggle_action_controls(action_type):
             # Button visible only for click
@@ -603,59 +630,63 @@ def create_ui():
             # Update state
             ui_handler.current_action_type = action_type or "click"
             return button_vis, scroll_x_vis, scroll_y_vis
         action_type_radio.change(
             fn=toggle_action_controls,
             inputs=[action_type_radio],
-            outputs=[action_button_radio, scroll_x_input, scroll_y_input]
+            outputs=[action_button_radio, scroll_x_input, scroll_y_input],
         )
         # Keep other control values in ui_handler state
         def on_button_change(val):
-            ui_handler.current_button = (val or "left")
-        action_button_radio.change(
-            fn=on_button_change,
-            inputs=[action_button_radio]
-        )
+            ui_handler.current_button = val or "left"
+        action_button_radio.change(fn=on_button_change, inputs=[action_button_radio])
         def on_scroll_x_change(val):
             try:
                 ui_handler.current_scroll_x = int(val) if val is not None else 0
             except Exception:
                 ui_handler.current_scroll_x = 0
-        scroll_x_input.change(
-            fn=on_scroll_x_change,
-            inputs=[scroll_x_input]
-        )
+        scroll_x_input.change(fn=on_scroll_x_change, inputs=[scroll_x_input])
         def on_scroll_y_change(val):
             try:
                 ui_handler.current_scroll_y = int(val) if val is not None else 0
             except Exception:
                 ui_handler.current_scroll_y = 0
-        scroll_y_input.change(
-            fn=on_scroll_y_change,
-            inputs=[scroll_y_input]
-        )
+        scroll_y_input.change(fn=on_scroll_y_change, inputs=[scroll_y_input])
         type_submit_btn.click(
-            fn=ui_handler.submit_type_action,
-            inputs=[type_text],
-            outputs=[status_display]
+            fn=ui_handler.submit_type_action, inputs=[type_text], outputs=[status_display]
         ).then(
             fn=ui_handler.wait_for_pending_calls,
-            outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
+            outputs=[
+                call_dropdown,
+                screenshot_image,
+                conversation_chatbot,
+                submit_btn,
+                click_actions_group,
+                actions_group,
+            ],
         )
         keypress_submit_btn.click(
-            fn=ui_handler.submit_hotkey_action,
-            inputs=[keypress_text],
-            outputs=[status_display]
+            fn=ui_handler.submit_hotkey_action, inputs=[keypress_text], outputs=[status_display]
         ).then(
             fn=ui_handler.wait_for_pending_calls,
-            outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
+            outputs=[
+                call_dropdown,
+                screenshot_image,
+                conversation_chatbot,
+                submit_btn,
+                click_actions_group,
+                actions_group,
+            ],
         )
         def handle_description_submit(description, action_type, button):
             if description:
                 result = ui_handler.submit_description_click(description, action_type, button)
@@ -666,12 +697,19 @@ def create_ui():
         description_submit_btn.click(
             fn=handle_description_submit,
             inputs=[description_text, description_action_type, description_button],
-            outputs=[status_display]
+            outputs=[status_display],
         ).then(
             fn=ui_handler.wait_for_pending_calls,
-            outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
+            outputs=[
+                call_dropdown,
+                screenshot_image,
+                conversation_chatbot,
+                submit_btn,
+                click_actions_group,
+                actions_group,
+            ],
         )
         # Misc action handler
         def handle_misc_submit(selected_action):
             if selected_action == "wait":
@@ -681,20 +719,32 @@ def create_ui():
             return f"Unsupported misc action: {selected_action}"
         misc_submit_btn.click(
-            fn=handle_misc_submit,
-            inputs=[misc_action_dropdown],
-            outputs=[status_display]
+            fn=handle_misc_submit, inputs=[misc_action_dropdown], outputs=[status_display]
         ).then(
             fn=ui_handler.wait_for_pending_calls,
-            outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
+            outputs=[
+                call_dropdown,
+                screenshot_image,
+                conversation_chatbot,
+                submit_btn,
+                click_actions_group,
+                actions_group,
+            ],
         )
         # Load initial data
         demo.load(
             fn=ui_handler.refresh_pending_calls,
-            outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
+            outputs=[
+                call_dropdown,
+                screenshot_image,
+                conversation_chatbot,
+                submit_btn,
+                click_actions_group,
+                actions_group,
+            ],
         )
     return demo

cua-agent 0.4.34__py3-none-any.whl → 0.4.35__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.34py3-none-any.whl → 0.4.35py3-none-any.whl