PyPI - cua-agent - Versions diffs - 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl - Mend

cua-agent 0.4.22py3-none-any.whl → 0.7.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (79) hide show

agent/__init__.py +4 -10
agent/__main__.py +2 -1
agent/adapters/__init__.py +4 -0
agent/adapters/azure_ml_adapter.py +283 -0
agent/adapters/cua_adapter.py +161 -0
agent/adapters/huggingfacelocal_adapter.py +67 -125
agent/adapters/human_adapter.py +116 -114
agent/adapters/mlxvlm_adapter.py +110 -99
agent/adapters/models/__init__.py +41 -0
agent/adapters/models/generic.py +78 -0
agent/adapters/models/internvl.py +290 -0
agent/adapters/models/opencua.py +115 -0
agent/adapters/models/qwen2_5_vl.py +78 -0
agent/agent.py +337 -185
agent/callbacks/__init__.py +9 -4
agent/callbacks/base.py +45 -31
agent/callbacks/budget_manager.py +22 -10
agent/callbacks/image_retention.py +54 -98
agent/callbacks/logging.py +55 -42
agent/callbacks/operator_validator.py +35 -33
agent/callbacks/otel.py +291 -0
agent/callbacks/pii_anonymization.py +19 -16
agent/callbacks/prompt_instructions.py +47 -0
agent/callbacks/telemetry.py +99 -61
agent/callbacks/trajectory_saver.py +95 -69
agent/cli.py +269 -119
agent/computers/__init__.py +14 -9
agent/computers/base.py +32 -19
agent/computers/cua.py +52 -25
agent/computers/custom.py +78 -71
agent/decorators.py +23 -14
agent/human_tool/__init__.py +2 -7
agent/human_tool/__main__.py +6 -2
agent/human_tool/server.py +48 -37
agent/human_tool/ui.py +359 -235
agent/integrations/hud/__init__.py +38 -99
agent/integrations/hud/agent.py +369 -0
agent/integrations/hud/proxy.py +166 -52
agent/loops/__init__.py +44 -14
agent/loops/anthropic.py +579 -492
agent/loops/base.py +19 -15
agent/loops/composed_grounded.py +136 -150
agent/loops/fara/__init__.py +8 -0
agent/loops/fara/config.py +506 -0
agent/loops/fara/helpers.py +357 -0
agent/loops/fara/schema.py +143 -0
agent/loops/gelato.py +183 -0
agent/loops/gemini.py +935 -0
agent/loops/generic_vlm.py +601 -0
agent/loops/glm45v.py +140 -135
agent/loops/gta1.py +48 -51
agent/loops/holo.py +218 -0
agent/loops/internvl.py +180 -0
agent/loops/moondream3.py +493 -0
agent/loops/omniparser.py +326 -226
agent/loops/openai.py +50 -51
agent/loops/opencua.py +134 -0
agent/loops/uiins.py +175 -0
agent/loops/uitars.py +247 -206
agent/loops/uitars2.py +951 -0
agent/playground/__init__.py +5 -0
agent/playground/server.py +301 -0
agent/proxy/examples.py +61 -57
agent/proxy/handlers.py +46 -39
agent/responses.py +447 -347
agent/tools/__init__.py +24 -0
agent/tools/base.py +253 -0
agent/tools/browser_tool.py +423 -0
agent/types.py +11 -5
agent/ui/__init__.py +1 -1
agent/ui/__main__.py +1 -1
agent/ui/gradio/app.py +25 -22
agent/ui/gradio/ui_components.py +314 -167
cua_agent-0.7.16.dist-info/METADATA +85 -0
cua_agent-0.7.16.dist-info/RECORD +79 -0
{cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
cua_agent-0.4.22.dist-info/METADATA +0 -436
cua_agent-0.4.22.dist-info/RECORD +0 -51
{cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0

agent/ui/gradio/ui_components.py CHANGED Viewed

@@ -2,19 +2,25 @@
 UI Components for the Gradio interface
 """
-import os
 import asyncio
-import logging
 import json
+import logging
+import os
 import platform
 from pathlib import Path
-from typing import Dict, List, Optional, Any, cast
+from typing import Any, Dict, List, Optional, cast
 import gradio as gr
 from gradio.components.chatbot import MetadataDict
 from .app import (
-    load_settings, save_settings, create_agent, get_model_string,
-    get_ollama_models, global_agent, global_computer
+    create_agent,
+    get_model_string,
+    get_ollama_models,
+    global_agent,
+    global_computer,
+    load_settings,
+    save_settings,
 )
 # Global messages array to maintain conversation history
@@ -23,30 +29,28 @@ global_messages = []
 def create_gradio_ui() -> gr.Blocks:
     """Create a Gradio UI for the Computer-Use Agent."""
     # Load settings
     saved_settings = load_settings()
     # Check for API keys
     openai_api_key = os.environ.get("OPENAI_API_KEY", "")
     anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
     cua_api_key = os.environ.get("CUA_API_KEY", "")
     # Model choices
     openai_models = ["OpenAI: Computer-Use Preview"]
     anthropic_models = [
         "Anthropic: Claude 4 Opus (20250514)",
         "Anthropic: Claude 4 Sonnet (20250514)",
         "Anthropic: Claude 3.7 Sonnet (20250219)",
-        "Anthropic: Claude 3.5 Sonnet (20241022)",
     ]
     omni_models = [
         "OMNI: OpenAI GPT-4o",
         "OMNI: OpenAI GPT-4o mini",
-        "OMNI: Claude 3.7 Sonnet (20250219)",
-        "OMNI: Claude 3.5 Sonnet (20241022)"
+        "OMNI: Claude 3.7 Sonnet (20250219)",
     ]
     # Check if API keys are available
     has_openai_key = bool(openai_api_key)
     has_anthropic_key = bool(anthropic_api_key)
@@ -59,15 +63,20 @@ def create_gradio_ui() -> gr.Blocks:
     # Detect platform
     is_mac = platform.system().lower() == "darwin"
     # Format model choices
     provider_to_models = {
         "OPENAI": openai_models,
         "ANTHROPIC": anthropic_models,
         "OMNI": omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"],
-        "UITARS": ([
-            "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
-        ] if is_mac else []) + ["Custom model (OpenAI compatible API)"],
+        "UITARS": (
+            [
+                "huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
+            ]
+            if is_mac
+            else []
+        )
+        + ["Custom model (OpenAI compatible API)"],
     }
     # Apply saved settings
@@ -82,7 +91,9 @@ def create_gradio_ui() -> gr.Blocks:
         elif initial_loop == "ANTHROPIC":
             initial_model = anthropic_models[0] if anthropic_models else "No models available"
         else:  # OMNI
-            initial_model = omni_models[0] if omni_models else "Custom model (OpenAI compatible API)"
+            initial_model = (
+                omni_models[0] if omni_models else "Custom model (OpenAI compatible API)"
+            )
     initial_custom_model = saved_settings.get("custom_model", "Qwen2.5-VL-7B-Instruct")
     initial_provider_base_url = saved_settings.get("provider_base_url", "http://localhost:1234/v1")
@@ -96,16 +107,27 @@ def create_gradio_ui() -> gr.Blocks:
         "Open Safari, search for 'macOS automation tools', and save the first three results as bookmarks",
         "Configure SSH keys and set up a connection to a remote server",
     ]
-    def generate_python_code(agent_loop_choice, model_name, tasks, recent_images=3, save_trajectory=True, computer_os="linux", computer_provider="cloud", container_name="", cua_cloud_api_key="", max_budget=None):
+    def generate_python_code(
+        agent_loop_choice,
+        model_name,
+        tasks,
+        recent_images=3,
+        save_trajectory=True,
+        computer_os="linux",
+        computer_provider="cloud",
+        container_name="",
+        cua_cloud_api_key="",
+        max_budget=None,
+    ):
         """Generate Python code for the current configuration and tasks."""
         tasks_str = ""
         for task in tasks:
             if task and task.strip():
                 tasks_str += f'            "{task}",\n'
         model_string = get_model_string(model_name, agent_loop_choice)
         computer_args = []
         if computer_os != "macos":
             computer_args.append(f'os_type="{computer_os}"')
@@ -115,14 +137,14 @@ def create_gradio_ui() -> gr.Blocks:
             computer_args.append(f'name="{container_name}"')
         if cua_cloud_api_key:
             computer_args.append(f'api_key="{cua_cloud_api_key}"')
         computer_args_str = ", ".join(computer_args)
         if computer_args_str:
             computer_args_str = f"({computer_args_str})"
         else:
             computer_args_str = "()"
-        code = f'''import asyncio
+        code = f"""import asyncio
 from computer import Computer
 from agent import ComputerAgent
@@ -131,22 +153,22 @@ async def main():
         agent = ComputerAgent(
             model="{model_string}",
             tools=[computer],
-            only_n_most_recent_images={recent_images},'''
+            only_n_most_recent_images={recent_images},"""
         if save_trajectory:
-            code += '''
-            trajectory_dir="trajectories",'''
+            code += """
+            trajectory_dir="trajectories","""
         if max_budget:
-            code += f'''
-            max_trajectory_budget={{"max_budget": {max_budget}, "raise_error": True}},'''
-        code += '''
+            code += f"""
+            max_trajectory_budget={{"max_budget": {max_budget}, "raise_error": True}},"""
+        code += """
         )
-        '''
+        """
         if tasks_str:
-            code += f'''
+            code += f"""
         # Prompts for the computer-use agent
         tasks = [
 {tasks_str.rstrip()}
@@ -158,23 +180,23 @@ async def main():
             async for result in agent.run(messages):
                 for item in result["output"]:
                     if item["type"] == "message":
-                        print(item["content"][0]["text"])'''
+                        print(item["content"][0]["text"])"""
         else:
-            code += f'''
+            code += """
         # Execute a single task
-        task = "Search for information about CUA on GitHub"
-        print(f"Executing task: {{task}}")
-        messages = [{{"role": "user", "content": task}}]
+        task = "Search for information about Cua on GitHub"
+        print(f"Executing task: {task}")
+        messages = [{"role": "user", "content": task}]
         async for result in agent.run(messages):
             for item in result["output"]:
                 if item["type"] == "message":
-                    print(item["content"][0]["text"])'''
+                    print(item["content"][0]["text"])"""
-        code += '''
+        code += """
 if __name__ == "__main__":
-    asyncio.run(main())'''
+    asyncio.run(main())"""
         return code
     # Create the Gradio interface
@@ -186,7 +208,7 @@ if __name__ == "__main__":
                 gr.HTML(
                     """
                     <div style="display: flex; justify-content: center; margin-bottom: 0.5em">
-                        <img alt="CUA Logo" style="width: 80px;"
+                        <img alt="Cua Logo" style="width: 80px;"
                              src="https://github.com/trycua/cua/blob/main/img/logo_white.png?raw=true" />
                     </div>
                     """
@@ -199,11 +221,11 @@ if __name__ == "__main__":
                         value=generate_python_code(initial_loop, "gpt-4o", []),
                         interactive=False,
                     )
                 with gr.Accordion("Computer Configuration", open=True):
                     is_windows = platform.system().lower() == "windows"
                     is_mac = platform.system().lower() == "darwin"
                     providers = ["cloud", "localhost", "docker"]
                     if is_mac:
                         providers += ["lume"]
@@ -227,30 +249,30 @@ if __name__ == "__main__":
                         value=computer_choices[0],
                         info="Select the operating system for the computer",
                     )
                     computer_provider = gr.Radio(
                         choices=providers,
                         label="Provider",
                         value="lume" if is_mac else "cloud",
                         info="Select the computer provider",
                     )
                     container_name = gr.Textbox(
                         label="Container Name",
                         placeholder="Enter container name (optional)",
                         value=os.environ.get("CUA_CONTAINER_NAME", ""),
                         info="Optional name for the container",
                     )
                     cua_cloud_api_key = gr.Textbox(
-                        label="CUA Cloud API Key",
-                        placeholder="Enter your CUA Cloud API key",
+                        label="Cua Cloud API Key",
+                        placeholder="Enter your Cua Cloud API key",
                         value=os.environ.get("CUA_API_KEY", ""),
                         type="password",
                         info="Required for cloud provider",
-                        visible=(not has_cua_key)
+                        visible=(not has_cua_key),
                     )
                 with gr.Accordion("Agent Configuration", open=True):
                     agent_loop = gr.Dropdown(
                         choices=["OPENAI", "ANTHROPIC", "OMNI", "UITARS"],
@@ -267,90 +289,113 @@ if __name__ == "__main__":
                             value=openai_models[0] if openai_models else "No models available",
                             info="Select OpenAI model",
                             interactive=True,
-                            visible=(initial_loop == "OPENAI")
+                            visible=(initial_loop == "OPENAI"),
                         )
                         anthropic_model_choice = gr.Dropdown(
                             choices=anthropic_models,
                             label="Anthropic Model",
-                            value=anthropic_models[0] if anthropic_models else "No models available",
+                            value=(
+                                anthropic_models[0] if anthropic_models else "No models available"
+                            ),
                             info="Select Anthropic model",
                             interactive=True,
-                            visible=(initial_loop == "ANTHROPIC")
+                            visible=(initial_loop == "ANTHROPIC"),
                         )
                         omni_model_choice = gr.Dropdown(
-                            choices=omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"],
+                            choices=omni_models
+                            + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"],
                             label="OMNI Model",
-                            value=omni_models[0] if omni_models else "Custom model (OpenAI compatible API)",
+                            value=(
+                                omni_models[0]
+                                if omni_models
+                                else "Custom model (OpenAI compatible API)"
+                            ),
                             info="Select OMNI model or choose a custom model option",
                             interactive=True,
-                            visible=(initial_loop == "OMNI")
+                            visible=(initial_loop == "OMNI"),
                         )
                         uitars_model_choice = gr.Dropdown(
                             choices=provider_to_models.get("UITARS", ["No models available"]),
                             label="UITARS Model",
-                            value=provider_to_models.get("UITARS", ["No models available"])[0] if provider_to_models.get("UITARS") else "No models available",
+                            value=(
+                                provider_to_models.get("UITARS", ["No models available"])[0]
+                                if provider_to_models.get("UITARS")
+                                else "No models available"
+                            ),
                             info="Select UITARS model",
                             interactive=True,
-                            visible=(initial_loop == "UITARS")
+                            visible=(initial_loop == "UITARS"),
                         )
                         model_choice = gr.Textbox(visible=False)
                     # API key inputs
-                    with gr.Group(visible=not has_openai_key and (initial_loop == "OPENAI" or initial_loop == "OMNI")) as openai_key_group:
+                    with gr.Group(
+                        visible=not has_openai_key
+                        and (initial_loop == "OPENAI" or initial_loop == "OMNI")
+                    ) as openai_key_group:
                         openai_api_key_input = gr.Textbox(
                             label="OpenAI API Key",
                             placeholder="Enter your OpenAI API key",
                             value=os.environ.get("OPENAI_API_KEY", ""),
                             interactive=True,
                             type="password",
-                            info="Required for OpenAI models"
+                            info="Required for OpenAI models",
                         )
-                    with gr.Group(visible=not has_anthropic_key and (initial_loop == "ANTHROPIC" or initial_loop == "OMNI")) as anthropic_key_group:
+                    with gr.Group(
+                        visible=not has_anthropic_key
+                        and (initial_loop == "ANTHROPIC" or initial_loop == "OMNI")
+                    ) as anthropic_key_group:
                         anthropic_api_key_input = gr.Textbox(
                             label="Anthropic API Key",
                             placeholder="Enter your Anthropic API key",
                             value=os.environ.get("ANTHROPIC_API_KEY", ""),
                             interactive=True,
                             type="password",
-                            info="Required for Anthropic models"
+                            info="Required for Anthropic models",
                         )
                     # API key handlers
                     def set_openai_api_key(key):
                         if key and key.strip():
                             os.environ["OPENAI_API_KEY"] = key.strip()
-                            print(f"DEBUG - Set OpenAI API key environment variable")
+                            print("DEBUG - Set OpenAI API key environment variable")
                         return key
                     def set_anthropic_api_key(key):
                         if key and key.strip():
                             os.environ["ANTHROPIC_API_KEY"] = key.strip()
-                            print(f"DEBUG - Set Anthropic API key environment variable")
+                            print("DEBUG - Set Anthropic API key environment variable")
                         return key
                     openai_api_key_input.change(
                         fn=set_openai_api_key,
                         inputs=[openai_api_key_input],
                         outputs=[openai_api_key_input],
-                        queue=False
+                        queue=False,
                     )
                     anthropic_api_key_input.change(
                         fn=set_anthropic_api_key,
                         inputs=[anthropic_api_key_input],
                         outputs=[anthropic_api_key_input],
-                        queue=False
+                        queue=False,
                     )
                     # UI update function
-                    def update_ui(loop=None, openai_model=None, anthropic_model=None, omni_model=None, uitars_model=None):
+                    def update_ui(
+                        loop=None,
+                        openai_model=None,
+                        anthropic_model=None,
+                        omni_model=None,
+                        uitars_model=None,
+                    ):
                         loop = loop or agent_loop.value
                         model_value = None
                         if loop == "OPENAI" and openai_model:
                             model_value = openai_model
@@ -360,21 +405,37 @@ if __name__ == "__main__":
                             model_value = omni_model
                         elif loop == "UITARS" and uitars_model:
                             model_value = uitars_model
-                        openai_visible = (loop == "OPENAI")
-                        anthropic_visible = (loop == "ANTHROPIC")
-                        omni_visible = (loop == "OMNI")
-                        uitars_visible = (loop == "UITARS")
-                        show_openai_key = not has_openai_key and (loop == "OPENAI" or (loop == "OMNI" and model_value and "OpenAI" in model_value and "Custom" not in model_value))
-                        show_anthropic_key = not has_anthropic_key and (loop == "ANTHROPIC" or (loop == "OMNI" and model_value and "Claude" in model_value and "Custom" not in model_value))
+                        openai_visible = loop == "OPENAI"
+                        anthropic_visible = loop == "ANTHROPIC"
+                        omni_visible = loop == "OMNI"
+                        uitars_visible = loop == "UITARS"
+                        show_openai_key = not has_openai_key and (
+                            loop == "OPENAI"
+                            or (
+                                loop == "OMNI"
+                                and model_value
+                                and "OpenAI" in model_value
+                                and "Custom" not in model_value
+                            )
+                        )
+                        show_anthropic_key = not has_anthropic_key and (
+                            loop == "ANTHROPIC"
+                            or (
+                                loop == "OMNI"
+                                and model_value
+                                and "Claude" in model_value
+                                and "Custom" not in model_value
+                            )
+                        )
                         is_custom_openai_api = model_value == "Custom model (OpenAI compatible API)"
                         is_custom_ollama = model_value == "Custom model (ollama)"
                         is_any_custom = is_custom_openai_api or is_custom_ollama
                         model_choice_value = model_value if model_value else ""
                         return [
                             gr.update(visible=openai_visible),
                             gr.update(visible=anthropic_visible),
@@ -385,15 +446,18 @@ if __name__ == "__main__":
                             gr.update(visible=is_any_custom),
                             gr.update(visible=is_custom_openai_api),
                             gr.update(visible=is_custom_openai_api),
-                            gr.update(value=model_choice_value)
+                            gr.update(value=model_choice_value),
                         ]
                     # Custom model inputs
                     custom_model = gr.Textbox(
                         label="Custom Model Name",
                         placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct or llama3)",
                         value=initial_custom_model,
-                        visible=(initial_model == "Custom model (OpenAI compatible API)" or initial_model == "Custom model (ollama)"),
+                        visible=(
+                            initial_model == "Custom model (OpenAI compatible API)"
+                            or initial_model == "Custom model (ollama)"
+                        ),
                         interactive=True,
                     )
@@ -413,36 +477,56 @@ if __name__ == "__main__":
                         interactive=True,
                         type="password",
                     )
                     # Provider visibility update function
                     def update_provider_visibility(provider):
                         """Update visibility of container name and API key based on selected provider."""
                         is_localhost = provider == "localhost"
                         return [
                             gr.update(visible=not is_localhost),  # container_name
-                            gr.update(visible=not is_localhost and not has_cua_key)  # cua_cloud_api_key
+                            gr.update(
+                                visible=not is_localhost and not has_cua_key
+                            ),  # cua_cloud_api_key
                         ]
                     # Connect provider change event
                     computer_provider.change(
                         fn=update_provider_visibility,
                         inputs=[computer_provider],
                         outputs=[container_name, cua_cloud_api_key],
-                        queue=False
+                        queue=False,
                     )
                     # Connect UI update events
-                    for dropdown in [agent_loop, omni_model_choice, uitars_model_choice, openai_model_choice, anthropic_model_choice]:
+                    for dropdown in [
+                        agent_loop,
+                        omni_model_choice,
+                        uitars_model_choice,
+                        openai_model_choice,
+                        anthropic_model_choice,
+                    ]:
                         dropdown.change(
                             fn=update_ui,
-                            inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
+                            inputs=[
+                                agent_loop,
+                                openai_model_choice,
+                                anthropic_model_choice,
+                                omni_model_choice,
+                                uitars_model_choice,
+                            ],
                             outputs=[
-                                openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
-                                openai_key_group, anthropic_key_group,
-                                custom_model, provider_base_url, provider_api_key,
-                                model_choice
+                                openai_model_choice,
+                                anthropic_model_choice,
+                                omni_model_choice,
+                                uitars_model_choice,
+                                openai_key_group,
+                                anthropic_key_group,
+                                custom_model,
+                                provider_base_url,
+                                provider_api_key,
+                                model_choice,
                             ],
-                            queue=False
+                            queue=False,
                         )
                     save_trajectory = gr.Checkbox(
@@ -461,7 +545,7 @@ if __name__ == "__main__":
                         info="Number of recent images to keep in context",
                         interactive=True,
                     )
                     max_budget = gr.Number(
                         label="Max Budget ($)",
                         value=lambda: None,
@@ -478,10 +562,8 @@ if __name__ == "__main__":
                     "Ask me to perform tasks in a virtual environment.<br>Built with <a href='https://github.com/trycua/cua' target='_blank'>github.com/trycua/cua</a>."
                 )
-                chatbot_history = gr.Chatbot(type="messages")
-                msg = gr.Textbox(
-                    placeholder="Ask me to perform tasks in a virtual environment"
-                )
+                chatbot_history = gr.Chatbot()
+                msg = gr.Textbox(placeholder="Ask me to perform tasks in a virtual environment")
                 clear = gr.Button("Clear")
                 cancel_button = gr.Button("Cancel", variant="stop")
@@ -498,11 +580,23 @@ if __name__ == "__main__":
                     global global_agent
                     if global_agent:
                         print("DEBUG - Cancelling agent task")
-                        history.append(gr.ChatMessage(role="assistant", content="Task cancelled by user", metadata={"title": "❌ Cancelled"}))
+                        history.append(
+                            gr.ChatMessage(
+                                role="assistant",
+                                content="Task cancelled by user",
+                                metadata={"title": "❌ Cancelled"},
+                            )
+                        )
                     else:
-                        history.append(gr.ChatMessage(role="assistant", content="No active agent task to cancel", metadata={"title": "ℹ️ Info"}))
+                        history.append(
+                            gr.ChatMessage(
+                                role="assistant",
+                                content="No active agent task to cancel",
+                                metadata={"title": "ℹ️ Info"},
+                            )
+                        )
                     return history
                 # Process response function
                 async def process_response(
                     history,
@@ -542,10 +636,13 @@ if __name__ == "__main__":
                         model_choice_value = uitars_model_value
                     else:
                         model_choice_value = "No models available"
                     # Determine if this is a custom model selection
-                    is_custom_model_selected = model_choice_value in ["Custom model (OpenAI compatible API)", "Custom model (ollama)"]
+                    is_custom_model_selected = model_choice_value in [
+                        "Custom model (OpenAI compatible API)",
+                        "Custom model (ollama)",
+                    ]
                     # Determine the model name string to analyze
                     if is_custom_model_selected:
                         model_string_to_analyze = custom_model_value
@@ -583,13 +680,19 @@ if __name__ == "__main__":
                             model_string=model_string,
                             save_trajectory=save_traj,
                             only_n_most_recent_images=recent_imgs,
-                            custom_model_name=custom_model_value if is_custom_model_selected else None,
+                            custom_model_name=(
+                                custom_model_value if is_custom_model_selected else None
+                            ),
                             computer_os=computer_os,
                             computer_provider=computer_provider,
                             computer_name=container_name,
                             computer_api_key=cua_cloud_api_key,
                             verbosity=logging.DEBUG,
-                            max_trajectory_budget=max_budget_value if max_budget_value and max_budget_value > 0 else None,
+                            max_trajectory_budget=(
+                                max_budget_value
+                                if max_budget_value and max_budget_value > 0
+                                else None
+                            ),
                         )
                         if global_agent is None:
@@ -605,7 +708,7 @@ if __name__ == "__main__":
                         # Add user message to global history
                         global global_messages
                         global_messages.append({"role": "user", "content": last_user_message})
                         # Stream responses from the agent
                         async for result in global_agent.run(global_messages):
                             global_messages += result.get("output", [])
@@ -613,18 +716,20 @@ if __name__ == "__main__":
                             # from pprint import pprint
                             # pprint(result)
                             # print(f"DEBUG - Agent response ------- END")
                             # Process the result output
                             for item in result.get("output", []):
                                 if item.get("type") == "message":
                                     content = item.get("content", [])
                                     for content_part in content:
                                         if content_part.get("text"):
-                                            history.append(gr.ChatMessage(
-                                                role=item.get("role", "assistant"),
-                                                content=content_part.get("text", ""),
-                                                metadata=content_part.get("metadata", {})
-                                            ))
+                                            history.append(
+                                                gr.ChatMessage(
+                                                    role=item.get("role", "assistant"),
+                                                    content=content_part.get("text", ""),
+                                                    metadata=content_part.get("metadata", {}),
+                                                )
+                                            )
                                 elif item.get("type") == "computer_call":
                                     action = item.get("action", {})
                                     action_type = action.get("type", "")
@@ -632,43 +737,52 @@ if __name__ == "__main__":
                                         action_title = f"🛠️ Performing {action_type}"
                                         if action.get("x") and action.get("y"):
                                             action_title += f" at ({action['x']}, {action['y']})"
-                                        history.append(gr.ChatMessage(
-                                            role="assistant",
-                                            content=f"```json\n{json.dumps(action)}\n```",
-                                            metadata={"title": action_title}
-                                        ))
+                                        history.append(
+                                            gr.ChatMessage(
+                                                role="assistant",
+                                                content=f"```json\n{json.dumps(action)}\n```",
+                                                metadata={"title": action_title},
+                                            )
+                                        )
                                 elif item.get("type") == "function_call":
                                     function_name = item.get("name", "")
                                     arguments = item.get("arguments", "{}")
-                                    history.append(gr.ChatMessage(
-                                        role="assistant",
-                                        content=f"🔧 Calling function: {function_name}\n```json\n{arguments}\n```",
-                                        metadata={"title": f"Function Call: {function_name}"}
-                                    ))
+                                    history.append(
+                                        gr.ChatMessage(
+                                            role="assistant",
+                                            content=f"🔧 Calling function: {function_name}\n```json\n{arguments}\n```",
+                                            metadata={"title": f"Function Call: {function_name}"},
+                                        )
+                                    )
                                 elif item.get("type") == "function_call_output":
                                     output = item.get("output", "")
-                                    history.append(gr.ChatMessage(
-                                        role="assistant",
-                                        content=f"📤 Function output:\n```\n{output}\n```",
-                                        metadata={"title": "Function Output"}
-                                    ))
+                                    history.append(
+                                        gr.ChatMessage(
+                                            role="assistant",
+                                            content=f"📤 Function output:\n```\n{output}\n```",
+                                            metadata={"title": "Function Output"},
+                                        )
+                                    )
                                 elif item.get("type") == "computer_call_output":
                                     output = item.get("output", {}).get("image_url", "")
                                     image_markdown = f"![Computer output]({output})"
-                                    history.append(gr.ChatMessage(
-                                        role="assistant",
-                                        content=image_markdown,
-                                        metadata={"title": "🖥️ Computer Output"}
-                                    ))
+                                    history.append(
+                                        gr.ChatMessage(
+                                            role="assistant",
+                                            content=image_markdown,
+                                            metadata={"title": "🖥️ Computer Output"},
+                                        )
+                                    )
                             yield history
                     except Exception as e:
                         import traceback
                         traceback.print_exc()
                         history.append(gr.ChatMessage(role="assistant", content=f"Error: {str(e)}"))
                         yield history
                 # Connect the submit button
                 submit_event = msg.submit(
                     fn=chat_submit,
@@ -706,44 +820,77 @@ if __name__ == "__main__":
                     global global_messages
                     global_messages.clear()
                     return None
                 clear.click(clear_chat, None, chatbot_history, queue=False)
                 # Connect cancel button
                 cancel_button.click(
-                    cancel_agent_task,
-                    [chatbot_history],
-                    [chatbot_history],
-                    queue=False
+                    cancel_agent_task, [chatbot_history], [chatbot_history], queue=False
                 )
                 # Code display update function
-                def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, recent_images_val, save_trajectory_val, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget_val):
+                def update_code_display(
+                    agent_loop,
+                    model_choice_val,
+                    custom_model_val,
+                    chat_history,
+                    recent_images_val,
+                    save_trajectory_val,
+                    computer_os,
+                    computer_provider,
+                    container_name,
+                    cua_cloud_api_key,
+                    max_budget_val,
+                ):
                     messages = []
                     if chat_history:
                         for msg in chat_history:
                             if isinstance(msg, dict) and msg.get("role") == "user":
                                 messages.append(msg.get("content", ""))
                     return generate_python_code(
-                        agent_loop,
-                        model_choice_val or custom_model_val or "gpt-4o",
-                        messages,
+                        agent_loop,
+                        model_choice_val or custom_model_val or "gpt-4o",
+                        messages,
                         recent_images_val,
                         save_trajectory_val,
                         computer_os,
                         computer_provider,
                         container_name,
                         cua_cloud_api_key,
-                        max_budget_val
+                        max_budget_val,
                     )
                 # Update code display when configuration changes
-                for component in [agent_loop, model_choice, custom_model, chatbot_history, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget]:
+                for component in [
+                    agent_loop,
+                    model_choice,
+                    custom_model,
+                    chatbot_history,
+                    recent_images,
+                    save_trajectory,
+                    computer_os,
+                    computer_provider,
+                    container_name,
+                    cua_cloud_api_key,
+                    max_budget,
+                ]:
                     component.change(
                         update_code_display,
-                        inputs=[agent_loop, model_choice, custom_model, chatbot_history, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget],
-                        outputs=[code_display]
+                        inputs=[
+                            agent_loop,
+                            model_choice,
+                            custom_model,
+                            chatbot_history,
+                            recent_images,
+                            save_trajectory,
+                            computer_os,
+                            computer_provider,
+                            container_name,
+                            cua_cloud_api_key,
+                            max_budget,
+                        ],
+                        outputs=[code_display],
                     )
     return demo

cua-agent 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.22py3-none-any.whl → 0.7.16py3-none-any.whl