PyPI - cua-agent - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

cua-agent 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (5) hide show

agent/ui/gradio/app.py CHANGED Viewed

@@ -290,7 +290,7 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
         model_name_to_use = cleaned_model_name
         # agent_loop remains AgentLoop.OMNI
     elif agent_loop == AgentLoop.UITARS:
-        # For UITARS, use MLXVLM provider for the MLX models, OAICOMPAT for custom
+        # For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for custom
         if model_name == "Custom model (OpenAI compatible API)":
             provider = LLMProvider.OAICOMPAT
             model_name_to_use = "tgi"
@@ -333,12 +333,25 @@ def get_ollama_models() -> List[str]:
         logging.error(f"Error getting Ollama models: {e}")
         return []
-def create_computer_instance(verbosity: int = logging.INFO) -> Computer:
+def create_computer_instance(
+    verbosity: int = logging.INFO,
+    os_type: str = "macos",
+    provider_type: str = "lume",
+    name: Optional[str] = None,
+    api_key: Optional[str] = None
+) -> Computer:
     """Create or get the global Computer instance."""
     global global_computer
     if global_computer is None:
-        global_computer = Computer(verbosity=verbosity)
+        global_computer = Computer(
+            verbosity=verbosity,
+            os_type=os_type,
+            provider_type=provider_type,
+            name=name if name else "",
+            api_key=api_key
+        )
     return global_computer
@@ -353,12 +366,22 @@ def create_agent(
     verbosity: int = logging.INFO,
     use_oaicompat: bool = False,
     provider_base_url: Optional[str] = None,
+    computer_os: str = "macos",
+    computer_provider: str = "lume",
+    computer_name: Optional[str] = None,
+    computer_api_key: Optional[str] = None,
 ) -> ComputerAgent:
     """Create or update the global agent with the specified parameters."""
     global global_agent
     # Create the computer if not already done
-    computer = create_computer_instance(verbosity=verbosity)
+    computer = create_computer_instance(
+        verbosity=verbosity,
+        os_type=computer_os,
+        provider_type=computer_provider,
+        name=computer_name,
+        api_key=computer_api_key
+    )
     # Get API key from environment if not provided
     if api_key is None:
@@ -401,6 +424,7 @@ def create_agent(
     return global_agent
 def create_gradio_ui(
     provider_name: str = "openai",
     model_name: str = "gpt-4o",
@@ -439,6 +463,9 @@ def create_gradio_ui(
     # Check if API keys are available
     has_openai_key = bool(openai_api_key)
     has_anthropic_key = bool(anthropic_api_key)
+    print("has_openai_key", has_openai_key)
+    print("has_anthropic_key", has_anthropic_key)
     # Get Ollama models for OMNI
     ollama_models = get_ollama_models()
@@ -473,7 +500,7 @@ def create_gradio_ui(
         elif initial_loop == "ANTHROPIC":
             initial_model = anthropic_models[0] if anthropic_models else "No models available"
         else:  # OMNI
-            initial_model = omni_models[0] if omni_models else "No models available"
+            initial_model = omni_models[0] if omni_models else "Custom model (OpenAI compatible API)"
             if "Custom model (OpenAI compatible API)" in available_models_for_loop:
                 initial_model = (
                     "Custom model (OpenAI compatible API)"  # Default to custom if available and no other default fits
@@ -494,7 +521,7 @@ def create_gradio_ui(
     ]
     # Function to generate Python code based on configuration and tasks
-    def generate_python_code(agent_loop_choice, provider, model_name, tasks, provider_url, recent_images=3, save_trajectory=True):
+    def generate_python_code(agent_loop_choice, provider, model_name, tasks, provider_url, recent_images=3, save_trajectory=True, computer_os="macos", computer_provider="lume", container_name="", cua_cloud_api_key=""):
         """Generate Python code for the current configuration and tasks.
         Args:
@@ -505,6 +532,10 @@ def create_gradio_ui(
             provider_url: The provider base URL for OAICOMPAT providers
             recent_images: Number of recent images to keep in context
             save_trajectory: Whether to save the agent trajectory
+            computer_os: Operating system type for the computer
+            computer_provider: Provider type for the computer
+            container_name: Optional VM name
+            cua_cloud_api_key: Optional CUA Cloud API key
         Returns:
             Formatted Python code as a string
@@ -515,13 +546,29 @@ def create_gradio_ui(
             if task and task.strip():
                 tasks_str += f'            "{task}",\n'
-        # Create the Python code template
+        # Create the Python code template with computer configuration
+        computer_args = []
+        if computer_os != "macos":
+            computer_args.append(f'os_type="{computer_os}"')
+        if computer_provider != "lume":
+            computer_args.append(f'provider_type="{computer_provider}"')
+        if container_name:
+            computer_args.append(f'name="{container_name}"')
+        if cua_cloud_api_key:
+            computer_args.append(f'api_key="{cua_cloud_api_key}"')
+        computer_args_str = ", ".join(computer_args)
+        if computer_args_str:
+            computer_args_str = f"({computer_args_str})"
+        else:
+            computer_args_str = "()"
         code = f'''import asyncio
 from computer import Computer
 from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
 async def main():
-    async with Computer() as macos_computer:
+    async with Computer{computer_args_str} as macos_computer:
         agent = ComputerAgent(
             computer=macos_computer,
             loop=AgentLoop.{agent_loop_choice},
@@ -660,12 +707,49 @@ if __name__ == "__main__":
                             LLMProvider.OPENAI,
                             "gpt-4o",
                             [],
-                            "https://openrouter.ai/api/v1"
+                            "https://openrouter.ai/api/v1",
+                            3,  # recent_images default
+                            True,  # save_trajectory default
+                            "macos",
+                            "lume",
+                            "",
+                            ""
                         ),
                         interactive=False,
                     )
-                with gr.Accordion("Configuration", open=True):
+                with gr.Accordion("Computer Configuration", open=True):
+                    # Computer configuration options
+                    computer_os = gr.Radio(
+                        choices=["macos", "linux"],
+                        label="Operating System",
+                        value="macos",
+                        info="Select the operating system for the computer",
+                    )
+                    computer_provider = gr.Radio(
+                        choices=["cloud", "lume"],
+                        label="Provider",
+                        value="lume",
+                        info="Select the computer provider",
+                    )
+                    container_name = gr.Textbox(
+                        label="Container Name",
+                        placeholder="Enter container name (optional)",
+                        value="",
+                        info="Optional name for the container",
+                    )
+                    cua_cloud_api_key = gr.Textbox(
+                        label="CUA Cloud API Key",
+                        placeholder="Enter your CUA Cloud API key",
+                        value="",
+                        type="password",
+                        info="Required for cloud provider",
+                    )
+                with gr.Accordion("Agent Configuration", open=True):
                     # Configuration options
                     agent_loop = gr.Dropdown(
                         choices=["OPENAI", "ANTHROPIC", "OMNI", "UITARS"],
@@ -986,6 +1070,10 @@ if __name__ == "__main__":
                     custom_api_key=None,
                     openai_key_input=None,
                     anthropic_key_input=None,
+                    computer_os="macos",
+                    computer_provider="lume",
+                    container_name="",
+                    cua_cloud_api_key="",
                 ):
                     if not history:
                         yield history
@@ -1092,6 +1180,10 @@ if __name__ == "__main__":
                             "provider_base_url": custom_url_value,
                             "save_trajectory": save_traj,
                             "recent_images": recent_imgs,
+                            "computer_os": computer_os,
+                            "computer_provider": computer_provider,
+                            "container_name": container_name,
+                            "cua_cloud_api_key": cua_cloud_api_key,
                         }
                         save_settings(current_settings)
                         # --- End Save Settings ---
@@ -1109,6 +1201,10 @@ if __name__ == "__main__":
                             use_oaicompat=is_oaicompat,  # Set flag if custom model was selected
                             # Pass custom URL only if custom model was selected
                             provider_base_url=custom_url_value if is_oaicompat else None,
+                            computer_os=computer_os,
+                            computer_provider=computer_provider,
+                            computer_name=container_name,
+                            computer_api_key=cua_cloud_api_key,
                             verbosity=logging.DEBUG,  # Added verbosity here
                         )
@@ -1235,6 +1331,10 @@ if __name__ == "__main__":
                         provider_api_key,
                         openai_api_key_input,
                         anthropic_api_key_input,
+                        computer_os,
+                        computer_provider,
+                        container_name,
+                        cua_cloud_api_key,
                     ],
                     outputs=[chatbot_history],
                     queue=True,
@@ -1253,82 +1353,20 @@ if __name__ == "__main__":
                 # Function to update the code display based on configuration and chat history
-                def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val):
+                def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val, computer_os, computer_provider, container_name, cua_cloud_api_key):
                     # Extract messages from chat history
                     messages = []
                     if chat_history:
                         for msg in chat_history:
-                            if msg.get("role") == "user":
+                            if isinstance(msg, dict) and msg.get("role") == "user":
                                 messages.append(msg.get("content", ""))
-                    # Determine if this is a custom model selection and which type
-                    is_custom_openai_api = model_choice_val == "Custom model (OpenAI compatible API)"
-                    is_custom_ollama = model_choice_val == "Custom model (ollama)"
-                    is_custom_model_selected = is_custom_openai_api or is_custom_ollama
-                    # Determine provider and model name based on agent loop
-                    if agent_loop == "OPENAI":
-                        # For OPENAI loop, always use OPENAI provider with computer-use-preview
-                        provider = LLMProvider.OPENAI
-                        model_name = "computer-use-preview"
-                    elif agent_loop == "ANTHROPIC":
-                        # For ANTHROPIC loop, always use ANTHROPIC provider
-                        provider = LLMProvider.ANTHROPIC
-                        # Extract model name from the UI string
-                        if model_choice_val.startswith("Anthropic: Claude "):
-                            # Extract the model name based on the UI string
-                            model_parts = model_choice_val.replace("Anthropic: Claude ", "").split(" (")
-                            version = model_parts[0]  # e.g., "3.7 Sonnet"
-                            date = model_parts[1].replace(")", "") if len(model_parts) > 1 else ""  # e.g., "20250219"
-                            # Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620
-                            version = version.replace(".", "-").replace(" ", "-").lower()
-                            model_name = f"claude-{version}-{date}"
-                        else:
-                            # Use the model_choice_val directly if it doesn't match the expected format
-                            model_name = model_choice_val
-                    elif agent_loop == "UITARS":
-                        # For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for custom
-                        if model_choice_val == "Custom model (OpenAI compatible API)":
-                            provider = LLMProvider.OAICOMPAT
-                            model_name = custom_model_val
-                        else:
-                            provider = LLMProvider.MLXVLM
-                            model_name = model_choice_val
-                    elif agent_loop == "OMNI":
-                        # For OMNI, provider can be OPENAI, ANTHROPIC, OLLAMA, or OAICOMPAT
-                        if is_custom_openai_api:
-                            provider = LLMProvider.OAICOMPAT
-                            model_name = custom_model_val
-                        elif is_custom_ollama:
-                            provider = LLMProvider.OLLAMA
-                            model_name = custom_model_val
-                        elif model_choice_val.startswith("OMNI: OpenAI "):
-                            provider = LLMProvider.OPENAI
-                            # Extract model name from UI string (e.g., "OMNI: OpenAI GPT-4o" -> "gpt-4o")
-                            model_name = model_choice_val.replace("OMNI: OpenAI ", "").lower().replace(" ", "-")
-                        elif model_choice_val.startswith("OMNI: Claude "):
-                            provider = LLMProvider.ANTHROPIC
-                            # Extract model name from UI string (similar to ANTHROPIC loop case)
-                            model_parts = model_choice_val.replace("OMNI: Claude ", "").split(" (")
-                            version = model_parts[0]  # e.g., "3.7 Sonnet"
-                            date = model_parts[1].replace(")", "") if len(model_parts) > 1 else ""  # e.g., "20250219"
-                            # Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620
-                            version = version.replace(".", "-").replace(" ", "-").lower()
-                            model_name = f"claude-{version}-{date}"
-                        elif model_choice_val.startswith("OMNI: Ollama "):
-                            provider = LLMProvider.OLLAMA
-                            # Extract model name from UI string (e.g., "OMNI: Ollama llama3" -> "llama3")
-                            model_name = model_choice_val.replace("OMNI: Ollama ", "")
-                        else:
-                            # Fallback to get_provider_and_model for any other cases
-                            provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop)
-                    else:
-                        # Fallback for any other agent loop
-                        provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop)
+                    # Determine provider and model based on current selection
+                    provider, model_name, _ = get_provider_and_model(
+                        model_choice_val or custom_model_val or "gpt-4o",
+                        agent_loop
+                    )
-                    # Generate and return the code
                     return generate_python_code(
                         agent_loop,
                         provider,
@@ -1336,38 +1374,62 @@ if __name__ == "__main__":
                         messages,
                         provider_base_url,
                         recent_images_val,
-                        save_trajectory_val
+                        save_trajectory_val,
+                        computer_os,
+                        computer_provider,
+                        container_name,
+                        cua_cloud_api_key
                     )
                 # Update code display when configuration changes
                 agent_loop.change(
                     update_code_display,
-                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
+                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
                     outputs=[code_display]
                 )
                 model_choice.change(
                     update_code_display,
-                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
+                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
                     outputs=[code_display]
                 )
                 custom_model.change(
                     update_code_display,
-                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
+                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
                     outputs=[code_display]
                 )
                 chatbot_history.change(
                     update_code_display,
-                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
+                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
                     outputs=[code_display]
                 )
                 recent_images.change(
                     update_code_display,
-                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
+                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
                     outputs=[code_display]
                 )
                 save_trajectory.change(
                     update_code_display,
-                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
+                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
+                    outputs=[code_display]
+                )
+                computer_os.change(
+                    update_code_display,
+                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
+                    outputs=[code_display]
+                )
+                computer_provider.change(
+                    update_code_display,
+                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
+                    outputs=[code_display]
+                )
+                container_name.change(
+                    update_code_display,
+                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
+                    outputs=[code_display]
+                )
+                cua_cloud_api_key.change(
+                    update_code_display,
+                    inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
                     outputs=[code_display]
                 )

{cua_agent-0.2.1.dist-info → cua_agent-0.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cua-agent
-Version: 0.2.1
+Version: 0.2.2
 Summary: CUA (Computer Use) Agent for AI-driven computer interaction
 Author-Email: TryCua <gh@trycua.com>
 Requires-Python: >=3.10

{cua_agent-0.2.1.dist-info → cua_agent-0.2.2.dist-info}/RECORD RENAMED Viewed

@@ -79,8 +79,8 @@ agent/providers/uitars/utils.py,sha256=493STTEEJcVhVbQgR0e8rNTI1DjkxUx8IgIv3wkJ1
 agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
 agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
 agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
-agent/ui/gradio/app.py,sha256=-ccsE6LrXFfxnPeMlEqm49QGvdjCgm-l6TudZZEM9r0,68241
-cua_agent-0.2.1.dist-info/METADATA,sha256=g3ca5FEJpxPobVoOrOW2ysqNFnEzwFQhTPvtq4zyLNs,12688
-cua_agent-0.2.1.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
-cua_agent-0.2.1.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
-cua_agent-0.2.1.dist-info/RECORD,,
+agent/ui/gradio/app.py,sha256=c6K5Pb-iP4N6PuXCkrkbB6g6FFTOjAcYAz7pj-fbBlY,69915
+cua_agent-0.2.2.dist-info/METADATA,sha256=Z5JMKgdDMFXKgOg4-NWPohgSS0pRJdOLdVPzu7J52kc,12688
+cua_agent-0.2.2.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
+cua_agent-0.2.2.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
+cua_agent-0.2.2.dist-info/RECORD,,

{cua_agent-0.2.1.dist-info → cua_agent-0.2.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{cua_agent-0.2.1.dist-info → cua_agent-0.2.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

cua-agent 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

Potentially problematic release.

cua-agent 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl