PyPI - cua-agent - Versions diffs - 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl - Mend

cua-agent 0.1.23py3-none-any.whl → 0.1.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (7) hide show

agent/core/types.py CHANGED Viewed

@@ -54,23 +54,6 @@ LLMModel = LLM
 Model = LLM
-# Default models for each provider
-PROVIDER_TO_DEFAULT_MODEL: Dict[LLMProvider, str] = {
-    LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
-    LLMProvider.OPENAI: "gpt-4o",
-    LLMProvider.OLLAMA: "gemma3:4b-it-q4_K_M",
-    LLMProvider.OAICOMPAT: "Qwen2.5-VL-7B-Instruct",
-}
-# Environment variable names for each provider
-PROVIDER_TO_ENV_VAR: Dict[LLMProvider, str] = {
-    LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
-    LLMProvider.OPENAI: "OPENAI_API_KEY",
-    LLMProvider.OLLAMA: "none",
-    LLMProvider.OAICOMPAT: "none",
-}
 class AgentResponse(TypedDict, total=False):
     """Agent response format."""

agent/providers/omni/loop.py CHANGED Viewed

@@ -443,6 +443,8 @@ class OmniLoop(BaseLoop):
                     except (json.JSONDecodeError, IndexError):
                         try:
                             # Look for JSON object pattern
+                            import re  # Local import to ensure availability
                             json_pattern = r"\{[^}]+\}"
                             json_match = re.search(json_pattern, raw_text)
                             if json_match:
@@ -453,8 +455,20 @@ class OmniLoop(BaseLoop):
                                 logger.error(f"No JSON found in content")
                                 return True, action_screenshot_saved
                         except json.JSONDecodeError as e:
-                            logger.error(f"Failed to parse JSON from text: {str(e)}")
-                            return True, action_screenshot_saved
+                            # Try to sanitize the JSON string and retry
+                            try:
+                                # Remove or replace invalid control characters
+                                import re  # Local import to ensure availability
+                                sanitized_text = re.sub(r"[\x00-\x1F\x7F]", "", raw_text)
+                                # Try parsing again with sanitized text
+                                parsed_content = json.loads(sanitized_text)
+                                logger.info(
+                                    "Successfully parsed JSON after sanitizing control characters"
+                                )
+                            except json.JSONDecodeError:
+                                logger.error(f"Failed to parse JSON from text: {str(e)}")
+                                return True, action_screenshot_saved
             # Step 4: Process the parsed content if available
             if parsed_content:

agent/ui/gradio/app.py CHANGED Viewed

@@ -271,16 +271,19 @@ def create_agent(
             api_key = os.environ.get("ANTHROPIC_API_KEY", "")
     # Create LLM model object with appropriate parameters
-    provider_base_url = "http://localhost:8000/v1" if use_oaicompat else None
+    provider_base_url = "http://localhost:1234/v1" if use_oaicompat else None
     if use_oaicompat:
-        # Special handling for OAICOMPAT - use OPENAI provider with custom base URL
-        print(f"DEBUG - Creating OAICOMPAT agent with model: {model_name}")
+        # Special handling for OAICOMPAT - use OAICOMPAT provider with custom base URL
+        print(
+            f"DEBUG - Creating OAICOMPAT agent with model: {model_name}, URL: {provider_base_url}"
+        )
         llm = LLM(
-            provider=provider,  # Already set to OPENAI
+            provider=LLMProvider.OAICOMPAT,  # Set to OAICOMPAT instead of using original provider
             name=model_name,
             provider_base_url=provider_base_url,
         )
+        print(f"DEBUG - LLM provider is now: {llm.provider}, base URL: {llm.provider_base_url}")
         # Note: Don't pass use_oaicompat to the agent, as it doesn't accept this parameter
     elif provider == LLMProvider.OAICOMPAT:
         # This path is unlikely to be taken with our current approach
@@ -461,8 +464,10 @@ def respond(
         # Special handling for OAICOMPAT to bypass provider-specific errors
         # Creates the agent with OPENAI provider but using custom model name and provider base URL
         is_oaicompat = str(provider) == "oaicompat"
-        if is_oaicompat:
-            provider = LLMProvider.OPENAI
+        # Don't override the provider for OAICOMPAT - instead pass it through
+        # if is_oaicompat:
+        #    provider = LLMProvider.OPENAI
         # Get API key based on provider
         if provider == LLMProvider.OPENAI:

{cua_agent-0.1.23.dist-info → cua_agent-0.1.25.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cua-agent
-Version: 0.1.23
+Version: 0.1.25
 Summary: CUA (Computer Use) Agent for AI-driven computer interaction
 Author-Email: TryCua <gh@trycua.com>
 Requires-Python: <3.13,>=3.10
@@ -151,12 +151,61 @@ pip install "cua-agent[ui]"
 # Create a simple launcher script
 ```python
+# launch_ui.py
 from agent.ui.gradio.app import create_gradio_ui
 app = create_gradio_ui()
 app.launch(share=False)
 ```
+# Run the launcher
+python launch_ui.py
+```
+### Setting up API Keys
+For the Gradio UI to show available models, you need to set API keys as environment variables:
+```bash
+# For OpenAI models
+export OPENAI_API_KEY=your_openai_key_here
+# For Anthropic models
+export ANTHROPIC_API_KEY=your_anthropic_key_here
+# Launch with both keys set
+OPENAI_API_KEY=your_key ANTHROPIC_API_KEY=your_key python launch_ui.py
+```
+### Using Local Models
+You can use local models with the OMNI loop provider by selecting "Custom model..." from the dropdown. The default provider URL is set to `http://localhost:1234/v1` which works with LM Studio.
+If you're using a different local model server:
+- vLLM: `http://localhost:8000/v1`
+- LocalAI: `http://localhost:8080/v1`
+- Ollama with OpenAI compat API: `http://localhost:11434/v1`
+To change the URL, modify the `provider_base_url` in your launcher script:
+```python
+# In your launcher script
+from agent.ui.gradio.app import create_gradio_ui
+from agent import LLM, LLMProvider
+# Create a custom model with a specific URL
+custom_model = LLM(
+    provider=LLMProvider.OAICOMPAT,
+    name="your-model-name",
+    provider_base_url="http://localhost:8000/v1"  # Change to your server URL
+)
+app = create_gradio_ui(custom_model=custom_model)
+app.launch()
+```
+Without these environment variables, the UI will show "No models available" for the corresponding providers, but you can still use local models with the OMNI loop provider.
 The Gradio UI provides:
 - Selection of different agent loops (OpenAI, Anthropic, OMNI)
 - Model selection for each provider
@@ -169,14 +218,8 @@ You can also embed the Gradio UI in your own application:
 # Import directly in your application
 from agent.ui.gradio.app import create_gradio_ui
-# Create the UI with advanced features
-demo = create_gradio_ui()
-demo.launch()
-# Or for a simpler interface
-from agent.ui.gradio import registry
-demo = registry(name='cua:gpt-4o')
-demo.launch()
+app = create_gradio_ui()
+app.launch()
 ```
 ## Agent Loops
@@ -187,7 +230,7 @@ The `cua-agent` package provides three agent loops variations, based on differen
 |:-----------|:-----------------|:------------|:-------------|
 | `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
 | `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
-| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
+| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama or OpenAI-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
 ## AgentResponse
 The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.

{cua_agent-0.1.23.dist-info → cua_agent-0.1.25.dist-info}/RECORD RENAMED Viewed

@@ -16,7 +16,7 @@ agent/core/tools/collection.py,sha256=NuwTn6dXSyznxWodfmFDQwUlxxaGb4oBPym4AEJABS
 agent/core/tools/computer.py,sha256=lT_aW3huoYpcM8kffuokELupSz_WZG_qkaW1gITRC58,3892
 agent/core/tools/edit.py,sha256=kv4jTKCM0VXrnoNErf7mT-xlr81-7T8v49_VA9y_L4Y,2005
 agent/core/tools/manager.py,sha256=IRsCXjGc076nncQuyIjODoafnHTDhrf9sP5B4q5Pcdo,1742
-agent/core/types.py,sha256=4XnjuCkZAeyOidqixHp3pWVVf3pxc2l-0hNoYlB3Mrk,2914
+agent/core/types.py,sha256=2RKDVzBd6O6woeH7A0oisbdpD_nx67B8ITnkMGu-g2E,2375
 agent/core/visualization.py,sha256=1DuFF5sSeSf5BRSevBMDxml9-ajl7BQLFm5KBUwMbI8,6573
 agent/providers/__init__.py,sha256=b4tIBAaIB1V7p8V0BWipHVnMhfHH_OuVgP4OWGSHdD8,194
 agent/providers/anthropic/__init__.py,sha256=Mj11IZnVshZ2iHkvg4Z5-jrQIaD1WvzDz2Zk_pMwqIA,149
@@ -47,7 +47,7 @@ agent/providers/omni/clients/ollama.py,sha256=PmR5EhU9Mi43_o5mZN36XcpiGKp5HbQwlX
 agent/providers/omni/clients/openai.py,sha256=iTSYWEJEM8INFPGJMiUVs8rFn0781XF_ofRkd7NT3gk,5920
 agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
 agent/providers/omni/image_utils.py,sha256=wejhWb36yqedsPnLFTFwk2wth8a6txfVWSg4EaNrRdA,908
-agent/providers/omni/loop.py,sha256=h9c-Ie4MA84H3XKYiAKA6J4Tec3_ACYxmU--eRuiS8A,39591
+agent/providers/omni/loop.py,sha256=-eKNHYpNUZ683FNI5ZNcW0ywrAaS27o46Iqt2DR5ZBU,40416
 agent/providers/omni/parser.py,sha256=REpQwlwvY1z_N8wbMj6GhOeTiiWVWHhVja_LOxgzbks,11734
 agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
 agent/providers/omni/tools/__init__.py,sha256=IC1cMEDoR2ljGcNNthzBRF_VtnDbRL5qvHJWErtNp98,774
@@ -69,8 +69,8 @@ agent/providers/openai/utils.py,sha256=YeCZWIqOFSeugWoqAS0rhxOKAfL-9uN9nrYSBGBgP
 agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
 agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
 agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
-agent/ui/gradio/app.py,sha256=6n0c_3HBb6ZeN213izyurL8oML1peet1cI8fx82DLZg,33980
-cua_agent-0.1.23.dist-info/METADATA,sha256=88aLbVo6etPVlHUPYmxmOpCTRfmeIJ1axKfsrznGG10,9238
-cua_agent-0.1.23.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
-cua_agent-0.1.23.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
-cua_agent-0.1.23.dist-info/RECORD,,
+agent/ui/gradio/app.py,sha256=TzFOo40Fv6mC12UOXacu8JjYMzXAf0llBWD0VjH7bPA,34253
+cua_agent-0.1.25.dist-info/METADATA,sha256=rA7ZoOCmIrWiHWf2MeH03USJ7fvSXGCCznp113ItBio,10570
+cua_agent-0.1.25.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
+cua_agent-0.1.25.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
+cua_agent-0.1.25.dist-info/RECORD,,

{cua_agent-0.1.23.dist-info → cua_agent-0.1.25.dist-info}/WHEEL RENAMED Viewed

File without changes

{cua_agent-0.1.23.dist-info → cua_agent-0.1.25.dist-info}/entry_points.txt RENAMED Viewed

File without changes

cua-agent 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl

Potentially problematic release.

cua-agent 0.1.23py3-none-any.whl → 0.1.25py3-none-any.whl