cua-agent 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

agent/core/types.py CHANGED
@@ -54,23 +54,6 @@ LLMModel = LLM
54
54
  Model = LLM
55
55
 
56
56
 
57
- # Default models for each provider
58
- PROVIDER_TO_DEFAULT_MODEL: Dict[LLMProvider, str] = {
59
- LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
60
- LLMProvider.OPENAI: "gpt-4o",
61
- LLMProvider.OLLAMA: "gemma3:4b-it-q4_K_M",
62
- LLMProvider.OAICOMPAT: "Qwen2.5-VL-7B-Instruct",
63
- }
64
-
65
- # Environment variable names for each provider
66
- PROVIDER_TO_ENV_VAR: Dict[LLMProvider, str] = {
67
- LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
68
- LLMProvider.OPENAI: "OPENAI_API_KEY",
69
- LLMProvider.OLLAMA: "none",
70
- LLMProvider.OAICOMPAT: "none",
71
- }
72
-
73
-
74
57
  class AgentResponse(TypedDict, total=False):
75
58
  """Agent response format."""
76
59
 
@@ -443,6 +443,8 @@ class OmniLoop(BaseLoop):
443
443
  except (json.JSONDecodeError, IndexError):
444
444
  try:
445
445
  # Look for JSON object pattern
446
+ import re # Local import to ensure availability
447
+
446
448
  json_pattern = r"\{[^}]+\}"
447
449
  json_match = re.search(json_pattern, raw_text)
448
450
  if json_match:
@@ -453,8 +455,20 @@ class OmniLoop(BaseLoop):
453
455
  logger.error(f"No JSON found in content")
454
456
  return True, action_screenshot_saved
455
457
  except json.JSONDecodeError as e:
456
- logger.error(f"Failed to parse JSON from text: {str(e)}")
457
- return True, action_screenshot_saved
458
+ # Try to sanitize the JSON string and retry
459
+ try:
460
+ # Remove or replace invalid control characters
461
+ import re # Local import to ensure availability
462
+
463
+ sanitized_text = re.sub(r"[\x00-\x1F\x7F]", "", raw_text)
464
+ # Try parsing again with sanitized text
465
+ parsed_content = json.loads(sanitized_text)
466
+ logger.info(
467
+ "Successfully parsed JSON after sanitizing control characters"
468
+ )
469
+ except json.JSONDecodeError:
470
+ logger.error(f"Failed to parse JSON from text: {str(e)}")
471
+ return True, action_screenshot_saved
458
472
 
459
473
  # Step 4: Process the parsed content if available
460
474
  if parsed_content:
agent/ui/gradio/app.py CHANGED
@@ -271,16 +271,19 @@ def create_agent(
271
271
  api_key = os.environ.get("ANTHROPIC_API_KEY", "")
272
272
 
273
273
  # Create LLM model object with appropriate parameters
274
- provider_base_url = "http://localhost:8000/v1" if use_oaicompat else None
274
+ provider_base_url = "http://localhost:1234/v1" if use_oaicompat else None
275
275
 
276
276
  if use_oaicompat:
277
- # Special handling for OAICOMPAT - use OPENAI provider with custom base URL
278
- print(f"DEBUG - Creating OAICOMPAT agent with model: {model_name}")
277
+ # Special handling for OAICOMPAT - use OAICOMPAT provider with custom base URL
278
+ print(
279
+ f"DEBUG - Creating OAICOMPAT agent with model: {model_name}, URL: {provider_base_url}"
280
+ )
279
281
  llm = LLM(
280
- provider=provider, # Already set to OPENAI
282
+ provider=LLMProvider.OAICOMPAT, # Set to OAICOMPAT instead of using original provider
281
283
  name=model_name,
282
284
  provider_base_url=provider_base_url,
283
285
  )
286
+ print(f"DEBUG - LLM provider is now: {llm.provider}, base URL: {llm.provider_base_url}")
284
287
  # Note: Don't pass use_oaicompat to the agent, as it doesn't accept this parameter
285
288
  elif provider == LLMProvider.OAICOMPAT:
286
289
  # This path is unlikely to be taken with our current approach
@@ -461,8 +464,10 @@ def respond(
461
464
  # Special handling for OAICOMPAT to bypass provider-specific errors
462
465
  # Creates the agent with OPENAI provider but using custom model name and provider base URL
463
466
  is_oaicompat = str(provider) == "oaicompat"
464
- if is_oaicompat:
465
- provider = LLMProvider.OPENAI
467
+
468
+ # Don't override the provider for OAICOMPAT - instead pass it through
469
+ # if is_oaicompat:
470
+ # provider = LLMProvider.OPENAI
466
471
 
467
472
  # Get API key based on provider
468
473
  if provider == LLMProvider.OPENAI:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.1.23
3
+ Version: 0.1.25
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: <3.13,>=3.10
@@ -151,12 +151,61 @@ pip install "cua-agent[ui]"
151
151
 
152
152
  # Create a simple launcher script
153
153
  ```python
154
+ # launch_ui.py
154
155
  from agent.ui.gradio.app import create_gradio_ui
155
156
 
156
157
  app = create_gradio_ui()
157
158
  app.launch(share=False)
158
159
  ```
159
160
 
161
+ # Run the launcher
162
+ python launch_ui.py
163
+ ```
164
+
165
+ ### Setting up API Keys
166
+
167
+ For the Gradio UI to show available models, you need to set API keys as environment variables:
168
+
169
+ ```bash
170
+ # For OpenAI models
171
+ export OPENAI_API_KEY=your_openai_key_here
172
+
173
+ # For Anthropic models
174
+ export ANTHROPIC_API_KEY=your_anthropic_key_here
175
+
176
+ # Launch with both keys set
177
+ OPENAI_API_KEY=your_key ANTHROPIC_API_KEY=your_key python launch_ui.py
178
+ ```
179
+
180
+ ### Using Local Models
181
+
182
+ You can use local models with the OMNI loop provider by selecting "Custom model..." from the dropdown. The default provider URL is set to `http://localhost:1234/v1` which works with LM Studio.
183
+
184
+ If you're using a different local model server:
185
+ - vLLM: `http://localhost:8000/v1`
186
+ - LocalAI: `http://localhost:8080/v1`
187
+ - Ollama with OpenAI compat API: `http://localhost:11434/v1`
188
+
189
+ To change the URL, modify the `provider_base_url` in your launcher script:
190
+
191
+ ```python
192
+ # In your launcher script
193
+ from agent.ui.gradio.app import create_gradio_ui
194
+ from agent import LLM, LLMProvider
195
+
196
+ # Create a custom model with a specific URL
197
+ custom_model = LLM(
198
+ provider=LLMProvider.OAICOMPAT,
199
+ name="your-model-name",
200
+ provider_base_url="http://localhost:8000/v1" # Change to your server URL
201
+ )
202
+
203
+ app = create_gradio_ui(custom_model=custom_model)
204
+ app.launch()
205
+ ```
206
+
207
+ Without these environment variables, the UI will show "No models available" for the corresponding providers, but you can still use local models with the OMNI loop provider.
208
+
160
209
  The Gradio UI provides:
161
210
  - Selection of different agent loops (OpenAI, Anthropic, OMNI)
162
211
  - Model selection for each provider
@@ -169,14 +218,8 @@ You can also embed the Gradio UI in your own application:
169
218
  # Import directly in your application
170
219
  from agent.ui.gradio.app import create_gradio_ui
171
220
 
172
- # Create the UI with advanced features
173
- demo = create_gradio_ui()
174
- demo.launch()
175
-
176
- # Or for a simpler interface
177
- from agent.ui.gradio import registry
178
- demo = registry(name='cua:gpt-4o')
179
- demo.launch()
221
+ app = create_gradio_ui()
222
+ app.launch()
180
223
  ```
181
224
 
182
225
  ## Agent Loops
@@ -187,7 +230,7 @@ The `cua-agent` package provides three agent loops variations, based on differen
187
230
  |:-----------|:-----------------|:------------|:-------------|
188
231
  | `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
189
232
  | `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
190
- | `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
233
+ | `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama or OpenAI-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
191
234
 
192
235
  ## AgentResponse
193
236
  The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.
@@ -16,7 +16,7 @@ agent/core/tools/collection.py,sha256=NuwTn6dXSyznxWodfmFDQwUlxxaGb4oBPym4AEJABS
16
16
  agent/core/tools/computer.py,sha256=lT_aW3huoYpcM8kffuokELupSz_WZG_qkaW1gITRC58,3892
17
17
  agent/core/tools/edit.py,sha256=kv4jTKCM0VXrnoNErf7mT-xlr81-7T8v49_VA9y_L4Y,2005
18
18
  agent/core/tools/manager.py,sha256=IRsCXjGc076nncQuyIjODoafnHTDhrf9sP5B4q5Pcdo,1742
19
- agent/core/types.py,sha256=4XnjuCkZAeyOidqixHp3pWVVf3pxc2l-0hNoYlB3Mrk,2914
19
+ agent/core/types.py,sha256=2RKDVzBd6O6woeH7A0oisbdpD_nx67B8ITnkMGu-g2E,2375
20
20
  agent/core/visualization.py,sha256=1DuFF5sSeSf5BRSevBMDxml9-ajl7BQLFm5KBUwMbI8,6573
21
21
  agent/providers/__init__.py,sha256=b4tIBAaIB1V7p8V0BWipHVnMhfHH_OuVgP4OWGSHdD8,194
22
22
  agent/providers/anthropic/__init__.py,sha256=Mj11IZnVshZ2iHkvg4Z5-jrQIaD1WvzDz2Zk_pMwqIA,149
@@ -47,7 +47,7 @@ agent/providers/omni/clients/ollama.py,sha256=PmR5EhU9Mi43_o5mZN36XcpiGKp5HbQwlX
47
47
  agent/providers/omni/clients/openai.py,sha256=iTSYWEJEM8INFPGJMiUVs8rFn0781XF_ofRkd7NT3gk,5920
48
48
  agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
49
49
  agent/providers/omni/image_utils.py,sha256=wejhWb36yqedsPnLFTFwk2wth8a6txfVWSg4EaNrRdA,908
50
- agent/providers/omni/loop.py,sha256=h9c-Ie4MA84H3XKYiAKA6J4Tec3_ACYxmU--eRuiS8A,39591
50
+ agent/providers/omni/loop.py,sha256=-eKNHYpNUZ683FNI5ZNcW0ywrAaS27o46Iqt2DR5ZBU,40416
51
51
  agent/providers/omni/parser.py,sha256=REpQwlwvY1z_N8wbMj6GhOeTiiWVWHhVja_LOxgzbks,11734
52
52
  agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
53
53
  agent/providers/omni/tools/__init__.py,sha256=IC1cMEDoR2ljGcNNthzBRF_VtnDbRL5qvHJWErtNp98,774
@@ -69,8 +69,8 @@ agent/providers/openai/utils.py,sha256=YeCZWIqOFSeugWoqAS0rhxOKAfL-9uN9nrYSBGBgP
69
69
  agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
70
70
  agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
71
71
  agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
72
- agent/ui/gradio/app.py,sha256=6n0c_3HBb6ZeN213izyurL8oML1peet1cI8fx82DLZg,33980
73
- cua_agent-0.1.23.dist-info/METADATA,sha256=88aLbVo6etPVlHUPYmxmOpCTRfmeIJ1axKfsrznGG10,9238
74
- cua_agent-0.1.23.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
75
- cua_agent-0.1.23.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
76
- cua_agent-0.1.23.dist-info/RECORD,,
72
+ agent/ui/gradio/app.py,sha256=TzFOo40Fv6mC12UOXacu8JjYMzXAf0llBWD0VjH7bPA,34253
73
+ cua_agent-0.1.25.dist-info/METADATA,sha256=rA7ZoOCmIrWiHWf2MeH03USJ7fvSXGCCznp113ItBio,10570
74
+ cua_agent-0.1.25.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
75
+ cua_agent-0.1.25.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
76
+ cua_agent-0.1.25.dist-info/RECORD,,