cua-agent 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/core/types.py +0 -17
- agent/providers/omni/loop.py +16 -2
- agent/ui/gradio/app.py +11 -6
- {cua_agent-0.1.23.dist-info → cua_agent-0.1.25.dist-info}/METADATA +53 -10
- {cua_agent-0.1.23.dist-info → cua_agent-0.1.25.dist-info}/RECORD +7 -7
- {cua_agent-0.1.23.dist-info → cua_agent-0.1.25.dist-info}/WHEEL +0 -0
- {cua_agent-0.1.23.dist-info → cua_agent-0.1.25.dist-info}/entry_points.txt +0 -0
agent/core/types.py
CHANGED
|
@@ -54,23 +54,6 @@ LLMModel = LLM
|
|
|
54
54
|
Model = LLM
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
# Default models for each provider
|
|
58
|
-
PROVIDER_TO_DEFAULT_MODEL: Dict[LLMProvider, str] = {
|
|
59
|
-
LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
|
|
60
|
-
LLMProvider.OPENAI: "gpt-4o",
|
|
61
|
-
LLMProvider.OLLAMA: "gemma3:4b-it-q4_K_M",
|
|
62
|
-
LLMProvider.OAICOMPAT: "Qwen2.5-VL-7B-Instruct",
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
# Environment variable names for each provider
|
|
66
|
-
PROVIDER_TO_ENV_VAR: Dict[LLMProvider, str] = {
|
|
67
|
-
LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
|
|
68
|
-
LLMProvider.OPENAI: "OPENAI_API_KEY",
|
|
69
|
-
LLMProvider.OLLAMA: "none",
|
|
70
|
-
LLMProvider.OAICOMPAT: "none",
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
|
|
74
57
|
class AgentResponse(TypedDict, total=False):
|
|
75
58
|
"""Agent response format."""
|
|
76
59
|
|
agent/providers/omni/loop.py
CHANGED
|
@@ -443,6 +443,8 @@ class OmniLoop(BaseLoop):
|
|
|
443
443
|
except (json.JSONDecodeError, IndexError):
|
|
444
444
|
try:
|
|
445
445
|
# Look for JSON object pattern
|
|
446
|
+
import re # Local import to ensure availability
|
|
447
|
+
|
|
446
448
|
json_pattern = r"\{[^}]+\}"
|
|
447
449
|
json_match = re.search(json_pattern, raw_text)
|
|
448
450
|
if json_match:
|
|
@@ -453,8 +455,20 @@ class OmniLoop(BaseLoop):
|
|
|
453
455
|
logger.error(f"No JSON found in content")
|
|
454
456
|
return True, action_screenshot_saved
|
|
455
457
|
except json.JSONDecodeError as e:
|
|
456
|
-
|
|
457
|
-
|
|
458
|
+
# Try to sanitize the JSON string and retry
|
|
459
|
+
try:
|
|
460
|
+
# Remove or replace invalid control characters
|
|
461
|
+
import re # Local import to ensure availability
|
|
462
|
+
|
|
463
|
+
sanitized_text = re.sub(r"[\x00-\x1F\x7F]", "", raw_text)
|
|
464
|
+
# Try parsing again with sanitized text
|
|
465
|
+
parsed_content = json.loads(sanitized_text)
|
|
466
|
+
logger.info(
|
|
467
|
+
"Successfully parsed JSON after sanitizing control characters"
|
|
468
|
+
)
|
|
469
|
+
except json.JSONDecodeError:
|
|
470
|
+
logger.error(f"Failed to parse JSON from text: {str(e)}")
|
|
471
|
+
return True, action_screenshot_saved
|
|
458
472
|
|
|
459
473
|
# Step 4: Process the parsed content if available
|
|
460
474
|
if parsed_content:
|
agent/ui/gradio/app.py
CHANGED
|
@@ -271,16 +271,19 @@ def create_agent(
|
|
|
271
271
|
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
|
272
272
|
|
|
273
273
|
# Create LLM model object with appropriate parameters
|
|
274
|
-
provider_base_url = "http://localhost:
|
|
274
|
+
provider_base_url = "http://localhost:1234/v1" if use_oaicompat else None
|
|
275
275
|
|
|
276
276
|
if use_oaicompat:
|
|
277
|
-
# Special handling for OAICOMPAT - use
|
|
278
|
-
print(
|
|
277
|
+
# Special handling for OAICOMPAT - use OAICOMPAT provider with custom base URL
|
|
278
|
+
print(
|
|
279
|
+
f"DEBUG - Creating OAICOMPAT agent with model: {model_name}, URL: {provider_base_url}"
|
|
280
|
+
)
|
|
279
281
|
llm = LLM(
|
|
280
|
-
provider=
|
|
282
|
+
provider=LLMProvider.OAICOMPAT, # Set to OAICOMPAT instead of using original provider
|
|
281
283
|
name=model_name,
|
|
282
284
|
provider_base_url=provider_base_url,
|
|
283
285
|
)
|
|
286
|
+
print(f"DEBUG - LLM provider is now: {llm.provider}, base URL: {llm.provider_base_url}")
|
|
284
287
|
# Note: Don't pass use_oaicompat to the agent, as it doesn't accept this parameter
|
|
285
288
|
elif provider == LLMProvider.OAICOMPAT:
|
|
286
289
|
# This path is unlikely to be taken with our current approach
|
|
@@ -461,8 +464,10 @@ def respond(
|
|
|
461
464
|
# Special handling for OAICOMPAT to bypass provider-specific errors
|
|
462
465
|
# Creates the agent with OPENAI provider but using custom model name and provider base URL
|
|
463
466
|
is_oaicompat = str(provider) == "oaicompat"
|
|
464
|
-
|
|
465
|
-
|
|
467
|
+
|
|
468
|
+
# Don't override the provider for OAICOMPAT - instead pass it through
|
|
469
|
+
# if is_oaicompat:
|
|
470
|
+
# provider = LLMProvider.OPENAI
|
|
466
471
|
|
|
467
472
|
# Get API key based on provider
|
|
468
473
|
if provider == LLMProvider.OPENAI:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cua-agent
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.25
|
|
4
4
|
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
|
6
6
|
Requires-Python: <3.13,>=3.10
|
|
@@ -151,12 +151,61 @@ pip install "cua-agent[ui]"
|
|
|
151
151
|
|
|
152
152
|
# Create a simple launcher script
|
|
153
153
|
```python
|
|
154
|
+
# launch_ui.py
|
|
154
155
|
from agent.ui.gradio.app import create_gradio_ui
|
|
155
156
|
|
|
156
157
|
app = create_gradio_ui()
|
|
157
158
|
app.launch(share=False)
|
|
158
159
|
```
|
|
159
160
|
|
|
161
|
+
# Run the launcher
|
|
162
|
+
python launch_ui.py
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Setting up API Keys
|
|
166
|
+
|
|
167
|
+
For the Gradio UI to show available models, you need to set API keys as environment variables:
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
# For OpenAI models
|
|
171
|
+
export OPENAI_API_KEY=your_openai_key_here
|
|
172
|
+
|
|
173
|
+
# For Anthropic models
|
|
174
|
+
export ANTHROPIC_API_KEY=your_anthropic_key_here
|
|
175
|
+
|
|
176
|
+
# Launch with both keys set
|
|
177
|
+
OPENAI_API_KEY=your_key ANTHROPIC_API_KEY=your_key python launch_ui.py
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Using Local Models
|
|
181
|
+
|
|
182
|
+
You can use local models with the OMNI loop provider by selecting "Custom model..." from the dropdown. The default provider URL is set to `http://localhost:1234/v1` which works with LM Studio.
|
|
183
|
+
|
|
184
|
+
If you're using a different local model server:
|
|
185
|
+
- vLLM: `http://localhost:8000/v1`
|
|
186
|
+
- LocalAI: `http://localhost:8080/v1`
|
|
187
|
+
- Ollama with OpenAI compat API: `http://localhost:11434/v1`
|
|
188
|
+
|
|
189
|
+
To change the URL, modify the `provider_base_url` in your launcher script:
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
# In your launcher script
|
|
193
|
+
from agent.ui.gradio.app import create_gradio_ui
|
|
194
|
+
from agent import LLM, LLMProvider
|
|
195
|
+
|
|
196
|
+
# Create a custom model with a specific URL
|
|
197
|
+
custom_model = LLM(
|
|
198
|
+
provider=LLMProvider.OAICOMPAT,
|
|
199
|
+
name="your-model-name",
|
|
200
|
+
provider_base_url="http://localhost:8000/v1" # Change to your server URL
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
app = create_gradio_ui(custom_model=custom_model)
|
|
204
|
+
app.launch()
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
Without these environment variables, the UI will show "No models available" for the corresponding providers, but you can still use local models with the OMNI loop provider.
|
|
208
|
+
|
|
160
209
|
The Gradio UI provides:
|
|
161
210
|
- Selection of different agent loops (OpenAI, Anthropic, OMNI)
|
|
162
211
|
- Model selection for each provider
|
|
@@ -169,14 +218,8 @@ You can also embed the Gradio UI in your own application:
|
|
|
169
218
|
# Import directly in your application
|
|
170
219
|
from agent.ui.gradio.app import create_gradio_ui
|
|
171
220
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
demo.launch()
|
|
175
|
-
|
|
176
|
-
# Or for a simpler interface
|
|
177
|
-
from agent.ui.gradio import registry
|
|
178
|
-
demo = registry(name='cua:gpt-4o')
|
|
179
|
-
demo.launch()
|
|
221
|
+
app = create_gradio_ui()
|
|
222
|
+
app.launch()
|
|
180
223
|
```
|
|
181
224
|
|
|
182
225
|
## Agent Loops
|
|
@@ -187,7 +230,7 @@ The `cua-agent` package provides three agent loops variations, based on differen
|
|
|
187
230
|
|:-----------|:-----------------|:------------|:-------------|
|
|
188
231
|
| `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
|
|
189
232
|
| `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
|
|
190
|
-
| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
|
|
233
|
+
| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama or OpenAI-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
|
|
191
234
|
|
|
192
235
|
## AgentResponse
|
|
193
236
|
The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.
|
|
@@ -16,7 +16,7 @@ agent/core/tools/collection.py,sha256=NuwTn6dXSyznxWodfmFDQwUlxxaGb4oBPym4AEJABS
|
|
|
16
16
|
agent/core/tools/computer.py,sha256=lT_aW3huoYpcM8kffuokELupSz_WZG_qkaW1gITRC58,3892
|
|
17
17
|
agent/core/tools/edit.py,sha256=kv4jTKCM0VXrnoNErf7mT-xlr81-7T8v49_VA9y_L4Y,2005
|
|
18
18
|
agent/core/tools/manager.py,sha256=IRsCXjGc076nncQuyIjODoafnHTDhrf9sP5B4q5Pcdo,1742
|
|
19
|
-
agent/core/types.py,sha256=
|
|
19
|
+
agent/core/types.py,sha256=2RKDVzBd6O6woeH7A0oisbdpD_nx67B8ITnkMGu-g2E,2375
|
|
20
20
|
agent/core/visualization.py,sha256=1DuFF5sSeSf5BRSevBMDxml9-ajl7BQLFm5KBUwMbI8,6573
|
|
21
21
|
agent/providers/__init__.py,sha256=b4tIBAaIB1V7p8V0BWipHVnMhfHH_OuVgP4OWGSHdD8,194
|
|
22
22
|
agent/providers/anthropic/__init__.py,sha256=Mj11IZnVshZ2iHkvg4Z5-jrQIaD1WvzDz2Zk_pMwqIA,149
|
|
@@ -47,7 +47,7 @@ agent/providers/omni/clients/ollama.py,sha256=PmR5EhU9Mi43_o5mZN36XcpiGKp5HbQwlX
|
|
|
47
47
|
agent/providers/omni/clients/openai.py,sha256=iTSYWEJEM8INFPGJMiUVs8rFn0781XF_ofRkd7NT3gk,5920
|
|
48
48
|
agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
|
|
49
49
|
agent/providers/omni/image_utils.py,sha256=wejhWb36yqedsPnLFTFwk2wth8a6txfVWSg4EaNrRdA,908
|
|
50
|
-
agent/providers/omni/loop.py,sha256
|
|
50
|
+
agent/providers/omni/loop.py,sha256=-eKNHYpNUZ683FNI5ZNcW0ywrAaS27o46Iqt2DR5ZBU,40416
|
|
51
51
|
agent/providers/omni/parser.py,sha256=REpQwlwvY1z_N8wbMj6GhOeTiiWVWHhVja_LOxgzbks,11734
|
|
52
52
|
agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
|
|
53
53
|
agent/providers/omni/tools/__init__.py,sha256=IC1cMEDoR2ljGcNNthzBRF_VtnDbRL5qvHJWErtNp98,774
|
|
@@ -69,8 +69,8 @@ agent/providers/openai/utils.py,sha256=YeCZWIqOFSeugWoqAS0rhxOKAfL-9uN9nrYSBGBgP
|
|
|
69
69
|
agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
|
|
70
70
|
agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
|
|
71
71
|
agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
|
|
72
|
-
agent/ui/gradio/app.py,sha256=
|
|
73
|
-
cua_agent-0.1.
|
|
74
|
-
cua_agent-0.1.
|
|
75
|
-
cua_agent-0.1.
|
|
76
|
-
cua_agent-0.1.
|
|
72
|
+
agent/ui/gradio/app.py,sha256=TzFOo40Fv6mC12UOXacu8JjYMzXAf0llBWD0VjH7bPA,34253
|
|
73
|
+
cua_agent-0.1.25.dist-info/METADATA,sha256=rA7ZoOCmIrWiHWf2MeH03USJ7fvSXGCCznp113ItBio,10570
|
|
74
|
+
cua_agent-0.1.25.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
|
|
75
|
+
cua_agent-0.1.25.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
76
|
+
cua_agent-0.1.25.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|