cua-agent 0.1.24__py3-none-any.whl → 0.1.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/providers/omni/loop.py +16 -2
- agent/ui/gradio/app.py +11 -6
- {cua_agent-0.1.24.dist-info → cua_agent-0.1.25.dist-info}/METADATA +31 -10
- {cua_agent-0.1.24.dist-info → cua_agent-0.1.25.dist-info}/RECORD +6 -6
- {cua_agent-0.1.24.dist-info → cua_agent-0.1.25.dist-info}/WHEEL +0 -0
- {cua_agent-0.1.24.dist-info → cua_agent-0.1.25.dist-info}/entry_points.txt +0 -0
agent/providers/omni/loop.py
CHANGED
|
@@ -443,6 +443,8 @@ class OmniLoop(BaseLoop):
|
|
|
443
443
|
except (json.JSONDecodeError, IndexError):
|
|
444
444
|
try:
|
|
445
445
|
# Look for JSON object pattern
|
|
446
|
+
import re # Local import to ensure availability
|
|
447
|
+
|
|
446
448
|
json_pattern = r"\{[^}]+\}"
|
|
447
449
|
json_match = re.search(json_pattern, raw_text)
|
|
448
450
|
if json_match:
|
|
@@ -453,8 +455,20 @@ class OmniLoop(BaseLoop):
|
|
|
453
455
|
logger.error(f"No JSON found in content")
|
|
454
456
|
return True, action_screenshot_saved
|
|
455
457
|
except json.JSONDecodeError as e:
|
|
456
|
-
|
|
457
|
-
|
|
458
|
+
# Try to sanitize the JSON string and retry
|
|
459
|
+
try:
|
|
460
|
+
# Remove or replace invalid control characters
|
|
461
|
+
import re # Local import to ensure availability
|
|
462
|
+
|
|
463
|
+
sanitized_text = re.sub(r"[\x00-\x1F\x7F]", "", raw_text)
|
|
464
|
+
# Try parsing again with sanitized text
|
|
465
|
+
parsed_content = json.loads(sanitized_text)
|
|
466
|
+
logger.info(
|
|
467
|
+
"Successfully parsed JSON after sanitizing control characters"
|
|
468
|
+
)
|
|
469
|
+
except json.JSONDecodeError:
|
|
470
|
+
logger.error(f"Failed to parse JSON from text: {str(e)}")
|
|
471
|
+
return True, action_screenshot_saved
|
|
458
472
|
|
|
459
473
|
# Step 4: Process the parsed content if available
|
|
460
474
|
if parsed_content:
|
agent/ui/gradio/app.py
CHANGED
|
@@ -271,16 +271,19 @@ def create_agent(
|
|
|
271
271
|
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
|
272
272
|
|
|
273
273
|
# Create LLM model object with appropriate parameters
|
|
274
|
-
provider_base_url = "http://localhost:
|
|
274
|
+
provider_base_url = "http://localhost:1234/v1" if use_oaicompat else None
|
|
275
275
|
|
|
276
276
|
if use_oaicompat:
|
|
277
|
-
# Special handling for OAICOMPAT - use
|
|
278
|
-
print(
|
|
277
|
+
# Special handling for OAICOMPAT - use OAICOMPAT provider with custom base URL
|
|
278
|
+
print(
|
|
279
|
+
f"DEBUG - Creating OAICOMPAT agent with model: {model_name}, URL: {provider_base_url}"
|
|
280
|
+
)
|
|
279
281
|
llm = LLM(
|
|
280
|
-
provider=
|
|
282
|
+
provider=LLMProvider.OAICOMPAT, # Set to OAICOMPAT instead of using original provider
|
|
281
283
|
name=model_name,
|
|
282
284
|
provider_base_url=provider_base_url,
|
|
283
285
|
)
|
|
286
|
+
print(f"DEBUG - LLM provider is now: {llm.provider}, base URL: {llm.provider_base_url}")
|
|
284
287
|
# Note: Don't pass use_oaicompat to the agent, as it doesn't accept this parameter
|
|
285
288
|
elif provider == LLMProvider.OAICOMPAT:
|
|
286
289
|
# This path is unlikely to be taken with our current approach
|
|
@@ -461,8 +464,10 @@ def respond(
|
|
|
461
464
|
# Special handling for OAICOMPAT to bypass provider-specific errors
|
|
462
465
|
# Creates the agent with OPENAI provider but using custom model name and provider base URL
|
|
463
466
|
is_oaicompat = str(provider) == "oaicompat"
|
|
464
|
-
|
|
465
|
-
|
|
467
|
+
|
|
468
|
+
# Don't override the provider for OAICOMPAT - instead pass it through
|
|
469
|
+
# if is_oaicompat:
|
|
470
|
+
# provider = LLMProvider.OPENAI
|
|
466
471
|
|
|
467
472
|
# Get API key based on provider
|
|
468
473
|
if provider == LLMProvider.OPENAI:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cua-agent
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.25
|
|
4
4
|
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
|
6
6
|
Requires-Python: <3.13,>=3.10
|
|
@@ -177,6 +177,33 @@ export ANTHROPIC_API_KEY=your_anthropic_key_here
|
|
|
177
177
|
OPENAI_API_KEY=your_key ANTHROPIC_API_KEY=your_key python launch_ui.py
|
|
178
178
|
```
|
|
179
179
|
|
|
180
|
+
### Using Local Models
|
|
181
|
+
|
|
182
|
+
You can use local models with the OMNI loop provider by selecting "Custom model..." from the dropdown. The default provider URL is set to `http://localhost:1234/v1` which works with LM Studio.
|
|
183
|
+
|
|
184
|
+
If you're using a different local model server:
|
|
185
|
+
- vLLM: `http://localhost:8000/v1`
|
|
186
|
+
- LocalAI: `http://localhost:8080/v1`
|
|
187
|
+
- Ollama with OpenAI compat API: `http://localhost:11434/v1`
|
|
188
|
+
|
|
189
|
+
To change the URL, modify the `provider_base_url` in your launcher script:
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
# In your launcher script
|
|
193
|
+
from agent.ui.gradio.app import create_gradio_ui
|
|
194
|
+
from agent import LLM, LLMProvider
|
|
195
|
+
|
|
196
|
+
# Create a custom model with a specific URL
|
|
197
|
+
custom_model = LLM(
|
|
198
|
+
provider=LLMProvider.OAICOMPAT,
|
|
199
|
+
name="your-model-name",
|
|
200
|
+
provider_base_url="http://localhost:8000/v1" # Change to your server URL
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
app = create_gradio_ui(custom_model=custom_model)
|
|
204
|
+
app.launch()
|
|
205
|
+
```
|
|
206
|
+
|
|
180
207
|
Without these environment variables, the UI will show "No models available" for the corresponding providers, but you can still use local models with the OMNI loop provider.
|
|
181
208
|
|
|
182
209
|
The Gradio UI provides:
|
|
@@ -191,14 +218,8 @@ You can also embed the Gradio UI in your own application:
|
|
|
191
218
|
# Import directly in your application
|
|
192
219
|
from agent.ui.gradio.app import create_gradio_ui
|
|
193
220
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
demo.launch()
|
|
197
|
-
|
|
198
|
-
# Or for a simpler interface
|
|
199
|
-
from agent.ui.gradio import registry
|
|
200
|
-
demo = registry(name='cua:gpt-4o')
|
|
201
|
-
demo.launch()
|
|
221
|
+
app = create_gradio_ui()
|
|
222
|
+
app.launch()
|
|
202
223
|
```
|
|
203
224
|
|
|
204
225
|
## Agent Loops
|
|
@@ -209,7 +230,7 @@ The `cua-agent` package provides three agent loops variations, based on differen
|
|
|
209
230
|
|:-----------|:-----------------|:------------|:-------------|
|
|
210
231
|
| `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
|
|
211
232
|
| `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
|
|
212
|
-
| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
|
|
233
|
+
| `AgentLoop.OMNI` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `phi4`<br>• `phi4-mini`<br>• `gemma3`<br>• `...`<br>• `Any Ollama or OpenAI-compatible model` | Use OmniParser for element pixel-detection (SoM) and any VLMs for UI Grounding and Reasoning | OmniParser |
|
|
213
234
|
|
|
214
235
|
## AgentResponse
|
|
215
236
|
The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.
|
|
@@ -47,7 +47,7 @@ agent/providers/omni/clients/ollama.py,sha256=PmR5EhU9Mi43_o5mZN36XcpiGKp5HbQwlX
|
|
|
47
47
|
agent/providers/omni/clients/openai.py,sha256=iTSYWEJEM8INFPGJMiUVs8rFn0781XF_ofRkd7NT3gk,5920
|
|
48
48
|
agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
|
|
49
49
|
agent/providers/omni/image_utils.py,sha256=wejhWb36yqedsPnLFTFwk2wth8a6txfVWSg4EaNrRdA,908
|
|
50
|
-
agent/providers/omni/loop.py,sha256
|
|
50
|
+
agent/providers/omni/loop.py,sha256=-eKNHYpNUZ683FNI5ZNcW0ywrAaS27o46Iqt2DR5ZBU,40416
|
|
51
51
|
agent/providers/omni/parser.py,sha256=REpQwlwvY1z_N8wbMj6GhOeTiiWVWHhVja_LOxgzbks,11734
|
|
52
52
|
agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
|
|
53
53
|
agent/providers/omni/tools/__init__.py,sha256=IC1cMEDoR2ljGcNNthzBRF_VtnDbRL5qvHJWErtNp98,774
|
|
@@ -69,8 +69,8 @@ agent/providers/openai/utils.py,sha256=YeCZWIqOFSeugWoqAS0rhxOKAfL-9uN9nrYSBGBgP
|
|
|
69
69
|
agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
|
|
70
70
|
agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
|
|
71
71
|
agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
|
|
72
|
-
agent/ui/gradio/app.py,sha256=
|
|
73
|
-
cua_agent-0.1.
|
|
74
|
-
cua_agent-0.1.
|
|
75
|
-
cua_agent-0.1.
|
|
76
|
-
cua_agent-0.1.
|
|
72
|
+
agent/ui/gradio/app.py,sha256=TzFOo40Fv6mC12UOXacu8JjYMzXAf0llBWD0VjH7bPA,34253
|
|
73
|
+
cua_agent-0.1.25.dist-info/METADATA,sha256=rA7ZoOCmIrWiHWf2MeH03USJ7fvSXGCCznp113ItBio,10570
|
|
74
|
+
cua_agent-0.1.25.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
|
|
75
|
+
cua_agent-0.1.25.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
76
|
+
cua_agent-0.1.25.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|