cua-agent 0.4.34__py3-none-any.whl → 0.4.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +4 -10
- agent/__main__.py +2 -1
- agent/adapters/huggingfacelocal_adapter.py +54 -61
- agent/adapters/human_adapter.py +116 -114
- agent/adapters/mlxvlm_adapter.py +110 -99
- agent/adapters/models/__init__.py +14 -6
- agent/adapters/models/generic.py +7 -4
- agent/adapters/models/internvl.py +66 -30
- agent/adapters/models/opencua.py +23 -8
- agent/adapters/models/qwen2_5_vl.py +7 -4
- agent/agent.py +184 -158
- agent/callbacks/__init__.py +4 -4
- agent/callbacks/base.py +45 -31
- agent/callbacks/budget_manager.py +22 -10
- agent/callbacks/image_retention.py +18 -13
- agent/callbacks/logging.py +55 -42
- agent/callbacks/operator_validator.py +3 -1
- agent/callbacks/pii_anonymization.py +19 -16
- agent/callbacks/telemetry.py +67 -61
- agent/callbacks/trajectory_saver.py +90 -70
- agent/cli.py +115 -110
- agent/computers/__init__.py +13 -8
- agent/computers/base.py +26 -17
- agent/computers/cua.py +27 -23
- agent/computers/custom.py +72 -69
- agent/decorators.py +23 -14
- agent/human_tool/__init__.py +2 -7
- agent/human_tool/__main__.py +6 -2
- agent/human_tool/server.py +48 -37
- agent/human_tool/ui.py +235 -185
- agent/integrations/hud/__init__.py +15 -21
- agent/integrations/hud/agent.py +101 -83
- agent/integrations/hud/proxy.py +90 -57
- agent/loops/__init__.py +25 -21
- agent/loops/anthropic.py +537 -483
- agent/loops/base.py +13 -14
- agent/loops/composed_grounded.py +135 -149
- agent/loops/gemini.py +31 -12
- agent/loops/glm45v.py +135 -133
- agent/loops/gta1.py +47 -50
- agent/loops/holo.py +4 -2
- agent/loops/internvl.py +6 -11
- agent/loops/moondream3.py +36 -12
- agent/loops/omniparser.py +212 -209
- agent/loops/openai.py +49 -50
- agent/loops/opencua.py +29 -41
- agent/loops/qwen.py +475 -0
- agent/loops/uitars.py +237 -202
- agent/proxy/examples.py +54 -50
- agent/proxy/handlers.py +27 -34
- agent/responses.py +330 -330
- agent/types.py +11 -5
- agent/ui/__init__.py +1 -1
- agent/ui/__main__.py +1 -1
- agent/ui/gradio/app.py +23 -18
- agent/ui/gradio/ui_components.py +310 -161
- {cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/METADATA +18 -10
- cua_agent-0.4.35.dist-info/RECORD +64 -0
- cua_agent-0.4.34.dist-info/RECORD +0 -63
- {cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/WHEEL +0 -0
- {cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/entry_points.txt +0 -0
agent/types.py
CHANGED
|
@@ -2,37 +2,43 @@
|
|
|
2
2
|
Type definitions for agent
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from typing import Dict, List, Any, Optional, Callable, Protocol, Literal
|
|
6
|
-
from pydantic import BaseModel
|
|
7
5
|
import re
|
|
8
|
-
from litellm import ResponseInputParam, ResponsesAPIResponse, ToolParam
|
|
9
6
|
from collections.abc import Iterable
|
|
7
|
+
from typing import Any, Callable, Dict, List, Literal, Optional, Protocol
|
|
8
|
+
|
|
9
|
+
from litellm import ResponseInputParam, ResponsesAPIResponse, ToolParam
|
|
10
|
+
from pydantic import BaseModel
|
|
10
11
|
|
|
11
12
|
# Agent input types
|
|
12
13
|
Messages = str | ResponseInputParam | List[Dict[str, Any]]
|
|
13
14
|
Tools = Optional[Iterable[ToolParam]]
|
|
14
15
|
|
|
15
16
|
# Agent output types
|
|
16
|
-
AgentResponse = ResponsesAPIResponse
|
|
17
|
+
AgentResponse = ResponsesAPIResponse
|
|
17
18
|
AgentCapability = Literal["step", "click"]
|
|
18
19
|
|
|
20
|
+
|
|
19
21
|
# Exception types
|
|
20
22
|
class ToolError(RuntimeError):
|
|
21
23
|
"""Base exception for tool-related errors"""
|
|
24
|
+
|
|
22
25
|
pass
|
|
23
26
|
|
|
27
|
+
|
|
24
28
|
class IllegalArgumentError(ToolError):
|
|
25
29
|
"""Exception raised when function arguments are invalid"""
|
|
30
|
+
|
|
26
31
|
pass
|
|
27
32
|
|
|
28
33
|
|
|
29
34
|
# Agent config registration
|
|
30
35
|
class AgentConfigInfo(BaseModel):
|
|
31
36
|
"""Information about a registered agent config"""
|
|
37
|
+
|
|
32
38
|
agent_class: type
|
|
33
39
|
models_regex: str
|
|
34
40
|
priority: int = 0
|
|
35
|
-
|
|
41
|
+
|
|
36
42
|
def matches_model(self, model: str) -> bool:
|
|
37
43
|
"""Check if this agent config matches the given model"""
|
|
38
44
|
return bool(re.match(self.models_regex, model))
|
agent/ui/__init__.py
CHANGED
agent/ui/__main__.py
CHANGED
agent/ui/gradio/app.py
CHANGED
|
@@ -18,21 +18,21 @@ Requirements:
|
|
|
18
18
|
- OpenAI or Anthropic API key
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
import os
|
|
22
21
|
import asyncio
|
|
23
|
-
import logging
|
|
24
22
|
import json
|
|
23
|
+
import logging
|
|
24
|
+
import os
|
|
25
25
|
import platform
|
|
26
26
|
from pathlib import Path
|
|
27
|
-
from typing import
|
|
27
|
+
from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union, cast
|
|
28
|
+
|
|
28
29
|
import gradio as gr
|
|
29
|
-
from gradio.components.chatbot import MetadataDict
|
|
30
|
-
from typing import cast
|
|
31
30
|
|
|
32
31
|
# Import from agent package
|
|
33
32
|
from agent import ComputerAgent
|
|
34
|
-
from agent.types import
|
|
33
|
+
from agent.types import AgentResponse, Messages
|
|
35
34
|
from computer import Computer
|
|
35
|
+
from gradio.components.chatbot import MetadataDict
|
|
36
36
|
|
|
37
37
|
# Global variables
|
|
38
38
|
global_agent = None
|
|
@@ -42,11 +42,13 @@ SETTINGS_FILE = Path(".gradio_settings.json")
|
|
|
42
42
|
logging.basicConfig(level=logging.INFO)
|
|
43
43
|
|
|
44
44
|
import dotenv
|
|
45
|
+
|
|
45
46
|
if dotenv.load_dotenv():
|
|
46
47
|
print(f"DEBUG - Loaded environment variables from {dotenv.find_dotenv()}")
|
|
47
48
|
else:
|
|
48
49
|
print("DEBUG - No .env file found")
|
|
49
50
|
|
|
51
|
+
|
|
50
52
|
# --- Settings Load/Save Functions ---
|
|
51
53
|
def load_settings() -> Dict[str, Any]:
|
|
52
54
|
"""Loads settings from the JSON file."""
|
|
@@ -84,7 +86,7 @@ def save_settings(settings: Dict[str, Any]):
|
|
|
84
86
|
# async def on_screenshot(self, screenshot_base64: str, action_type: str = "") -> None:
|
|
85
87
|
# """Add screenshot to chatbot when a screenshot is taken."""
|
|
86
88
|
# image_markdown = f""
|
|
87
|
-
|
|
89
|
+
|
|
88
90
|
# if self.chatbot_history is not None:
|
|
89
91
|
# self.chatbot_history.append(
|
|
90
92
|
# gr.ChatMessage(
|
|
@@ -141,7 +143,7 @@ def get_model_string(model_name: str, loop_provider: str) -> str:
|
|
|
141
143
|
ollama_model = model_name.split("OMNI: Ollama ", 1)[1]
|
|
142
144
|
return f"omniparser+ollama_chat/{ollama_model}"
|
|
143
145
|
return "omniparser+ollama_chat/llama3"
|
|
144
|
-
|
|
146
|
+
|
|
145
147
|
# Map based on loop provider
|
|
146
148
|
mapping = MODEL_MAPPINGS.get(loop_provider.lower(), MODEL_MAPPINGS["openai"])
|
|
147
149
|
return mapping.get(model_name, mapping["default"])
|
|
@@ -151,6 +153,7 @@ def get_ollama_models() -> List[str]:
|
|
|
151
153
|
"""Get available models from Ollama if installed."""
|
|
152
154
|
try:
|
|
153
155
|
import subprocess
|
|
156
|
+
|
|
154
157
|
result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
|
|
155
158
|
if result.returncode == 0:
|
|
156
159
|
lines = result.stdout.strip().split("\n")
|
|
@@ -174,16 +177,14 @@ def create_computer_instance(
|
|
|
174
177
|
os_type: str = "macos",
|
|
175
178
|
provider_type: str = "lume",
|
|
176
179
|
name: Optional[str] = None,
|
|
177
|
-
api_key: Optional[str] = None
|
|
180
|
+
api_key: Optional[str] = None,
|
|
178
181
|
) -> Computer:
|
|
179
182
|
"""Create or get the global Computer instance."""
|
|
180
183
|
global global_computer
|
|
181
184
|
if global_computer is None:
|
|
182
185
|
if provider_type == "localhost":
|
|
183
186
|
global_computer = Computer(
|
|
184
|
-
verbosity=verbosity,
|
|
185
|
-
os_type=os_type,
|
|
186
|
-
use_host_computer_server=True
|
|
187
|
+
verbosity=verbosity, os_type=os_type, use_host_computer_server=True
|
|
187
188
|
)
|
|
188
189
|
else:
|
|
189
190
|
global_computer = Computer(
|
|
@@ -191,7 +192,7 @@ def create_computer_instance(
|
|
|
191
192
|
os_type=os_type,
|
|
192
193
|
provider_type=provider_type,
|
|
193
194
|
name=name if name else "",
|
|
194
|
-
api_key=api_key
|
|
195
|
+
api_key=api_key,
|
|
195
196
|
)
|
|
196
197
|
return global_computer
|
|
197
198
|
|
|
@@ -217,7 +218,7 @@ def create_agent(
|
|
|
217
218
|
os_type=computer_os,
|
|
218
219
|
provider_type=computer_provider,
|
|
219
220
|
name=computer_name,
|
|
220
|
-
api_key=computer_api_key
|
|
221
|
+
api_key=computer_api_key,
|
|
221
222
|
)
|
|
222
223
|
|
|
223
224
|
# Handle custom models
|
|
@@ -233,12 +234,15 @@ def create_agent(
|
|
|
233
234
|
"only_n_most_recent_images": only_n_most_recent_images,
|
|
234
235
|
"verbosity": verbosity,
|
|
235
236
|
}
|
|
236
|
-
|
|
237
|
+
|
|
237
238
|
if save_trajectory:
|
|
238
239
|
agent_kwargs["trajectory_dir"] = "trajectories"
|
|
239
|
-
|
|
240
|
+
|
|
240
241
|
if max_trajectory_budget:
|
|
241
|
-
agent_kwargs["max_trajectory_budget"] = {
|
|
242
|
+
agent_kwargs["max_trajectory_budget"] = {
|
|
243
|
+
"max_budget": max_trajectory_budget,
|
|
244
|
+
"raise_error": True,
|
|
245
|
+
}
|
|
242
246
|
|
|
243
247
|
global_agent = ComputerAgent(**agent_kwargs)
|
|
244
248
|
return global_agent
|
|
@@ -247,7 +251,8 @@ def create_agent(
|
|
|
247
251
|
def launch_ui():
|
|
248
252
|
"""Standalone function to launch the Gradio app."""
|
|
249
253
|
from agent.ui.gradio.ui_components import create_gradio_ui
|
|
250
|
-
|
|
254
|
+
|
|
255
|
+
print("Starting Gradio app for CUA Agent...")
|
|
251
256
|
demo = create_gradio_ui()
|
|
252
257
|
demo.launch(share=False, inbrowser=True)
|
|
253
258
|
|