cua-agent 0.4.14__py3-none-any.whl → 0.7.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +4 -19
- agent/__main__.py +2 -1
- agent/adapters/__init__.py +6 -0
- agent/adapters/azure_ml_adapter.py +283 -0
- agent/adapters/cua_adapter.py +161 -0
- agent/adapters/huggingfacelocal_adapter.py +67 -125
- agent/adapters/human_adapter.py +116 -114
- agent/adapters/mlxvlm_adapter.py +370 -0
- agent/adapters/models/__init__.py +41 -0
- agent/adapters/models/generic.py +78 -0
- agent/adapters/models/internvl.py +290 -0
- agent/adapters/models/opencua.py +115 -0
- agent/adapters/models/qwen2_5_vl.py +78 -0
- agent/agent.py +431 -241
- agent/callbacks/__init__.py +10 -3
- agent/callbacks/base.py +45 -31
- agent/callbacks/budget_manager.py +22 -10
- agent/callbacks/image_retention.py +54 -98
- agent/callbacks/logging.py +55 -42
- agent/callbacks/operator_validator.py +140 -0
- agent/callbacks/otel.py +291 -0
- agent/callbacks/pii_anonymization.py +19 -16
- agent/callbacks/prompt_instructions.py +47 -0
- agent/callbacks/telemetry.py +106 -69
- agent/callbacks/trajectory_saver.py +178 -70
- agent/cli.py +269 -119
- agent/computers/__init__.py +14 -9
- agent/computers/base.py +32 -19
- agent/computers/cua.py +52 -25
- agent/computers/custom.py +78 -71
- agent/decorators.py +23 -14
- agent/human_tool/__init__.py +2 -7
- agent/human_tool/__main__.py +6 -2
- agent/human_tool/server.py +48 -37
- agent/human_tool/ui.py +359 -235
- agent/integrations/hud/__init__.py +164 -74
- agent/integrations/hud/agent.py +338 -342
- agent/integrations/hud/proxy.py +297 -0
- agent/loops/__init__.py +44 -14
- agent/loops/anthropic.py +590 -492
- agent/loops/base.py +19 -15
- agent/loops/composed_grounded.py +142 -144
- agent/loops/fara/__init__.py +8 -0
- agent/loops/fara/config.py +506 -0
- agent/loops/fara/helpers.py +357 -0
- agent/loops/fara/schema.py +143 -0
- agent/loops/gelato.py +183 -0
- agent/loops/gemini.py +935 -0
- agent/loops/generic_vlm.py +601 -0
- agent/loops/glm45v.py +140 -135
- agent/loops/gta1.py +48 -51
- agent/loops/holo.py +218 -0
- agent/loops/internvl.py +180 -0
- agent/loops/moondream3.py +493 -0
- agent/loops/omniparser.py +326 -226
- agent/loops/openai.py +63 -56
- agent/loops/opencua.py +134 -0
- agent/loops/uiins.py +175 -0
- agent/loops/uitars.py +262 -212
- agent/loops/uitars2.py +951 -0
- agent/playground/__init__.py +5 -0
- agent/playground/server.py +301 -0
- agent/proxy/examples.py +196 -0
- agent/proxy/handlers.py +255 -0
- agent/responses.py +486 -339
- agent/tools/__init__.py +24 -0
- agent/tools/base.py +253 -0
- agent/tools/browser_tool.py +423 -0
- agent/types.py +20 -5
- agent/ui/__init__.py +1 -1
- agent/ui/__main__.py +1 -1
- agent/ui/gradio/app.py +25 -22
- agent/ui/gradio/ui_components.py +314 -167
- cua_agent-0.7.16.dist-info/METADATA +85 -0
- cua_agent-0.7.16.dist-info/RECORD +79 -0
- {cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
- agent/integrations/hud/adapter.py +0 -121
- agent/integrations/hud/computer_handler.py +0 -187
- agent/telemetry.py +0 -142
- cua_agent-0.4.14.dist-info/METADATA +0 -436
- cua_agent-0.4.14.dist-info/RECORD +0 -50
- {cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0
|
@@ -1,77 +1,167 @@
|
|
|
1
|
-
"""HUD integration
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
#
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
1
|
+
"""HUD integration: dataset runners and MCP-based computer agent export.
|
|
2
|
+
|
|
3
|
+
This module exposes helpers to evaluate HUD-compatible datasets and exports
|
|
4
|
+
the MCP-compatible computer agent implementation.
|
|
5
|
+
|
|
6
|
+
Exports:
|
|
7
|
+
- run_single_task(dataset, ...)
|
|
8
|
+
- run_full_dataset(dataset, ...)
|
|
9
|
+
- MCPComputerAgent
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import time
|
|
13
|
+
from typing import Any, Optional
|
|
14
|
+
|
|
15
|
+
from agent.computers import is_agent_computer
|
|
16
|
+
from datasets import Dataset, load_dataset
|
|
17
|
+
from hud import trace
|
|
18
|
+
from hud.datasets import Task, run_dataset
|
|
19
|
+
|
|
20
|
+
from .agent import MCPComputerAgent
|
|
21
|
+
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# Single-task runner
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def run_single_task(
|
|
28
|
+
dataset: str | Dataset | list[dict[str, Any]],
|
|
29
|
+
*,
|
|
30
|
+
task_id: int = 0,
|
|
31
|
+
model: str | None = None,
|
|
32
|
+
allowed_tools: list[str] | None = None,
|
|
33
|
+
# === ComputerAgent kwargs ===
|
|
34
|
+
tools: list[Any] | None = None,
|
|
35
|
+
custom_loop: Any | None = None,
|
|
36
|
+
only_n_most_recent_images: int | None = None,
|
|
37
|
+
callbacks: list[Any] | None = None,
|
|
38
|
+
instructions: str | None = None,
|
|
39
|
+
verbosity: int | None = None,
|
|
40
|
+
trajectory_dir: str | dict | None = None,
|
|
41
|
+
max_retries: int | None = 3,
|
|
42
|
+
screenshot_delay: float | int = 0.5,
|
|
43
|
+
use_prompt_caching: bool | None = False,
|
|
44
|
+
max_trajectory_budget: float | dict | None = None,
|
|
45
|
+
telemetry_enabled: bool | None = True,
|
|
46
|
+
) -> None:
|
|
47
|
+
"""Load one task from the dataset and execute it with MCPComputerAgent."""
|
|
48
|
+
|
|
49
|
+
# Load dataset and pick a sample
|
|
50
|
+
if isinstance(dataset, str):
|
|
51
|
+
dataset = load_dataset(dataset, split="train") # type: ignore[arg-type]
|
|
52
|
+
elif isinstance(dataset, list):
|
|
53
|
+
dataset = dataset
|
|
54
|
+
else:
|
|
55
|
+
dataset = dataset["train"]
|
|
56
|
+
|
|
57
|
+
sample_task = dataset[task_id] # type: ignore[index]
|
|
58
|
+
task_prompt = sample_task.get("prompt", f"Task {sample_task.get('id', 0)}") # type: ignore[attr-defined]
|
|
59
|
+
|
|
60
|
+
# Filter any existing Computer tools
|
|
61
|
+
# The eval framework will add its own Computer tool per task
|
|
62
|
+
if tools:
|
|
63
|
+
tools = [tool for tool in tools if not is_agent_computer(tool)]
|
|
64
|
+
|
|
65
|
+
with trace(name=task_prompt):
|
|
66
|
+
task = Task(**sample_task) # type: ignore[arg-type]
|
|
67
|
+
|
|
68
|
+
agent = MCPComputerAgent(
|
|
69
|
+
model=model or "computer-use-preview",
|
|
70
|
+
allowed_tools=allowed_tools or ["openai_computer"],
|
|
71
|
+
# === ComputerAgent kwargs passthrough ===
|
|
72
|
+
tools=tools,
|
|
73
|
+
custom_loop=custom_loop,
|
|
74
|
+
only_n_most_recent_images=only_n_most_recent_images,
|
|
75
|
+
callbacks=callbacks,
|
|
76
|
+
instructions=instructions,
|
|
77
|
+
verbosity=verbosity,
|
|
78
|
+
trajectory_dir=trajectory_dir,
|
|
79
|
+
max_retries=max_retries,
|
|
80
|
+
screenshot_delay=screenshot_delay,
|
|
81
|
+
use_prompt_caching=use_prompt_caching,
|
|
82
|
+
max_trajectory_budget=max_trajectory_budget,
|
|
83
|
+
telemetry_enabled=telemetry_enabled,
|
|
84
|
+
)
|
|
85
|
+
print(f"Running: {task_prompt}")
|
|
86
|
+
result = await agent.run(task, max_steps=10)
|
|
87
|
+
print(f"✅ Reward: {result.reward}")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# ---------------------------------------------------------------------------
|
|
91
|
+
# Full-dataset runner
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def run_full_dataset(
|
|
96
|
+
dataset: str | Dataset | list[dict[str, Any]],
|
|
97
|
+
*,
|
|
98
|
+
job_name: Optional[str] = None,
|
|
99
|
+
model: str | None = None,
|
|
100
|
+
allowed_tools: list[str] | None = None,
|
|
101
|
+
max_concurrent: int = 30,
|
|
102
|
+
max_steps: int = 50,
|
|
103
|
+
split: str = "train",
|
|
104
|
+
trajectory_dir: str | dict | None = None,
|
|
105
|
+
# === ComputerAgent kwargs ===
|
|
106
|
+
tools: list[Any] | None = None,
|
|
107
|
+
custom_loop: Any | None = None,
|
|
108
|
+
only_n_most_recent_images: int | None = 5,
|
|
109
|
+
callbacks: list[Any] | None = None,
|
|
110
|
+
instructions: str | None = None,
|
|
111
|
+
verbosity: int | None = None,
|
|
112
|
+
max_retries: int | None = 3,
|
|
113
|
+
screenshot_delay: float | int = 0.5,
|
|
114
|
+
use_prompt_caching: bool | None = False,
|
|
115
|
+
max_trajectory_budget: float | dict | None = None,
|
|
116
|
+
telemetry_enabled: bool | None = True,
|
|
117
|
+
) -> list[Any]:
|
|
118
|
+
"""Run evaluation across the entire dataset using hud.datasets.run_dataset."""
|
|
119
|
+
|
|
120
|
+
# Run with our MCP-based agent class.
|
|
121
|
+
if isinstance(dataset, str):
|
|
122
|
+
dataset_name = dataset.split("/")[-1]
|
|
123
|
+
job_name = job_name or f"Evaluation {dataset_name}"
|
|
124
|
+
dataset = load_dataset(dataset, split=split) # type: ignore[arg-type]
|
|
125
|
+
else:
|
|
126
|
+
dataset_name = "custom"
|
|
127
|
+
job_name = job_name or f"Evaluation {time.strftime('%H:%M %Y-%m-%d')}"
|
|
128
|
+
|
|
129
|
+
# Filter any existing Computer tools
|
|
130
|
+
# The eval framework will add its own Computer tool per task
|
|
131
|
+
if tools:
|
|
132
|
+
tools = [tool for tool in tools if not is_agent_computer(tool)]
|
|
133
|
+
|
|
134
|
+
# Execute evaluation
|
|
135
|
+
return await run_dataset(
|
|
136
|
+
name=job_name,
|
|
137
|
+
dataset=dataset,
|
|
138
|
+
agent_class=MCPComputerAgent,
|
|
139
|
+
agent_config={
|
|
140
|
+
"model": model,
|
|
141
|
+
"allowed_tools": allowed_tools,
|
|
142
|
+
"trajectory_dir": trajectory_dir,
|
|
143
|
+
# === ComputerAgent kwargs passthrough ===
|
|
144
|
+
"tools": tools,
|
|
145
|
+
"custom_loop": custom_loop,
|
|
146
|
+
"only_n_most_recent_images": only_n_most_recent_images,
|
|
147
|
+
"callbacks": callbacks,
|
|
148
|
+
"instructions": instructions,
|
|
149
|
+
"verbosity": verbosity,
|
|
150
|
+
"max_retries": max_retries,
|
|
151
|
+
"screenshot_delay": screenshot_delay,
|
|
152
|
+
"use_prompt_caching": use_prompt_caching,
|
|
153
|
+
"max_trajectory_budget": max_trajectory_budget,
|
|
154
|
+
"telemetry_enabled": telemetry_enabled,
|
|
155
|
+
},
|
|
156
|
+
max_concurrent=max_concurrent,
|
|
157
|
+
metadata={"dataset": dataset_name},
|
|
158
|
+
max_steps=max_steps,
|
|
159
|
+
auto_respond=True,
|
|
74
160
|
)
|
|
75
161
|
|
|
76
162
|
|
|
77
|
-
__all__ = [
|
|
163
|
+
__all__ = [
|
|
164
|
+
"run_single_task",
|
|
165
|
+
"run_full_dataset",
|
|
166
|
+
"MCPComputerAgent",
|
|
167
|
+
]
|