cua-agent 0.4.18__tar.gz → 0.4.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- {cua_agent-0.4.18 → cua_agent-0.4.20}/PKG-INFO +1 -1
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/adapters/mlxvlm_adapter.py +3 -2
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/agent.py +10 -2
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/callbacks/trajectory_saver.py +83 -5
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/integrations/hud/__init__.py +3 -3
- {cua_agent-0.4.18 → cua_agent-0.4.20}/pyproject.toml +1 -1
- {cua_agent-0.4.18 → cua_agent-0.4.20}/README.md +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/__init__.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/__main__.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/adapters/__init__.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/adapters/huggingfacelocal_adapter.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/adapters/human_adapter.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/callbacks/__init__.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/callbacks/base.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/callbacks/budget_manager.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/callbacks/image_retention.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/callbacks/logging.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/callbacks/operator_validator.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/callbacks/pii_anonymization.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/callbacks/telemetry.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/cli.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/computers/__init__.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/computers/base.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/computers/cua.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/computers/custom.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/decorators.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/human_tool/__init__.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/human_tool/__main__.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/human_tool/server.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/human_tool/ui.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/integrations/hud/proxy.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/loops/__init__.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/loops/anthropic.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/loops/base.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/loops/composed_grounded.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/loops/glm45v.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/loops/gta1.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/loops/model_types.csv +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/loops/omniparser.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/loops/openai.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/loops/uitars.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/proxy/examples.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/proxy/handlers.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/responses.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/types.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/ui/__init__.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/ui/__main__.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/ui/gradio/__init__.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/ui/gradio/app.py +0 -0
- {cua_agent-0.4.18 → cua_agent-0.4.20}/agent/ui/gradio/ui_components.py +0 -0
|
@@ -78,8 +78,6 @@ class MLXVLMAdapter(CustomLLM):
|
|
|
78
78
|
**kwargs: Additional arguments
|
|
79
79
|
"""
|
|
80
80
|
super().__init__()
|
|
81
|
-
if not MLX_AVAILABLE:
|
|
82
|
-
raise ImportError("MLX VLM dependencies not available. Please install mlx-vlm.")
|
|
83
81
|
|
|
84
82
|
self.models = {} # Cache for loaded models
|
|
85
83
|
self.processors = {} # Cache for loaded processors
|
|
@@ -95,6 +93,9 @@ class MLXVLMAdapter(CustomLLM):
|
|
|
95
93
|
Returns:
|
|
96
94
|
Tuple of (model, processor, config)
|
|
97
95
|
"""
|
|
96
|
+
if not MLX_AVAILABLE:
|
|
97
|
+
raise ImportError("MLX VLM dependencies not available. Please install mlx-vlm.")
|
|
98
|
+
|
|
98
99
|
if model_name not in self.models:
|
|
99
100
|
# Load model and processor
|
|
100
101
|
model_obj, processor = load(
|
|
@@ -3,6 +3,7 @@ ComputerAgent - Main agent class that selects and runs agent loops
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from typing import Dict, List, Any, Optional, AsyncGenerator, Union, cast, Callable, Set, Tuple
|
|
7
8
|
|
|
8
9
|
from litellm.responses.utils import Usage
|
|
@@ -162,7 +163,7 @@ class ComputerAgent:
|
|
|
162
163
|
only_n_most_recent_images: Optional[int] = None,
|
|
163
164
|
callbacks: Optional[List[Any]] = None,
|
|
164
165
|
verbosity: Optional[int] = None,
|
|
165
|
-
trajectory_dir: Optional[str] = None,
|
|
166
|
+
trajectory_dir: Optional[str | Path | dict] = None,
|
|
166
167
|
max_retries: Optional[int] = 3,
|
|
167
168
|
screenshot_delay: Optional[float | int] = 0.5,
|
|
168
169
|
use_prompt_caching: Optional[bool] = False,
|
|
@@ -223,7 +224,10 @@ class ComputerAgent:
|
|
|
223
224
|
|
|
224
225
|
# Add trajectory saver callback if trajectory_dir is set
|
|
225
226
|
if self.trajectory_dir:
|
|
226
|
-
|
|
227
|
+
if isinstance(self.trajectory_dir, dict):
|
|
228
|
+
self.callbacks.append(TrajectorySaverCallback(**self.trajectory_dir))
|
|
229
|
+
elif isinstance(self.trajectory_dir, (str, Path)):
|
|
230
|
+
self.callbacks.append(TrajectorySaverCallback(str(self.trajectory_dir)))
|
|
227
231
|
|
|
228
232
|
# Add budget manager if max_trajectory_budget is set
|
|
229
233
|
if max_trajectory_budget:
|
|
@@ -249,6 +253,10 @@ class ComputerAgent:
|
|
|
249
253
|
|
|
250
254
|
# == Initialize computer agent ==
|
|
251
255
|
|
|
256
|
+
# If the loop is "human/human", we need to prefix a grounding model fallback
|
|
257
|
+
if self.agent_loop in ["human/human", "human"]:
|
|
258
|
+
self.agent_loop = "openai/computer-use-preview+human/human"
|
|
259
|
+
|
|
252
260
|
# Find the appropriate agent loop
|
|
253
261
|
if custom_loop:
|
|
254
262
|
self.agent_loop = custom_loop
|
|
@@ -11,6 +11,8 @@ from pathlib import Path
|
|
|
11
11
|
from typing import List, Dict, Any, Optional, Union, override
|
|
12
12
|
from PIL import Image, ImageDraw
|
|
13
13
|
import io
|
|
14
|
+
from copy import deepcopy
|
|
15
|
+
|
|
14
16
|
from .base import AsyncCallbackHandler
|
|
15
17
|
|
|
16
18
|
def sanitize_image_urls(data: Any) -> Any:
|
|
@@ -43,6 +45,64 @@ def sanitize_image_urls(data: Any) -> Any:
|
|
|
43
45
|
return data
|
|
44
46
|
|
|
45
47
|
|
|
48
|
+
def extract_computer_call_outputs(items: List[Dict[str, Any]], screenshot_dir: Optional[Path]) -> List[Dict[str, Any]]:
|
|
49
|
+
"""
|
|
50
|
+
Save any base64-encoded screenshots from computer_call_output entries to files and
|
|
51
|
+
replace their image_url with the saved file path when a call_id is present.
|
|
52
|
+
|
|
53
|
+
Only operates if screenshot_dir is provided and exists; otherwise returns items unchanged.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
items: List of message/result dicts potentially containing computer_call_output entries
|
|
57
|
+
screenshot_dir: Directory to write screenshots into
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
A new list with updated image_url fields when applicable.
|
|
61
|
+
"""
|
|
62
|
+
if not items:
|
|
63
|
+
return items
|
|
64
|
+
if not screenshot_dir or not screenshot_dir.exists():
|
|
65
|
+
return items
|
|
66
|
+
|
|
67
|
+
updated: List[Dict[str, Any]] = []
|
|
68
|
+
for item in items:
|
|
69
|
+
# work on a shallow copy; deep copy nested 'output' if we modify it
|
|
70
|
+
msg = dict(item)
|
|
71
|
+
try:
|
|
72
|
+
if msg.get("type") == "computer_call_output":
|
|
73
|
+
call_id = msg.get("call_id")
|
|
74
|
+
output = msg.get("output", {})
|
|
75
|
+
image_url = output.get("image_url")
|
|
76
|
+
if call_id and isinstance(image_url, str) and image_url.startswith("data:"):
|
|
77
|
+
# derive extension from MIME type e.g. data:image/png;base64,
|
|
78
|
+
try:
|
|
79
|
+
ext = image_url.split(";", 1)[0].split("/")[-1]
|
|
80
|
+
if not ext:
|
|
81
|
+
ext = "png"
|
|
82
|
+
except Exception:
|
|
83
|
+
ext = "png"
|
|
84
|
+
out_path = screenshot_dir / f"{call_id}.{ext}"
|
|
85
|
+
# write file if it doesn't exist
|
|
86
|
+
if not out_path.exists():
|
|
87
|
+
try:
|
|
88
|
+
b64_payload = image_url.split(",", 1)[1]
|
|
89
|
+
img_bytes = base64.b64decode(b64_payload)
|
|
90
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
91
|
+
with open(out_path, "wb") as f:
|
|
92
|
+
f.write(img_bytes)
|
|
93
|
+
except Exception:
|
|
94
|
+
# if anything fails, skip modifying this message
|
|
95
|
+
pass
|
|
96
|
+
# update image_url to file path
|
|
97
|
+
new_output = dict(output)
|
|
98
|
+
new_output["image_url"] = str(out_path)
|
|
99
|
+
msg["output"] = new_output
|
|
100
|
+
except Exception:
|
|
101
|
+
# do not block on malformed entries; keep original
|
|
102
|
+
pass
|
|
103
|
+
updated.append(msg)
|
|
104
|
+
return updated
|
|
105
|
+
|
|
46
106
|
class TrajectorySaverCallback(AsyncCallbackHandler):
|
|
47
107
|
"""
|
|
48
108
|
Callback handler that saves agent trajectories to disk.
|
|
@@ -51,7 +111,7 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
|
|
|
51
111
|
within the trajectory gets its own folder with screenshots and responses.
|
|
52
112
|
"""
|
|
53
113
|
|
|
54
|
-
def __init__(self, trajectory_dir: str, reset_on_run: bool = True):
|
|
114
|
+
def __init__(self, trajectory_dir: str, reset_on_run: bool = True, screenshot_dir: Optional[str] = None):
|
|
55
115
|
"""
|
|
56
116
|
Initialize trajectory saver.
|
|
57
117
|
|
|
@@ -67,10 +127,12 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
|
|
|
67
127
|
self.model: Optional[str] = None
|
|
68
128
|
self.total_usage: Dict[str, Any] = {}
|
|
69
129
|
self.reset_on_run = reset_on_run
|
|
130
|
+
# Optional directory to store extracted screenshots from metadata/new_items
|
|
131
|
+
self.screenshot_dir: Optional[Path] = Path(screenshot_dir) if screenshot_dir else None
|
|
70
132
|
|
|
71
133
|
# Ensure trajectory directory exists
|
|
72
134
|
self.trajectory_dir.mkdir(parents=True, exist_ok=True)
|
|
73
|
-
|
|
135
|
+
|
|
74
136
|
def _get_turn_dir(self) -> Path:
|
|
75
137
|
"""Get the directory for the current turn."""
|
|
76
138
|
if not self.trajectory_id:
|
|
@@ -139,12 +201,21 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
|
|
|
139
201
|
trajectory_path = self.trajectory_dir / self.trajectory_id
|
|
140
202
|
trajectory_path.mkdir(parents=True, exist_ok=True)
|
|
141
203
|
|
|
142
|
-
# Save trajectory metadata
|
|
204
|
+
# Save trajectory metadata (optionally extract screenshots to screenshot_dir)
|
|
205
|
+
kwargs_to_save = kwargs.copy()
|
|
206
|
+
try:
|
|
207
|
+
if "messages" in kwargs_to_save:
|
|
208
|
+
kwargs_to_save["messages"] = extract_computer_call_outputs(
|
|
209
|
+
kwargs_to_save["messages"], self.screenshot_dir
|
|
210
|
+
)
|
|
211
|
+
except Exception:
|
|
212
|
+
# If extraction fails, fall back to original messages
|
|
213
|
+
pass
|
|
143
214
|
metadata = {
|
|
144
215
|
"trajectory_id": self.trajectory_id,
|
|
145
216
|
"created_at": str(uuid.uuid1().time),
|
|
146
217
|
"status": "running",
|
|
147
|
-
"kwargs":
|
|
218
|
+
"kwargs": kwargs_to_save,
|
|
148
219
|
}
|
|
149
220
|
|
|
150
221
|
with open(trajectory_path / "metadata.json", "w") as f:
|
|
@@ -171,11 +242,18 @@ class TrajectorySaverCallback(AsyncCallbackHandler):
|
|
|
171
242
|
metadata = {}
|
|
172
243
|
|
|
173
244
|
# Update metadata with completion info
|
|
245
|
+
# Optionally extract screenshots from new_items before persisting
|
|
246
|
+
new_items_to_save = new_items
|
|
247
|
+
try:
|
|
248
|
+
new_items_to_save = extract_computer_call_outputs(new_items, self.screenshot_dir)
|
|
249
|
+
except Exception:
|
|
250
|
+
pass
|
|
251
|
+
|
|
174
252
|
metadata.update({
|
|
175
253
|
"status": "completed",
|
|
176
254
|
"completed_at": str(uuid.uuid1().time),
|
|
177
255
|
"total_usage": self.total_usage,
|
|
178
|
-
"new_items":
|
|
256
|
+
"new_items": new_items_to_save,
|
|
179
257
|
"total_turns": self.current_turn
|
|
180
258
|
})
|
|
181
259
|
|
|
@@ -41,7 +41,7 @@ class ProxyOperatorAgent(OperatorAgent):
|
|
|
41
41
|
*,
|
|
42
42
|
model: str | None = None,
|
|
43
43
|
allowed_tools: list[str] | None = None,
|
|
44
|
-
trajectory_dir: str | None = None,
|
|
44
|
+
trajectory_dir: str | dict | None = None,
|
|
45
45
|
# === ComputerAgent kwargs ===
|
|
46
46
|
tools: list[Any] | None = None,
|
|
47
47
|
custom_loop: Any | None = None,
|
|
@@ -109,7 +109,7 @@ async def run_single_task(
|
|
|
109
109
|
only_n_most_recent_images: int | None = None,
|
|
110
110
|
callbacks: list[Any] | None = None,
|
|
111
111
|
verbosity: int | None = None,
|
|
112
|
-
trajectory_dir: str | None = None,
|
|
112
|
+
trajectory_dir: str | dict | None = None,
|
|
113
113
|
max_retries: int | None = 3,
|
|
114
114
|
screenshot_delay: float | int = 0.5,
|
|
115
115
|
use_prompt_caching: bool | None = False,
|
|
@@ -167,7 +167,7 @@ async def run_full_dataset(
|
|
|
167
167
|
max_concurrent: int = 30,
|
|
168
168
|
max_steps: int = 50,
|
|
169
169
|
split: str = "train",
|
|
170
|
-
trajectory_dir: str | None = None,
|
|
170
|
+
trajectory_dir: str | dict | None = None,
|
|
171
171
|
# === ComputerAgent kwargs ===
|
|
172
172
|
tools: list[Any] | None = None,
|
|
173
173
|
custom_loop: Any | None = None,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|