cua-agent 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +4 -10
- agent/__main__.py +2 -1
- agent/adapters/__init__.py +4 -0
- agent/adapters/azure_ml_adapter.py +283 -0
- agent/adapters/cua_adapter.py +161 -0
- agent/adapters/huggingfacelocal_adapter.py +67 -125
- agent/adapters/human_adapter.py +116 -114
- agent/adapters/mlxvlm_adapter.py +110 -99
- agent/adapters/models/__init__.py +41 -0
- agent/adapters/models/generic.py +78 -0
- agent/adapters/models/internvl.py +290 -0
- agent/adapters/models/opencua.py +115 -0
- agent/adapters/models/qwen2_5_vl.py +78 -0
- agent/agent.py +337 -185
- agent/callbacks/__init__.py +9 -4
- agent/callbacks/base.py +45 -31
- agent/callbacks/budget_manager.py +22 -10
- agent/callbacks/image_retention.py +54 -98
- agent/callbacks/logging.py +55 -42
- agent/callbacks/operator_validator.py +35 -33
- agent/callbacks/otel.py +291 -0
- agent/callbacks/pii_anonymization.py +19 -16
- agent/callbacks/prompt_instructions.py +47 -0
- agent/callbacks/telemetry.py +99 -61
- agent/callbacks/trajectory_saver.py +95 -69
- agent/cli.py +269 -119
- agent/computers/__init__.py +14 -9
- agent/computers/base.py +32 -19
- agent/computers/cua.py +52 -25
- agent/computers/custom.py +78 -71
- agent/decorators.py +23 -14
- agent/human_tool/__init__.py +2 -7
- agent/human_tool/__main__.py +6 -2
- agent/human_tool/server.py +48 -37
- agent/human_tool/ui.py +359 -235
- agent/integrations/hud/__init__.py +38 -99
- agent/integrations/hud/agent.py +369 -0
- agent/integrations/hud/proxy.py +166 -52
- agent/loops/__init__.py +44 -14
- agent/loops/anthropic.py +579 -492
- agent/loops/base.py +19 -15
- agent/loops/composed_grounded.py +136 -150
- agent/loops/fara/__init__.py +8 -0
- agent/loops/fara/config.py +506 -0
- agent/loops/fara/helpers.py +357 -0
- agent/loops/fara/schema.py +143 -0
- agent/loops/gelato.py +183 -0
- agent/loops/gemini.py +935 -0
- agent/loops/generic_vlm.py +601 -0
- agent/loops/glm45v.py +140 -135
- agent/loops/gta1.py +48 -51
- agent/loops/holo.py +218 -0
- agent/loops/internvl.py +180 -0
- agent/loops/moondream3.py +493 -0
- agent/loops/omniparser.py +326 -226
- agent/loops/openai.py +50 -51
- agent/loops/opencua.py +134 -0
- agent/loops/uiins.py +175 -0
- agent/loops/uitars.py +247 -206
- agent/loops/uitars2.py +951 -0
- agent/playground/__init__.py +5 -0
- agent/playground/server.py +301 -0
- agent/proxy/examples.py +61 -57
- agent/proxy/handlers.py +46 -39
- agent/responses.py +447 -347
- agent/tools/__init__.py +24 -0
- agent/tools/base.py +253 -0
- agent/tools/browser_tool.py +423 -0
- agent/types.py +11 -5
- agent/ui/__init__.py +1 -1
- agent/ui/__main__.py +1 -1
- agent/ui/gradio/app.py +25 -22
- agent/ui/gradio/ui_components.py +314 -167
- cua_agent-0.7.16.dist-info/METADATA +85 -0
- cua_agent-0.7.16.dist-info/RECORD +79 -0
- {cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
- cua_agent-0.4.22.dist-info/METADATA +0 -436
- cua_agent-0.4.22.dist-info/RECORD +0 -51
- {cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0
agent/proxy/handlers.py
CHANGED
|
@@ -2,29 +2,34 @@
|
|
|
2
2
|
Request handlers for the proxy endpoints.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
import asyncio
|
|
6
5
|
import json
|
|
7
6
|
import logging
|
|
8
7
|
import os
|
|
8
|
+
import re
|
|
9
9
|
from contextlib import contextmanager
|
|
10
|
-
from typing import
|
|
10
|
+
from typing import Any, Dict, List, Optional, Union
|
|
11
11
|
|
|
12
|
-
from ..agent import ComputerAgent
|
|
13
12
|
from computer import Computer
|
|
14
13
|
|
|
14
|
+
from ..agent import ComputerAgent
|
|
15
|
+
from ..tools.browser_tool import BrowserTool
|
|
16
|
+
|
|
15
17
|
logger = logging.getLogger(__name__)
|
|
16
18
|
|
|
19
|
+
# Pattern to detect FARA models (case-insensitive)
|
|
20
|
+
FARA_MODEL_PATTERN = re.compile(r"(?i).*fara.*")
|
|
21
|
+
|
|
17
22
|
|
|
18
23
|
class ResponsesHandler:
|
|
19
24
|
"""Handler for /responses endpoint that processes agent requests."""
|
|
20
|
-
|
|
25
|
+
|
|
21
26
|
def __init__(self):
|
|
22
27
|
self.computer = None
|
|
23
28
|
self.agent = None
|
|
24
29
|
# Simple in-memory caches
|
|
25
30
|
self._computer_cache: Dict[str, Any] = {}
|
|
26
31
|
self._agent_cache: Dict[str, Any] = {}
|
|
27
|
-
|
|
32
|
+
|
|
28
33
|
async def setup_computer_agent(
|
|
29
34
|
self,
|
|
30
35
|
model: str,
|
|
@@ -75,7 +80,9 @@ class ResponsesHandler:
|
|
|
75
80
|
computer = Computer(**default_c_config)
|
|
76
81
|
await computer.__aenter__()
|
|
77
82
|
self._computer_cache[comp_key] = computer
|
|
78
|
-
logger.info(
|
|
83
|
+
logger.info(
|
|
84
|
+
f"Computer created and cached with key={comp_key} config={default_c_config}"
|
|
85
|
+
)
|
|
79
86
|
else:
|
|
80
87
|
logger.info(f"Reusing cached computer for key={comp_key}")
|
|
81
88
|
|
|
@@ -88,41 +95,51 @@ class ResponsesHandler:
|
|
|
88
95
|
agent_key_payload = {"model": model, **agent_kwargs_for_key}
|
|
89
96
|
agent_key = _stable_key(agent_key_payload)
|
|
90
97
|
|
|
98
|
+
# Determine the appropriate tool based on model type
|
|
99
|
+
# FARA models require BrowserTool instead of Computer for browser-specific actions
|
|
100
|
+
# (visit_url, web_search, terminate, history_back, etc.)
|
|
101
|
+
is_fara_model = bool(FARA_MODEL_PATTERN.match(model))
|
|
102
|
+
if is_fara_model and computer is not None:
|
|
103
|
+
tool = BrowserTool(interface=computer.interface)
|
|
104
|
+
logger.info(f"Using BrowserTool for FARA model: {model}")
|
|
105
|
+
else:
|
|
106
|
+
tool = computer
|
|
107
|
+
|
|
91
108
|
agent = self._agent_cache.get(agent_key)
|
|
92
109
|
if agent is None:
|
|
93
110
|
# Default agent configuration
|
|
94
111
|
default_a_config: Dict[str, Any] = {"model": model}
|
|
95
112
|
if not has_custom_tools:
|
|
96
|
-
default_a_config["tools"] = [
|
|
113
|
+
default_a_config["tools"] = [tool]
|
|
97
114
|
# Apply user overrides, but keep tools unless user explicitly sets
|
|
98
115
|
if agent_kwargs:
|
|
99
116
|
if not has_custom_tools:
|
|
100
|
-
agent_kwargs.setdefault("tools", [
|
|
117
|
+
agent_kwargs.setdefault("tools", [tool])
|
|
101
118
|
default_a_config.update(agent_kwargs)
|
|
102
119
|
# JSON-derived kwargs may have loose types; ignore static arg typing here
|
|
103
120
|
agent = ComputerAgent(**default_a_config) # type: ignore[arg-type]
|
|
104
121
|
self._agent_cache[agent_key] = agent
|
|
105
122
|
logger.info(f"Agent created and cached with key={agent_key} model={model}")
|
|
106
123
|
else:
|
|
107
|
-
# Ensure cached agent uses the current
|
|
124
|
+
# Ensure cached agent uses the current tool (in case object differs)
|
|
108
125
|
# Only update if tools not explicitly provided in agent_kwargs
|
|
109
126
|
if not has_custom_tools:
|
|
110
127
|
try:
|
|
111
|
-
agent.tools = [
|
|
128
|
+
agent.tools = [tool]
|
|
112
129
|
except Exception:
|
|
113
130
|
pass
|
|
114
131
|
logger.info(f"Reusing cached agent for key={agent_key}")
|
|
115
132
|
|
|
116
133
|
# Bind current agent reference
|
|
117
134
|
self.agent = agent
|
|
118
|
-
|
|
135
|
+
|
|
119
136
|
async def process_request(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
120
137
|
"""
|
|
121
138
|
Process a /responses request and return the result.
|
|
122
|
-
|
|
139
|
+
|
|
123
140
|
Args:
|
|
124
141
|
request_data: Dictionary containing model, input, and optional kwargs
|
|
125
|
-
|
|
142
|
+
|
|
126
143
|
Returns:
|
|
127
144
|
Dictionary with the agent's response
|
|
128
145
|
"""
|
|
@@ -133,12 +150,12 @@ class ResponsesHandler:
|
|
|
133
150
|
agent_kwargs = request_data.get("agent_kwargs", {})
|
|
134
151
|
computer_kwargs = request_data.get("computer_kwargs", {})
|
|
135
152
|
env_overrides = request_data.get("env", {}) or {}
|
|
136
|
-
|
|
153
|
+
|
|
137
154
|
if not model:
|
|
138
155
|
raise ValueError("Model is required")
|
|
139
156
|
if not input_data:
|
|
140
157
|
raise ValueError("Input is required")
|
|
141
|
-
|
|
158
|
+
|
|
142
159
|
# Apply env overrides for the duration of this request
|
|
143
160
|
with self._env_overrides(env_overrides):
|
|
144
161
|
# Set up (and possibly reuse) computer and agent via caches
|
|
@@ -155,28 +172,22 @@ class ResponsesHandler:
|
|
|
155
172
|
# Run agent and get first result
|
|
156
173
|
async for result in agent.run(messages):
|
|
157
174
|
# Return the first result and break
|
|
158
|
-
return {
|
|
159
|
-
|
|
160
|
-
"result": result,
|
|
161
|
-
"model": model
|
|
162
|
-
}
|
|
163
|
-
|
|
175
|
+
return {"success": True, "result": result, "model": model}
|
|
176
|
+
|
|
164
177
|
# If no results were yielded
|
|
165
|
-
return {
|
|
166
|
-
|
|
167
|
-
"error": "No results from agent",
|
|
168
|
-
"model": model
|
|
169
|
-
}
|
|
170
|
-
|
|
178
|
+
return {"success": False, "error": "No results from agent", "model": model}
|
|
179
|
+
|
|
171
180
|
except Exception as e:
|
|
172
181
|
logger.error(f"Error processing request: {e}")
|
|
173
182
|
return {
|
|
174
183
|
"success": False,
|
|
175
184
|
"error": str(e),
|
|
176
|
-
"model": request_data.get("model", "unknown")
|
|
185
|
+
"model": request_data.get("model", "unknown"),
|
|
177
186
|
}
|
|
178
|
-
|
|
179
|
-
def _convert_input_to_messages(
|
|
187
|
+
|
|
188
|
+
def _convert_input_to_messages(
|
|
189
|
+
self, input_data: Union[str, List[Dict[str, Any]]]
|
|
190
|
+
) -> List[Dict[str, Any]]:
|
|
180
191
|
"""Convert input data to messages format."""
|
|
181
192
|
if isinstance(input_data, str):
|
|
182
193
|
# Simple string input
|
|
@@ -192,22 +203,18 @@ class ResponsesHandler:
|
|
|
192
203
|
if part.get("type") == "input_text":
|
|
193
204
|
content_parts.append({"type": "text", "text": part["text"]})
|
|
194
205
|
elif part.get("type") == "input_image":
|
|
195
|
-
content_parts.append(
|
|
196
|
-
"type": "image_url",
|
|
197
|
-
|
|
198
|
-
})
|
|
206
|
+
content_parts.append(
|
|
207
|
+
{"type": "image_url", "image_url": {"url": part["image_url"]}}
|
|
208
|
+
)
|
|
199
209
|
else:
|
|
200
210
|
content_parts.append(part)
|
|
201
|
-
messages.append({
|
|
202
|
-
"role": msg["role"],
|
|
203
|
-
"content": content_parts
|
|
204
|
-
})
|
|
211
|
+
messages.append({"role": msg["role"], "content": content_parts})
|
|
205
212
|
else:
|
|
206
213
|
messages.append(msg)
|
|
207
214
|
return messages
|
|
208
215
|
else:
|
|
209
216
|
raise ValueError("Input must be string or list of messages")
|
|
210
|
-
|
|
217
|
+
|
|
211
218
|
async def cleanup(self):
|
|
212
219
|
"""Clean up resources."""
|
|
213
220
|
if self.computer:
|