cua-agent 0.4.14__py3-none-any.whl → 0.7.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (82) hide show
  1. agent/__init__.py +4 -19
  2. agent/__main__.py +2 -1
  3. agent/adapters/__init__.py +6 -0
  4. agent/adapters/azure_ml_adapter.py +283 -0
  5. agent/adapters/cua_adapter.py +161 -0
  6. agent/adapters/huggingfacelocal_adapter.py +67 -125
  7. agent/adapters/human_adapter.py +116 -114
  8. agent/adapters/mlxvlm_adapter.py +370 -0
  9. agent/adapters/models/__init__.py +41 -0
  10. agent/adapters/models/generic.py +78 -0
  11. agent/adapters/models/internvl.py +290 -0
  12. agent/adapters/models/opencua.py +115 -0
  13. agent/adapters/models/qwen2_5_vl.py +78 -0
  14. agent/agent.py +431 -241
  15. agent/callbacks/__init__.py +10 -3
  16. agent/callbacks/base.py +45 -31
  17. agent/callbacks/budget_manager.py +22 -10
  18. agent/callbacks/image_retention.py +54 -98
  19. agent/callbacks/logging.py +55 -42
  20. agent/callbacks/operator_validator.py +140 -0
  21. agent/callbacks/otel.py +291 -0
  22. agent/callbacks/pii_anonymization.py +19 -16
  23. agent/callbacks/prompt_instructions.py +47 -0
  24. agent/callbacks/telemetry.py +106 -69
  25. agent/callbacks/trajectory_saver.py +178 -70
  26. agent/cli.py +269 -119
  27. agent/computers/__init__.py +14 -9
  28. agent/computers/base.py +32 -19
  29. agent/computers/cua.py +52 -25
  30. agent/computers/custom.py +78 -71
  31. agent/decorators.py +23 -14
  32. agent/human_tool/__init__.py +2 -7
  33. agent/human_tool/__main__.py +6 -2
  34. agent/human_tool/server.py +48 -37
  35. agent/human_tool/ui.py +359 -235
  36. agent/integrations/hud/__init__.py +164 -74
  37. agent/integrations/hud/agent.py +338 -342
  38. agent/integrations/hud/proxy.py +297 -0
  39. agent/loops/__init__.py +44 -14
  40. agent/loops/anthropic.py +590 -492
  41. agent/loops/base.py +19 -15
  42. agent/loops/composed_grounded.py +142 -144
  43. agent/loops/fara/__init__.py +8 -0
  44. agent/loops/fara/config.py +506 -0
  45. agent/loops/fara/helpers.py +357 -0
  46. agent/loops/fara/schema.py +143 -0
  47. agent/loops/gelato.py +183 -0
  48. agent/loops/gemini.py +935 -0
  49. agent/loops/generic_vlm.py +601 -0
  50. agent/loops/glm45v.py +140 -135
  51. agent/loops/gta1.py +48 -51
  52. agent/loops/holo.py +218 -0
  53. agent/loops/internvl.py +180 -0
  54. agent/loops/moondream3.py +493 -0
  55. agent/loops/omniparser.py +326 -226
  56. agent/loops/openai.py +63 -56
  57. agent/loops/opencua.py +134 -0
  58. agent/loops/uiins.py +175 -0
  59. agent/loops/uitars.py +262 -212
  60. agent/loops/uitars2.py +951 -0
  61. agent/playground/__init__.py +5 -0
  62. agent/playground/server.py +301 -0
  63. agent/proxy/examples.py +196 -0
  64. agent/proxy/handlers.py +255 -0
  65. agent/responses.py +486 -339
  66. agent/tools/__init__.py +24 -0
  67. agent/tools/base.py +253 -0
  68. agent/tools/browser_tool.py +423 -0
  69. agent/types.py +20 -5
  70. agent/ui/__init__.py +1 -1
  71. agent/ui/__main__.py +1 -1
  72. agent/ui/gradio/app.py +25 -22
  73. agent/ui/gradio/ui_components.py +314 -167
  74. cua_agent-0.7.16.dist-info/METADATA +85 -0
  75. cua_agent-0.7.16.dist-info/RECORD +79 -0
  76. {cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
  77. agent/integrations/hud/adapter.py +0 -121
  78. agent/integrations/hud/computer_handler.py +0 -187
  79. agent/telemetry.py +0 -142
  80. cua_agent-0.4.14.dist-info/METADATA +0 -436
  81. cua_agent-0.4.14.dist-info/RECORD +0 -50
  82. {cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,255 @@
1
+ """
2
+ Request handlers for the proxy endpoints.
3
+ """
4
+
5
+ import json
6
+ import logging
7
+ import os
8
+ import re
9
+ from contextlib import contextmanager
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+ from computer import Computer
13
+
14
+ from ..agent import ComputerAgent
15
+ from ..tools.browser_tool import BrowserTool
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Pattern to detect FARA models (case-insensitive)
20
+ FARA_MODEL_PATTERN = re.compile(r"(?i).*fara.*")
21
+
22
+
23
+ class ResponsesHandler:
24
+ """Handler for /responses endpoint that processes agent requests."""
25
+
26
+ def __init__(self):
27
+ self.computer = None
28
+ self.agent = None
29
+ # Simple in-memory caches
30
+ self._computer_cache: Dict[str, Any] = {}
31
+ self._agent_cache: Dict[str, Any] = {}
32
+
33
+ async def setup_computer_agent(
34
+ self,
35
+ model: str,
36
+ agent_kwargs: Optional[Dict[str, Any]] = None,
37
+ computer_kwargs: Optional[Dict[str, Any]] = None,
38
+ ):
39
+ """Set up (and cache) computer and agent instances.
40
+
41
+ Caching keys:
42
+ - Computer cache key: computer_kwargs
43
+ - Agent cache key: {"model": model, **agent_kwargs}
44
+ """
45
+ agent_kwargs = agent_kwargs or {}
46
+ computer_kwargs = computer_kwargs or {}
47
+
48
+ def _stable_key(obj: Dict[str, Any]) -> str:
49
+ try:
50
+ return json.dumps(obj, sort_keys=True, separators=(",", ":"))
51
+ except Exception:
52
+ # Fallback: stringify non-serializable values
53
+ safe_obj = {}
54
+ for k, v in obj.items():
55
+ try:
56
+ json.dumps(v)
57
+ safe_obj[k] = v
58
+ except Exception:
59
+ safe_obj[k] = str(v)
60
+ return json.dumps(safe_obj, sort_keys=True, separators=(",", ":"))
61
+
62
+ # Determine if custom tools are supplied; if so, skip computer setup entirely
63
+ has_custom_tools = bool(agent_kwargs.get("tools"))
64
+
65
+ computer = None
66
+ if not has_custom_tools:
67
+ # ---------- Computer setup (with cache) ----------
68
+ comp_key = _stable_key(computer_kwargs)
69
+
70
+ computer = self._computer_cache.get(comp_key)
71
+ if computer is None:
72
+ # Default computer configuration
73
+ default_c_config = {
74
+ "os_type": "linux",
75
+ "provider_type": "cloud",
76
+ "name": os.getenv("CUA_CONTAINER_NAME"),
77
+ "api_key": os.getenv("CUA_API_KEY"),
78
+ }
79
+ default_c_config.update(computer_kwargs)
80
+ computer = Computer(**default_c_config)
81
+ await computer.__aenter__()
82
+ self._computer_cache[comp_key] = computer
83
+ logger.info(
84
+ f"Computer created and cached with key={comp_key} config={default_c_config}"
85
+ )
86
+ else:
87
+ logger.info(f"Reusing cached computer for key={comp_key}")
88
+
89
+ # Bind current computer reference (None if custom tools supplied)
90
+ self.computer = computer
91
+
92
+ # ---------- Agent setup (with cache) ----------
93
+ # Build agent cache key from {model} + agent_kwargs (excluding tools unless explicitly passed)
94
+ agent_kwargs_for_key = dict(agent_kwargs)
95
+ agent_key_payload = {"model": model, **agent_kwargs_for_key}
96
+ agent_key = _stable_key(agent_key_payload)
97
+
98
+ # Determine the appropriate tool based on model type
99
+ # FARA models require BrowserTool instead of Computer for browser-specific actions
100
+ # (visit_url, web_search, terminate, history_back, etc.)
101
+ is_fara_model = bool(FARA_MODEL_PATTERN.match(model))
102
+ if is_fara_model and computer is not None:
103
+ tool = BrowserTool(interface=computer.interface)
104
+ logger.info(f"Using BrowserTool for FARA model: {model}")
105
+ else:
106
+ tool = computer
107
+
108
+ agent = self._agent_cache.get(agent_key)
109
+ if agent is None:
110
+ # Default agent configuration
111
+ default_a_config: Dict[str, Any] = {"model": model}
112
+ if not has_custom_tools:
113
+ default_a_config["tools"] = [tool]
114
+ # Apply user overrides, but keep tools unless user explicitly sets
115
+ if agent_kwargs:
116
+ if not has_custom_tools:
117
+ agent_kwargs.setdefault("tools", [tool])
118
+ default_a_config.update(agent_kwargs)
119
+ # JSON-derived kwargs may have loose types; ignore static arg typing here
120
+ agent = ComputerAgent(**default_a_config) # type: ignore[arg-type]
121
+ self._agent_cache[agent_key] = agent
122
+ logger.info(f"Agent created and cached with key={agent_key} model={model}")
123
+ else:
124
+ # Ensure cached agent uses the current tool (in case object differs)
125
+ # Only update if tools not explicitly provided in agent_kwargs
126
+ if not has_custom_tools:
127
+ try:
128
+ agent.tools = [tool]
129
+ except Exception:
130
+ pass
131
+ logger.info(f"Reusing cached agent for key={agent_key}")
132
+
133
+ # Bind current agent reference
134
+ self.agent = agent
135
+
136
+ async def process_request(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
137
+ """
138
+ Process a /responses request and return the result.
139
+
140
+ Args:
141
+ request_data: Dictionary containing model, input, and optional kwargs
142
+
143
+ Returns:
144
+ Dictionary with the agent's response
145
+ """
146
+ try:
147
+ # Extract request parameters
148
+ model = request_data.get("model")
149
+ input_data = request_data.get("input")
150
+ agent_kwargs = request_data.get("agent_kwargs", {})
151
+ computer_kwargs = request_data.get("computer_kwargs", {})
152
+ env_overrides = request_data.get("env", {}) or {}
153
+
154
+ if not model:
155
+ raise ValueError("Model is required")
156
+ if not input_data:
157
+ raise ValueError("Input is required")
158
+
159
+ # Apply env overrides for the duration of this request
160
+ with self._env_overrides(env_overrides):
161
+ # Set up (and possibly reuse) computer and agent via caches
162
+ await self.setup_computer_agent(model, agent_kwargs, computer_kwargs)
163
+
164
+ # Defensive: ensure agent is initialized for type checkers
165
+ agent = self.agent
166
+ if agent is None:
167
+ raise RuntimeError("Agent failed to initialize")
168
+
169
+ # Convert input to messages format
170
+ messages = self._convert_input_to_messages(input_data)
171
+
172
+ # Run agent and get first result
173
+ async for result in agent.run(messages):
174
+ # Return the first result and break
175
+ return {"success": True, "result": result, "model": model}
176
+
177
+ # If no results were yielded
178
+ return {"success": False, "error": "No results from agent", "model": model}
179
+
180
+ except Exception as e:
181
+ logger.error(f"Error processing request: {e}")
182
+ return {
183
+ "success": False,
184
+ "error": str(e),
185
+ "model": request_data.get("model", "unknown"),
186
+ }
187
+
188
+ def _convert_input_to_messages(
189
+ self, input_data: Union[str, List[Dict[str, Any]]]
190
+ ) -> List[Dict[str, Any]]:
191
+ """Convert input data to messages format."""
192
+ if isinstance(input_data, str):
193
+ # Simple string input
194
+ return [{"role": "user", "content": input_data}]
195
+ elif isinstance(input_data, list):
196
+ # Already in messages format
197
+ messages = []
198
+ for msg in input_data:
199
+ # Convert content array format if needed
200
+ if isinstance(msg.get("content"), list):
201
+ content_parts = []
202
+ for part in msg["content"]:
203
+ if part.get("type") == "input_text":
204
+ content_parts.append({"type": "text", "text": part["text"]})
205
+ elif part.get("type") == "input_image":
206
+ content_parts.append(
207
+ {"type": "image_url", "image_url": {"url": part["image_url"]}}
208
+ )
209
+ else:
210
+ content_parts.append(part)
211
+ messages.append({"role": msg["role"], "content": content_parts})
212
+ else:
213
+ messages.append(msg)
214
+ return messages
215
+ else:
216
+ raise ValueError("Input must be string or list of messages")
217
+
218
+ async def cleanup(self):
219
+ """Clean up resources."""
220
+ if self.computer:
221
+ try:
222
+ await self.computer.__aexit__(None, None, None)
223
+ except Exception as e:
224
+ logger.error(f"Error cleaning up computer: {e}")
225
+ finally:
226
+ self.computer = None
227
+ self.agent = None
228
+
229
+ @staticmethod
230
+ @contextmanager
231
+ def _env_overrides(env: Dict[str, str]):
232
+ """Temporarily apply environment variable overrides for the current process.
233
+ Restores previous values after the context exits.
234
+
235
+ Args:
236
+ env: Mapping of env var names to override for this request.
237
+ """
238
+ if not env:
239
+ # No-op context
240
+ yield
241
+ return
242
+
243
+ original: Dict[str, Optional[str]] = {}
244
+ try:
245
+ for k, v in env.items():
246
+ original[k] = os.environ.get(k)
247
+ os.environ[k] = str(v)
248
+ yield
249
+ finally:
250
+ for k, old in original.items():
251
+ if old is None:
252
+ # Was not set before
253
+ os.environ.pop(k, None)
254
+ else:
255
+ os.environ[k] = old