cua-agent 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (79) hide show
  1. agent/__init__.py +4 -10
  2. agent/__main__.py +2 -1
  3. agent/adapters/__init__.py +4 -0
  4. agent/adapters/azure_ml_adapter.py +283 -0
  5. agent/adapters/cua_adapter.py +161 -0
  6. agent/adapters/huggingfacelocal_adapter.py +67 -125
  7. agent/adapters/human_adapter.py +116 -114
  8. agent/adapters/mlxvlm_adapter.py +110 -99
  9. agent/adapters/models/__init__.py +41 -0
  10. agent/adapters/models/generic.py +78 -0
  11. agent/adapters/models/internvl.py +290 -0
  12. agent/adapters/models/opencua.py +115 -0
  13. agent/adapters/models/qwen2_5_vl.py +78 -0
  14. agent/agent.py +337 -185
  15. agent/callbacks/__init__.py +9 -4
  16. agent/callbacks/base.py +45 -31
  17. agent/callbacks/budget_manager.py +22 -10
  18. agent/callbacks/image_retention.py +54 -98
  19. agent/callbacks/logging.py +55 -42
  20. agent/callbacks/operator_validator.py +35 -33
  21. agent/callbacks/otel.py +291 -0
  22. agent/callbacks/pii_anonymization.py +19 -16
  23. agent/callbacks/prompt_instructions.py +47 -0
  24. agent/callbacks/telemetry.py +99 -61
  25. agent/callbacks/trajectory_saver.py +95 -69
  26. agent/cli.py +269 -119
  27. agent/computers/__init__.py +14 -9
  28. agent/computers/base.py +32 -19
  29. agent/computers/cua.py +52 -25
  30. agent/computers/custom.py +78 -71
  31. agent/decorators.py +23 -14
  32. agent/human_tool/__init__.py +2 -7
  33. agent/human_tool/__main__.py +6 -2
  34. agent/human_tool/server.py +48 -37
  35. agent/human_tool/ui.py +359 -235
  36. agent/integrations/hud/__init__.py +38 -99
  37. agent/integrations/hud/agent.py +369 -0
  38. agent/integrations/hud/proxy.py +166 -52
  39. agent/loops/__init__.py +44 -14
  40. agent/loops/anthropic.py +579 -492
  41. agent/loops/base.py +19 -15
  42. agent/loops/composed_grounded.py +136 -150
  43. agent/loops/fara/__init__.py +8 -0
  44. agent/loops/fara/config.py +506 -0
  45. agent/loops/fara/helpers.py +357 -0
  46. agent/loops/fara/schema.py +143 -0
  47. agent/loops/gelato.py +183 -0
  48. agent/loops/gemini.py +935 -0
  49. agent/loops/generic_vlm.py +601 -0
  50. agent/loops/glm45v.py +140 -135
  51. agent/loops/gta1.py +48 -51
  52. agent/loops/holo.py +218 -0
  53. agent/loops/internvl.py +180 -0
  54. agent/loops/moondream3.py +493 -0
  55. agent/loops/omniparser.py +326 -226
  56. agent/loops/openai.py +50 -51
  57. agent/loops/opencua.py +134 -0
  58. agent/loops/uiins.py +175 -0
  59. agent/loops/uitars.py +247 -206
  60. agent/loops/uitars2.py +951 -0
  61. agent/playground/__init__.py +5 -0
  62. agent/playground/server.py +301 -0
  63. agent/proxy/examples.py +61 -57
  64. agent/proxy/handlers.py +46 -39
  65. agent/responses.py +447 -347
  66. agent/tools/__init__.py +24 -0
  67. agent/tools/base.py +253 -0
  68. agent/tools/browser_tool.py +423 -0
  69. agent/types.py +11 -5
  70. agent/ui/__init__.py +1 -1
  71. agent/ui/__main__.py +1 -1
  72. agent/ui/gradio/app.py +25 -22
  73. agent/ui/gradio/ui_components.py +314 -167
  74. cua_agent-0.7.16.dist-info/METADATA +85 -0
  75. cua_agent-0.7.16.dist-info/RECORD +79 -0
  76. {cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
  77. cua_agent-0.4.22.dist-info/METADATA +0 -436
  78. cua_agent-0.4.22.dist-info/RECORD +0 -51
  79. {cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0
agent/proxy/handlers.py CHANGED
@@ -2,29 +2,34 @@
2
2
  Request handlers for the proxy endpoints.
3
3
  """
4
4
 
5
- import asyncio
6
5
  import json
7
6
  import logging
8
7
  import os
8
+ import re
9
9
  from contextlib import contextmanager
10
- from typing import Dict, Any, List, Union, Optional
10
+ from typing import Any, Dict, List, Optional, Union
11
11
 
12
- from ..agent import ComputerAgent
13
12
  from computer import Computer
14
13
 
14
+ from ..agent import ComputerAgent
15
+ from ..tools.browser_tool import BrowserTool
16
+
15
17
  logger = logging.getLogger(__name__)
16
18
 
19
+ # Pattern to detect FARA models (case-insensitive)
20
+ FARA_MODEL_PATTERN = re.compile(r"(?i).*fara.*")
21
+
17
22
 
18
23
  class ResponsesHandler:
19
24
  """Handler for /responses endpoint that processes agent requests."""
20
-
25
+
21
26
  def __init__(self):
22
27
  self.computer = None
23
28
  self.agent = None
24
29
  # Simple in-memory caches
25
30
  self._computer_cache: Dict[str, Any] = {}
26
31
  self._agent_cache: Dict[str, Any] = {}
27
-
32
+
28
33
  async def setup_computer_agent(
29
34
  self,
30
35
  model: str,
@@ -75,7 +80,9 @@ class ResponsesHandler:
75
80
  computer = Computer(**default_c_config)
76
81
  await computer.__aenter__()
77
82
  self._computer_cache[comp_key] = computer
78
- logger.info(f"Computer created and cached with key={comp_key} config={default_c_config}")
83
+ logger.info(
84
+ f"Computer created and cached with key={comp_key} config={default_c_config}"
85
+ )
79
86
  else:
80
87
  logger.info(f"Reusing cached computer for key={comp_key}")
81
88
 
@@ -88,41 +95,51 @@ class ResponsesHandler:
88
95
  agent_key_payload = {"model": model, **agent_kwargs_for_key}
89
96
  agent_key = _stable_key(agent_key_payload)
90
97
 
98
+ # Determine the appropriate tool based on model type
99
+ # FARA models require BrowserTool instead of Computer for browser-specific actions
100
+ # (visit_url, web_search, terminate, history_back, etc.)
101
+ is_fara_model = bool(FARA_MODEL_PATTERN.match(model))
102
+ if is_fara_model and computer is not None:
103
+ tool = BrowserTool(interface=computer.interface)
104
+ logger.info(f"Using BrowserTool for FARA model: {model}")
105
+ else:
106
+ tool = computer
107
+
91
108
  agent = self._agent_cache.get(agent_key)
92
109
  if agent is None:
93
110
  # Default agent configuration
94
111
  default_a_config: Dict[str, Any] = {"model": model}
95
112
  if not has_custom_tools:
96
- default_a_config["tools"] = [computer]
113
+ default_a_config["tools"] = [tool]
97
114
  # Apply user overrides, but keep tools unless user explicitly sets
98
115
  if agent_kwargs:
99
116
  if not has_custom_tools:
100
- agent_kwargs.setdefault("tools", [computer])
117
+ agent_kwargs.setdefault("tools", [tool])
101
118
  default_a_config.update(agent_kwargs)
102
119
  # JSON-derived kwargs may have loose types; ignore static arg typing here
103
120
  agent = ComputerAgent(**default_a_config) # type: ignore[arg-type]
104
121
  self._agent_cache[agent_key] = agent
105
122
  logger.info(f"Agent created and cached with key={agent_key} model={model}")
106
123
  else:
107
- # Ensure cached agent uses the current computer tool (in case object differs)
124
+ # Ensure cached agent uses the current tool (in case object differs)
108
125
  # Only update if tools not explicitly provided in agent_kwargs
109
126
  if not has_custom_tools:
110
127
  try:
111
- agent.tools = [computer]
128
+ agent.tools = [tool]
112
129
  except Exception:
113
130
  pass
114
131
  logger.info(f"Reusing cached agent for key={agent_key}")
115
132
 
116
133
  # Bind current agent reference
117
134
  self.agent = agent
118
-
135
+
119
136
  async def process_request(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
120
137
  """
121
138
  Process a /responses request and return the result.
122
-
139
+
123
140
  Args:
124
141
  request_data: Dictionary containing model, input, and optional kwargs
125
-
142
+
126
143
  Returns:
127
144
  Dictionary with the agent's response
128
145
  """
@@ -133,12 +150,12 @@ class ResponsesHandler:
133
150
  agent_kwargs = request_data.get("agent_kwargs", {})
134
151
  computer_kwargs = request_data.get("computer_kwargs", {})
135
152
  env_overrides = request_data.get("env", {}) or {}
136
-
153
+
137
154
  if not model:
138
155
  raise ValueError("Model is required")
139
156
  if not input_data:
140
157
  raise ValueError("Input is required")
141
-
158
+
142
159
  # Apply env overrides for the duration of this request
143
160
  with self._env_overrides(env_overrides):
144
161
  # Set up (and possibly reuse) computer and agent via caches
@@ -155,28 +172,22 @@ class ResponsesHandler:
155
172
  # Run agent and get first result
156
173
  async for result in agent.run(messages):
157
174
  # Return the first result and break
158
- return {
159
- "success": True,
160
- "result": result,
161
- "model": model
162
- }
163
-
175
+ return {"success": True, "result": result, "model": model}
176
+
164
177
  # If no results were yielded
165
- return {
166
- "success": False,
167
- "error": "No results from agent",
168
- "model": model
169
- }
170
-
178
+ return {"success": False, "error": "No results from agent", "model": model}
179
+
171
180
  except Exception as e:
172
181
  logger.error(f"Error processing request: {e}")
173
182
  return {
174
183
  "success": False,
175
184
  "error": str(e),
176
- "model": request_data.get("model", "unknown")
185
+ "model": request_data.get("model", "unknown"),
177
186
  }
178
-
179
- def _convert_input_to_messages(self, input_data: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
187
+
188
+ def _convert_input_to_messages(
189
+ self, input_data: Union[str, List[Dict[str, Any]]]
190
+ ) -> List[Dict[str, Any]]:
180
191
  """Convert input data to messages format."""
181
192
  if isinstance(input_data, str):
182
193
  # Simple string input
@@ -192,22 +203,18 @@ class ResponsesHandler:
192
203
  if part.get("type") == "input_text":
193
204
  content_parts.append({"type": "text", "text": part["text"]})
194
205
  elif part.get("type") == "input_image":
195
- content_parts.append({
196
- "type": "image_url",
197
- "image_url": {"url": part["image_url"]}
198
- })
206
+ content_parts.append(
207
+ {"type": "image_url", "image_url": {"url": part["image_url"]}}
208
+ )
199
209
  else:
200
210
  content_parts.append(part)
201
- messages.append({
202
- "role": msg["role"],
203
- "content": content_parts
204
- })
211
+ messages.append({"role": msg["role"], "content": content_parts})
205
212
  else:
206
213
  messages.append(msg)
207
214
  return messages
208
215
  else:
209
216
  raise ValueError("Input must be string or list of messages")
210
-
217
+
211
218
  async def cleanup(self):
212
219
  """Clean up resources."""
213
220
  if self.computer: