cua-agent 0.4.34__py3-none-any.whl → 0.4.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (61) hide show
  1. agent/__init__.py +4 -10
  2. agent/__main__.py +2 -1
  3. agent/adapters/huggingfacelocal_adapter.py +54 -61
  4. agent/adapters/human_adapter.py +116 -114
  5. agent/adapters/mlxvlm_adapter.py +110 -99
  6. agent/adapters/models/__init__.py +14 -6
  7. agent/adapters/models/generic.py +7 -4
  8. agent/adapters/models/internvl.py +66 -30
  9. agent/adapters/models/opencua.py +23 -8
  10. agent/adapters/models/qwen2_5_vl.py +7 -4
  11. agent/agent.py +184 -158
  12. agent/callbacks/__init__.py +4 -4
  13. agent/callbacks/base.py +45 -31
  14. agent/callbacks/budget_manager.py +22 -10
  15. agent/callbacks/image_retention.py +18 -13
  16. agent/callbacks/logging.py +55 -42
  17. agent/callbacks/operator_validator.py +3 -1
  18. agent/callbacks/pii_anonymization.py +19 -16
  19. agent/callbacks/telemetry.py +67 -61
  20. agent/callbacks/trajectory_saver.py +90 -70
  21. agent/cli.py +115 -110
  22. agent/computers/__init__.py +13 -8
  23. agent/computers/base.py +32 -19
  24. agent/computers/cua.py +33 -25
  25. agent/computers/custom.py +78 -71
  26. agent/decorators.py +23 -14
  27. agent/human_tool/__init__.py +2 -7
  28. agent/human_tool/__main__.py +6 -2
  29. agent/human_tool/server.py +48 -37
  30. agent/human_tool/ui.py +235 -185
  31. agent/integrations/hud/__init__.py +15 -21
  32. agent/integrations/hud/agent.py +101 -83
  33. agent/integrations/hud/proxy.py +90 -57
  34. agent/loops/__init__.py +25 -21
  35. agent/loops/anthropic.py +537 -483
  36. agent/loops/base.py +13 -14
  37. agent/loops/composed_grounded.py +135 -149
  38. agent/loops/gemini.py +31 -12
  39. agent/loops/glm45v.py +135 -133
  40. agent/loops/gta1.py +47 -50
  41. agent/loops/holo.py +4 -2
  42. agent/loops/internvl.py +6 -11
  43. agent/loops/moondream3.py +36 -12
  44. agent/loops/omniparser.py +215 -210
  45. agent/loops/openai.py +49 -50
  46. agent/loops/opencua.py +29 -41
  47. agent/loops/qwen.py +510 -0
  48. agent/loops/uitars.py +237 -202
  49. agent/proxy/examples.py +54 -50
  50. agent/proxy/handlers.py +27 -34
  51. agent/responses.py +330 -330
  52. agent/types.py +11 -5
  53. agent/ui/__init__.py +1 -1
  54. agent/ui/__main__.py +1 -1
  55. agent/ui/gradio/app.py +23 -18
  56. agent/ui/gradio/ui_components.py +310 -161
  57. {cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/METADATA +18 -10
  58. cua_agent-0.4.36.dist-info/RECORD +64 -0
  59. cua_agent-0.4.34.dist-info/RECORD +0 -63
  60. {cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/WHEEL +0 -0
  61. {cua_agent-0.4.34.dist-info → cua_agent-0.4.36.dist-info}/entry_points.txt +0 -0
agent/types.py CHANGED
@@ -2,37 +2,43 @@
2
2
  Type definitions for agent
3
3
  """
4
4
 
5
- from typing import Dict, List, Any, Optional, Callable, Protocol, Literal
6
- from pydantic import BaseModel
7
5
  import re
8
- from litellm import ResponseInputParam, ResponsesAPIResponse, ToolParam
9
6
  from collections.abc import Iterable
7
+ from typing import Any, Callable, Dict, List, Literal, Optional, Protocol
8
+
9
+ from litellm import ResponseInputParam, ResponsesAPIResponse, ToolParam
10
+ from pydantic import BaseModel
10
11
 
11
12
  # Agent input types
12
13
  Messages = str | ResponseInputParam | List[Dict[str, Any]]
13
14
  Tools = Optional[Iterable[ToolParam]]
14
15
 
15
16
  # Agent output types
16
- AgentResponse = ResponsesAPIResponse
17
+ AgentResponse = ResponsesAPIResponse
17
18
  AgentCapability = Literal["step", "click"]
18
19
 
20
+
19
21
  # Exception types
20
22
  class ToolError(RuntimeError):
21
23
  """Base exception for tool-related errors"""
24
+
22
25
  pass
23
26
 
27
+
24
28
  class IllegalArgumentError(ToolError):
25
29
  """Exception raised when function arguments are invalid"""
30
+
26
31
  pass
27
32
 
28
33
 
29
34
  # Agent config registration
30
35
  class AgentConfigInfo(BaseModel):
31
36
  """Information about a registered agent config"""
37
+
32
38
  agent_class: type
33
39
  models_regex: str
34
40
  priority: int = 0
35
-
41
+
36
42
  def matches_model(self, model: str) -> bool:
37
43
  """Check if this agent config matches the given model"""
38
44
  return bool(re.match(self.models_regex, model))
agent/ui/__init__.py CHANGED
@@ -2,6 +2,6 @@
2
2
  UI components for agent
3
3
  """
4
4
 
5
- from .gradio import launch_ui, create_gradio_ui
5
+ from .gradio import create_gradio_ui, launch_ui
6
6
 
7
7
  __all__ = ["launch_ui", "create_gradio_ui"]
agent/ui/__main__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  from .gradio import launch_ui
2
2
 
3
3
  if __name__ == "__main__":
4
- launch_ui()
4
+ launch_ui()
agent/ui/gradio/app.py CHANGED
@@ -18,21 +18,21 @@ Requirements:
18
18
  - OpenAI or Anthropic API key
19
19
  """
20
20
 
21
- import os
22
21
  import asyncio
23
- import logging
24
22
  import json
23
+ import logging
24
+ import os
25
25
  import platform
26
26
  from pathlib import Path
27
- from typing import Dict, List, Optional, AsyncGenerator, Any, Tuple, Union
27
+ from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union, cast
28
+
28
29
  import gradio as gr
29
- from gradio.components.chatbot import MetadataDict
30
- from typing import cast
31
30
 
32
31
  # Import from agent package
33
32
  from agent import ComputerAgent
34
- from agent.types import Messages, AgentResponse
33
+ from agent.types import AgentResponse, Messages
35
34
  from computer import Computer
35
+ from gradio.components.chatbot import MetadataDict
36
36
 
37
37
  # Global variables
38
38
  global_agent = None
@@ -42,11 +42,13 @@ SETTINGS_FILE = Path(".gradio_settings.json")
42
42
  logging.basicConfig(level=logging.INFO)
43
43
 
44
44
  import dotenv
45
+
45
46
  if dotenv.load_dotenv():
46
47
  print(f"DEBUG - Loaded environment variables from {dotenv.find_dotenv()}")
47
48
  else:
48
49
  print("DEBUG - No .env file found")
49
50
 
51
+
50
52
  # --- Settings Load/Save Functions ---
51
53
  def load_settings() -> Dict[str, Any]:
52
54
  """Loads settings from the JSON file."""
@@ -84,7 +86,7 @@ def save_settings(settings: Dict[str, Any]):
84
86
  # async def on_screenshot(self, screenshot_base64: str, action_type: str = "") -> None:
85
87
  # """Add screenshot to chatbot when a screenshot is taken."""
86
88
  # image_markdown = f"![Screenshot after {action_type}](data:image/png;base64,{screenshot_base64})"
87
-
89
+
88
90
  # if self.chatbot_history is not None:
89
91
  # self.chatbot_history.append(
90
92
  # gr.ChatMessage(
@@ -141,7 +143,7 @@ def get_model_string(model_name: str, loop_provider: str) -> str:
141
143
  ollama_model = model_name.split("OMNI: Ollama ", 1)[1]
142
144
  return f"omniparser+ollama_chat/{ollama_model}"
143
145
  return "omniparser+ollama_chat/llama3"
144
-
146
+
145
147
  # Map based on loop provider
146
148
  mapping = MODEL_MAPPINGS.get(loop_provider.lower(), MODEL_MAPPINGS["openai"])
147
149
  return mapping.get(model_name, mapping["default"])
@@ -151,6 +153,7 @@ def get_ollama_models() -> List[str]:
151
153
  """Get available models from Ollama if installed."""
152
154
  try:
153
155
  import subprocess
156
+
154
157
  result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
155
158
  if result.returncode == 0:
156
159
  lines = result.stdout.strip().split("\n")
@@ -174,16 +177,14 @@ def create_computer_instance(
174
177
  os_type: str = "macos",
175
178
  provider_type: str = "lume",
176
179
  name: Optional[str] = None,
177
- api_key: Optional[str] = None
180
+ api_key: Optional[str] = None,
178
181
  ) -> Computer:
179
182
  """Create or get the global Computer instance."""
180
183
  global global_computer
181
184
  if global_computer is None:
182
185
  if provider_type == "localhost":
183
186
  global_computer = Computer(
184
- verbosity=verbosity,
185
- os_type=os_type,
186
- use_host_computer_server=True
187
+ verbosity=verbosity, os_type=os_type, use_host_computer_server=True
187
188
  )
188
189
  else:
189
190
  global_computer = Computer(
@@ -191,7 +192,7 @@ def create_computer_instance(
191
192
  os_type=os_type,
192
193
  provider_type=provider_type,
193
194
  name=name if name else "",
194
- api_key=api_key
195
+ api_key=api_key,
195
196
  )
196
197
  return global_computer
197
198
 
@@ -217,7 +218,7 @@ def create_agent(
217
218
  os_type=computer_os,
218
219
  provider_type=computer_provider,
219
220
  name=computer_name,
220
- api_key=computer_api_key
221
+ api_key=computer_api_key,
221
222
  )
222
223
 
223
224
  # Handle custom models
@@ -233,12 +234,15 @@ def create_agent(
233
234
  "only_n_most_recent_images": only_n_most_recent_images,
234
235
  "verbosity": verbosity,
235
236
  }
236
-
237
+
237
238
  if save_trajectory:
238
239
  agent_kwargs["trajectory_dir"] = "trajectories"
239
-
240
+
240
241
  if max_trajectory_budget:
241
- agent_kwargs["max_trajectory_budget"] = {"max_budget": max_trajectory_budget, "raise_error": True}
242
+ agent_kwargs["max_trajectory_budget"] = {
243
+ "max_budget": max_trajectory_budget,
244
+ "raise_error": True,
245
+ }
242
246
 
243
247
  global_agent = ComputerAgent(**agent_kwargs)
244
248
  return global_agent
@@ -247,7 +251,8 @@ def create_agent(
247
251
  def launch_ui():
248
252
  """Standalone function to launch the Gradio app."""
249
253
  from agent.ui.gradio.ui_components import create_gradio_ui
250
- print(f"Starting Gradio app for CUA Agent...")
254
+
255
+ print("Starting Gradio app for CUA Agent...")
251
256
  demo = create_gradio_ui()
252
257
  demo.launch(share=False, inbrowser=True)
253
258