cua-agent 0.4.14__py3-none-any.whl → 0.7.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (82) hide show
  1. agent/__init__.py +4 -19
  2. agent/__main__.py +2 -1
  3. agent/adapters/__init__.py +6 -0
  4. agent/adapters/azure_ml_adapter.py +283 -0
  5. agent/adapters/cua_adapter.py +161 -0
  6. agent/adapters/huggingfacelocal_adapter.py +67 -125
  7. agent/adapters/human_adapter.py +116 -114
  8. agent/adapters/mlxvlm_adapter.py +370 -0
  9. agent/adapters/models/__init__.py +41 -0
  10. agent/adapters/models/generic.py +78 -0
  11. agent/adapters/models/internvl.py +290 -0
  12. agent/adapters/models/opencua.py +115 -0
  13. agent/adapters/models/qwen2_5_vl.py +78 -0
  14. agent/agent.py +431 -241
  15. agent/callbacks/__init__.py +10 -3
  16. agent/callbacks/base.py +45 -31
  17. agent/callbacks/budget_manager.py +22 -10
  18. agent/callbacks/image_retention.py +54 -98
  19. agent/callbacks/logging.py +55 -42
  20. agent/callbacks/operator_validator.py +140 -0
  21. agent/callbacks/otel.py +291 -0
  22. agent/callbacks/pii_anonymization.py +19 -16
  23. agent/callbacks/prompt_instructions.py +47 -0
  24. agent/callbacks/telemetry.py +106 -69
  25. agent/callbacks/trajectory_saver.py +178 -70
  26. agent/cli.py +269 -119
  27. agent/computers/__init__.py +14 -9
  28. agent/computers/base.py +32 -19
  29. agent/computers/cua.py +52 -25
  30. agent/computers/custom.py +78 -71
  31. agent/decorators.py +23 -14
  32. agent/human_tool/__init__.py +2 -7
  33. agent/human_tool/__main__.py +6 -2
  34. agent/human_tool/server.py +48 -37
  35. agent/human_tool/ui.py +359 -235
  36. agent/integrations/hud/__init__.py +164 -74
  37. agent/integrations/hud/agent.py +338 -342
  38. agent/integrations/hud/proxy.py +297 -0
  39. agent/loops/__init__.py +44 -14
  40. agent/loops/anthropic.py +590 -492
  41. agent/loops/base.py +19 -15
  42. agent/loops/composed_grounded.py +142 -144
  43. agent/loops/fara/__init__.py +8 -0
  44. agent/loops/fara/config.py +506 -0
  45. agent/loops/fara/helpers.py +357 -0
  46. agent/loops/fara/schema.py +143 -0
  47. agent/loops/gelato.py +183 -0
  48. agent/loops/gemini.py +935 -0
  49. agent/loops/generic_vlm.py +601 -0
  50. agent/loops/glm45v.py +140 -135
  51. agent/loops/gta1.py +48 -51
  52. agent/loops/holo.py +218 -0
  53. agent/loops/internvl.py +180 -0
  54. agent/loops/moondream3.py +493 -0
  55. agent/loops/omniparser.py +326 -226
  56. agent/loops/openai.py +63 -56
  57. agent/loops/opencua.py +134 -0
  58. agent/loops/uiins.py +175 -0
  59. agent/loops/uitars.py +262 -212
  60. agent/loops/uitars2.py +951 -0
  61. agent/playground/__init__.py +5 -0
  62. agent/playground/server.py +301 -0
  63. agent/proxy/examples.py +196 -0
  64. agent/proxy/handlers.py +255 -0
  65. agent/responses.py +486 -339
  66. agent/tools/__init__.py +24 -0
  67. agent/tools/base.py +253 -0
  68. agent/tools/browser_tool.py +423 -0
  69. agent/types.py +20 -5
  70. agent/ui/__init__.py +1 -1
  71. agent/ui/__main__.py +1 -1
  72. agent/ui/gradio/app.py +25 -22
  73. agent/ui/gradio/ui_components.py +314 -167
  74. cua_agent-0.7.16.dist-info/METADATA +85 -0
  75. cua_agent-0.7.16.dist-info/RECORD +79 -0
  76. {cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
  77. agent/integrations/hud/adapter.py +0 -121
  78. agent/integrations/hud/computer_handler.py +0 -187
  79. agent/telemetry.py +0 -142
  80. cua_agent-0.4.14.dist-info/METADATA +0 -436
  81. cua_agent-0.4.14.dist-info/RECORD +0 -50
  82. {cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0
@@ -1,77 +1,167 @@
1
- """HUD integration for ComputerAgent."""
2
-
3
- import logging
4
- from typing import Any, Optional, Dict
5
- from hud import run_job as hud_run_job
6
-
7
- from .agent import ComputerAgent
8
- from .adapter import ComputerAgentAdapter
9
- from .computer_handler import HUDComputerHandler
10
-
11
-
12
- async def run_job(
13
- model: str,
14
- task_or_taskset: Any,
15
- job_name: str,
16
- # Job kwargs
17
- auto_reply_question: bool = False,
18
- adapter_cls: Any = None,
19
- adapter_kwargs: Optional[Dict[str, Any]] = None,
20
- max_steps_per_task: int = 20,
21
- run_parallel: bool = True,
22
- job_metadata: Optional[Dict[str, Any]] = None,
23
- show_progress: bool = True,
24
- max_concurrent_env_creations: Optional[int] = 30, # Limits gym.make calls
25
- max_concurrent_agent_predictions: Optional[int] = None, # No limit on LLM calls
26
- max_concurrent_tasks: Optional[int] = 30, # Limits overall task concurrency
27
- **agent_kwargs: Any
28
- ) -> Any:
29
- """
30
- Run a job using ComputerAgent with the specified model.
31
-
32
- Args:
33
- model: Model string for ComputerAgent (e.g., "anthropic/claude-3-5-sonnet-20241022")
34
- task_or_taskset: Task or TaskSet to run
35
- job_name: Name for the job
36
- auto_reply_question: Whether to auto-reply to questions
37
- adapter_cls: Custom adapter class (defaults to ComputerAgentAdapter)
38
- adapter_kwargs: Additional kwargs for the adapter
39
- max_steps_per_task: Maximum steps per task
40
- run_parallel: Whether to run tasks in parallel
41
- job_metadata: Additional metadata for the job
42
- show_progress: Whether to show progress
43
- max_concurrent_env_creations: Max concurrent environment creations
44
- max_concurrent_agent_predictions: Max concurrent agent predictions
45
- max_concurrent_tasks: Max concurrent tasks
46
- **agent_kwargs: Additional kwargs to pass to ComputerAgent
47
-
48
- Returns:
49
- Job instance from HUD
50
- """
51
- # combine verbose and verbosity kwargs
52
- if "verbose" in agent_kwargs:
53
- agent_kwargs["verbosity"] = logging.INFO
54
- del agent_kwargs["verbose"]
55
- verbose = True if agent_kwargs.get("verbosity", logging.WARNING) > logging.INFO else False
56
-
57
- # run job
58
- return await hud_run_job(
59
- agent_cls=ComputerAgent,
60
- agent_kwargs={"model": model, **agent_kwargs},
61
- task_or_taskset=task_or_taskset,
62
- job_name=job_name,
63
- auto_reply_question=auto_reply_question,
64
- adapter_cls=adapter_cls,
65
- adapter_kwargs=adapter_kwargs,
66
- max_steps_per_task=max_steps_per_task,
67
- run_parallel=run_parallel,
68
- job_metadata=job_metadata,
69
- show_progress=show_progress,
70
- verbose=verbose,
71
- max_concurrent_env_creations=max_concurrent_env_creations,
72
- max_concurrent_agent_predictions=max_concurrent_agent_predictions,
73
- max_concurrent_tasks=max_concurrent_tasks
1
+ """HUD integration: dataset runners and MCP-based computer agent export.
2
+
3
+ This module exposes helpers to evaluate HUD-compatible datasets and exports
4
+ the MCP-compatible computer agent implementation.
5
+
6
+ Exports:
7
+ - run_single_task(dataset, ...)
8
+ - run_full_dataset(dataset, ...)
9
+ - MCPComputerAgent
10
+ """
11
+
12
+ import time
13
+ from typing import Any, Optional
14
+
15
+ from agent.computers import is_agent_computer
16
+ from datasets import Dataset, load_dataset
17
+ from hud import trace
18
+ from hud.datasets import Task, run_dataset
19
+
20
+ from .agent import MCPComputerAgent
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Single-task runner
24
+ # ---------------------------------------------------------------------------
25
+
26
+
27
+ async def run_single_task(
28
+ dataset: str | Dataset | list[dict[str, Any]],
29
+ *,
30
+ task_id: int = 0,
31
+ model: str | None = None,
32
+ allowed_tools: list[str] | None = None,
33
+ # === ComputerAgent kwargs ===
34
+ tools: list[Any] | None = None,
35
+ custom_loop: Any | None = None,
36
+ only_n_most_recent_images: int | None = None,
37
+ callbacks: list[Any] | None = None,
38
+ instructions: str | None = None,
39
+ verbosity: int | None = None,
40
+ trajectory_dir: str | dict | None = None,
41
+ max_retries: int | None = 3,
42
+ screenshot_delay: float | int = 0.5,
43
+ use_prompt_caching: bool | None = False,
44
+ max_trajectory_budget: float | dict | None = None,
45
+ telemetry_enabled: bool | None = True,
46
+ ) -> None:
47
+ """Load one task from the dataset and execute it with MCPComputerAgent."""
48
+
49
+ # Load dataset and pick a sample
50
+ if isinstance(dataset, str):
51
+ dataset = load_dataset(dataset, split="train") # type: ignore[arg-type]
52
+ elif isinstance(dataset, list):
53
+ dataset = dataset
54
+ else:
55
+ dataset = dataset["train"]
56
+
57
+ sample_task = dataset[task_id] # type: ignore[index]
58
+ task_prompt = sample_task.get("prompt", f"Task {sample_task.get('id', 0)}") # type: ignore[attr-defined]
59
+
60
+ # Filter any existing Computer tools
61
+ # The eval framework will add its own Computer tool per task
62
+ if tools:
63
+ tools = [tool for tool in tools if not is_agent_computer(tool)]
64
+
65
+ with trace(name=task_prompt):
66
+ task = Task(**sample_task) # type: ignore[arg-type]
67
+
68
+ agent = MCPComputerAgent(
69
+ model=model or "computer-use-preview",
70
+ allowed_tools=allowed_tools or ["openai_computer"],
71
+ # === ComputerAgent kwargs passthrough ===
72
+ tools=tools,
73
+ custom_loop=custom_loop,
74
+ only_n_most_recent_images=only_n_most_recent_images,
75
+ callbacks=callbacks,
76
+ instructions=instructions,
77
+ verbosity=verbosity,
78
+ trajectory_dir=trajectory_dir,
79
+ max_retries=max_retries,
80
+ screenshot_delay=screenshot_delay,
81
+ use_prompt_caching=use_prompt_caching,
82
+ max_trajectory_budget=max_trajectory_budget,
83
+ telemetry_enabled=telemetry_enabled,
84
+ )
85
+ print(f"Running: {task_prompt}")
86
+ result = await agent.run(task, max_steps=10)
87
+ print(f"✅ Reward: {result.reward}")
88
+
89
+
90
+ # ---------------------------------------------------------------------------
91
+ # Full-dataset runner
92
+ # ---------------------------------------------------------------------------
93
+
94
+
95
+ async def run_full_dataset(
96
+ dataset: str | Dataset | list[dict[str, Any]],
97
+ *,
98
+ job_name: Optional[str] = None,
99
+ model: str | None = None,
100
+ allowed_tools: list[str] | None = None,
101
+ max_concurrent: int = 30,
102
+ max_steps: int = 50,
103
+ split: str = "train",
104
+ trajectory_dir: str | dict | None = None,
105
+ # === ComputerAgent kwargs ===
106
+ tools: list[Any] | None = None,
107
+ custom_loop: Any | None = None,
108
+ only_n_most_recent_images: int | None = 5,
109
+ callbacks: list[Any] | None = None,
110
+ instructions: str | None = None,
111
+ verbosity: int | None = None,
112
+ max_retries: int | None = 3,
113
+ screenshot_delay: float | int = 0.5,
114
+ use_prompt_caching: bool | None = False,
115
+ max_trajectory_budget: float | dict | None = None,
116
+ telemetry_enabled: bool | None = True,
117
+ ) -> list[Any]:
118
+ """Run evaluation across the entire dataset using hud.datasets.run_dataset."""
119
+
120
+ # Run with our MCP-based agent class.
121
+ if isinstance(dataset, str):
122
+ dataset_name = dataset.split("/")[-1]
123
+ job_name = job_name or f"Evaluation {dataset_name}"
124
+ dataset = load_dataset(dataset, split=split) # type: ignore[arg-type]
125
+ else:
126
+ dataset_name = "custom"
127
+ job_name = job_name or f"Evaluation {time.strftime('%H:%M %Y-%m-%d')}"
128
+
129
+ # Filter any existing Computer tools
130
+ # The eval framework will add its own Computer tool per task
131
+ if tools:
132
+ tools = [tool for tool in tools if not is_agent_computer(tool)]
133
+
134
+ # Execute evaluation
135
+ return await run_dataset(
136
+ name=job_name,
137
+ dataset=dataset,
138
+ agent_class=MCPComputerAgent,
139
+ agent_config={
140
+ "model": model,
141
+ "allowed_tools": allowed_tools,
142
+ "trajectory_dir": trajectory_dir,
143
+ # === ComputerAgent kwargs passthrough ===
144
+ "tools": tools,
145
+ "custom_loop": custom_loop,
146
+ "only_n_most_recent_images": only_n_most_recent_images,
147
+ "callbacks": callbacks,
148
+ "instructions": instructions,
149
+ "verbosity": verbosity,
150
+ "max_retries": max_retries,
151
+ "screenshot_delay": screenshot_delay,
152
+ "use_prompt_caching": use_prompt_caching,
153
+ "max_trajectory_budget": max_trajectory_budget,
154
+ "telemetry_enabled": telemetry_enabled,
155
+ },
156
+ max_concurrent=max_concurrent,
157
+ metadata={"dataset": dataset_name},
158
+ max_steps=max_steps,
159
+ auto_respond=True,
74
160
  )
75
161
 
76
162
 
77
- __all__ = ["ComputerAgent", "ComputerAgentAdapter", "HUDComputerHandler", "run_job"]
163
+ __all__ = [
164
+ "run_single_task",
165
+ "run_full_dataset",
166
+ "MCPComputerAgent",
167
+ ]