hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,193 @@
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+ from mcp.types import ImageContent, TextContent
5
+
6
+ from hud.tools.types import ContentResult, EvaluationResult, ToolError
7
+
8
+
9
+ def test_evaluation_result_defaults():
10
+ """Test EvaluationResult with default values."""
11
+ result = EvaluationResult()
12
+
13
+ assert result.reward == 0.0
14
+ assert result.done is False
15
+ assert result.content is None
16
+ assert result.info == {}
17
+ assert result.isError is False
18
+
19
+
20
+ def test_evaluation_result_with_values():
21
+ """Test EvaluationResult with custom values."""
22
+ result = EvaluationResult(
23
+ reward=0.95,
24
+ done=True,
25
+ content="Task completed successfully",
26
+ info={"steps": 5},
27
+ isError=False,
28
+ )
29
+
30
+ assert result.reward == 0.95
31
+ assert result.done is True
32
+ assert result.content == "Task completed successfully"
33
+ assert result.info == {"steps": 5}
34
+ assert result.isError is False
35
+
36
+
37
+ def test_content_result_defaults():
38
+ """Test ContentResult with default values."""
39
+ result = ContentResult()
40
+
41
+ assert result.output is None
42
+ assert result.error is None
43
+ assert result.base64_image is None
44
+ assert result.system is None
45
+
46
+
47
+ def test_content_result_with_values():
48
+ """Test ContentResult with custom values."""
49
+ result = ContentResult(
50
+ output="Command executed",
51
+ error="No errors",
52
+ base64_image="base64data",
53
+ system="System message",
54
+ )
55
+
56
+ assert result.output == "Command executed"
57
+ assert result.error == "No errors"
58
+ assert result.base64_image == "base64data"
59
+ assert result.system == "System message"
60
+
61
+
62
+ def test_content_result_add_both_output():
63
+ """Test adding two ContentResults with output."""
64
+ result1 = ContentResult(output="Part 1")
65
+ result2 = ContentResult(output=" Part 2")
66
+
67
+ combined = result1 + result2
68
+
69
+ assert combined.output == "Part 1 Part 2"
70
+ assert combined.error is None
71
+ assert combined.base64_image is None
72
+
73
+
74
+ def test_content_result_add_both_error():
75
+ """Test adding two ContentResults with errors."""
76
+ result1 = ContentResult(error="Error 1")
77
+ result2 = ContentResult(error=" Error 2")
78
+
79
+ combined = result1 + result2
80
+
81
+ assert combined.error == "Error 1 Error 2"
82
+ assert combined.output is None
83
+
84
+
85
+ def test_content_result_add_both_system():
86
+ """Test adding two ContentResults with system messages."""
87
+ result1 = ContentResult(system="System 1")
88
+ result2 = ContentResult(system=" System 2")
89
+
90
+ combined = result1 + result2
91
+
92
+ assert combined.system == "System 1 System 2"
93
+
94
+
95
+ def test_content_result_add_one_sided():
96
+ """Test adding ContentResults where only one has values."""
97
+ result1 = ContentResult(output="Output")
98
+ result2 = ContentResult(error="Error")
99
+
100
+ combined = result1 + result2
101
+
102
+ assert combined.output == "Output"
103
+ assert combined.error == "Error"
104
+
105
+
106
+ def test_content_result_add_images_raises_error():
107
+ """Test that combining two results with images raises an error."""
108
+ result1 = ContentResult(base64_image="image1")
109
+ result2 = ContentResult(base64_image="image2")
110
+
111
+ with pytest.raises(ValueError, match="Cannot combine tool results"):
112
+ _ = result1 + result2
113
+
114
+
115
+ def test_content_result_add_one_image():
116
+ """Test adding ContentResults where only one has an image."""
117
+ result1 = ContentResult(base64_image="image1")
118
+ result2 = ContentResult(output="Output")
119
+
120
+ combined = result1 + result2
121
+
122
+ assert combined.base64_image == "image1"
123
+ assert combined.output == "Output"
124
+
125
+
126
+ def test_content_result_to_content_blocks_output():
127
+ """Test converting ContentResult with output to content blocks."""
128
+ result = ContentResult(output="Test output")
129
+
130
+ blocks = result.to_content_blocks()
131
+
132
+ assert len(blocks) == 1
133
+ assert isinstance(blocks[0], TextContent)
134
+ assert blocks[0].text == "Test output"
135
+
136
+
137
+ def test_content_result_to_content_blocks_error():
138
+ """Test converting ContentResult with error to content blocks."""
139
+ result = ContentResult(error="Test error")
140
+
141
+ blocks = result.to_content_blocks()
142
+
143
+ assert len(blocks) == 1
144
+ assert isinstance(blocks[0], TextContent)
145
+ assert blocks[0].text == "Test error"
146
+
147
+
148
+ def test_content_result_to_content_blocks_image():
149
+ """Test converting ContentResult with image to content blocks."""
150
+ result = ContentResult(base64_image="base64data")
151
+
152
+ blocks = result.to_content_blocks()
153
+
154
+ assert len(blocks) == 1
155
+ assert isinstance(blocks[0], ImageContent)
156
+ assert blocks[0].data == "base64data"
157
+ assert blocks[0].mimeType == "image/png"
158
+
159
+
160
+ def test_content_result_to_content_blocks_all():
161
+ """Test converting ContentResult with all fields to content blocks."""
162
+ result = ContentResult(
163
+ output="Output",
164
+ error="Error",
165
+ base64_image="image",
166
+ )
167
+
168
+ blocks = result.to_content_blocks()
169
+
170
+ assert len(blocks) == 3
171
+ assert isinstance(blocks[0], TextContent)
172
+ assert blocks[0].text == "Output"
173
+ assert isinstance(blocks[1], TextContent)
174
+ assert blocks[1].text == "Error"
175
+ assert isinstance(blocks[2], ImageContent)
176
+ assert blocks[2].data == "image"
177
+
178
+
179
+ def test_content_result_to_content_blocks_empty():
180
+ """Test converting empty ContentResult to content blocks."""
181
+ result = ContentResult()
182
+
183
+ blocks = result.to_content_blocks()
184
+
185
+ assert len(blocks) == 0
186
+
187
+
188
+ def test_tool_error():
189
+ """Test ToolError exception."""
190
+ error = ToolError("Test error message")
191
+
192
+ assert isinstance(error, Exception)
193
+ assert str(error) == "Test error message"
hud/tools/types.py CHANGED
@@ -6,6 +6,18 @@ from mcp.types import ContentBlock, ImageContent, TextContent
6
6
  from pydantic import BaseModel, ConfigDict, Field
7
7
 
8
8
 
9
+ class Coordinate(BaseModel):
10
+ """A coordinate point with x and y values.
11
+
12
+ Used for path-based actions like drag operations.
13
+ """
14
+
15
+ model_config = ConfigDict(extra="forbid")
16
+
17
+ x: int = Field(..., description="X coordinate")
18
+ y: int = Field(..., description="Y coordinate")
19
+
20
+
9
21
  class EvaluationResult(BaseModel):
10
22
  """Standard evaluation result format."""
11
23
 
@@ -28,6 +40,7 @@ class ContentResult(BaseModel):
28
40
  error: str | None = Field(default=None, description="Error message")
29
41
  base64_image: str | None = Field(default=None, description="Base64-encoded image")
30
42
  system: str | None = Field(default=None, description="System message")
43
+ url: str | None = Field(default=None, description="Current page URL (for browser automation)")
31
44
 
32
45
  def __add__(self, other: ContentResult) -> ContentResult:
33
46
  def combine_fields(
@@ -44,6 +57,7 @@ class ContentResult(BaseModel):
44
57
  error=combine_fields(self.error, other.error),
45
58
  base64_image=combine_fields(self.base64_image, other.base64_image, False),
46
59
  system=combine_fields(self.system, other.system),
60
+ url=combine_fields(self.url, other.url, False),
47
61
  )
48
62
 
49
63
  def to_content_blocks(self) -> list[ContentBlock]:
@@ -55,7 +69,7 @@ class ContentResult(BaseModel):
55
69
  result: ContentResult to convert
56
70
 
57
71
  Returns:
58
- List of ContentBlock
72
+ List of ContentBlock with URL embedded as metadata if available
59
73
  """
60
74
  blocks: list[ContentBlock] = []
61
75
 
@@ -65,6 +79,12 @@ class ContentResult(BaseModel):
65
79
  blocks.append(TextContent(text=self.error, type="text"))
66
80
  if self.base64_image:
67
81
  blocks.append(ImageContent(data=self.base64_image, mimeType="image/png", type="image"))
82
+
83
+ # Add URL as a special metadata text block (for Gemini Computer Use)
84
+ # Always include URL if set, even if it's a placeholder like "about:blank"
85
+ if self.url:
86
+ blocks.append(TextContent(text=f"__URL__:{self.url}", type="text"))
87
+
68
88
  return blocks
69
89
 
70
90
 
hud/types.py CHANGED
@@ -1,11 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
- import contextlib
4
3
  import json
5
4
  import logging
6
5
  import uuid
7
- from collections import defaultdict
8
- from string import Template
6
+ from enum import Enum
9
7
  from typing import Any, Literal
10
8
 
11
9
  import mcp.types as types
@@ -13,22 +11,134 @@ from mcp.types import CallToolRequestParams, CallToolResult
13
11
  from pydantic import BaseModel, ConfigDict, Field, field_validator
14
12
 
15
13
  from hud.settings import settings
14
+ from hud.utils.env import resolve_env_vars as _resolve_env_vars
16
15
  from hud.utils.tool_shorthand import normalize_to_tool_call_dict
17
16
 
18
17
  logger = logging.getLogger(__name__)
19
18
 
19
+ # Guard to ensure we only log missing HUD_API_KEY once
20
+ _missing_api_key_error_logged: bool = False
20
21
 
21
- class Task(BaseModel):
22
+
23
+ class AgentType(str, Enum):
24
+ CLAUDE = "claude"
25
+ OPENAI = "openai"
26
+ OPERATOR = "operator"
27
+ GEMINI = "gemini"
28
+ GEMINI_CUA = "gemini_cua"
29
+ OPENAI_COMPATIBLE = "openai_compatible"
30
+ INTEGRATION_TEST = "integration_test"
31
+
32
+ @property
33
+ def cls(self) -> type:
34
+ if self == AgentType.CLAUDE:
35
+ from hud.agents.claude import ClaudeAgent
36
+
37
+ return ClaudeAgent
38
+ elif self == AgentType.OPENAI:
39
+ from hud.agents import OpenAIAgent
40
+
41
+ return OpenAIAgent
42
+ elif self == AgentType.OPERATOR:
43
+ from hud.agents import OperatorAgent
44
+
45
+ return OperatorAgent
46
+ elif self == AgentType.GEMINI:
47
+ from hud.agents.gemini import GeminiAgent
48
+
49
+ return GeminiAgent
50
+ elif self == AgentType.GEMINI_CUA:
51
+ from hud.agents.gemini_cua import GeminiCUAAgent
52
+
53
+ return GeminiCUAAgent
54
+ elif self == AgentType.OPENAI_COMPATIBLE:
55
+ from hud.agents.openai_chat import OpenAIChatAgent
56
+
57
+ return OpenAIChatAgent
58
+ elif self == AgentType.INTEGRATION_TEST:
59
+ from hud.agents.misc.integration_test_agent import IntegrationTestRunner
60
+
61
+ return IntegrationTestRunner
62
+ else:
63
+ raise ValueError(f"Unsupported agent type: {self}")
64
+
65
+ @property
66
+ def config_cls(self) -> type:
67
+ """Get config class without importing agent (avoids SDK dependency)."""
68
+ from hud.agents.types import (
69
+ ClaudeConfig,
70
+ GeminiConfig,
71
+ GeminiCUAConfig,
72
+ OpenAIChatConfig,
73
+ OpenAIConfig,
74
+ OperatorConfig,
75
+ )
76
+
77
+ mapping: dict[AgentType, type] = {
78
+ AgentType.CLAUDE: ClaudeConfig,
79
+ AgentType.OPENAI: OpenAIConfig,
80
+ AgentType.OPERATOR: OperatorConfig,
81
+ AgentType.GEMINI: GeminiConfig,
82
+ AgentType.GEMINI_CUA: GeminiCUAConfig,
83
+ AgentType.OPENAI_COMPATIBLE: OpenAIChatConfig,
84
+ AgentType.INTEGRATION_TEST: BaseAgentConfig,
85
+ }
86
+ if self not in mapping:
87
+ raise ValueError(f"Unsupported agent type for config: {self}")
88
+ return mapping[self]
89
+
90
+
91
+ class BaseAgentConfig(BaseModel):
92
+ """Agent configuration for LLM-specific settings.
93
+
94
+ Note: allowed_tools, disallowed_tools, response_tool_name, append_setup_output,
95
+ and initial_screenshot are kept for backwards compatibility with v4 task configs
96
+ but are no longer applied at the agent level. These should be configured on the
97
+ Environment/Task instead.
98
+ """
99
+
100
+ model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid", populate_by_name=True)
101
+
102
+ # LLM-specific setting
103
+ system_prompt: str | None = None
104
+
105
+ # Deprecated: kept for backwards compat with v4 task configs
106
+ # allowed_tools/disallowed_tools are applied at Environment level
107
+ # append_setup_output is applied by EvalContext -> agent
108
+ # response_tool_name and initial_screenshot are parsed but NOT implemented
109
+ allowed_tools: list[str] | None = None
110
+ disallowed_tools: list[str] | None = None
111
+ response_tool_name: str | None = None # Not implemented
112
+ append_setup_output: bool = False
113
+ append_setup_tool: bool = False # Alias for append_setup_output
114
+ initial_screenshot: bool = False # Not implemented
115
+
116
+
117
+ class LegacyTask(BaseModel):
22
118
  """
119
+ DEPRECATED: Use Task from env() instead.
120
+
23
121
  A task configuration that can be used to create a task.
24
122
 
25
123
  The mcp_config field supports environment variable substitution using
26
124
  template placeholders in the format ${VAR_NAME} or ${VAR_NAME:default_value}.
27
125
 
28
- Example:
126
+ .. deprecated:: 0.5.0
127
+ LegacyTask is deprecated in v0.5.0 and will be removed in v0.6.0
128
+ (no earlier than March 1st, 2026).
129
+
130
+ Use one of these migration paths:
131
+
132
+ 1. Quick conversion: ``Task.from_v4(legacy_task)`` converts LegacyTask to Task
133
+ 2. Full migration: Use ``@env.scenario()`` with setup code before first yield
134
+ and evaluate code after first yield
135
+
136
+ See https://docs.hud.ai/migration for the full migration guide.
137
+
138
+ Example (deprecated):
29
139
  mcp_config: {
30
140
  "hud": {
31
- "url": "${HUD_MCP_URL:https://mcp.hud.so/v3/mcp}",
141
+ "url": "${HUD_MCP_URL:https://mcp.hud.ai/v3/mcp}",
32
142
  "headers": {
33
143
  "Authorization": "Bearer ${HUD_API_KEY}",
34
144
  "Mcp-Image": "your-mcp-image"
@@ -43,10 +153,23 @@ class Task(BaseModel):
43
153
  setup_tool: MCPToolCall | list[MCPToolCall] | None = None
44
154
  evaluate_tool: MCPToolCall | list[MCPToolCall] | None = None
45
155
  integration_test_tool: MCPToolCall | list[MCPToolCall] | None = None
46
- agent_tools: list[str] | None = None
47
- system_prompt: str | None = None
156
+ agent_config: BaseAgentConfig | None = None
48
157
  metadata: dict[str, Any] = Field(default_factory=dict)
49
158
 
159
+ def __init__(self, **data: Any) -> None:
160
+ """Initialize LegacyTask with deprecation warning."""
161
+ import warnings
162
+
163
+ warnings.warn(
164
+ "LegacyTask is deprecated in v0.5.0 and will be removed in v0.6.0 "
165
+ "(no earlier than March 1st, 2026). "
166
+ "Use Task.from_v4() for quick conversion, or migrate to @env.scenario(). "
167
+ "See https://docs.hud.ai/migration for details.",
168
+ DeprecationWarning,
169
+ stacklevel=2,
170
+ )
171
+ super().__init__(**data)
172
+
50
173
  @field_validator("mcp_config", "metadata", mode="before")
51
174
  @classmethod
52
175
  def parse_json_strings(cls, v: Any) -> Any:
@@ -60,6 +183,25 @@ class Task(BaseModel):
60
183
  raise HudConfigError(f"Invalid JSON string: {e}") from e
61
184
  return v
62
185
 
186
+ @field_validator("agent_config", mode="before")
187
+ @classmethod
188
+ def parse_agent_config(cls, v: Any) -> BaseAgentConfig | None:
189
+ """Parse agent_config into BaseAgentConfig."""
190
+ if v is None:
191
+ return None
192
+ if isinstance(v, BaseAgentConfig):
193
+ return v
194
+ if isinstance(v, str):
195
+ try:
196
+ v = json.loads(v)
197
+ except json.JSONDecodeError as e:
198
+ from hud.shared.exceptions import HudConfigError
199
+
200
+ raise HudConfigError(f"Invalid JSON string for agent_config: {e}") from e
201
+ if isinstance(v, dict):
202
+ return BaseAgentConfig(**v)
203
+ return v
204
+
63
205
  @field_validator("setup_tool", "evaluate_tool", "integration_test_tool", mode="before")
64
206
  @classmethod
65
207
  def convert_dict_to_tool_call(cls, v: Any, info: Any) -> Any:
@@ -98,44 +240,21 @@ class Task(BaseModel):
98
240
  @classmethod
99
241
  def resolve_env_vars(cls, v: dict[str, Any]) -> dict[str, Any]:
100
242
  """
101
- Automatically resolve environment variables in mcp_config using Template.
243
+ Automatically resolve environment variables in mcp_config.
102
244
 
103
245
  Supports ${VAR_NAME} syntax with variable substitution from
104
- System environment variables (including HUD_API_KEY, etc.)
246
+ system environment variables and settings (including HUD_API_KEY, etc.)
105
247
 
106
248
  Missing variables resolve to empty strings.
107
249
  """
108
- import os
109
-
110
- # Start with current environment variables
111
- mapping = dict(os.environ)
112
- # Include settings (from process env, project .env, and user .env)
113
- settings_dict = settings.model_dump()
114
- mapping.update(settings_dict)
115
- # Add UPPERCASE aliases for settings keys
116
- for _key, _val in settings_dict.items():
117
- with contextlib.suppress(Exception):
118
- mapping[_key.upper()] = _val
119
-
120
- if settings.api_key:
121
- mapping["HUD_API_KEY"] = settings.api_key
122
- else:
123
- logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
250
+ # Warn once if HUD_API_KEY is not set
251
+ if not settings.api_key:
252
+ global _missing_api_key_error_logged
253
+ if not _missing_api_key_error_logged:
254
+ logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
255
+ _missing_api_key_error_logged = True
124
256
 
125
- def substitute_in_value(obj: Any) -> Any:
126
- """Recursively substitute variables in nested structures."""
127
- if isinstance(obj, str):
128
- # Use Template's substitute with defaultdict - missing vars become empty strings
129
- safe_mapping = defaultdict(str, mapping)
130
- return Template(obj).substitute(safe_mapping)
131
- elif isinstance(obj, dict):
132
- return {k: substitute_in_value(v) for k, v in obj.items()}
133
- elif isinstance(obj, list):
134
- return [substitute_in_value(item) for item in obj]
135
- else:
136
- return obj
137
-
138
- return substitute_in_value(v)
257
+ return _resolve_env_vars(v)
139
258
 
140
259
 
141
260
  class MCPToolCall(CallToolRequestParams):
@@ -164,7 +283,9 @@ class MCPToolCall(CallToolRequestParams):
164
283
 
165
284
 
166
285
  class MCPToolResult(CallToolResult):
167
- """A tool result."""
286
+ """A tool result with optional call_id for correlation."""
287
+
288
+ call_id: str | None = None # For correlating with provider-specific tool call IDs
168
289
 
169
290
  def _get_content_summary(self) -> str:
170
291
  """Extract a summary of the content."""
@@ -216,7 +337,7 @@ class AgentResponse(BaseModel):
216
337
  tool_calls: list[MCPToolCall] = Field(default_factory=list)
217
338
  done: bool = Field(default=False)
218
339
 
219
- # --- TELEMETRY [hud.so] ---
340
+ # --- TELEMETRY [hud.ai] ---
220
341
  # Responses
221
342
  content: str | None = Field(default=None)
222
343
  reasoning: str | None = Field(default=None)
@@ -267,6 +388,27 @@ class TraceStep(BaseModel):
267
388
  model_config = ConfigDict(populate_by_name=True, extra="allow")
268
389
 
269
390
 
391
+ class HudSpan(BaseModel):
392
+ """A telemetry span ready for export to HUD API."""
393
+
394
+ name: str
395
+ trace_id: str = Field(pattern=r"^[0-9a-fA-F]{32}$")
396
+ span_id: str = Field(pattern=r"^[0-9a-fA-F]{16}$")
397
+ parent_span_id: str | None = Field(default=None, pattern=r"^[0-9a-fA-F]{16}$")
398
+
399
+ start_time: str # ISO format
400
+ end_time: str # ISO format
401
+
402
+ status_code: str # "UNSET", "OK", "ERROR"
403
+ status_message: str | None = None
404
+
405
+ attributes: TraceStep
406
+ exceptions: list[dict[str, Any]] | None = None
407
+ internal_type: str | None = None
408
+
409
+ model_config = ConfigDict(extra="forbid")
410
+
411
+
270
412
  class Trace(BaseModel):
271
413
  """Unified result from agent execution (task or prompt).
272
414
 
@@ -286,7 +428,7 @@ class Trace(BaseModel):
286
428
  isError: bool = Field(default=False)
287
429
 
288
430
  # Metadata
289
- task: Task | None = Field(default=None)
431
+ task: LegacyTask | None = Field(default=None)
290
432
 
291
433
  # Trace
292
434
  trace: list[TraceStep] = Field(default_factory=list)
@@ -302,26 +444,22 @@ class Trace(BaseModel):
302
444
  def append(self, step: TraceStep) -> None:
303
445
  self.trace.append(step)
304
446
 
305
- def populate_from_context(self) -> None:
306
- """Populate trace steps from the current trace context if available.
307
-
308
- This checks if we're executing within a hud.trace() context and
309
- automatically populates the trace field with collected steps.
310
- """
311
- from hud.otel.context import get_current_task_run_id
312
- from hud.telemetry.replay import get_trace
313
447
 
314
- task_run_id = get_current_task_run_id()
315
- if task_run_id:
316
- collected_trace = get_trace(task_run_id)
317
- if collected_trace:
318
- self.trace = collected_trace.trace
448
+ # Re-export Task for backwards compatibility (after module defs to avoid circular import)
449
+ from hud.eval.task import Task # noqa: E402
319
450
 
451
+ # Type alias for functions that accept v5 Task, v4 LegacyTask, or raw dicts
452
+ TaskInput = Task | LegacyTask | dict[str, Any]
320
453
 
321
454
  __all__ = [
322
455
  "AgentResponse",
456
+ "AgentType",
457
+ "HudSpan",
458
+ "LegacyTask",
323
459
  "MCPToolCall",
324
460
  "MCPToolResult",
461
+ "Task",
462
+ "TaskInput",
325
463
  "Trace",
326
464
  "TraceStep",
327
465
  ]
hud/utils/__init__.py CHANGED
@@ -2,9 +2,11 @@ from __future__ import annotations
2
2
 
3
3
  from .hud_console import HUDConsole, hud_console
4
4
  from .telemetry import stream
5
+ from .types import with_signature
5
6
 
6
7
  __all__ = [
7
8
  "HUDConsole",
8
9
  "hud_console",
9
10
  "stream",
11
+ "with_signature",
10
12
  ]
hud/utils/env.py ADDED
@@ -0,0 +1,67 @@
1
+ """Environment variable resolution utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import os
7
+ from collections import defaultdict
8
+ from string import Template
9
+ from typing import TYPE_CHECKING, Any
10
+
11
+ from hud.settings import settings
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Mapping
15
+
16
+
17
+ def resolve_env_vars(obj: Any, extra_mapping: Mapping[str, Any] | None = None) -> Any:
18
+ """Recursively resolve ${VAR_NAME} placeholders in strings.
19
+
20
+ Uses Python's string.Template for substitution. Sources values from:
21
+ 1. os.environ
22
+ 2. hud.settings (loads from project .env and ~/.hud/.env)
23
+ 3. Optional extra_mapping parameter
24
+
25
+ Uppercase aliases are automatically added for settings keys,
26
+ so both ${api_key} and ${API_KEY} work.
27
+
28
+ Missing variables resolve to empty strings.
29
+
30
+ Args:
31
+ obj: The object to resolve (string, dict, list, or other).
32
+ extra_mapping: Optional additional key-value pairs to include.
33
+
34
+ Returns:
35
+ The object with all ${VAR_NAME} placeholders resolved.
36
+
37
+ Example:
38
+ >>> resolve_env_vars({"key": "${MY_VAR}"})
39
+ {'key': 'resolved_value'}
40
+ """
41
+ # Build mapping from environment and settings
42
+ mapping: dict[str, Any] = dict(os.environ)
43
+ settings_dict = settings.model_dump()
44
+ mapping.update(settings_dict)
45
+
46
+ # Add UPPERCASE aliases for settings keys
47
+ for key, val in settings_dict.items():
48
+ with contextlib.suppress(Exception):
49
+ mapping[key.upper()] = val
50
+
51
+ if settings.api_key:
52
+ mapping["HUD_API_KEY"] = settings.api_key
53
+
54
+ if extra_mapping:
55
+ mapping.update(extra_mapping)
56
+
57
+ def substitute(value: Any) -> Any:
58
+ if isinstance(value, str):
59
+ safe_mapping = defaultdict(str, mapping)
60
+ return Template(value).substitute(safe_mapping)
61
+ elif isinstance(value, dict):
62
+ return {k: substitute(v) for k, v in value.items()}
63
+ elif isinstance(value, list):
64
+ return [substitute(item) for item in value]
65
+ return value
66
+
67
+ return substitute(obj)