hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,186 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from unittest.mock import MagicMock, patch
5
+
6
+ from hud.utils.pretty_errors import (
7
+ _async_exception_handler,
8
+ _render_and_fallback,
9
+ install_pretty_errors,
10
+ )
11
+
12
+
13
+ def test_render_and_fallback_hud_exception():
14
+ """Test _render_and_fallback with HudException."""
15
+ from hud.shared.exceptions import HudException
16
+
17
+ exc = HudException("Test error")
18
+
19
+ with (
20
+ patch("sys.__excepthook__") as mock_excepthook,
21
+ patch("hud.utils.pretty_errors.hud_console") as mock_console,
22
+ patch("sys.stderr.flush"),
23
+ ):
24
+ _render_and_fallback(HudException, exc, None)
25
+
26
+ mock_excepthook.assert_called_once()
27
+ mock_console.render_exception.assert_called_once_with(exc)
28
+
29
+
30
+ def test_render_and_fallback_non_hud_exception():
31
+ """Test _render_and_fallback with non-HudException."""
32
+ exc = ValueError("Test error")
33
+
34
+ with (
35
+ patch("sys.__excepthook__") as mock_excepthook,
36
+ patch("hud.utils.pretty_errors.hud_console") as mock_console,
37
+ ):
38
+ _render_and_fallback(ValueError, exc, None)
39
+
40
+ mock_excepthook.assert_called_once()
41
+ # Should not render for non-HudException
42
+ mock_console.render_exception.assert_not_called()
43
+
44
+
45
+ def test_render_and_fallback_rendering_error():
46
+ """Test _render_and_fallback handles rendering errors gracefully."""
47
+ from hud.shared.exceptions import HudException
48
+
49
+ exc = HudException("Test error")
50
+
51
+ with (
52
+ patch("sys.__excepthook__") as mock_excepthook,
53
+ patch("hud.utils.pretty_errors.hud_console") as mock_console,
54
+ ):
55
+ mock_console.render_exception.side_effect = Exception("Render failed")
56
+
57
+ # Should not raise
58
+ _render_and_fallback(HudException, exc, None)
59
+
60
+ mock_excepthook.assert_called_once()
61
+
62
+
63
+ def test_async_exception_handler_with_exception():
64
+ """Test _async_exception_handler with exception in context."""
65
+ mock_loop = MagicMock()
66
+ context = {"exception": ValueError("Test error")}
67
+
68
+ with patch("hud.utils.pretty_errors.hud_console") as mock_console:
69
+ _async_exception_handler(mock_loop, context)
70
+
71
+ mock_console.render_exception.assert_called_once()
72
+ mock_loop.default_exception_handler.assert_called_once_with(context)
73
+
74
+
75
+ def test_async_exception_handler_with_message():
76
+ """Test _async_exception_handler with message only."""
77
+ mock_loop = MagicMock()
78
+ context = {"message": "Error message"}
79
+
80
+ with patch("hud.utils.pretty_errors.hud_console") as mock_console:
81
+ _async_exception_handler(mock_loop, context)
82
+
83
+ mock_console.error.assert_called_once_with("Error message")
84
+ mock_console.render_support_hint.assert_called_once()
85
+ mock_loop.default_exception_handler.assert_called_once()
86
+
87
+
88
+ def test_async_exception_handler_rendering_error():
89
+ """Test _async_exception_handler handles rendering errors."""
90
+ mock_loop = MagicMock()
91
+ context = {"exception": ValueError("Test")}
92
+
93
+ with patch("hud.utils.pretty_errors.hud_console") as mock_console:
94
+ mock_console.render_exception.side_effect = Exception("Render failed")
95
+
96
+ # Should not raise, should call default handler
97
+ _async_exception_handler(mock_loop, context)
98
+
99
+ mock_loop.default_exception_handler.assert_called_once()
100
+
101
+
102
+ def test_install_pretty_errors_with_running_loop():
103
+ """Test install_pretty_errors with a running event loop."""
104
+ mock_loop = MagicMock()
105
+
106
+ with patch("asyncio.get_running_loop", return_value=mock_loop):
107
+ install_pretty_errors()
108
+
109
+ assert sys.excepthook == _render_and_fallback
110
+ mock_loop.set_exception_handler.assert_called_once_with(_async_exception_handler)
111
+
112
+
113
+ def test_install_pretty_errors_no_running_loop():
114
+ """Test install_pretty_errors without a running loop."""
115
+ with (
116
+ patch("asyncio.get_running_loop", side_effect=RuntimeError("No running loop")),
117
+ patch("asyncio.new_event_loop") as mock_new_loop,
118
+ ):
119
+ mock_loop = MagicMock()
120
+ mock_new_loop.return_value = mock_loop
121
+
122
+ install_pretty_errors()
123
+
124
+ assert sys.excepthook == _render_and_fallback
125
+ mock_loop.set_exception_handler.assert_called_once()
126
+
127
+
128
+ def test_install_pretty_errors_new_loop_fails():
129
+ """Test install_pretty_errors when creating new loop fails."""
130
+ with (
131
+ patch("asyncio.get_running_loop", side_effect=RuntimeError("No running loop")),
132
+ patch("asyncio.new_event_loop", side_effect=Exception("Can't create loop")),
133
+ ):
134
+ # Should not raise
135
+ install_pretty_errors()
136
+
137
+ assert sys.excepthook == _render_and_fallback
138
+
139
+
140
+ def test_install_pretty_errors_set_handler_fails():
141
+ """Test install_pretty_errors when set_exception_handler fails."""
142
+ mock_loop = MagicMock()
143
+ mock_loop.set_exception_handler.side_effect = Exception("Can't set handler")
144
+
145
+ with patch("asyncio.get_running_loop", return_value=mock_loop):
146
+ # Should not raise
147
+ install_pretty_errors()
148
+
149
+ assert sys.excepthook == _render_and_fallback
150
+
151
+
152
+ def test_async_exception_handler_no_exception_or_message():
153
+ """Test _async_exception_handler with empty context."""
154
+ mock_loop = MagicMock()
155
+ context = {}
156
+
157
+ with patch("hud.utils.pretty_errors.hud_console") as mock_console:
158
+ _async_exception_handler(mock_loop, context)
159
+
160
+ mock_console.render_exception.assert_not_called()
161
+ mock_console.error.assert_not_called()
162
+ mock_loop.default_exception_handler.assert_called_once()
163
+
164
+
165
+ def test_render_and_fallback_with_traceback():
166
+ """Test _render_and_fallback includes traceback."""
167
+ from hud.shared.exceptions import HudException
168
+
169
+ exc = HudException("Test error")
170
+
171
+ # Create a fake traceback
172
+ try:
173
+ raise exc
174
+ except HudException as e:
175
+ tb = e.__traceback__
176
+
177
+ with (
178
+ patch("sys.__excepthook__") as mock_excepthook,
179
+ patch("hud.utils.pretty_errors.hud_console"),
180
+ patch("sys.stderr.flush"),
181
+ ):
182
+ _render_and_fallback(HudException, exc, tb)
183
+
184
+ # Should call excepthook with traceback
185
+ call_args = mock_excepthook.call_args[0]
186
+ assert call_args[2] == tb
@@ -0,0 +1,154 @@
1
+ from __future__ import annotations
2
+
3
+ from hud.utils.tool_shorthand import (
4
+ _is_call_like,
5
+ _to_call_dict,
6
+ normalize_to_tool_call_dict,
7
+ )
8
+
9
+
10
+ def test_is_call_like_with_name_and_arguments():
11
+ """Test _is_call_like with name and arguments keys."""
12
+ obj = {"name": "test_tool", "arguments": {"key": "value"}}
13
+ assert _is_call_like(obj) is True
14
+
15
+
16
+ def test_is_call_like_with_single_key_dict_value():
17
+ """Test _is_call_like with single key dict containing dict value."""
18
+ obj = {"tool": {"name": "test"}}
19
+ assert _is_call_like(obj) is True
20
+
21
+
22
+ def test_is_call_like_with_nested_single_key():
23
+ """Test _is_call_like with nested single key dict."""
24
+ obj = {"tool": {"inner": {"key": "value"}}}
25
+ assert _is_call_like(obj) is True
26
+
27
+
28
+ def test_is_call_like_not_dict():
29
+ """Test _is_call_like returns False for non-dict."""
30
+ assert _is_call_like("string") is False
31
+ assert _is_call_like(123) is False
32
+ assert _is_call_like(None) is False
33
+ assert _is_call_like([]) is False
34
+
35
+
36
+ def test_is_call_like_empty_dict():
37
+ """Test _is_call_like returns False for empty dict."""
38
+ assert _is_call_like({}) is False
39
+
40
+
41
+ def test_is_call_like_multi_key_dict():
42
+ """Test _is_call_like returns False for multi-key dict without name/arguments."""
43
+ obj = {"key1": "value1", "key2": "value2"}
44
+ assert _is_call_like(obj) is False
45
+
46
+
47
+ def test_to_call_dict_with_name_arguments():
48
+ """Test _to_call_dict preserves name and arguments."""
49
+ obj = {"name": "test_tool", "arguments": {"param": "value"}}
50
+ result = _to_call_dict(obj)
51
+ assert result == {"name": "test_tool", "arguments": {"param": "value"}}
52
+
53
+
54
+ def test_to_call_dict_with_nested_call():
55
+ """Test _to_call_dict with nested call-like arguments."""
56
+ obj = {"name": "outer", "arguments": {"name": "inner", "arguments": {"x": 1}}}
57
+ result = _to_call_dict(obj)
58
+ assert result == {"name": "outer", "arguments": {"name": "inner", "arguments": {"x": 1}}}
59
+
60
+
61
+ def test_to_call_dict_shorthand_single_key():
62
+ """Test _to_call_dict converts shorthand single-key dict."""
63
+ obj = {"tool_name": {"name": "inner", "arguments": {}}}
64
+ result = _to_call_dict(obj)
65
+ assert result == {"name": "tool_name", "arguments": {"name": "inner", "arguments": {}}}
66
+
67
+
68
+ def test_to_call_dict_non_call_arguments():
69
+ """Test _to_call_dict with non-call-like arguments."""
70
+ obj = {"name": "test", "arguments": {"simple": "value"}}
71
+ result = _to_call_dict(obj)
72
+ assert result == {"name": "test", "arguments": {"simple": "value"}}
73
+
74
+
75
+ def test_to_call_dict_non_dict():
76
+ """Test _to_call_dict returns non-dict unchanged."""
77
+ assert _to_call_dict("string") == "string"
78
+ assert _to_call_dict(123) == 123
79
+ assert _to_call_dict(None) is None
80
+
81
+
82
+ def test_to_call_dict_single_key_non_call():
83
+ """Test _to_call_dict with single key but non-call value."""
84
+ obj = {"key": "simple_value"}
85
+ result = _to_call_dict(obj)
86
+ assert result == {"key": "simple_value"}
87
+
88
+
89
+ def test_normalize_to_tool_call_dict_none():
90
+ """Test normalize_to_tool_call_dict with None."""
91
+ assert normalize_to_tool_call_dict(None) is None
92
+
93
+
94
+ def test_normalize_to_tool_call_dict_simple_dict():
95
+ """Test normalize_to_tool_call_dict with simple dict."""
96
+ obj = {"name": "tool", "arguments": {"x": 1}}
97
+ result = normalize_to_tool_call_dict(obj)
98
+ assert result == {"name": "tool", "arguments": {"x": 1}}
99
+
100
+
101
+ def test_normalize_to_tool_call_dict_shorthand():
102
+ """Test normalize_to_tool_call_dict with shorthand notation."""
103
+ obj = {"tool_name": {"name": "inner", "arguments": {}}}
104
+ result = normalize_to_tool_call_dict(obj)
105
+ assert result == {"name": "tool_name", "arguments": {"name": "inner", "arguments": {}}}
106
+
107
+
108
+ def test_normalize_to_tool_call_dict_list():
109
+ """Test normalize_to_tool_call_dict with list of dicts."""
110
+ obj = [
111
+ {"name": "tool1", "arguments": {"a": 1}},
112
+ {"name": "tool2", "arguments": {"b": 2}},
113
+ ]
114
+ result = normalize_to_tool_call_dict(obj)
115
+ assert len(result) == 2
116
+ assert result[0] == {"name": "tool1", "arguments": {"a": 1}}
117
+ assert result[1] == {"name": "tool2", "arguments": {"b": 2}}
118
+
119
+
120
+ def test_normalize_to_tool_call_dict_list_shorthand():
121
+ """Test normalize_to_tool_call_dict with list of shorthand dicts."""
122
+ obj = [
123
+ {"tool1": {"name": "inner1", "arguments": {}}},
124
+ {"tool2": {"name": "inner2", "arguments": {}}},
125
+ ]
126
+ result = normalize_to_tool_call_dict(obj)
127
+ assert len(result) == 2
128
+ assert result[0]["name"] == "tool1"
129
+ assert result[1]["name"] == "tool2"
130
+
131
+
132
+ def test_normalize_to_tool_call_dict_non_dict_non_list():
133
+ """Test normalize_to_tool_call_dict with non-dict, non-list value."""
134
+ assert normalize_to_tool_call_dict("string") == "string"
135
+ assert normalize_to_tool_call_dict(123) == 123
136
+
137
+
138
+ def test_normalize_to_tool_call_dict_empty_list():
139
+ """Test normalize_to_tool_call_dict with empty list."""
140
+ assert normalize_to_tool_call_dict([]) == []
141
+
142
+
143
+ def test_normalize_to_tool_call_dict_complex_nested():
144
+ """Test normalize_to_tool_call_dict with complex nested structure."""
145
+ obj = {
146
+ "outer_tool": {
147
+ "name": "middle_tool",
148
+ "arguments": {"name": "inner_tool", "arguments": {"x": 1}},
149
+ }
150
+ }
151
+ result = normalize_to_tool_call_dict(obj)
152
+ assert result["name"] == "outer_tool"
153
+ assert result["arguments"]["name"] == "middle_tool"
154
+ assert result["arguments"]["arguments"]["name"] == "inner_tool"
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.4.45"
8
+ assert hud.__version__ == "0.5.13"
hud/utils/types.py ADDED
@@ -0,0 +1,20 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, ParamSpec, TypeVar
4
+
5
+ if TYPE_CHECKING:
6
+ from collections.abc import Callable
7
+
8
+ P = ParamSpec("P")
9
+ R = TypeVar("R")
10
+
11
+
12
+ def with_signature(
13
+ params_cls: Callable[P, Any],
14
+ ) -> Callable[[Callable[..., R]], Callable[P, R]]:
15
+ """Decorator that gives a method the signature of a Pydantic model."""
16
+
17
+ def decorator(method: Callable[..., R]) -> Callable[P, R]:
18
+ return method # type: ignore[return-value]
19
+
20
+ return decorator
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.4.45"
7
+ __version__ = "0.5.13"
@@ -0,0 +1,264 @@
1
+ Metadata-Version: 2.4
2
+ Name: hud-python
3
+ Version: 0.5.13
4
+ Summary: SDK for the HUD platform.
5
+ Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
+ Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
7
+ Project-URL: Documentation, https://docs.hud.ai
8
+ Author-email: HUD <founders@hud.ai>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2025 Human Union Data, Inc
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Classifier: Development Status :: 4 - Beta
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: Programming Language :: Python :: 3
34
+ Classifier: Programming Language :: Python :: 3.11
35
+ Classifier: Programming Language :: Python :: 3.12
36
+ Classifier: Programming Language :: Python :: 3.13
37
+ Requires-Python: <3.13,>=3.11
38
+ Requires-Dist: blessed>=1.20.0
39
+ Requires-Dist: fastmcp==2.13.3
40
+ Requires-Dist: httpx<1,>=0.23.0
41
+ Requires-Dist: mcp<1.23,>1.21.1
42
+ Requires-Dist: openai>=2.8.1
43
+ Requires-Dist: packaging>=21.0
44
+ Requires-Dist: prompt-toolkit==3.0.51
45
+ Requires-Dist: pydantic-settings<3,>=2.2
46
+ Requires-Dist: pydantic<3,>=2.6
47
+ Requires-Dist: questionary==2.1.0
48
+ Requires-Dist: rich>=13.0.0
49
+ Requires-Dist: scarf-sdk>=0.1.0
50
+ Requires-Dist: toml>=0.10.2
51
+ Requires-Dist: typer>=0.9.0
52
+ Requires-Dist: watchfiles>=0.21.0
53
+ Provides-Extra: agent
54
+ Requires-Dist: anthropic>=0.75; extra == 'agent'
55
+ Requires-Dist: datasets>=2.14.0; extra == 'agent'
56
+ Requires-Dist: google-genai; extra == 'agent'
57
+ Requires-Dist: langchain>=1.1.0; extra == 'agent'
58
+ Requires-Dist: mcp-use==1.5.0; extra == 'agent'
59
+ Requires-Dist: openai-agents; extra == 'agent'
60
+ Requires-Dist: pillow>=11.1.0; extra == 'agent'
61
+ Requires-Dist: tornado>=6.5.2; extra == 'agent'
62
+ Provides-Extra: agents
63
+ Requires-Dist: anthropic>=0.75; extra == 'agents'
64
+ Requires-Dist: datasets>=2.14.0; extra == 'agents'
65
+ Requires-Dist: google-genai; extra == 'agents'
66
+ Requires-Dist: langchain>=1.1.0; extra == 'agents'
67
+ Requires-Dist: mcp-use==1.5.0; extra == 'agents'
68
+ Requires-Dist: openai-agents; extra == 'agents'
69
+ Requires-Dist: pillow>=11.1.0; extra == 'agents'
70
+ Requires-Dist: tornado>=6.5.2; extra == 'agents'
71
+ Provides-Extra: bedrock
72
+ Requires-Dist: anthropic[bedrock]>=0.75; extra == 'bedrock'
73
+ Provides-Extra: dev
74
+ Requires-Dist: anthropic>=0.75; extra == 'dev'
75
+ Requires-Dist: datasets>=2.14.0; extra == 'dev'
76
+ Requires-Dist: dotenv>=0.9.9; extra == 'dev'
77
+ Requires-Dist: google-adk; extra == 'dev'
78
+ Requires-Dist: google-genai; extra == 'dev'
79
+ Requires-Dist: ipykernel; extra == 'dev'
80
+ Requires-Dist: ipython<9; extra == 'dev'
81
+ Requires-Dist: jupyter-client; extra == 'dev'
82
+ Requires-Dist: jupyter-core; extra == 'dev'
83
+ Requires-Dist: langchain>=1.1.0; extra == 'dev'
84
+ Requires-Dist: llama-index-core; extra == 'dev'
85
+ Requires-Dist: mcp-use==1.5.0; extra == 'dev'
86
+ Requires-Dist: openai-agents; extra == 'dev'
87
+ Requires-Dist: pillow>=11.1.0; extra == 'dev'
88
+ Requires-Dist: playwright; extra == 'dev'
89
+ Requires-Dist: pyautogui>=0.9.54; extra == 'dev'
90
+ Requires-Dist: pyright==1.1.407; extra == 'dev'
91
+ Requires-Dist: pytest-asyncio; extra == 'dev'
92
+ Requires-Dist: pytest-cov; extra == 'dev'
93
+ Requires-Dist: pytest-mock; extra == 'dev'
94
+ Requires-Dist: pytest>=8.1.1; extra == 'dev'
95
+ Requires-Dist: ruff>=0.11.8; extra == 'dev'
96
+ Requires-Dist: tornado>=6.5.2; extra == 'dev'
97
+ Description-Content-Type: text/markdown
98
+
99
+ <div align="left">
100
+ <picture>
101
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo_dark.svg">
102
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg">
103
+ <img src="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg" alt="HUD" width="150" style="margin-bottom: 24px;"/>
104
+ </picture>
105
+ </div>
106
+
107
+ The HUD SDK is an open-source Python toolkit for building, evaluating, and training AI agents. Use a unified API for any model provider, wrap your code as MCP environments, run A/B evals at scale, and train with reinforcement learning.
108
+
109
+ To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
110
+
111
+ [![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
112
+ [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
113
+ [![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
114
+ [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
115
+ [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
116
+ [![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
117
+ [![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
118
+ [![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
119
+
120
+ ## Install
121
+
122
+ ```bash
123
+ pip install hud-python
124
+ ```
125
+
126
+ Get your API key at [hud.ai](https://hud.ai) and set it:
127
+
128
+ ```bash
129
+ export HUD_API_KEY=your-key-here
130
+ ```
131
+
132
+ > For CLI tools (`hud init`, `hud dev`, etc.): `uv tool install hud-python --python 3.12`
133
+
134
+ ![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
135
+
136
+ ## Usage
137
+
138
+ ### Unified Model API
139
+
140
+ Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
141
+
142
+ ```python
143
+ from openai import AsyncOpenAI
144
+ import os
145
+
146
+ client = AsyncOpenAI(
147
+ base_url="https://inference.hud.ai",
148
+ api_key=os.environ["HUD_API_KEY"]
149
+ )
150
+
151
+ response = await client.chat.completions.create(
152
+ model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
153
+ messages=[{"role": "user", "content": "Hello!"}]
154
+ )
155
+ ```
156
+
157
+ Every call is traced at [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/gateway)
158
+
159
+ ### Environments
160
+
161
+ Turn your code into tools agents can call. Define how to evaluate them:
162
+
163
+ ```python
164
+ from hud import Environment
165
+
166
+ env = Environment("my-env")
167
+
168
+ @env.tool()
169
+ def add(a: int, b: int) -> int:
170
+ """Add two numbers."""
171
+ return a + b
172
+
173
+ @env.scenario("solve-math")
174
+ async def solve_math(problem: str, answer: int):
175
+ response = yield problem # Prompt
176
+ yield 1.0 if str(answer) in response else 0.0 # Reward
177
+
178
+ async with env("solve-math", problem="What is 2+2?", answer=4) as ctx:
179
+ # Your agent logic here - call tools, get response
180
+ result = await ctx.call_tool("add", a=2, b=2)
181
+ await ctx.submit(f"The answer is {result}")
182
+
183
+ print(ctx.reward) # 1.0
184
+ ```
185
+
186
+ The agent runs between the yields. First yield sends the prompt, second yield scores the result. → [Docs](https://docs.hud.ai/quick-links/environments) · [Templates](https://hud.ai/environments)
187
+
188
+ ### A/B Evals
189
+
190
+ Test different models. Repeat runs to see the distribution:
191
+
192
+ ```python
193
+ from openai import AsyncOpenAI
194
+ import os
195
+
196
+ client = AsyncOpenAI(
197
+ base_url="https://inference.hud.ai",
198
+ api_key=os.environ["HUD_API_KEY"]
199
+ )
200
+
201
+ # Using the env from above
202
+ async with env("solve-math", problem="What is 2+2?", answer=4, variants={"model": ["gpt-4o", "claude-sonnet-4-5"]}, group=5) as ctx:
203
+ response = await client.chat.completions.create(
204
+ model=ctx.variants["model"],
205
+ messages=[{"role": "user", "content": ctx.prompt}],
206
+ tools=ctx.tools # Environment tools available to the model
207
+ )
208
+ await ctx.submit(response.choices[0].message.content)
209
+ ```
210
+
211
+ **Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/ab-testing)
212
+
213
+ ### Deploy & Train
214
+
215
+ Push to GitHub, connect on hud.ai, run at scale:
216
+
217
+ ```bash
218
+ hud init # Scaffold environment
219
+ git push # Push to GitHub
220
+ # Connect on hud.ai → New → Environment
221
+ hud eval my-eval --model gpt-4o --group-size 100
222
+ # Or create and run tasks on the platform
223
+ ```
224
+
225
+ Every run generates training data. Use it to fine-tune or run RL. → [Docs](https://docs.hud.ai/quick-links/deploy)
226
+
227
+ ## Links
228
+
229
+ - 📖 [Documentation](https://docs.hud.ai)
230
+ - ⌨️ [CLI Reference](https://docs.hud.ai/reference/cli/overview)
231
+ - 🏆 [Leaderboards](https://hud.ai/leaderboards)
232
+ - 🌐 [Environment Templates](https://hud.ai/environments)
233
+ - 🤖 [Supported Models](https://hud.ai/models)
234
+ - 💬 [Discord](https://discord.gg/wkjtmHYYjm)
235
+
236
+ ## Enterprise
237
+
238
+ Building agents at scale? We work with teams on custom environments, benchmarks, and training.
239
+
240
+ [📅 Book a call](https://cal.com/jay-hud) · [📧 founders@hud.ai](mailto:founders@hud.ai)
241
+
242
+ ## Contributing
243
+
244
+ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
245
+
246
+ Key areas: [Agents](hud/agents/) · [Tools](hud/tools/) · [Environments](https://hud.ai/environments)
247
+
248
+ <a href="https://github.com/hud-evals/hud-python/graphs/contributors">
249
+ <img src="https://contrib.rocks/image?repo=hud-evals/hud-python&max=50" />
250
+ </a>
251
+
252
+ ## Citation
253
+
254
+ ```bibtex
255
+ @software{hud2025agentevalplatform,
256
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
257
+ title = {HUD: An Evaluation and RL Envrionments Platform for Agents},
258
+ date = {2025-04},
259
+ url = {https://github.com/hud-evals/hud-python},
260
+ langid = {en}
261
+ }
262
+ ```
263
+
264
+ MIT License · [LICENSE](LICENSE)