hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +11 -5
  3. hud/agents/base.py +220 -500
  4. hud/agents/claude.py +200 -240
  5. hud/agents/gemini.py +275 -0
  6. hud/agents/gemini_cua.py +335 -0
  7. hud/agents/grounded_openai.py +98 -100
  8. hud/agents/misc/integration_test_agent.py +51 -20
  9. hud/agents/misc/response_agent.py +41 -36
  10. hud/agents/openai.py +291 -292
  11. hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
  12. hud/agents/operator.py +211 -0
  13. hud/agents/tests/conftest.py +133 -0
  14. hud/agents/tests/test_base.py +300 -622
  15. hud/agents/tests/test_base_runtime.py +233 -0
  16. hud/agents/tests/test_claude.py +379 -210
  17. hud/agents/tests/test_client.py +9 -10
  18. hud/agents/tests/test_gemini.py +369 -0
  19. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  20. hud/agents/tests/test_openai.py +376 -140
  21. hud/agents/tests/test_operator.py +362 -0
  22. hud/agents/tests/test_run_eval.py +179 -0
  23. hud/cli/__init__.py +461 -545
  24. hud/cli/analyze.py +43 -5
  25. hud/cli/build.py +664 -110
  26. hud/cli/debug.py +8 -5
  27. hud/cli/dev.py +882 -734
  28. hud/cli/eval.py +782 -668
  29. hud/cli/flows/dev.py +167 -0
  30. hud/cli/flows/init.py +191 -0
  31. hud/cli/flows/tasks.py +153 -56
  32. hud/cli/flows/templates.py +151 -0
  33. hud/cli/flows/tests/__init__.py +1 -0
  34. hud/cli/flows/tests/test_dev.py +126 -0
  35. hud/cli/init.py +60 -58
  36. hud/cli/push.py +29 -11
  37. hud/cli/rft.py +311 -0
  38. hud/cli/rft_status.py +145 -0
  39. hud/cli/tests/test_analyze.py +5 -5
  40. hud/cli/tests/test_analyze_metadata.py +3 -2
  41. hud/cli/tests/test_analyze_module.py +120 -0
  42. hud/cli/tests/test_build.py +108 -6
  43. hud/cli/tests/test_build_failure.py +41 -0
  44. hud/cli/tests/test_build_module.py +50 -0
  45. hud/cli/tests/test_cli_init.py +6 -1
  46. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  47. hud/cli/tests/test_cli_root.py +140 -0
  48. hud/cli/tests/test_convert.py +361 -0
  49. hud/cli/tests/test_debug.py +12 -10
  50. hud/cli/tests/test_dev.py +197 -0
  51. hud/cli/tests/test_eval.py +251 -0
  52. hud/cli/tests/test_eval_bedrock.py +51 -0
  53. hud/cli/tests/test_init.py +124 -0
  54. hud/cli/tests/test_main_module.py +11 -5
  55. hud/cli/tests/test_mcp_server.py +12 -100
  56. hud/cli/tests/test_push_happy.py +74 -0
  57. hud/cli/tests/test_push_wrapper.py +23 -0
  58. hud/cli/tests/test_registry.py +1 -1
  59. hud/cli/tests/test_utils.py +1 -1
  60. hud/cli/{rl → utils}/celebrate.py +14 -12
  61. hud/cli/utils/config.py +18 -1
  62. hud/cli/utils/docker.py +130 -4
  63. hud/cli/utils/env_check.py +9 -9
  64. hud/cli/utils/git.py +136 -0
  65. hud/cli/utils/interactive.py +39 -5
  66. hud/cli/utils/metadata.py +69 -0
  67. hud/cli/utils/runner.py +1 -1
  68. hud/cli/utils/server.py +2 -2
  69. hud/cli/utils/source_hash.py +3 -3
  70. hud/cli/utils/tasks.py +4 -1
  71. hud/cli/utils/tests/__init__.py +0 -0
  72. hud/cli/utils/tests/test_config.py +58 -0
  73. hud/cli/utils/tests/test_docker.py +93 -0
  74. hud/cli/utils/tests/test_docker_hints.py +71 -0
  75. hud/cli/utils/tests/test_env_check.py +74 -0
  76. hud/cli/utils/tests/test_environment.py +42 -0
  77. hud/cli/utils/tests/test_git.py +142 -0
  78. hud/cli/utils/tests/test_interactive_module.py +60 -0
  79. hud/cli/utils/tests/test_local_runner.py +50 -0
  80. hud/cli/utils/tests/test_logging_utils.py +23 -0
  81. hud/cli/utils/tests/test_metadata.py +49 -0
  82. hud/cli/utils/tests/test_package_runner.py +35 -0
  83. hud/cli/utils/tests/test_registry_utils.py +49 -0
  84. hud/cli/utils/tests/test_remote_runner.py +25 -0
  85. hud/cli/utils/tests/test_runner_modules.py +52 -0
  86. hud/cli/utils/tests/test_source_hash.py +36 -0
  87. hud/cli/utils/tests/test_tasks.py +80 -0
  88. hud/cli/utils/version_check.py +258 -0
  89. hud/cli/{rl → utils}/viewer.py +2 -2
  90. hud/clients/README.md +12 -11
  91. hud/clients/__init__.py +4 -3
  92. hud/clients/base.py +166 -26
  93. hud/clients/environment.py +51 -0
  94. hud/clients/fastmcp.py +13 -6
  95. hud/clients/mcp_use.py +40 -15
  96. hud/clients/tests/test_analyze_scenarios.py +206 -0
  97. hud/clients/tests/test_protocol.py +9 -3
  98. hud/datasets/__init__.py +23 -20
  99. hud/datasets/loader.py +327 -0
  100. hud/datasets/runner.py +192 -105
  101. hud/datasets/tests/__init__.py +0 -0
  102. hud/datasets/tests/test_loader.py +221 -0
  103. hud/datasets/tests/test_utils.py +315 -0
  104. hud/datasets/utils.py +270 -90
  105. hud/environment/__init__.py +50 -0
  106. hud/environment/connection.py +206 -0
  107. hud/environment/connectors/__init__.py +33 -0
  108. hud/environment/connectors/base.py +68 -0
  109. hud/environment/connectors/local.py +177 -0
  110. hud/environment/connectors/mcp_config.py +109 -0
  111. hud/environment/connectors/openai.py +101 -0
  112. hud/environment/connectors/remote.py +172 -0
  113. hud/environment/environment.py +694 -0
  114. hud/environment/integrations/__init__.py +45 -0
  115. hud/environment/integrations/adk.py +67 -0
  116. hud/environment/integrations/anthropic.py +196 -0
  117. hud/environment/integrations/gemini.py +92 -0
  118. hud/environment/integrations/langchain.py +82 -0
  119. hud/environment/integrations/llamaindex.py +68 -0
  120. hud/environment/integrations/openai.py +238 -0
  121. hud/environment/mock.py +306 -0
  122. hud/environment/router.py +112 -0
  123. hud/environment/scenarios.py +493 -0
  124. hud/environment/tests/__init__.py +1 -0
  125. hud/environment/tests/test_connection.py +317 -0
  126. hud/environment/tests/test_connectors.py +218 -0
  127. hud/environment/tests/test_environment.py +161 -0
  128. hud/environment/tests/test_integrations.py +257 -0
  129. hud/environment/tests/test_local_connectors.py +201 -0
  130. hud/environment/tests/test_scenarios.py +280 -0
  131. hud/environment/tests/test_tools.py +208 -0
  132. hud/environment/types.py +23 -0
  133. hud/environment/utils/__init__.py +35 -0
  134. hud/environment/utils/formats.py +215 -0
  135. hud/environment/utils/schema.py +171 -0
  136. hud/environment/utils/tool_wrappers.py +113 -0
  137. hud/eval/__init__.py +67 -0
  138. hud/eval/context.py +674 -0
  139. hud/eval/display.py +299 -0
  140. hud/eval/instrument.py +185 -0
  141. hud/eval/manager.py +466 -0
  142. hud/eval/parallel.py +268 -0
  143. hud/eval/task.py +340 -0
  144. hud/eval/tests/__init__.py +1 -0
  145. hud/eval/tests/test_context.py +178 -0
  146. hud/eval/tests/test_eval.py +210 -0
  147. hud/eval/tests/test_manager.py +152 -0
  148. hud/eval/tests/test_parallel.py +168 -0
  149. hud/eval/tests/test_task.py +145 -0
  150. hud/eval/types.py +63 -0
  151. hud/eval/utils.py +183 -0
  152. hud/patches/__init__.py +19 -0
  153. hud/patches/mcp_patches.py +151 -0
  154. hud/patches/warnings.py +54 -0
  155. hud/samples/browser.py +4 -4
  156. hud/server/__init__.py +2 -1
  157. hud/server/low_level.py +2 -1
  158. hud/server/router.py +164 -0
  159. hud/server/server.py +567 -80
  160. hud/server/tests/test_mcp_server_integration.py +11 -11
  161. hud/server/tests/test_mcp_server_more.py +1 -1
  162. hud/server/tests/test_server_extra.py +2 -0
  163. hud/settings.py +45 -3
  164. hud/shared/exceptions.py +36 -10
  165. hud/shared/hints.py +26 -1
  166. hud/shared/requests.py +15 -3
  167. hud/shared/tests/test_exceptions.py +40 -31
  168. hud/shared/tests/test_hints.py +167 -0
  169. hud/telemetry/__init__.py +20 -19
  170. hud/telemetry/exporter.py +201 -0
  171. hud/telemetry/instrument.py +158 -253
  172. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  173. hud/telemetry/tests/test_exporter.py +258 -0
  174. hud/telemetry/tests/test_instrument.py +401 -0
  175. hud/tools/__init__.py +16 -2
  176. hud/tools/apply_patch.py +639 -0
  177. hud/tools/base.py +54 -4
  178. hud/tools/bash.py +2 -2
  179. hud/tools/computer/__init__.py +4 -0
  180. hud/tools/computer/anthropic.py +2 -2
  181. hud/tools/computer/gemini.py +385 -0
  182. hud/tools/computer/hud.py +23 -6
  183. hud/tools/computer/openai.py +20 -21
  184. hud/tools/computer/qwen.py +434 -0
  185. hud/tools/computer/settings.py +37 -0
  186. hud/tools/edit.py +3 -7
  187. hud/tools/executors/base.py +4 -2
  188. hud/tools/executors/pyautogui.py +1 -1
  189. hud/tools/grounding/grounded_tool.py +13 -18
  190. hud/tools/grounding/grounder.py +10 -31
  191. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  192. hud/tools/jupyter.py +330 -0
  193. hud/tools/playwright.py +18 -3
  194. hud/tools/shell.py +308 -0
  195. hud/tools/tests/test_apply_patch.py +718 -0
  196. hud/tools/tests/test_computer.py +4 -9
  197. hud/tools/tests/test_computer_actions.py +24 -2
  198. hud/tools/tests/test_jupyter_tool.py +181 -0
  199. hud/tools/tests/test_shell.py +596 -0
  200. hud/tools/tests/test_submit.py +85 -0
  201. hud/tools/tests/test_types.py +193 -0
  202. hud/tools/types.py +21 -1
  203. hud/types.py +167 -57
  204. hud/utils/__init__.py +2 -0
  205. hud/utils/env.py +67 -0
  206. hud/utils/hud_console.py +61 -3
  207. hud/utils/mcp.py +15 -58
  208. hud/utils/strict_schema.py +162 -0
  209. hud/utils/tests/test_init.py +1 -2
  210. hud/utils/tests/test_mcp.py +1 -28
  211. hud/utils/tests/test_pretty_errors.py +186 -0
  212. hud/utils/tests/test_tool_shorthand.py +154 -0
  213. hud/utils/tests/test_version.py +1 -1
  214. hud/utils/types.py +20 -0
  215. hud/version.py +1 -1
  216. hud_python-0.5.1.dist-info/METADATA +264 -0
  217. hud_python-0.5.1.dist-info/RECORD +299 -0
  218. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
  219. hud/agents/langchain.py +0 -261
  220. hud/agents/lite_llm.py +0 -72
  221. hud/cli/rl/__init__.py +0 -180
  222. hud/cli/rl/config.py +0 -101
  223. hud/cli/rl/display.py +0 -133
  224. hud/cli/rl/gpu.py +0 -63
  225. hud/cli/rl/gpu_utils.py +0 -321
  226. hud/cli/rl/local_runner.py +0 -595
  227. hud/cli/rl/presets.py +0 -96
  228. hud/cli/rl/remote_runner.py +0 -463
  229. hud/cli/rl/rl_api.py +0 -150
  230. hud/cli/rl/vllm.py +0 -177
  231. hud/cli/rl/wait_utils.py +0 -89
  232. hud/datasets/parallel.py +0 -687
  233. hud/misc/__init__.py +0 -1
  234. hud/misc/claude_plays_pokemon.py +0 -292
  235. hud/otel/__init__.py +0 -35
  236. hud/otel/collector.py +0 -142
  237. hud/otel/config.py +0 -181
  238. hud/otel/context.py +0 -570
  239. hud/otel/exporters.py +0 -369
  240. hud/otel/instrumentation.py +0 -135
  241. hud/otel/processors.py +0 -121
  242. hud/otel/tests/__init__.py +0 -1
  243. hud/otel/tests/test_processors.py +0 -197
  244. hud/rl/README.md +0 -30
  245. hud/rl/__init__.py +0 -1
  246. hud/rl/actor.py +0 -176
  247. hud/rl/buffer.py +0 -405
  248. hud/rl/chat_template.jinja +0 -101
  249. hud/rl/config.py +0 -192
  250. hud/rl/distributed.py +0 -132
  251. hud/rl/learner.py +0 -637
  252. hud/rl/tests/__init__.py +0 -1
  253. hud/rl/tests/test_learner.py +0 -186
  254. hud/rl/train.py +0 -382
  255. hud/rl/types.py +0 -101
  256. hud/rl/utils/start_vllm_server.sh +0 -30
  257. hud/rl/utils.py +0 -524
  258. hud/rl/vllm_adapter.py +0 -143
  259. hud/telemetry/job.py +0 -352
  260. hud/telemetry/replay.py +0 -74
  261. hud/telemetry/tests/test_replay.py +0 -40
  262. hud/telemetry/tests/test_trace.py +0 -63
  263. hud/telemetry/trace.py +0 -158
  264. hud/utils/agent_factories.py +0 -86
  265. hud/utils/async_utils.py +0 -65
  266. hud/utils/group_eval.py +0 -223
  267. hud/utils/progress.py +0 -149
  268. hud/utils/tasks.py +0 -127
  269. hud/utils/tests/test_async_utils.py +0 -173
  270. hud/utils/tests/test_progress.py +0 -261
  271. hud_python-0.4.45.dist-info/METADATA +0 -552
  272. hud_python-0.4.45.dist-info/RECORD +0 -228
  273. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
  274. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,280 @@
1
+ """Tests for Environment scenario decorator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pytest
6
+
7
+ from hud.environment import Environment
8
+
9
+
10
+ class TestScenarioDecorator:
11
+ """Tests for @env.scenario decorator."""
12
+
13
+ def test_scenario_registers_function(self) -> None:
14
+ """@env.scenario registers the function."""
15
+ env = Environment("test-env")
16
+
17
+ @env.scenario("greet")
18
+ async def greet_scenario(name: str):
19
+ yield f"Hello, {name}!"
20
+ yield 1.0
21
+
22
+ assert "greet" in env._scenarios
23
+
24
+ def test_scenario_creates_mcp_prompt(self) -> None:
25
+ """@env.scenario creates an MCP prompt."""
26
+ env = Environment("test-env")
27
+
28
+ @env.scenario("greet", description="Greeting scenario")
29
+ async def greet_scenario(name: str):
30
+ yield f"Hello, {name}!"
31
+ yield 1.0
32
+
33
+ # Check that prompt was registered via prompt manager
34
+ prompt_names = list(env._prompt_manager._prompts.keys())
35
+ assert "test-env:greet" in prompt_names
36
+
37
+ def test_scenario_creates_mcp_resource(self) -> None:
38
+ """@env.scenario creates an MCP resource."""
39
+ env = Environment("test-env")
40
+
41
+ @env.scenario("greet")
42
+ async def greet_scenario(name: str):
43
+ yield f"Hello, {name}!"
44
+ yield 1.0
45
+
46
+ # Check that resource was registered via resource manager
47
+ resource_uris = list(env._resource_manager._resources.keys())
48
+ assert "test-env:greet" in resource_uris
49
+
50
+ def test_scenario_extracts_arguments(self) -> None:
51
+ """@env.scenario extracts function arguments for prompt."""
52
+ env = Environment("test-env")
53
+
54
+ @env.scenario("checkout")
55
+ async def checkout_scenario(user_id: str, amount: int = 100):
56
+ yield f"Checkout for {user_id}: ${amount}"
57
+ yield 1.0
58
+
59
+ # Find the prompt
60
+ prompt = env._prompt_manager._prompts.get("test-env:checkout")
61
+ assert prompt is not None
62
+ assert prompt.arguments is not None
63
+
64
+ # Check arguments
65
+ arg_names = [arg.name for arg in prompt.arguments]
66
+ assert "user_id" in arg_names
67
+ assert "amount" in arg_names
68
+
69
+
70
+ class TestScenarioExecution:
71
+ """Tests for scenario execution flow."""
72
+
73
+ @pytest.mark.asyncio
74
+ async def test_scenario_setup_phase(self) -> None:
75
+ """Scenario setup phase yields prompt."""
76
+ env = Environment("test-env")
77
+ setup_ran = False
78
+
79
+ @env.scenario("test")
80
+ async def test_scenario():
81
+ nonlocal setup_ran
82
+ setup_ran = True
83
+ yield "Test prompt"
84
+ yield 1.0
85
+
86
+ # Get the prompt handler
87
+ prompt = env._prompt_manager._prompts.get("test-env:test")
88
+ assert prompt is not None
89
+
90
+ # Run setup via prompt render (which calls fn) - no need for context
91
+ result = await prompt.render({})
92
+
93
+ assert setup_ran
94
+ # Result is list of PromptMessage
95
+ assert len(result) > 0
96
+ assert "Test prompt" in str(result[0].content)
97
+
98
+ @pytest.mark.asyncio
99
+ async def test_scenario_stores_session(self) -> None:
100
+ """Scenario stores generator in session for evaluate phase."""
101
+ env = Environment("test-env")
102
+
103
+ @env.scenario("test")
104
+ async def test_scenario():
105
+ yield "Test prompt"
106
+ yield 1.0
107
+
108
+ # Run setup via prompt - no need for context
109
+ prompt = env._prompt_manager._prompts.get("test-env:test")
110
+ assert prompt is not None
111
+ await prompt.render({})
112
+
113
+ # Check session was stored
114
+ assert "test" in env._scenario_latest
115
+
116
+ @pytest.mark.asyncio
117
+ async def test_scenario_full_flow(self) -> None:
118
+ """Scenario runs setup and evaluate phases correctly."""
119
+ env = Environment("test-env")
120
+ phases = []
121
+
122
+ @env.scenario("test")
123
+ async def test_scenario():
124
+ phases.append("setup")
125
+ yield "Test prompt"
126
+ phases.append("evaluate")
127
+ yield 0.95
128
+
129
+ # Setup phase - no context needed for prompt/resource
130
+ prompt = env._prompt_manager._prompts.get("test-env:test")
131
+ assert prompt is not None
132
+ await prompt.render({})
133
+ assert "setup" in phases
134
+ assert "evaluate" not in phases
135
+
136
+ # Evaluate phase
137
+ resource = env._resource_manager._resources.get("test-env:test")
138
+ assert resource is not None
139
+ await resource.read()
140
+ assert "evaluate" in phases
141
+
142
+
143
+ class TestScenarioWithArgs:
144
+ """Tests for scenarios with arguments."""
145
+
146
+ @pytest.mark.asyncio
147
+ async def test_scenario_receives_args(self) -> None:
148
+ """Scenario receives arguments from prompt call."""
149
+ env = Environment("test-env")
150
+ received_args = {}
151
+
152
+ @env.scenario("checkout")
153
+ async def checkout_scenario(user_id: str, amount: int = 100):
154
+ received_args["user_id"] = user_id
155
+ received_args["amount"] = amount
156
+ yield f"Checkout {user_id}: ${amount}"
157
+ yield 1.0
158
+
159
+ prompt = env._prompt_manager._prompts.get("test-env:checkout")
160
+ assert prompt is not None
161
+ # No context needed for prompt render
162
+ await prompt.render({"user_id": "alice", "amount": 50})
163
+
164
+ assert received_args["user_id"] == "alice"
165
+ assert received_args["amount"] == 50
166
+
167
+
168
+ class TestScenarioSubmit:
169
+ """Tests for scenario submit and answer flow."""
170
+
171
+ @pytest.mark.asyncio
172
+ async def test_submit_stores_answer(self) -> None:
173
+ """submit() stores answer for scenario."""
174
+ env = Environment("test-env")
175
+
176
+ @env.scenario("test")
177
+ async def test_scenario():
178
+ yield "What is 2+2?"
179
+ yield 1.0
180
+
181
+ # Run setup
182
+ prompt = env._prompt_manager._prompts.get("test-env:test")
183
+ assert prompt is not None
184
+ await prompt.render({})
185
+
186
+ # Submit answer
187
+ await env.submit("test", "4")
188
+
189
+ assert env._scenario_answers.get("test") == "4"
190
+
191
+ @pytest.mark.asyncio
192
+ async def test_scenario_receives_answer(self) -> None:
193
+ """Scenario receives submitted answer via yield."""
194
+ env = Environment("test-env")
195
+ received_answer = None
196
+
197
+ @env.scenario("qa")
198
+ async def qa_scenario():
199
+ nonlocal received_answer
200
+ answer = yield "What is 2+2?"
201
+ received_answer = answer
202
+ yield 1.0 if answer == "4" else 0.0
203
+
204
+ # Run setup
205
+ prompt = env._prompt_manager._prompts.get("test-env:qa")
206
+ assert prompt is not None
207
+ await prompt.render({})
208
+
209
+ # Submit answer
210
+ env._scenario_answers["qa"] = "4"
211
+
212
+ # Run evaluate
213
+ resource = env._resource_manager._resources.get("test-env:qa")
214
+ assert resource is not None
215
+ await resource.read()
216
+
217
+ assert received_answer == "4"
218
+
219
+ @pytest.mark.asyncio
220
+ async def test_scenario_evaluates_answer(self) -> None:
221
+ """Scenario evaluates answer and returns reward."""
222
+ env = Environment("test-env")
223
+
224
+ @env.scenario("grading")
225
+ async def grading_scenario():
226
+ answer = yield "What is the capital of France?"
227
+ yield 1.0 if "paris" in answer.lower() else 0.0
228
+
229
+ # Run setup
230
+ prompt = env._prompt_manager._prompts.get("test-env:grading")
231
+ assert prompt is not None
232
+ await prompt.render({})
233
+
234
+ # Submit correct answer
235
+ env._scenario_answers["grading"] = "Paris"
236
+
237
+ # Run evaluate
238
+ resource = env._resource_manager._resources.get("test-env:grading")
239
+ assert resource is not None
240
+ result = await resource.read()
241
+
242
+ import json
243
+
244
+ data = json.loads(result)
245
+ assert data["reward"] == 1.0
246
+
247
+
248
+ class TestScenarioMeta:
249
+ """Tests for scenario _meta containing code."""
250
+
251
+ def test_scenario_captures_source_code(self) -> None:
252
+ """@env.scenario captures function source in meta."""
253
+ env = Environment("test-env")
254
+
255
+ @env.scenario("example")
256
+ async def example_scenario(x: int):
257
+ yield f"Process {x}"
258
+ yield 1.0
259
+
260
+ prompt = env._prompt_manager._prompts.get("test-env:example")
261
+ assert prompt is not None
262
+ assert prompt.meta is not None
263
+ assert "code" in prompt.meta
264
+ assert "async def example_scenario" in prompt.meta["code"]
265
+ assert "yield" in prompt.meta["code"]
266
+
267
+ def test_scenario_meta_on_resource(self) -> None:
268
+ """Resource also has source code in meta."""
269
+ env = Environment("test-env")
270
+
271
+ @env.scenario("example")
272
+ async def example_scenario():
273
+ yield "Test"
274
+ yield 1.0
275
+
276
+ resource = env._resource_manager._resources.get("test-env:example")
277
+ assert resource is not None
278
+ assert resource.meta is not None
279
+ assert "code" in resource.meta
280
+ assert "async def example_scenario" in resource.meta["code"]
@@ -0,0 +1,208 @@
1
+ """Tests for @env.tool() decorator and tool operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pytest
6
+
7
+ from hud.environment import Environment
8
+
9
+
10
+ class TestToolDecorator:
11
+ """Tests for @env.tool() decorator."""
12
+
13
+ def test_tool_registers_function(self) -> None:
14
+ """@env.tool registers the function in tool manager."""
15
+ env = Environment("test-env")
16
+
17
+ @env.tool()
18
+ def add(a: int, b: int) -> int:
19
+ """Add two numbers."""
20
+ return a + b
21
+
22
+ # Check tool was registered
23
+ tool_names = list(env._tool_manager._tools.keys())
24
+ assert "add" in tool_names
25
+
26
+ def test_tool_with_custom_name(self) -> None:
27
+ """@env.tool(name=...) uses custom name."""
28
+ env = Environment("test-env")
29
+
30
+ @env.tool(name="custom_add")
31
+ def add(a: int, b: int) -> int:
32
+ return a + b
33
+
34
+ tool_names = list(env._tool_manager._tools.keys())
35
+ assert "custom_add" in tool_names
36
+ assert "add" not in tool_names
37
+
38
+ def test_tool_preserves_docstring(self) -> None:
39
+ """@env.tool preserves function docstring as description."""
40
+ env = Environment("test-env")
41
+
42
+ @env.tool()
43
+ def greet(name: str) -> str:
44
+ """Greet someone by name."""
45
+ return f"Hello, {name}!"
46
+
47
+ tool = env._tool_manager._tools.get("greet")
48
+ assert tool is not None
49
+ assert "Greet someone by name" in (tool.description or "")
50
+
51
+ def test_tool_async_function(self) -> None:
52
+ """@env.tool works with async functions."""
53
+ env = Environment("test-env")
54
+
55
+ @env.tool()
56
+ async def fetch_data(url: str) -> str:
57
+ """Fetch data from URL."""
58
+ return f"Data from {url}"
59
+
60
+ tool_names = list(env._tool_manager._tools.keys())
61
+ assert "fetch_data" in tool_names
62
+
63
+ def test_tool_returns_function(self) -> None:
64
+ """@env.tool returns the original function."""
65
+ env = Environment("test-env")
66
+
67
+ @env.tool()
68
+ def add(a: int, b: int) -> int:
69
+ return a + b
70
+
71
+ # Should be able to call it directly
72
+ assert add(2, 3) == 5
73
+
74
+
75
+ class TestListTools:
76
+ """Tests for list_tools and as_tools."""
77
+
78
+ @pytest.mark.asyncio
79
+ async def test_as_tools_returns_registered_tools(self) -> None:
80
+ """as_tools returns list of registered MCP tools."""
81
+ env = Environment("test-env")
82
+
83
+ @env.tool()
84
+ def tool1() -> str:
85
+ return "1"
86
+
87
+ @env.tool()
88
+ def tool2() -> str:
89
+ return "2"
90
+
91
+ async with env:
92
+ tools = env.as_tools()
93
+ tool_names = [t.name for t in tools]
94
+ assert "tool1" in tool_names
95
+ assert "tool2" in tool_names
96
+
97
+ @pytest.mark.asyncio
98
+ async def test_as_tools_empty_when_no_tools(self) -> None:
99
+ """as_tools returns empty list when no tools registered."""
100
+ env = Environment("test-env")
101
+ async with env:
102
+ tools = env.as_tools()
103
+ # May have built-in _hud_submit tool
104
+ user_tools = [t for t in tools if not t.name.startswith("_")]
105
+ assert len(user_tools) == 0
106
+
107
+
108
+ class TestCallTool:
109
+ """Tests for call_tool method."""
110
+
111
+ @pytest.mark.asyncio
112
+ async def test_call_tool_executes_function(self) -> None:
113
+ """call_tool executes registered tool function."""
114
+ env = Environment("test-env")
115
+ executed = []
116
+
117
+ @env.tool()
118
+ def greet(name: str) -> str:
119
+ executed.append(name)
120
+ return f"Hello, {name}!"
121
+
122
+ async with env:
123
+ result = await env.call_tool("greet", name="Alice")
124
+
125
+ assert executed == ["Alice"]
126
+ assert result is not None
127
+
128
+ @pytest.mark.asyncio
129
+ async def test_call_tool_async_function(self) -> None:
130
+ """call_tool works with async tool functions."""
131
+ env = Environment("test-env")
132
+
133
+ @env.tool()
134
+ async def async_greet(name: str) -> str:
135
+ return f"Hello, {name}!"
136
+
137
+ async with env:
138
+ result = await env.call_tool("async_greet", name="Bob")
139
+
140
+ assert result is not None
141
+
142
+ @pytest.mark.asyncio
143
+ async def test_call_tool_not_found(self) -> None:
144
+ """call_tool raises for unknown tool."""
145
+ env = Environment("test-env")
146
+
147
+ async with env:
148
+ with pytest.raises(ValueError, match="Tool not found"):
149
+ await env.call_tool("nonexistent")
150
+
151
+
152
+ class TestMockMode:
153
+ """Tests for mock mode."""
154
+
155
+ def test_mock_mode_default_false(self) -> None:
156
+ """Mock mode is False by default."""
157
+ env = Environment("test-env")
158
+ assert env._mock_mode is False
159
+ assert env.is_mock is False
160
+
161
+ def test_mock_enables_mock_mode(self) -> None:
162
+ """mock() enables mock mode."""
163
+ env = Environment("test-env")
164
+ env.mock()
165
+ assert env._mock_mode is True
166
+ assert env.is_mock is True
167
+
168
+ def test_unmock_disables_mock_mode(self) -> None:
169
+ """unmock() disables mock mode."""
170
+ env = Environment("test-env")
171
+ env.mock()
172
+ env.unmock()
173
+ assert env._mock_mode is False
174
+
175
+ def test_mock_returns_self_for_chaining(self) -> None:
176
+ """mock() returns self for chaining."""
177
+ env = Environment("test-env")
178
+ result = env.mock()
179
+ assert result is env
180
+
181
+ def test_mock_tool_sets_custom_output(self) -> None:
182
+ """mock_tool() sets custom output for a tool."""
183
+ env = Environment("test-env")
184
+ env.mock_tool("navigate", "Custom result")
185
+ assert env._mock_outputs["navigate"] == "Custom result"
186
+
187
+ @pytest.mark.asyncio
188
+ async def test_mock_mode_returns_mock_response(self) -> None:
189
+ """Mock mode returns mock response instead of executing tool."""
190
+ env = Environment("test-env")
191
+ call_count = 0
192
+
193
+ @env.tool()
194
+ def real_tool() -> str:
195
+ nonlocal call_count
196
+ call_count += 1
197
+ return "real result"
198
+
199
+ env.mock()
200
+ env.mock_tool("real_tool", "mocked result")
201
+
202
+ async with env:
203
+ result = await env.call_tool("real_tool")
204
+
205
+ # Tool should not be called in mock mode
206
+ assert call_count == 0
207
+ # Should get the mock result
208
+ assert result is not None
@@ -0,0 +1,23 @@
1
+ """Environment types for configuration and tracing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ __all__ = ["EnvConfig"]
8
+
9
+
10
+ class EnvConfig(BaseModel):
11
+ """Environment configuration for Tasks.
12
+
13
+ Specifies which hub to connect to and optional tool filtering.
14
+
15
+ Attributes:
16
+ name: Hub name to connect via connect_hub() (e.g., "browser", "sheets")
17
+ include: Optional whitelist of tool names to include
18
+ exclude: Optional blacklist of tool names to exclude
19
+ """
20
+
21
+ name: str = Field(description="Hub name to connect to")
22
+ include: list[str] | None = Field(default=None, description="Whitelist of tool names")
23
+ exclude: list[str] | None = Field(default=None, description="Blacklist of tool names")
@@ -0,0 +1,35 @@
1
+ """Environment utilities."""
2
+
3
+ from hud.environment.utils.formats import (
4
+ ToolFormat,
5
+ format_result,
6
+ parse_tool_call,
7
+ parse_tool_calls,
8
+ result_to_string,
9
+ )
10
+ from hud.environment.utils.schema import (
11
+ ensure_strict_schema,
12
+ json_type_to_python,
13
+ schema_to_pydantic,
14
+ )
15
+ from hud.environment.utils.tool_wrappers import (
16
+ create_async_tool_fn,
17
+ create_sync_tool_fn,
18
+ create_tool_fns,
19
+ stringify_result,
20
+ )
21
+
22
+ __all__ = [
23
+ "ToolFormat",
24
+ "create_async_tool_fn",
25
+ "create_sync_tool_fn",
26
+ "create_tool_fns",
27
+ "ensure_strict_schema",
28
+ "format_result",
29
+ "json_type_to_python",
30
+ "parse_tool_call",
31
+ "parse_tool_calls",
32
+ "result_to_string",
33
+ "schema_to_pydantic",
34
+ "stringify_result",
35
+ ]