hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,593 @@
1
+ """Tests for Environment class - context manager, resources, prompts, prompt feature."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pytest
6
+
7
+
8
+ class TestEnvironmentPrompt:
9
+ """Tests for Environment.prompt feature."""
10
+
11
+ def test_prompt_defaults_to_none(self) -> None:
12
+ """Environment.prompt defaults to None."""
13
+ from hud.environment import Environment
14
+
15
+ env = Environment("test")
16
+ assert env.prompt is None
17
+
18
+ def test_prompt_can_be_set(self) -> None:
19
+ """Environment.prompt can be set manually."""
20
+ from hud.environment import Environment
21
+
22
+ env = Environment("test")
23
+ env.prompt = "Navigate to google.com"
24
+ assert env.prompt == "Navigate to google.com"
25
+
26
+
27
+ class TestEnvironmentContextManager:
28
+ """Tests for Environment async context manager."""
29
+
30
+ @pytest.mark.asyncio
31
+ async def test_context_manager_sets_in_context_flag(self) -> None:
32
+ """Context manager sets _in_context flag."""
33
+ from hud.environment import Environment
34
+
35
+ env = Environment("test")
36
+
37
+ assert env._in_context is False
38
+
39
+ async with env:
40
+ assert env._in_context is True
41
+
42
+ assert env._in_context is False
43
+
44
+ @pytest.mark.asyncio
45
+ async def test_context_manager_no_connections(self) -> None:
46
+ """Context manager works with no connections."""
47
+ from hud.environment import Environment
48
+
49
+ env = Environment("test")
50
+
51
+ async with env:
52
+ # Should work without connections
53
+ pass
54
+
55
+
56
+ class TestEnvironmentResources:
57
+ """Tests for Environment resource operations."""
58
+
59
+ @pytest.mark.asyncio
60
+ async def test_list_resources_empty(self) -> None:
61
+ """list_resources returns empty list when no resources."""
62
+ from hud.environment import Environment
63
+
64
+ env = Environment("test")
65
+
66
+ async with env:
67
+ resources = await env.list_resources()
68
+
69
+ assert resources == []
70
+
71
+ @pytest.mark.asyncio
72
+ async def test_read_resource_not_found(self) -> None:
73
+ """read_resource raises when resource not found."""
74
+ from hud.environment import Environment
75
+
76
+ env = Environment("test")
77
+
78
+ async with env:
79
+ with pytest.raises(ValueError, match="Resource not found"):
80
+ await env.read_resource("file://nonexistent.txt")
81
+
82
+
83
+ class TestEnvironmentPrompts:
84
+ """Tests for Environment prompt operations (MCP prompts, not task prompt)."""
85
+
86
+ @pytest.mark.asyncio
87
+ async def test_list_prompts_empty(self) -> None:
88
+ """list_prompts returns empty list when no prompts."""
89
+ from hud.environment import Environment
90
+
91
+ env = Environment("test")
92
+
93
+ async with env:
94
+ prompts = await env.list_prompts()
95
+
96
+ assert prompts == []
97
+
98
+ @pytest.mark.asyncio
99
+ async def test_get_prompt_not_found(self) -> None:
100
+ """get_prompt raises when prompt not found."""
101
+ from hud.environment import Environment
102
+
103
+ env = Environment("test")
104
+
105
+ async with env:
106
+ with pytest.raises(ValueError, match="Prompt not found"):
107
+ await env.get_prompt("nonexistent")
108
+
109
+
110
+ class TestEnvironmentSetupEvaluate:
111
+ """Tests for setup_tool and evaluate_tool methods."""
112
+
113
+ def test_setup_tool_with_name_and_kwargs(self) -> None:
114
+ """setup_tool accepts name and kwargs."""
115
+ from hud.environment import Environment
116
+
117
+ env = Environment("test")
118
+ env.setup_tool("navigate", url="https://example.com")
119
+
120
+ assert len(env._setup_calls) == 1
121
+ assert env._setup_calls[0] == ("navigate", {"url": "https://example.com"})
122
+
123
+ def test_setup_tool_returns_self(self) -> None:
124
+ """setup_tool returns self for chaining."""
125
+ from hud.environment import Environment
126
+
127
+ env = Environment("test")
128
+ result = env.setup_tool("navigate", url="https://example.com")
129
+
130
+ assert result is env
131
+
132
+ def test_evaluate_tool_with_name_and_kwargs(self) -> None:
133
+ """evaluate_tool accepts name and kwargs."""
134
+ from hud.environment import Environment
135
+
136
+ env = Environment("test")
137
+ env.evaluate_tool("check_text", contains="success")
138
+
139
+ assert len(env._evaluate_calls) == 1
140
+ assert env._evaluate_calls[0] == ("check_text", {"contains": "success"})
141
+
142
+ def test_evaluate_tool_returns_self(self) -> None:
143
+ """evaluate_tool returns self for chaining."""
144
+ from hud.environment import Environment
145
+
146
+ env = Environment("test")
147
+ result = env.evaluate_tool("check_text", contains="success")
148
+
149
+ assert result is env
150
+
151
+ def test_chaining_multiple_setup_calls(self) -> None:
152
+ """Multiple setup_tool calls can be chained."""
153
+ from hud.environment import Environment
154
+
155
+ env = (
156
+ Environment("test")
157
+ .setup_tool("navigate", url="https://example.com")
158
+ .setup_tool("wait", seconds=2)
159
+ )
160
+
161
+ assert len(env._setup_calls) == 2
162
+
163
+
164
+ class TestEnvironmentMCPProtocol:
165
+ """Tests for MCP protocol overrides - Environment._env_list_tools and _env_call_tool.
166
+
167
+ These test that Environment properly exposes connector tools via MCP handlers.
168
+ """
169
+
170
+ @pytest.mark.asyncio
171
+ async def test_env_list_tools_includes_local_tools(self) -> None:
172
+ """_env_list_tools returns local tools after routing is built."""
173
+ from hud.environment import Environment
174
+
175
+ env = Environment("test")
176
+
177
+ @env.tool()
178
+ def my_tool(x: int) -> int:
179
+ """A test tool."""
180
+ return x * 2
181
+
182
+ # Build routing (simulates what __aenter__ does)
183
+ await env._build_routing()
184
+
185
+ # Call the handler that MCP will call
186
+ tools = await env._env_list_tools()
187
+
188
+ assert len(tools) == 1
189
+ assert tools[0].name == "my_tool"
190
+
191
+ @pytest.mark.asyncio
192
+ async def test_env_list_tools_includes_connector_tools(self) -> None:
193
+ """_env_list_tools returns tools from connectors (the key feature)."""
194
+ import mcp.types as mcp_types
195
+
196
+ from hud.environment import Environment
197
+
198
+ env = Environment("test")
199
+
200
+ # Create a mock connector with cached tools
201
+ mock_tools = [
202
+ mcp_types.Tool(
203
+ name="remote_tool",
204
+ description="A remote tool",
205
+ inputSchema={"type": "object"},
206
+ )
207
+ ]
208
+
209
+ class MockConnector:
210
+ is_connected = True
211
+ _tools_cache = mock_tools
212
+
213
+ @property
214
+ def cached_tools(self) -> list[mcp_types.Tool]:
215
+ return self._tools_cache
216
+
217
+ @property
218
+ def cached_prompts(self) -> list[mcp_types.Prompt]:
219
+ return []
220
+
221
+ @property
222
+ def cached_resources(self) -> list[mcp_types.Resource]:
223
+ return []
224
+
225
+ async def connect(self) -> None:
226
+ pass
227
+
228
+ async def disconnect(self) -> None:
229
+ pass
230
+
231
+ async def list_tools(self) -> list[mcp_types.Tool]:
232
+ return self._tools_cache
233
+
234
+ # Add the mock connector
235
+ env._connections["mock"] = MockConnector() # type: ignore
236
+
237
+ # Build routing
238
+ await env._build_routing()
239
+
240
+ # Call the handler that MCP will call
241
+ tools = await env._env_list_tools()
242
+
243
+ # Should include the remote tool
244
+ tool_names = [t.name for t in tools]
245
+ assert "remote_tool" in tool_names
246
+
247
+ @pytest.mark.asyncio
248
+ async def test_env_call_tool_routes_to_local(self) -> None:
249
+ """_env_call_tool routes local tool calls correctly."""
250
+ from hud.environment import Environment
251
+
252
+ env = Environment("test")
253
+ called_with: list[int] = []
254
+
255
+ @env.tool()
256
+ def my_tool(x: int) -> str:
257
+ """A test tool."""
258
+ called_with.append(x)
259
+ return f"result: {x}"
260
+
261
+ # Build routing
262
+ await env._build_routing()
263
+
264
+ # Call the handler that MCP will call
265
+ result = await env._env_call_tool("my_tool", {"x": 42})
266
+
267
+ assert called_with == [42]
268
+ assert len(result) == 1
269
+
270
+ @pytest.mark.asyncio
271
+ async def test_env_call_tool_routes_to_connector(self) -> None:
272
+ """_env_call_tool routes connector tool calls correctly."""
273
+ from unittest.mock import AsyncMock
274
+
275
+ import mcp.types as mcp_types
276
+
277
+ from hud.environment import Environment
278
+ from hud.types import MCPToolResult
279
+
280
+ env = Environment("test")
281
+
282
+ # Create a mock connector
283
+ mock_tools = [
284
+ mcp_types.Tool(
285
+ name="remote_tool",
286
+ description="A remote tool",
287
+ inputSchema={"type": "object"},
288
+ )
289
+ ]
290
+
291
+ class MockConnector:
292
+ is_connected = True
293
+ _tools_cache = mock_tools
294
+ call_tool = AsyncMock(
295
+ return_value=MCPToolResult(
296
+ content=[mcp_types.TextContent(type="text", text="remote result")],
297
+ isError=False,
298
+ )
299
+ )
300
+
301
+ @property
302
+ def cached_tools(self) -> list[mcp_types.Tool]:
303
+ return self._tools_cache
304
+
305
+ @property
306
+ def cached_prompts(self) -> list[mcp_types.Prompt]:
307
+ return []
308
+
309
+ @property
310
+ def cached_resources(self) -> list[mcp_types.Resource]:
311
+ return []
312
+
313
+ async def connect(self) -> None:
314
+ pass
315
+
316
+ async def disconnect(self) -> None:
317
+ pass
318
+
319
+ async def list_tools(self) -> list[mcp_types.Tool]:
320
+ return self._tools_cache
321
+
322
+ mock_conn = MockConnector()
323
+ env._connections["mock"] = mock_conn # type: ignore
324
+
325
+ # Build routing
326
+ await env._build_routing()
327
+
328
+ # Call the handler that MCP will call
329
+ result = await env._env_call_tool("remote_tool", {"arg": "value"})
330
+
331
+ # Verify the connector was called
332
+ mock_conn.call_tool.assert_called_once_with("remote_tool", {"arg": "value"})
333
+ assert len(result) == 1
334
+
335
+ def test_setup_handlers_registers_custom_handlers(self) -> None:
336
+ """Verify _setup_handlers registers our _env_list_tools and _env_call_tool."""
337
+ from hud.environment import Environment
338
+
339
+ env = Environment("test")
340
+
341
+ # Verify the custom handlers exist
342
+ assert hasattr(env, "_env_list_tools")
343
+ assert hasattr(env, "_env_call_tool")
344
+ assert callable(env._env_list_tools)
345
+ assert callable(env._env_call_tool)
346
+
347
+
348
+ class TestEnvironmentToolFiltering:
349
+ """Tests for agent-level tool filtering with wildcard support (v4 backwards compat)."""
350
+
351
+ @pytest.mark.asyncio
352
+ async def test_as_tools_no_filter(self) -> None:
353
+ """as_tools returns all tools when no filter is set."""
354
+ from hud.environment import Environment
355
+
356
+ env = Environment("test")
357
+
358
+ @env.tool()
359
+ def tool_a() -> str:
360
+ """Tool A."""
361
+ return "a"
362
+
363
+ @env.tool()
364
+ def tool_b() -> str:
365
+ """Tool B."""
366
+ return "b"
367
+
368
+ await env._build_routing()
369
+
370
+ tools = env.as_tools()
371
+ tool_names = [t.name for t in tools]
372
+
373
+ assert "tool_a" in tool_names
374
+ assert "tool_b" in tool_names
375
+
376
+ @pytest.mark.asyncio
377
+ async def test_as_tools_exact_include(self) -> None:
378
+ """as_tools filters with exact include list."""
379
+ from hud.environment import Environment
380
+
381
+ env = Environment("test")
382
+
383
+ @env.tool()
384
+ def tool_a() -> str:
385
+ """Tool A."""
386
+ return "a"
387
+
388
+ @env.tool()
389
+ def tool_b() -> str:
390
+ """Tool B."""
391
+ return "b"
392
+
393
+ env._agent_include = ["tool_a"]
394
+ await env._build_routing()
395
+
396
+ tools = env.as_tools()
397
+ tool_names = [t.name for t in tools]
398
+
399
+ assert "tool_a" in tool_names
400
+ assert "tool_b" not in tool_names
401
+
402
+ @pytest.mark.asyncio
403
+ async def test_as_tools_exact_exclude(self) -> None:
404
+ """as_tools filters with exact exclude list."""
405
+ from hud.environment import Environment
406
+
407
+ env = Environment("test")
408
+
409
+ @env.tool()
410
+ def tool_a() -> str:
411
+ """Tool A."""
412
+ return "a"
413
+
414
+ @env.tool()
415
+ def tool_b() -> str:
416
+ """Tool B."""
417
+ return "b"
418
+
419
+ env._agent_exclude = ["tool_a"]
420
+ await env._build_routing()
421
+
422
+ tools = env.as_tools()
423
+ tool_names = [t.name for t in tools]
424
+
425
+ assert "tool_a" not in tool_names
426
+ assert "tool_b" in tool_names
427
+
428
+ @pytest.mark.asyncio
429
+ async def test_as_tools_wildcard_exclude_prefix(self) -> None:
430
+ """as_tools filters with wildcard prefix pattern (e.g., 'setup_*')."""
431
+ from hud.environment import Environment
432
+
433
+ env = Environment("test")
434
+
435
+ @env.tool()
436
+ def setup_database() -> str:
437
+ """Setup tool."""
438
+ return "setup"
439
+
440
+ @env.tool()
441
+ def setup_user() -> str:
442
+ """Another setup tool."""
443
+ return "setup"
444
+
445
+ @env.tool()
446
+ def run_query() -> str:
447
+ """Regular tool."""
448
+ return "query"
449
+
450
+ env._agent_exclude = ["setup_*"]
451
+ await env._build_routing()
452
+
453
+ tools = env.as_tools()
454
+ tool_names = [t.name for t in tools]
455
+
456
+ assert "setup_database" not in tool_names
457
+ assert "setup_user" not in tool_names
458
+ assert "run_query" in tool_names
459
+
460
+ @pytest.mark.asyncio
461
+ async def test_as_tools_wildcard_exclude_contains(self) -> None:
462
+ """as_tools filters with wildcard contains pattern (e.g., '*setup*')."""
463
+ from hud.environment import Environment
464
+
465
+ env = Environment("test")
466
+
467
+ @env.tool()
468
+ def hud_setup() -> str:
469
+ """Contains setup."""
470
+ return "setup"
471
+
472
+ @env.tool()
473
+ def setup_env() -> str:
474
+ """Starts with setup."""
475
+ return "setup"
476
+
477
+ @env.tool()
478
+ def my_setup_tool() -> str:
479
+ """Contains setup in middle."""
480
+ return "setup"
481
+
482
+ @env.tool()
483
+ def run_query() -> str:
484
+ """No setup in name."""
485
+ return "query"
486
+
487
+ env._agent_exclude = ["*setup*"]
488
+ await env._build_routing()
489
+
490
+ tools = env.as_tools()
491
+ tool_names = [t.name for t in tools]
492
+
493
+ assert "hud_setup" not in tool_names
494
+ assert "setup_env" not in tool_names
495
+ assert "my_setup_tool" not in tool_names
496
+ assert "run_query" in tool_names
497
+
498
+ @pytest.mark.asyncio
499
+ async def test_as_tools_multiple_wildcard_patterns(self) -> None:
500
+ """as_tools filters with multiple wildcard patterns."""
501
+ from hud.environment import Environment
502
+
503
+ env = Environment("test")
504
+
505
+ @env.tool()
506
+ def setup_db() -> str:
507
+ """Setup tool."""
508
+ return "setup"
509
+
510
+ @env.tool()
511
+ def evaluate_result() -> str:
512
+ """Evaluate tool."""
513
+ return "evaluate"
514
+
515
+ @env.tool()
516
+ def checkout_branch() -> str:
517
+ """Checkout tool."""
518
+ return "checkout"
519
+
520
+ @env.tool()
521
+ def run_query() -> str:
522
+ """Regular tool."""
523
+ return "query"
524
+
525
+ env._agent_exclude = ["*setup*", "*evaluate*", "checkout_branch"]
526
+ await env._build_routing()
527
+
528
+ tools = env.as_tools()
529
+ tool_names = [t.name for t in tools]
530
+
531
+ assert "setup_db" not in tool_names
532
+ assert "evaluate_result" not in tool_names
533
+ assert "checkout_branch" not in tool_names
534
+ assert "run_query" in tool_names
535
+
536
+ @pytest.mark.asyncio
537
+ async def test_as_tools_wildcard_include_all(self) -> None:
538
+ """as_tools with ['*'] include pattern matches all tools."""
539
+ from hud.environment import Environment
540
+
541
+ env = Environment("test")
542
+
543
+ @env.tool()
544
+ def tool_a() -> str:
545
+ """Tool A."""
546
+ return "a"
547
+
548
+ @env.tool()
549
+ def tool_b() -> str:
550
+ """Tool B."""
551
+ return "b"
552
+
553
+ env._agent_include = ["*"]
554
+ await env._build_routing()
555
+
556
+ tools = env.as_tools()
557
+ tool_names = [t.name for t in tools]
558
+
559
+ assert "tool_a" in tool_names
560
+ assert "tool_b" in tool_names
561
+
562
+ @pytest.mark.asyncio
563
+ async def test_as_tools_include_and_exclude_combined(self) -> None:
564
+ """as_tools applies both include and exclude filters."""
565
+ from hud.environment import Environment
566
+
567
+ env = Environment("test")
568
+
569
+ @env.tool()
570
+ def browser_navigate() -> str:
571
+ """Browser tool."""
572
+ return "nav"
573
+
574
+ @env.tool()
575
+ def browser_setup() -> str:
576
+ """Browser setup - should be excluded."""
577
+ return "setup"
578
+
579
+ @env.tool()
580
+ def file_read() -> str:
581
+ """File tool - not included."""
582
+ return "read"
583
+
584
+ env._agent_include = ["browser_*"]
585
+ env._agent_exclude = ["*setup*"]
586
+ await env._build_routing()
587
+
588
+ tools = env.as_tools()
589
+ tool_names = [t.name for t in tools]
590
+
591
+ assert "browser_navigate" in tool_names
592
+ assert "browser_setup" not in tool_names # Excluded by *setup*
593
+ assert "file_read" not in tool_names # Not included by browser_*