hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,355 @@
1
+ """Tests for AgentTool - scenario-to-agent composition."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import inspect
6
+ from unittest.mock import AsyncMock, MagicMock, patch
7
+
8
+ import pytest
9
+
10
+ from hud.environment import Environment
11
+ from hud.eval.task import Task
12
+ from hud.tools.agent import AgentTool, _is_eval_only
13
+
14
+
15
+ class TestIsEvalOnly:
16
+ """Tests for _is_eval_only helper function."""
17
+
18
+ def test_required_param_not_eval_only(self) -> None:
19
+ """Required params (no default) are not eval-only."""
20
+
21
+ def fn(x: str) -> None:
22
+ pass
23
+
24
+ sig = inspect.signature(fn)
25
+ param = sig.parameters["x"]
26
+ assert not _is_eval_only(param)
27
+
28
+ def test_optional_with_value_not_eval_only(self) -> None:
29
+ """Optional params with non-None default are not eval-only."""
30
+
31
+ def fn(x: str = "default") -> None:
32
+ pass
33
+
34
+ sig = inspect.signature(fn)
35
+ param = sig.parameters["x"]
36
+ assert not _is_eval_only(param)
37
+
38
+ def test_optional_none_without_union_not_eval_only(self) -> None:
39
+ """Optional with None default but no None in type is not eval-only."""
40
+
41
+ def fn(x: str = None) -> None: # type: ignore[assignment] # noqa: RUF013
42
+ pass
43
+
44
+ sig = inspect.signature(fn)
45
+ param = sig.parameters["x"]
46
+ assert not _is_eval_only(param)
47
+
48
+ def test_optional_none_with_union_is_eval_only(self) -> None:
49
+ """Params with `X | None = None` pattern are eval-only."""
50
+
51
+ def fn(x: str | None = None) -> None:
52
+ pass
53
+
54
+ sig = inspect.signature(fn)
55
+ param = sig.parameters["x"]
56
+ assert _is_eval_only(param)
57
+
58
+ def test_optional_int_none_is_eval_only(self) -> None:
59
+ """Works with int | None = None too."""
60
+
61
+ def fn(x: int | None = None) -> None:
62
+ pass
63
+
64
+ sig = inspect.signature(fn)
65
+ param = sig.parameters["x"]
66
+ assert _is_eval_only(param)
67
+
68
+ def test_string_annotation_with_none_union(self) -> None:
69
+ """Handles string annotations like 'str | None'."""
70
+ # Simulate string annotation
71
+ param = inspect.Parameter(
72
+ "x",
73
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
74
+ default=None,
75
+ annotation="str | None",
76
+ )
77
+ assert _is_eval_only(param)
78
+
79
+ def test_string_annotation_without_none(self) -> None:
80
+ """String annotations without None are not eval-only."""
81
+ param = inspect.Parameter(
82
+ "x",
83
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
84
+ default=None,
85
+ annotation="str",
86
+ )
87
+ assert not _is_eval_only(param)
88
+
89
+
90
+ class TestAgentToolInit:
91
+ """Tests for AgentTool initialization."""
92
+
93
+ def test_requires_model_or_agent(self) -> None:
94
+ """Must provide either model or agent."""
95
+ task = Task(args={})
96
+
97
+ with pytest.raises(ValueError, match="Must provide either"):
98
+ AgentTool(task)
99
+
100
+ def test_cannot_provide_both_model_and_agent(self) -> None:
101
+ """Cannot provide both model and agent."""
102
+ task = Task(args={})
103
+ mock_agent = MagicMock()
104
+
105
+ with pytest.raises(ValueError, match="Cannot provide both"):
106
+ AgentTool(task, model="claude", agent=mock_agent) # type: ignore[arg-type]
107
+
108
+ def test_accepts_model_string(self) -> None:
109
+ """Can create with model string."""
110
+ task = Task(scenario="test", args={})
111
+ tool = AgentTool(task, model="claude")
112
+
113
+ assert tool._model == "claude"
114
+ assert tool._agent_cls is None
115
+
116
+ def test_accepts_agent_class(self) -> None:
117
+ """Can create with custom agent class."""
118
+ task = Task(scenario="test", args={})
119
+ mock_agent_cls = MagicMock()
120
+ tool = AgentTool(task, agent=mock_agent_cls) # type: ignore[arg-type]
121
+
122
+ assert tool._model is None
123
+ assert tool._agent_cls is mock_agent_cls
124
+
125
+ def test_name_defaults_to_scenario(self) -> None:
126
+ """Tool name defaults to scenario name."""
127
+ task = Task(scenario="investigate", args={})
128
+ tool = AgentTool(task, model="claude")
129
+
130
+ assert tool.name == "investigate"
131
+
132
+ def test_name_can_be_overridden(self) -> None:
133
+ """Tool name can be overridden."""
134
+ task = Task(scenario="investigate", args={})
135
+ tool = AgentTool(task, model="claude", name="custom_name")
136
+
137
+ assert tool.name == "custom_name"
138
+
139
+
140
+ class TestAgentToolParamFiltering:
141
+ """Tests for parameter filtering (eval-only params hidden)."""
142
+
143
+ def test_filters_eval_only_params(self) -> None:
144
+ """Eval-only params (| None = None) are filtered from visible_params."""
145
+ env = Environment("test")
146
+
147
+ # Use Union syntax for consistency across Python versions
148
+ @env.scenario()
149
+ async def investigate(
150
+ issue_id: str,
151
+ include_traces: bool = True,
152
+ expected_cause: str | None = None, # Eval only
153
+ ):
154
+ yield {"task": f"Investigate {issue_id}"}
155
+
156
+ task = env("investigate")
157
+ tool = AgentTool(task, model="claude")
158
+
159
+ # visible_params should only have issue_id and include_traces
160
+ assert "issue_id" in tool._visible_params
161
+ assert "include_traces" in tool._visible_params
162
+ assert "expected_cause" not in tool._visible_params
163
+
164
+ def test_all_required_params_visible(self) -> None:
165
+ """All required params are visible."""
166
+ env = Environment("test")
167
+
168
+ @env.scenario()
169
+ async def search(query: str, limit: int):
170
+ yield {"task": f"Search: {query}"}
171
+
172
+ task = env("search")
173
+ tool = AgentTool(task, model="claude")
174
+
175
+ assert "query" in tool._visible_params
176
+ assert "limit" in tool._visible_params
177
+
178
+ def test_optional_with_default_visible(self) -> None:
179
+ """Optional params with non-None defaults are visible."""
180
+ env = Environment("test")
181
+
182
+ @env.scenario()
183
+ async def fetch(url: str, request_timeout: int = 30, retries: int = 3):
184
+ yield {"task": f"Fetch {url}"}
185
+
186
+ task = env("fetch")
187
+ tool = AgentTool(task, model="claude")
188
+
189
+ assert "url" in tool._visible_params
190
+ assert "request_timeout" in tool._visible_params
191
+ assert "retries" in tool._visible_params
192
+
193
+
194
+ class TestAgentToolSchema:
195
+ """Tests for JSON schema generation."""
196
+
197
+ def test_builds_json_schema(self) -> None:
198
+ """Builds proper JSON schema from visible params."""
199
+ env = Environment("test")
200
+
201
+ @env.scenario()
202
+ async def investigate(issue_id: str, verbose: bool = False):
203
+ yield {"task": f"Investigate {issue_id}"}
204
+
205
+ task = env("investigate")
206
+ tool = AgentTool(task, model="claude")
207
+
208
+ schema = tool._param_schema
209
+ assert schema is not None
210
+ assert schema["type"] == "object"
211
+ assert "issue_id" in schema["properties"]
212
+ assert "verbose" in schema["properties"]
213
+ assert "issue_id" in schema["required"]
214
+ assert "verbose" not in schema["required"] # Has default
215
+
216
+ def test_schema_excludes_eval_only(self) -> None:
217
+ """Schema excludes eval-only params."""
218
+ env = Environment("test")
219
+
220
+ @env.scenario()
221
+ async def check(
222
+ item_id: str,
223
+ expected_status: str | None = None, # Eval only
224
+ ):
225
+ yield {"task": f"Check {item_id}"}
226
+
227
+ task = env("check")
228
+ tool = AgentTool(task, model="claude")
229
+
230
+ schema = tool._param_schema
231
+ assert schema is not None
232
+ assert "item_id" in schema["properties"]
233
+ assert "expected_status" not in schema["properties"]
234
+
235
+
236
+ class TestAgentToolMCP:
237
+ """Tests for MCP tool integration."""
238
+
239
+ def test_mcp_property_returns_tool(self) -> None:
240
+ """The mcp property returns a FastMCP FunctionTool."""
241
+ from fastmcp.tools import FunctionTool
242
+
243
+ env = Environment("test")
244
+
245
+ @env.scenario()
246
+ async def greet(name: str):
247
+ yield {"task": f"Greet {name}"}
248
+
249
+ task = env("greet")
250
+ tool = AgentTool(task, model="claude")
251
+
252
+ mcp_tool = tool.mcp
253
+ assert isinstance(mcp_tool, FunctionTool)
254
+
255
+ def test_mcp_has_filtered_parameters(self) -> None:
256
+ """MCP tool has filtered parameter schema."""
257
+ env = Environment("test")
258
+
259
+ @env.scenario()
260
+ async def analyze(
261
+ data: str,
262
+ expected_result: str | None = None, # Eval only
263
+ ):
264
+ yield {"task": f"Analyze {data}"}
265
+
266
+ task = env("analyze")
267
+ tool = AgentTool(task, model="claude")
268
+
269
+ mcp_tool = tool.mcp
270
+ params = mcp_tool.parameters # FunctionTool uses 'parameters'
271
+
272
+ assert "data" in params["properties"]
273
+ assert "expected_result" not in params["properties"]
274
+
275
+
276
+ class TestAgentToolCall:
277
+ """Tests for AgentTool.__call__."""
278
+
279
+ @pytest.mark.asyncio
280
+ async def test_filters_kwargs_to_visible_only(self) -> None:
281
+ """Call filters kwargs to visible params only."""
282
+ # Import modules first so patches work
283
+ import hud.agents
284
+ import hud.eval.manager # noqa: F401
285
+
286
+ env = Environment("test")
287
+
288
+ @env.scenario()
289
+ async def process(item: str, expected: str | None = None):
290
+ yield {"task": f"Process {item}"}
291
+
292
+ task = env("process")
293
+ tool = AgentTool(task, model="claude")
294
+
295
+ # Mock the eval context and agent
296
+ with (
297
+ patch("hud.eval.manager.run_eval") as mock_run_eval,
298
+ patch("hud.agents.create_agent") as mock_create_agent,
299
+ ):
300
+ mock_ctx = AsyncMock()
301
+ mock_ctx.__aenter__ = AsyncMock(return_value=mock_ctx)
302
+ mock_ctx.__aexit__ = AsyncMock(return_value=None)
303
+ mock_run_eval.return_value = mock_ctx
304
+
305
+ mock_agent = MagicMock()
306
+ mock_agent.run = AsyncMock(return_value=MagicMock(content="result"))
307
+ mock_create_agent.return_value = mock_agent
308
+
309
+ # Call with both visible and eval-only params
310
+ await tool(item="test", expected="should_be_filtered")
311
+
312
+ # Check that task was created with filtered args
313
+ call_args = mock_run_eval.call_args
314
+ task_arg = call_args[0][0]
315
+ assert "item" in task_arg.args
316
+ assert "expected" not in task_arg.args # Filtered out
317
+
318
+ @pytest.mark.asyncio
319
+ async def test_merges_template_args(self) -> None:
320
+ """Call merges kwargs with template args."""
321
+ # Import modules first so patches work
322
+ import hud.agents
323
+ import hud.eval.manager # noqa: F401
324
+
325
+ env = Environment("test")
326
+
327
+ @env.scenario()
328
+ async def search(query: str, limit: int = 10):
329
+ yield {"task": f"Search {query}"}
330
+
331
+ # Create template with some args pre-filled
332
+ task = env("search", limit=5)
333
+ tool = AgentTool(task, model="claude")
334
+
335
+ with (
336
+ patch("hud.eval.manager.run_eval") as mock_run_eval,
337
+ patch("hud.agents.create_agent") as mock_create_agent,
338
+ ):
339
+ mock_ctx = AsyncMock()
340
+ mock_ctx.__aenter__ = AsyncMock(return_value=mock_ctx)
341
+ mock_ctx.__aexit__ = AsyncMock(return_value=None)
342
+ mock_run_eval.return_value = mock_ctx
343
+
344
+ mock_agent = MagicMock()
345
+ mock_agent.run = AsyncMock(return_value=MagicMock(content="result"))
346
+ mock_create_agent.return_value = mock_agent
347
+
348
+ # Call with additional arg
349
+ await tool(query="test query")
350
+
351
+ # Check merged args
352
+ call_args = mock_run_eval.call_args
353
+ task_arg = call_args[0][0]
354
+ assert task_arg.args["query"] == "test query"
355
+ assert task_arg.args["limit"] == 5 # From template