hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,233 @@
1
+ """Runtime tests for MCPAgent base class."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ import mcp.types as types
8
+ import pytest
9
+
10
+ from hud.agents.base import BaseCreateParams, MCPAgent, find_content, find_reward, text_to_blocks
11
+ from hud.environment.router import ToolRouter
12
+ from hud.eval.context import EvalContext
13
+ from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
14
+
15
+
16
+ class DummyConfig(BaseAgentConfig):
17
+ model_name: str = "DummyAgent"
18
+ model: str = "dummy-model"
19
+
20
+
21
+ class DummyCreateParams(BaseCreateParams, DummyConfig):
22
+ pass
23
+
24
+
25
+ class MockEvalContext(EvalContext):
26
+ """Mock EvalContext for testing."""
27
+
28
+ def __init__(
29
+ self,
30
+ prompt: str = "Test prompt",
31
+ tools: list[types.Tool] | None = None,
32
+ ) -> None:
33
+ # Core attributes
34
+ self.prompt = prompt
35
+ self._tools = tools or []
36
+ self._submitted: str | None = None
37
+ self.reward: float | None = None
38
+ self._call_tool_handler: Any = None
39
+
40
+ # Environment attributes
41
+ self._router = ToolRouter()
42
+ self._agent_include: list[str] | None = None
43
+ self._agent_exclude: list[str] | None = None
44
+
45
+ # EvalContext attributes
46
+ self._task = None
47
+ self.trace_id = "test-trace-id"
48
+ self.eval_name = "test-eval"
49
+ self.job_id: str | None = None
50
+ self.group_id: str | None = None
51
+ self.index = 0
52
+ self.variants: dict[str, Any] = {}
53
+ self.answer: str | None = None
54
+ self.system_prompt: str | None = None
55
+ self.error: BaseException | None = None
56
+ self.metadata: dict[str, Any] = {}
57
+ self.results: list[Any] = []
58
+ self._is_summary = False
59
+
60
+ def as_tools(self) -> list[types.Tool]:
61
+ return self._tools
62
+
63
+ @property
64
+ def has_scenario(self) -> bool:
65
+ return False
66
+
67
+ def set_call_tool_handler(self, handler: Any) -> None:
68
+ self._call_tool_handler = handler
69
+
70
+ async def list_tools(self) -> list[types.Tool]:
71
+ return self._tools
72
+
73
+ async def call_tool(self, call: Any, /, **kwargs: Any) -> MCPToolResult:
74
+ if self._call_tool_handler:
75
+ # Parse the call
76
+ if isinstance(call, tuple):
77
+ tc = MCPToolCall(name=call[0], arguments=call[1] if len(call) > 1 else {})
78
+ elif hasattr(call, "name"):
79
+ tc = call
80
+ else:
81
+ tc = MCPToolCall(name=str(call), arguments=kwargs)
82
+ return self._call_tool_handler(tc)
83
+ return MCPToolResult(
84
+ content=[types.TextContent(type="text", text="ok")],
85
+ isError=False,
86
+ )
87
+
88
+ async def submit(self, answer: str) -> None:
89
+ self._submitted = answer
90
+
91
+
92
+ class DummyAgent(MCPAgent):
93
+ config_cls = DummyConfig
94
+
95
+ def __init__(self, **kwargs: Any) -> None:
96
+ params = DummyCreateParams(**kwargs)
97
+ super().__init__(params)
98
+
99
+ async def get_system_messages(self) -> list[types.ContentBlock]:
100
+ return [types.TextContent(type="text", text="sys")]
101
+
102
+ async def get_response(self, messages: list[Any]) -> AgentResponse:
103
+ return AgentResponse(content="ok", tool_calls=[], done=True)
104
+
105
+ async def format_blocks(self, blocks: list[Any]) -> list[Any]:
106
+ return blocks
107
+
108
+ async def format_tool_results(
109
+ self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
110
+ ) -> list[Any]:
111
+ return [types.TextContent(text="tools", type="text")]
112
+
113
+
114
+ def test_find_reward_and_content_extractors() -> None:
115
+ """Test reward and content extraction from tool results."""
116
+ # Structured content
117
+ r = MCPToolResult(
118
+ content=text_to_blocks("{}"), isError=False, structuredContent={"reward": 0.7}
119
+ )
120
+ assert find_reward(r) == 0.7
121
+
122
+ # Text JSON
123
+ r2 = MCPToolResult(content=text_to_blocks('{"score": 0.5, "content": "hi"}'), isError=False)
124
+ assert find_reward(r2) == 0.5
125
+ assert find_content(r2) == "hi"
126
+
127
+
128
+ def test_get_available_tools_before_run_raises() -> None:
129
+ """Test that get_available_tools raises before initialization."""
130
+ agent = DummyAgent()
131
+ with pytest.raises(RuntimeError):
132
+ agent.get_available_tools()
133
+
134
+
135
+ @pytest.mark.asyncio
136
+ async def test_format_message_invalid_type_raises() -> None:
137
+ """Test that format_message raises for invalid types."""
138
+ agent = DummyAgent()
139
+ with pytest.raises(ValueError):
140
+ await agent.format_message({"oops": 1}) # type: ignore
141
+
142
+
143
+ def test_text_to_blocks_shapes() -> None:
144
+ """Test text_to_blocks returns correct structure."""
145
+ blocks = text_to_blocks("x")
146
+ assert isinstance(blocks, list) and blocks and isinstance(blocks[0], types.TextContent)
147
+
148
+
149
+ @pytest.mark.asyncio
150
+ async def test_run_with_eval_context() -> None:
151
+ """Test basic run() with EvalContext."""
152
+ ctx = MockEvalContext(prompt="hello")
153
+ agent = DummyAgent()
154
+ result = await agent.run(ctx, max_steps=1)
155
+ assert result.done is True
156
+ assert result.isError is False
157
+
158
+
159
+ @pytest.mark.asyncio
160
+ async def test_run_requires_eval_context() -> None:
161
+ """Test run() raises TypeError for non-EvalContext."""
162
+ agent = DummyAgent()
163
+ with pytest.raises(TypeError, match="must be EvalContext"):
164
+ await agent.run("hello") # type: ignore
165
+
166
+
167
+ @pytest.mark.asyncio
168
+ async def test_run_requires_prompt() -> None:
169
+ """Test run() raises ValueError when prompt is empty."""
170
+ ctx = MockEvalContext(prompt="")
171
+ agent = DummyAgent()
172
+ with pytest.raises(ValueError, match="prompt is not set"):
173
+ await agent.run(ctx)
174
+
175
+
176
+ @pytest.mark.asyncio
177
+ async def test_call_tools_error_paths() -> None:
178
+ """Test call_tools handles errors correctly."""
179
+ call_count = [0]
180
+ ok_result = MCPToolResult(content=text_to_blocks("ok"), isError=False)
181
+
182
+ def handler(tool_call: MCPToolCall) -> MCPToolResult:
183
+ call_count[0] += 1
184
+ if call_count[0] == 1:
185
+ return ok_result
186
+ raise RuntimeError("boom")
187
+
188
+ ctx = MockEvalContext(prompt="test")
189
+ ctx.set_call_tool_handler(handler)
190
+ agent = DummyAgent()
191
+
192
+ # Initialize the agent with context
193
+ agent.ctx = ctx
194
+ await agent._initialize_from_ctx(ctx)
195
+
196
+ results = await agent.call_tools(
197
+ [MCPToolCall(name="a", arguments={}), MCPToolCall(name="b", arguments={})]
198
+ )
199
+ assert results[0].isError is False
200
+ assert results[1].isError is True
201
+
202
+
203
+ @pytest.mark.asyncio
204
+ async def test_call_tools_timeout_raises() -> None:
205
+ """Test call_tools raises TimeoutError."""
206
+
207
+ def handler(tool_call: MCPToolCall) -> MCPToolResult:
208
+ raise TimeoutError("timeout")
209
+
210
+ ctx = MockEvalContext(prompt="test")
211
+ ctx.set_call_tool_handler(handler)
212
+ agent = DummyAgent()
213
+
214
+ agent.ctx = ctx
215
+ await agent._initialize_from_ctx(ctx)
216
+
217
+ with pytest.raises(TimeoutError):
218
+ await agent.call_tools(MCPToolCall(name="x", arguments={}))
219
+
220
+
221
+ @pytest.mark.asyncio
222
+ async def test_get_available_tools_after_run() -> None:
223
+ """Test get_available_tools works after initialization."""
224
+ tools = [types.Tool(name="test_tool", description="Test", inputSchema={})]
225
+ ctx = MockEvalContext(prompt="hello", tools=tools)
226
+ agent = DummyAgent()
227
+
228
+ # Run initializes the agent
229
+ await agent.run(ctx, max_steps=1)
230
+
231
+ # After cleanup, we can't access tools (ctx is cleared)
232
+ # But during run, tools were available
233
+ assert agent._initialized is True