hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +11 -5
  3. hud/agents/base.py +220 -500
  4. hud/agents/claude.py +200 -240
  5. hud/agents/gemini.py +275 -0
  6. hud/agents/gemini_cua.py +335 -0
  7. hud/agents/grounded_openai.py +98 -100
  8. hud/agents/misc/integration_test_agent.py +51 -20
  9. hud/agents/misc/response_agent.py +41 -36
  10. hud/agents/openai.py +291 -292
  11. hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
  12. hud/agents/operator.py +211 -0
  13. hud/agents/tests/conftest.py +133 -0
  14. hud/agents/tests/test_base.py +300 -622
  15. hud/agents/tests/test_base_runtime.py +233 -0
  16. hud/agents/tests/test_claude.py +379 -210
  17. hud/agents/tests/test_client.py +9 -10
  18. hud/agents/tests/test_gemini.py +369 -0
  19. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  20. hud/agents/tests/test_openai.py +376 -140
  21. hud/agents/tests/test_operator.py +362 -0
  22. hud/agents/tests/test_run_eval.py +179 -0
  23. hud/cli/__init__.py +461 -545
  24. hud/cli/analyze.py +43 -5
  25. hud/cli/build.py +664 -110
  26. hud/cli/debug.py +8 -5
  27. hud/cli/dev.py +882 -734
  28. hud/cli/eval.py +782 -668
  29. hud/cli/flows/dev.py +167 -0
  30. hud/cli/flows/init.py +191 -0
  31. hud/cli/flows/tasks.py +153 -56
  32. hud/cli/flows/templates.py +151 -0
  33. hud/cli/flows/tests/__init__.py +1 -0
  34. hud/cli/flows/tests/test_dev.py +126 -0
  35. hud/cli/init.py +60 -58
  36. hud/cli/push.py +29 -11
  37. hud/cli/rft.py +311 -0
  38. hud/cli/rft_status.py +145 -0
  39. hud/cli/tests/test_analyze.py +5 -5
  40. hud/cli/tests/test_analyze_metadata.py +3 -2
  41. hud/cli/tests/test_analyze_module.py +120 -0
  42. hud/cli/tests/test_build.py +108 -6
  43. hud/cli/tests/test_build_failure.py +41 -0
  44. hud/cli/tests/test_build_module.py +50 -0
  45. hud/cli/tests/test_cli_init.py +6 -1
  46. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  47. hud/cli/tests/test_cli_root.py +140 -0
  48. hud/cli/tests/test_convert.py +361 -0
  49. hud/cli/tests/test_debug.py +12 -10
  50. hud/cli/tests/test_dev.py +197 -0
  51. hud/cli/tests/test_eval.py +251 -0
  52. hud/cli/tests/test_eval_bedrock.py +51 -0
  53. hud/cli/tests/test_init.py +124 -0
  54. hud/cli/tests/test_main_module.py +11 -5
  55. hud/cli/tests/test_mcp_server.py +12 -100
  56. hud/cli/tests/test_push_happy.py +74 -0
  57. hud/cli/tests/test_push_wrapper.py +23 -0
  58. hud/cli/tests/test_registry.py +1 -1
  59. hud/cli/tests/test_utils.py +1 -1
  60. hud/cli/{rl → utils}/celebrate.py +14 -12
  61. hud/cli/utils/config.py +18 -1
  62. hud/cli/utils/docker.py +130 -4
  63. hud/cli/utils/env_check.py +9 -9
  64. hud/cli/utils/git.py +136 -0
  65. hud/cli/utils/interactive.py +39 -5
  66. hud/cli/utils/metadata.py +69 -0
  67. hud/cli/utils/runner.py +1 -1
  68. hud/cli/utils/server.py +2 -2
  69. hud/cli/utils/source_hash.py +3 -3
  70. hud/cli/utils/tasks.py +4 -1
  71. hud/cli/utils/tests/__init__.py +0 -0
  72. hud/cli/utils/tests/test_config.py +58 -0
  73. hud/cli/utils/tests/test_docker.py +93 -0
  74. hud/cli/utils/tests/test_docker_hints.py +71 -0
  75. hud/cli/utils/tests/test_env_check.py +74 -0
  76. hud/cli/utils/tests/test_environment.py +42 -0
  77. hud/cli/utils/tests/test_git.py +142 -0
  78. hud/cli/utils/tests/test_interactive_module.py +60 -0
  79. hud/cli/utils/tests/test_local_runner.py +50 -0
  80. hud/cli/utils/tests/test_logging_utils.py +23 -0
  81. hud/cli/utils/tests/test_metadata.py +49 -0
  82. hud/cli/utils/tests/test_package_runner.py +35 -0
  83. hud/cli/utils/tests/test_registry_utils.py +49 -0
  84. hud/cli/utils/tests/test_remote_runner.py +25 -0
  85. hud/cli/utils/tests/test_runner_modules.py +52 -0
  86. hud/cli/utils/tests/test_source_hash.py +36 -0
  87. hud/cli/utils/tests/test_tasks.py +80 -0
  88. hud/cli/utils/version_check.py +258 -0
  89. hud/cli/{rl → utils}/viewer.py +2 -2
  90. hud/clients/README.md +12 -11
  91. hud/clients/__init__.py +4 -3
  92. hud/clients/base.py +166 -26
  93. hud/clients/environment.py +51 -0
  94. hud/clients/fastmcp.py +13 -6
  95. hud/clients/mcp_use.py +40 -15
  96. hud/clients/tests/test_analyze_scenarios.py +206 -0
  97. hud/clients/tests/test_protocol.py +9 -3
  98. hud/datasets/__init__.py +23 -20
  99. hud/datasets/loader.py +327 -0
  100. hud/datasets/runner.py +192 -105
  101. hud/datasets/tests/__init__.py +0 -0
  102. hud/datasets/tests/test_loader.py +221 -0
  103. hud/datasets/tests/test_utils.py +315 -0
  104. hud/datasets/utils.py +270 -90
  105. hud/environment/__init__.py +50 -0
  106. hud/environment/connection.py +206 -0
  107. hud/environment/connectors/__init__.py +33 -0
  108. hud/environment/connectors/base.py +68 -0
  109. hud/environment/connectors/local.py +177 -0
  110. hud/environment/connectors/mcp_config.py +109 -0
  111. hud/environment/connectors/openai.py +101 -0
  112. hud/environment/connectors/remote.py +172 -0
  113. hud/environment/environment.py +694 -0
  114. hud/environment/integrations/__init__.py +45 -0
  115. hud/environment/integrations/adk.py +67 -0
  116. hud/environment/integrations/anthropic.py +196 -0
  117. hud/environment/integrations/gemini.py +92 -0
  118. hud/environment/integrations/langchain.py +82 -0
  119. hud/environment/integrations/llamaindex.py +68 -0
  120. hud/environment/integrations/openai.py +238 -0
  121. hud/environment/mock.py +306 -0
  122. hud/environment/router.py +112 -0
  123. hud/environment/scenarios.py +493 -0
  124. hud/environment/tests/__init__.py +1 -0
  125. hud/environment/tests/test_connection.py +317 -0
  126. hud/environment/tests/test_connectors.py +218 -0
  127. hud/environment/tests/test_environment.py +161 -0
  128. hud/environment/tests/test_integrations.py +257 -0
  129. hud/environment/tests/test_local_connectors.py +201 -0
  130. hud/environment/tests/test_scenarios.py +280 -0
  131. hud/environment/tests/test_tools.py +208 -0
  132. hud/environment/types.py +23 -0
  133. hud/environment/utils/__init__.py +35 -0
  134. hud/environment/utils/formats.py +215 -0
  135. hud/environment/utils/schema.py +171 -0
  136. hud/environment/utils/tool_wrappers.py +113 -0
  137. hud/eval/__init__.py +67 -0
  138. hud/eval/context.py +674 -0
  139. hud/eval/display.py +299 -0
  140. hud/eval/instrument.py +185 -0
  141. hud/eval/manager.py +466 -0
  142. hud/eval/parallel.py +268 -0
  143. hud/eval/task.py +340 -0
  144. hud/eval/tests/__init__.py +1 -0
  145. hud/eval/tests/test_context.py +178 -0
  146. hud/eval/tests/test_eval.py +210 -0
  147. hud/eval/tests/test_manager.py +152 -0
  148. hud/eval/tests/test_parallel.py +168 -0
  149. hud/eval/tests/test_task.py +145 -0
  150. hud/eval/types.py +63 -0
  151. hud/eval/utils.py +183 -0
  152. hud/patches/__init__.py +19 -0
  153. hud/patches/mcp_patches.py +151 -0
  154. hud/patches/warnings.py +54 -0
  155. hud/samples/browser.py +4 -4
  156. hud/server/__init__.py +2 -1
  157. hud/server/low_level.py +2 -1
  158. hud/server/router.py +164 -0
  159. hud/server/server.py +567 -80
  160. hud/server/tests/test_mcp_server_integration.py +11 -11
  161. hud/server/tests/test_mcp_server_more.py +1 -1
  162. hud/server/tests/test_server_extra.py +2 -0
  163. hud/settings.py +45 -3
  164. hud/shared/exceptions.py +36 -10
  165. hud/shared/hints.py +26 -1
  166. hud/shared/requests.py +15 -3
  167. hud/shared/tests/test_exceptions.py +40 -31
  168. hud/shared/tests/test_hints.py +167 -0
  169. hud/telemetry/__init__.py +20 -19
  170. hud/telemetry/exporter.py +201 -0
  171. hud/telemetry/instrument.py +158 -253
  172. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  173. hud/telemetry/tests/test_exporter.py +258 -0
  174. hud/telemetry/tests/test_instrument.py +401 -0
  175. hud/tools/__init__.py +16 -2
  176. hud/tools/apply_patch.py +639 -0
  177. hud/tools/base.py +54 -4
  178. hud/tools/bash.py +2 -2
  179. hud/tools/computer/__init__.py +4 -0
  180. hud/tools/computer/anthropic.py +2 -2
  181. hud/tools/computer/gemini.py +385 -0
  182. hud/tools/computer/hud.py +23 -6
  183. hud/tools/computer/openai.py +20 -21
  184. hud/tools/computer/qwen.py +434 -0
  185. hud/tools/computer/settings.py +37 -0
  186. hud/tools/edit.py +3 -7
  187. hud/tools/executors/base.py +4 -2
  188. hud/tools/executors/pyautogui.py +1 -1
  189. hud/tools/grounding/grounded_tool.py +13 -18
  190. hud/tools/grounding/grounder.py +10 -31
  191. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  192. hud/tools/jupyter.py +330 -0
  193. hud/tools/playwright.py +18 -3
  194. hud/tools/shell.py +308 -0
  195. hud/tools/tests/test_apply_patch.py +718 -0
  196. hud/tools/tests/test_computer.py +4 -9
  197. hud/tools/tests/test_computer_actions.py +24 -2
  198. hud/tools/tests/test_jupyter_tool.py +181 -0
  199. hud/tools/tests/test_shell.py +596 -0
  200. hud/tools/tests/test_submit.py +85 -0
  201. hud/tools/tests/test_types.py +193 -0
  202. hud/tools/types.py +21 -1
  203. hud/types.py +167 -57
  204. hud/utils/__init__.py +2 -0
  205. hud/utils/env.py +67 -0
  206. hud/utils/hud_console.py +61 -3
  207. hud/utils/mcp.py +15 -58
  208. hud/utils/strict_schema.py +162 -0
  209. hud/utils/tests/test_init.py +1 -2
  210. hud/utils/tests/test_mcp.py +1 -28
  211. hud/utils/tests/test_pretty_errors.py +186 -0
  212. hud/utils/tests/test_tool_shorthand.py +154 -0
  213. hud/utils/tests/test_version.py +1 -1
  214. hud/utils/types.py +20 -0
  215. hud/version.py +1 -1
  216. hud_python-0.5.1.dist-info/METADATA +264 -0
  217. hud_python-0.5.1.dist-info/RECORD +299 -0
  218. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
  219. hud/agents/langchain.py +0 -261
  220. hud/agents/lite_llm.py +0 -72
  221. hud/cli/rl/__init__.py +0 -180
  222. hud/cli/rl/config.py +0 -101
  223. hud/cli/rl/display.py +0 -133
  224. hud/cli/rl/gpu.py +0 -63
  225. hud/cli/rl/gpu_utils.py +0 -321
  226. hud/cli/rl/local_runner.py +0 -595
  227. hud/cli/rl/presets.py +0 -96
  228. hud/cli/rl/remote_runner.py +0 -463
  229. hud/cli/rl/rl_api.py +0 -150
  230. hud/cli/rl/vllm.py +0 -177
  231. hud/cli/rl/wait_utils.py +0 -89
  232. hud/datasets/parallel.py +0 -687
  233. hud/misc/__init__.py +0 -1
  234. hud/misc/claude_plays_pokemon.py +0 -292
  235. hud/otel/__init__.py +0 -35
  236. hud/otel/collector.py +0 -142
  237. hud/otel/config.py +0 -181
  238. hud/otel/context.py +0 -570
  239. hud/otel/exporters.py +0 -369
  240. hud/otel/instrumentation.py +0 -135
  241. hud/otel/processors.py +0 -121
  242. hud/otel/tests/__init__.py +0 -1
  243. hud/otel/tests/test_processors.py +0 -197
  244. hud/rl/README.md +0 -30
  245. hud/rl/__init__.py +0 -1
  246. hud/rl/actor.py +0 -176
  247. hud/rl/buffer.py +0 -405
  248. hud/rl/chat_template.jinja +0 -101
  249. hud/rl/config.py +0 -192
  250. hud/rl/distributed.py +0 -132
  251. hud/rl/learner.py +0 -637
  252. hud/rl/tests/__init__.py +0 -1
  253. hud/rl/tests/test_learner.py +0 -186
  254. hud/rl/train.py +0 -382
  255. hud/rl/types.py +0 -101
  256. hud/rl/utils/start_vllm_server.sh +0 -30
  257. hud/rl/utils.py +0 -524
  258. hud/rl/vllm_adapter.py +0 -143
  259. hud/telemetry/job.py +0 -352
  260. hud/telemetry/replay.py +0 -74
  261. hud/telemetry/tests/test_replay.py +0 -40
  262. hud/telemetry/tests/test_trace.py +0 -63
  263. hud/telemetry/trace.py +0 -158
  264. hud/utils/agent_factories.py +0 -86
  265. hud/utils/async_utils.py +0 -65
  266. hud/utils/group_eval.py +0 -223
  267. hud/utils/progress.py +0 -149
  268. hud/utils/tasks.py +0 -127
  269. hud/utils/tests/test_async_utils.py +0 -173
  270. hud/utils/tests/test_progress.py +0 -261
  271. hud_python-0.4.45.dist-info/METADATA +0 -552
  272. hud_python-0.4.45.dist-info/RECORD +0 -228
  273. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
  274. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,596 @@
1
+ """Tests for shell tool."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from unittest.mock import AsyncMock, MagicMock, patch
6
+
7
+ import pytest
8
+
9
+ from hud.tools.shell import (
10
+ ShellCallOutcome,
11
+ ShellCommandOutput,
12
+ ShellResult,
13
+ ShellTool,
14
+ _BashSession,
15
+ )
16
+ from hud.tools.types import ToolError
17
+
18
+
19
+ class TestShellCallOutcome:
20
+ """Tests for ShellCallOutcome dataclass."""
21
+
22
+ def test_to_dict_exit(self):
23
+ """Test to_dict for exit outcome."""
24
+ outcome = ShellCallOutcome(type="exit", exit_code=0)
25
+ assert outcome.to_dict() == {"type": "exit", "exit_code": 0}
26
+
27
+ def test_to_dict_exit_with_error_code(self):
28
+ """Test to_dict for exit outcome with non-zero exit code."""
29
+ outcome = ShellCallOutcome(type="exit", exit_code=1)
30
+ assert outcome.to_dict() == {"type": "exit", "exit_code": 1}
31
+
32
+ def test_to_dict_timeout(self):
33
+ """Test to_dict for timeout outcome."""
34
+ outcome = ShellCallOutcome(type="timeout")
35
+ assert outcome.to_dict() == {"type": "timeout"}
36
+
37
+
38
+ class TestShellCommandOutput:
39
+ """Tests for ShellCommandOutput dataclass."""
40
+
41
+ def test_to_dict(self):
42
+ """Test to_dict method."""
43
+ output = ShellCommandOutput(
44
+ stdout="hello",
45
+ stderr="",
46
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
47
+ )
48
+ result = output.to_dict()
49
+ assert result["stdout"] == "hello"
50
+ assert result["stderr"] == ""
51
+ assert result["outcome"] == {"type": "exit", "exit_code": 0}
52
+
53
+
54
+ class TestShellResult:
55
+ """Tests for ShellResult dataclass."""
56
+
57
+ def test_to_dict_without_max_output_length(self):
58
+ """Test to_dict without max_output_length."""
59
+ result = ShellResult(
60
+ output=[
61
+ ShellCommandOutput(
62
+ stdout="test",
63
+ stderr="",
64
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
65
+ )
66
+ ]
67
+ )
68
+ d = result.to_dict()
69
+ assert "output" in d
70
+ assert len(d["output"]) == 1
71
+ assert "max_output_length" not in d
72
+
73
+ def test_to_dict_with_max_output_length(self):
74
+ """Test to_dict with max_output_length."""
75
+ result = ShellResult(
76
+ output=[
77
+ ShellCommandOutput(
78
+ stdout="test",
79
+ stderr="",
80
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
81
+ )
82
+ ],
83
+ max_output_length=1024,
84
+ )
85
+ d = result.to_dict()
86
+ assert d["max_output_length"] == 1024
87
+
88
+
89
+ class TestBashSession:
90
+ """Tests for _BashSession."""
91
+
92
+ def test_init(self):
93
+ """Test session initialization."""
94
+ session = _BashSession()
95
+ assert session._started is False
96
+ assert session._timed_out is False
97
+
98
+ @pytest.mark.asyncio
99
+ async def test_start(self):
100
+ """Test starting a bash session."""
101
+ session = _BashSession()
102
+
103
+ with patch("asyncio.create_subprocess_shell") as mock_create:
104
+ mock_process = MagicMock()
105
+ mock_create.return_value = mock_process
106
+
107
+ await session.start()
108
+
109
+ assert session._started is True
110
+ assert session._process == mock_process
111
+ mock_create.assert_called_once()
112
+
113
+ @pytest.mark.asyncio
114
+ async def test_start_already_started(self):
115
+ """Test starting a session that's already started."""
116
+ session = _BashSession()
117
+ session._started = True
118
+
119
+ with patch("asyncio.create_subprocess_shell") as mock_create:
120
+ await session.start()
121
+ mock_create.assert_not_called()
122
+
123
+ def test_stop_not_started(self):
124
+ """Test stopping a session that hasn't started."""
125
+ session = _BashSession()
126
+ # Should not raise
127
+ session.stop()
128
+
129
+ def test_stop_already_exited(self):
130
+ """Test stopping a session that already exited."""
131
+ session = _BashSession()
132
+ session._started = True
133
+ mock_process = MagicMock()
134
+ mock_process.returncode = 0 # Already exited
135
+ session._process = mock_process
136
+
137
+ session.stop()
138
+ mock_process.terminate.assert_not_called()
139
+
140
+ def test_stop_running(self):
141
+ """Test stopping a running session."""
142
+ session = _BashSession()
143
+ session._started = True
144
+ mock_process = MagicMock()
145
+ mock_process.returncode = None # Still running
146
+ session._process = mock_process
147
+
148
+ session.stop()
149
+ mock_process.terminate.assert_called_once()
150
+
151
+ def test_is_alive_not_started(self):
152
+ """Test is_alive when not started."""
153
+ session = _BashSession()
154
+ assert session.is_alive() is False
155
+
156
+ def test_is_alive_running(self):
157
+ """Test is_alive when running."""
158
+ session = _BashSession()
159
+ session._started = True
160
+ session._timed_out = False
161
+ mock_process = MagicMock()
162
+ mock_process.returncode = None
163
+ session._process = mock_process
164
+
165
+ assert session.is_alive() is True
166
+
167
+ def test_is_alive_timed_out(self):
168
+ """Test is_alive when timed out."""
169
+ session = _BashSession()
170
+ session._started = True
171
+ session._timed_out = True
172
+ mock_process = MagicMock()
173
+ mock_process.returncode = None
174
+ session._process = mock_process
175
+
176
+ assert session.is_alive() is False
177
+
178
+ def test_is_alive_process_exited(self):
179
+ """Test is_alive when process exited."""
180
+ session = _BashSession()
181
+ session._started = True
182
+ session._timed_out = False
183
+ mock_process = MagicMock()
184
+ mock_process.returncode = 0
185
+ session._process = mock_process
186
+
187
+ assert session.is_alive() is False
188
+
189
+ @pytest.mark.asyncio
190
+ async def test_run_not_started(self):
191
+ """Test running command on a session that hasn't started."""
192
+ session = _BashSession()
193
+
194
+ with pytest.raises(ToolError) as exc_info:
195
+ await session.run("echo test")
196
+
197
+ assert "Session has not started" in str(exc_info.value)
198
+
199
+ @pytest.mark.asyncio
200
+ async def test_run_success(self):
201
+ """Test successful command execution."""
202
+ session = _BashSession()
203
+ session._started = True
204
+
205
+ # Mock process
206
+ mock_process = MagicMock()
207
+ mock_process.returncode = None
208
+ mock_process.stdin = MagicMock()
209
+ mock_process.stdin.write = MagicMock()
210
+ mock_process.stdin.drain = AsyncMock()
211
+
212
+ # Create mock buffers
213
+ stdout_buffer = MagicMock()
214
+ stdout_buffer.decode.return_value = "Hello World\n<<exit>>0\n"
215
+ stdout_buffer.clear = MagicMock()
216
+
217
+ stderr_buffer = MagicMock()
218
+ stderr_buffer.decode.return_value = ""
219
+ stderr_buffer.clear = MagicMock()
220
+
221
+ mock_process.stdout = MagicMock()
222
+ mock_process.stdout._buffer = stdout_buffer
223
+ mock_process.stderr = MagicMock()
224
+ mock_process.stderr._buffer = stderr_buffer
225
+
226
+ session._process = mock_process
227
+
228
+ # Patch asyncio.sleep to avoid actual delay
229
+ with patch("asyncio.sleep", new_callable=AsyncMock):
230
+ result = await session.run("echo Hello World")
231
+
232
+ assert result.stdout == "Hello World"
233
+ assert result.stderr == ""
234
+ assert result.outcome.type == "exit"
235
+ assert result.outcome.exit_code == 0
236
+
237
+ @pytest.mark.asyncio
238
+ async def test_run_with_exit_code(self):
239
+ """Test command execution with non-zero exit code."""
240
+ session = _BashSession()
241
+ session._started = True
242
+
243
+ mock_process = MagicMock()
244
+ mock_process.returncode = None
245
+ mock_process.stdin = MagicMock()
246
+ mock_process.stdin.write = MagicMock()
247
+ mock_process.stdin.drain = AsyncMock()
248
+
249
+ stdout_buffer = MagicMock()
250
+ stdout_buffer.decode.return_value = "<<exit>>127\n"
251
+ stdout_buffer.clear = MagicMock()
252
+
253
+ stderr_buffer = MagicMock()
254
+ stderr_buffer.decode.return_value = "command not found"
255
+ stderr_buffer.clear = MagicMock()
256
+
257
+ mock_process.stdout = MagicMock()
258
+ mock_process.stdout._buffer = stdout_buffer
259
+ mock_process.stderr = MagicMock()
260
+ mock_process.stderr._buffer = stderr_buffer
261
+
262
+ session._process = mock_process
263
+
264
+ with patch("asyncio.sleep", new_callable=AsyncMock):
265
+ result = await session.run("nonexistent_command")
266
+
267
+ assert result.outcome.type == "exit"
268
+ assert result.outcome.exit_code == 127
269
+
270
+
271
+ class TestShellTool:
272
+ """Tests for ShellTool."""
273
+
274
+ def test_init(self):
275
+ """Test ShellTool initialization."""
276
+ tool = ShellTool()
277
+ assert tool._session is None
278
+
279
+ @pytest.mark.asyncio
280
+ async def test_call_no_commands(self):
281
+ """Test calling without commands raises error."""
282
+ tool = ShellTool()
283
+
284
+ with pytest.raises(ToolError) as exc_info:
285
+ await tool()
286
+
287
+ assert "No commands provided" in str(exc_info.value)
288
+
289
+ @pytest.mark.asyncio
290
+ async def test_call_empty_commands(self):
291
+ """Test calling with empty commands list raises error."""
292
+ tool = ShellTool()
293
+
294
+ with pytest.raises(ToolError) as exc_info:
295
+ await tool(commands=[])
296
+
297
+ assert "No commands provided" in str(exc_info.value)
298
+
299
+ @pytest.mark.asyncio
300
+ async def test_call_with_command(self):
301
+ """Test calling tool with a command."""
302
+ tool = ShellTool()
303
+
304
+ # Mock session
305
+ mock_session = MagicMock()
306
+ mock_session.is_alive.return_value = True
307
+ mock_session.run = AsyncMock(
308
+ return_value=ShellCommandOutput(
309
+ stdout="test output",
310
+ stderr="",
311
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
312
+ )
313
+ )
314
+ mock_session.start = AsyncMock()
315
+
316
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
317
+ mock_session_class.return_value = mock_session
318
+
319
+ result = await tool(commands=["echo test"])
320
+
321
+ assert isinstance(result, ShellResult)
322
+ assert len(result.output) == 1
323
+ assert result.output[0].stdout == "test output"
324
+ mock_session.start.assert_called_once()
325
+ mock_session.run.assert_called_once_with("echo test", None)
326
+
327
+ @pytest.mark.asyncio
328
+ async def test_call_with_timeout(self):
329
+ """Test calling tool with timeout_ms."""
330
+ tool = ShellTool()
331
+
332
+ mock_session = MagicMock()
333
+ mock_session.is_alive.return_value = True
334
+ mock_session.run = AsyncMock(
335
+ return_value=ShellCommandOutput(
336
+ stdout="output",
337
+ stderr="",
338
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
339
+ )
340
+ )
341
+ mock_session.start = AsyncMock()
342
+
343
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
344
+ mock_session_class.return_value = mock_session
345
+
346
+ result = await tool(commands=["sleep 1"], timeout_ms=5000)
347
+
348
+ mock_session.run.assert_called_once_with("sleep 1", 5000)
349
+ assert result.max_output_length is None
350
+
351
+ @pytest.mark.asyncio
352
+ async def test_call_with_max_output_length(self):
353
+ """Test calling tool with max_output_length."""
354
+ tool = ShellTool()
355
+
356
+ mock_session = MagicMock()
357
+ mock_session.is_alive.return_value = True
358
+ mock_session.run = AsyncMock(
359
+ return_value=ShellCommandOutput(
360
+ stdout="output",
361
+ stderr="",
362
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
363
+ )
364
+ )
365
+ mock_session.start = AsyncMock()
366
+
367
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
368
+ mock_session_class.return_value = mock_session
369
+
370
+ result = await tool(commands=["echo test"], max_output_length=2048)
371
+
372
+ assert result.max_output_length == 2048
373
+
374
+ @pytest.mark.asyncio
375
+ async def test_call_multiple_commands(self):
376
+ """Test calling tool with multiple commands."""
377
+ tool = ShellTool()
378
+
379
+ mock_session = MagicMock()
380
+ mock_session.is_alive.return_value = True
381
+ mock_session.run = AsyncMock(
382
+ side_effect=[
383
+ ShellCommandOutput(
384
+ stdout="first",
385
+ stderr="",
386
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
387
+ ),
388
+ ShellCommandOutput(
389
+ stdout="second",
390
+ stderr="",
391
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
392
+ ),
393
+ ]
394
+ )
395
+ mock_session.start = AsyncMock()
396
+
397
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
398
+ mock_session_class.return_value = mock_session
399
+
400
+ result = await tool(commands=["echo first", "echo second"])
401
+
402
+ assert len(result.output) == 2
403
+ assert result.output[0].stdout == "first"
404
+ assert result.output[1].stdout == "second"
405
+
406
+ @pytest.mark.asyncio
407
+ async def test_call_reuses_session(self):
408
+ """Test that existing session is reused."""
409
+ tool = ShellTool()
410
+
411
+ mock_session = MagicMock()
412
+ mock_session.is_alive.return_value = True
413
+ mock_session.run = AsyncMock(
414
+ return_value=ShellCommandOutput(
415
+ stdout="output",
416
+ stderr="",
417
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
418
+ )
419
+ )
420
+ mock_session.start = AsyncMock()
421
+
422
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
423
+ mock_session_class.return_value = mock_session
424
+
425
+ # First call
426
+ await tool(commands=["echo first"])
427
+ # Second call
428
+ await tool(commands=["echo second"])
429
+
430
+ # Session should only be created once
431
+ assert mock_session_class.call_count == 1
432
+
433
+ @pytest.mark.asyncio
434
+ async def test_auto_restart_on_timeout(self):
435
+ """Test auto-restart after timeout."""
436
+ tool = ShellTool()
437
+
438
+ # Create a timed-out session
439
+ old_session = MagicMock()
440
+ old_session._timed_out = True
441
+ old_session._process = MagicMock()
442
+ old_session._process.returncode = None
443
+ old_session.is_alive.return_value = False
444
+ old_session.stop = MagicMock()
445
+
446
+ tool._session = old_session
447
+
448
+ # New session
449
+ new_session = MagicMock()
450
+ new_session.is_alive.return_value = True
451
+ new_session.run = AsyncMock(
452
+ return_value=ShellCommandOutput(
453
+ stdout="output",
454
+ stderr="",
455
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
456
+ )
457
+ )
458
+ new_session.start = AsyncMock()
459
+
460
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
461
+ mock_session_class.return_value = new_session
462
+
463
+ result = await tool(commands=["echo test"])
464
+
465
+ # Old session should be stopped
466
+ old_session.stop.assert_called_once()
467
+ # New session should be created and started
468
+ new_session.start.assert_called_once()
469
+ # Result should include restart message
470
+ assert "timed out" in result.output[0].stderr
471
+ assert "auto-restarted" in result.output[0].stderr
472
+
473
+ @pytest.mark.asyncio
474
+ async def test_auto_restart_on_exit(self):
475
+ """Test auto-restart after session exit."""
476
+ tool = ShellTool()
477
+
478
+ # Create an exited session
479
+ old_session = MagicMock()
480
+ old_session._timed_out = False
481
+ old_session._process = MagicMock()
482
+ old_session._process.returncode = 1
483
+ old_session.is_alive.return_value = False
484
+ old_session.stop = MagicMock()
485
+
486
+ tool._session = old_session
487
+
488
+ # New session
489
+ new_session = MagicMock()
490
+ new_session.is_alive.return_value = True
491
+ new_session.run = AsyncMock(
492
+ return_value=ShellCommandOutput(
493
+ stdout="output",
494
+ stderr="",
495
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
496
+ )
497
+ )
498
+ new_session.start = AsyncMock()
499
+
500
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
501
+ mock_session_class.return_value = new_session
502
+
503
+ result = await tool(commands=["echo test"])
504
+
505
+ # Result should include restart message with exit code
506
+ assert "exited with code 1" in result.output[0].stderr
507
+
508
+ @pytest.mark.asyncio
509
+ async def test_command_execution_error(self):
510
+ """Test handling of command execution error."""
511
+ tool = ShellTool()
512
+
513
+ mock_session = MagicMock()
514
+ mock_session.is_alive.return_value = True
515
+ mock_session.run = AsyncMock(side_effect=Exception("Test error"))
516
+ mock_session.start = AsyncMock()
517
+
518
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
519
+ mock_session_class.return_value = mock_session
520
+
521
+ result = await tool(commands=["failing command"])
522
+
523
+ assert len(result.output) == 1
524
+ assert "Test error" in result.output[0].stderr
525
+ assert result.output[0].outcome.exit_code == 1
526
+
527
+ @pytest.mark.asyncio
528
+ async def test_restart_message_added_to_existing_stderr(self):
529
+ """Test that restart message is prepended to existing stderr."""
530
+ tool = ShellTool()
531
+
532
+ # Create a timed-out session
533
+ old_session = MagicMock()
534
+ old_session._timed_out = True
535
+ old_session._process = MagicMock()
536
+ old_session._process.returncode = None
537
+ old_session.is_alive.return_value = False
538
+ old_session.stop = MagicMock()
539
+
540
+ tool._session = old_session
541
+
542
+ # New session
543
+ new_session = MagicMock()
544
+ new_session.is_alive.return_value = True
545
+ new_session.run = AsyncMock(
546
+ return_value=ShellCommandOutput(
547
+ stdout="output",
548
+ stderr="original error",
549
+ outcome=ShellCallOutcome(type="exit", exit_code=1),
550
+ )
551
+ )
552
+ new_session.start = AsyncMock()
553
+
554
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
555
+ mock_session_class.return_value = new_session
556
+
557
+ result = await tool(commands=["echo test"])
558
+
559
+ # Both restart message and original error should be in stderr
560
+ assert "timed out" in result.output[0].stderr
561
+ assert "original error" in result.output[0].stderr
562
+
563
+ @pytest.mark.asyncio
564
+ async def test_session_dies_mid_execution(self):
565
+ """Test that session is restarted if it dies mid-execution."""
566
+ tool = ShellTool()
567
+
568
+ mock_session = MagicMock()
569
+ # First command succeeds, then session dies, then restarts
570
+ mock_session.is_alive.side_effect = [True, False, True]
571
+ mock_session.run = AsyncMock(
572
+ side_effect=[
573
+ ShellCommandOutput(
574
+ stdout="first",
575
+ stderr="",
576
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
577
+ ),
578
+ ShellCommandOutput(
579
+ stdout="second",
580
+ stderr="",
581
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
582
+ ),
583
+ ]
584
+ )
585
+ mock_session.start = AsyncMock()
586
+ mock_session._timed_out = True
587
+ mock_session._process = MagicMock()
588
+ mock_session._process.returncode = None
589
+ mock_session.stop = MagicMock()
590
+
591
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
592
+ mock_session_class.return_value = mock_session
593
+
594
+ result = await tool(commands=["echo first", "echo second"])
595
+
596
+ assert len(result.output) == 2
@@ -0,0 +1,85 @@
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+ from mcp.types import TextContent
5
+
6
+ from hud.tools.submit import SubmitTool, get_submission, set_submission
7
+
8
+
9
+ @pytest.fixture(autouse=True)
10
+ def reset_submission():
11
+ """Reset submission before each test."""
12
+ set_submission(None)
13
+ yield
14
+ set_submission(None)
15
+
16
+
17
+ def test_set_and_get_submission():
18
+ """Test setting and getting submission value."""
19
+ assert get_submission() is None
20
+
21
+ set_submission("test value")
22
+ assert get_submission() == "test value"
23
+
24
+ set_submission("another value")
25
+ assert get_submission() == "another value"
26
+
27
+ set_submission(None)
28
+ assert get_submission() is None
29
+
30
+
31
+ @pytest.mark.asyncio
32
+ async def test_submit_tool_with_response():
33
+ """Test SubmitTool with a response string."""
34
+ tool = SubmitTool()
35
+
36
+ result = await tool(response="Test response")
37
+
38
+ assert get_submission() == "Test response"
39
+ assert len(result) == 1
40
+ assert isinstance(result[0], TextContent)
41
+ assert result[0].text == "Test response"
42
+
43
+
44
+ @pytest.mark.asyncio
45
+ async def test_submit_tool_with_none():
46
+ """Test SubmitTool with None response."""
47
+ tool = SubmitTool()
48
+
49
+ result = await tool(response=None)
50
+
51
+ assert get_submission() is None
52
+ assert len(result) == 0
53
+
54
+
55
+ @pytest.mark.asyncio
56
+ async def test_submit_tool_with_empty_string():
57
+ """Test SubmitTool with empty string."""
58
+ tool = SubmitTool()
59
+
60
+ result = await tool(response="")
61
+
62
+ assert get_submission() == ""
63
+ assert len(result) == 0
64
+
65
+
66
+ @pytest.mark.asyncio
67
+ async def test_submit_tool_overwrite():
68
+ """Test that submitting overwrites previous submission."""
69
+ tool = SubmitTool()
70
+
71
+ await tool(response="First submission")
72
+ assert get_submission() == "First submission"
73
+
74
+ await tool(response="Second submission")
75
+ assert get_submission() == "Second submission"
76
+
77
+
78
+ @pytest.mark.asyncio
79
+ async def test_submit_tool_properties():
80
+ """Test SubmitTool properties."""
81
+ tool = SubmitTool()
82
+
83
+ assert tool.name == "response"
84
+ assert tool.title == "Submit Tool"
85
+ assert "final response" in tool.description.lower()