hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,596 @@
1
+ """Tests for shell tool."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from unittest.mock import AsyncMock, MagicMock, patch
6
+
7
+ import pytest
8
+
9
+ from hud.tools.shell import (
10
+ ShellCallOutcome,
11
+ ShellCommandOutput,
12
+ ShellResult,
13
+ ShellTool,
14
+ _BashSession,
15
+ )
16
+ from hud.tools.types import ToolError
17
+
18
+
19
+ class TestShellCallOutcome:
20
+ """Tests for ShellCallOutcome dataclass."""
21
+
22
+ def test_to_dict_exit(self):
23
+ """Test to_dict for exit outcome."""
24
+ outcome = ShellCallOutcome(type="exit", exit_code=0)
25
+ assert outcome.to_dict() == {"type": "exit", "exit_code": 0}
26
+
27
+ def test_to_dict_exit_with_error_code(self):
28
+ """Test to_dict for exit outcome with non-zero exit code."""
29
+ outcome = ShellCallOutcome(type="exit", exit_code=1)
30
+ assert outcome.to_dict() == {"type": "exit", "exit_code": 1}
31
+
32
+ def test_to_dict_timeout(self):
33
+ """Test to_dict for timeout outcome."""
34
+ outcome = ShellCallOutcome(type="timeout")
35
+ assert outcome.to_dict() == {"type": "timeout"}
36
+
37
+
38
+ class TestShellCommandOutput:
39
+ """Tests for ShellCommandOutput dataclass."""
40
+
41
+ def test_to_dict(self):
42
+ """Test to_dict method."""
43
+ output = ShellCommandOutput(
44
+ stdout="hello",
45
+ stderr="",
46
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
47
+ )
48
+ result = output.to_dict()
49
+ assert result["stdout"] == "hello"
50
+ assert result["stderr"] == ""
51
+ assert result["outcome"] == {"type": "exit", "exit_code": 0}
52
+
53
+
54
+ class TestShellResult:
55
+ """Tests for ShellResult dataclass."""
56
+
57
+ def test_to_dict_without_max_output_length(self):
58
+ """Test to_dict without max_output_length."""
59
+ result = ShellResult(
60
+ output=[
61
+ ShellCommandOutput(
62
+ stdout="test",
63
+ stderr="",
64
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
65
+ )
66
+ ]
67
+ )
68
+ d = result.to_dict()
69
+ assert "output" in d
70
+ assert len(d["output"]) == 1
71
+ assert "max_output_length" not in d
72
+
73
+ def test_to_dict_with_max_output_length(self):
74
+ """Test to_dict with max_output_length."""
75
+ result = ShellResult(
76
+ output=[
77
+ ShellCommandOutput(
78
+ stdout="test",
79
+ stderr="",
80
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
81
+ )
82
+ ],
83
+ max_output_length=1024,
84
+ )
85
+ d = result.to_dict()
86
+ assert d["max_output_length"] == 1024
87
+
88
+
89
+ class TestBashSession:
90
+ """Tests for _BashSession."""
91
+
92
+ def test_init(self):
93
+ """Test session initialization."""
94
+ session = _BashSession()
95
+ assert session._started is False
96
+ assert session._timed_out is False
97
+
98
+ @pytest.mark.asyncio
99
+ async def test_start(self):
100
+ """Test starting a bash session."""
101
+ session = _BashSession()
102
+
103
+ with patch("asyncio.create_subprocess_shell") as mock_create:
104
+ mock_process = MagicMock()
105
+ mock_create.return_value = mock_process
106
+
107
+ await session.start()
108
+
109
+ assert session._started is True
110
+ assert session._process == mock_process
111
+ mock_create.assert_called_once()
112
+
113
+ @pytest.mark.asyncio
114
+ async def test_start_already_started(self):
115
+ """Test starting a session that's already started."""
116
+ session = _BashSession()
117
+ session._started = True
118
+
119
+ with patch("asyncio.create_subprocess_shell") as mock_create:
120
+ await session.start()
121
+ mock_create.assert_not_called()
122
+
123
+ def test_stop_not_started(self):
124
+ """Test stopping a session that hasn't started."""
125
+ session = _BashSession()
126
+ # Should not raise
127
+ session.stop()
128
+
129
+ def test_stop_already_exited(self):
130
+ """Test stopping a session that already exited."""
131
+ session = _BashSession()
132
+ session._started = True
133
+ mock_process = MagicMock()
134
+ mock_process.returncode = 0 # Already exited
135
+ session._process = mock_process
136
+
137
+ session.stop()
138
+ mock_process.terminate.assert_not_called()
139
+
140
+ def test_stop_running(self):
141
+ """Test stopping a running session."""
142
+ session = _BashSession()
143
+ session._started = True
144
+ mock_process = MagicMock()
145
+ mock_process.returncode = None # Still running
146
+ session._process = mock_process
147
+
148
+ session.stop()
149
+ mock_process.terminate.assert_called_once()
150
+
151
+ def test_is_alive_not_started(self):
152
+ """Test is_alive when not started."""
153
+ session = _BashSession()
154
+ assert session.is_alive() is False
155
+
156
+ def test_is_alive_running(self):
157
+ """Test is_alive when running."""
158
+ session = _BashSession()
159
+ session._started = True
160
+ session._timed_out = False
161
+ mock_process = MagicMock()
162
+ mock_process.returncode = None
163
+ session._process = mock_process
164
+
165
+ assert session.is_alive() is True
166
+
167
+ def test_is_alive_timed_out(self):
168
+ """Test is_alive when timed out."""
169
+ session = _BashSession()
170
+ session._started = True
171
+ session._timed_out = True
172
+ mock_process = MagicMock()
173
+ mock_process.returncode = None
174
+ session._process = mock_process
175
+
176
+ assert session.is_alive() is False
177
+
178
+ def test_is_alive_process_exited(self):
179
+ """Test is_alive when process exited."""
180
+ session = _BashSession()
181
+ session._started = True
182
+ session._timed_out = False
183
+ mock_process = MagicMock()
184
+ mock_process.returncode = 0
185
+ session._process = mock_process
186
+
187
+ assert session.is_alive() is False
188
+
189
+ @pytest.mark.asyncio
190
+ async def test_run_not_started(self):
191
+ """Test running command on a session that hasn't started."""
192
+ session = _BashSession()
193
+
194
+ with pytest.raises(ToolError) as exc_info:
195
+ await session.run("echo test")
196
+
197
+ assert "Session has not started" in str(exc_info.value)
198
+
199
+ @pytest.mark.asyncio
200
+ async def test_run_success(self):
201
+ """Test successful command execution."""
202
+ session = _BashSession()
203
+ session._started = True
204
+
205
+ # Mock process
206
+ mock_process = MagicMock()
207
+ mock_process.returncode = None
208
+ mock_process.stdin = MagicMock()
209
+ mock_process.stdin.write = MagicMock()
210
+ mock_process.stdin.drain = AsyncMock()
211
+
212
+ # Create mock buffers
213
+ stdout_buffer = MagicMock()
214
+ stdout_buffer.decode.return_value = "Hello World\n<<exit>>0\n"
215
+ stdout_buffer.clear = MagicMock()
216
+
217
+ stderr_buffer = MagicMock()
218
+ stderr_buffer.decode.return_value = ""
219
+ stderr_buffer.clear = MagicMock()
220
+
221
+ mock_process.stdout = MagicMock()
222
+ mock_process.stdout._buffer = stdout_buffer
223
+ mock_process.stderr = MagicMock()
224
+ mock_process.stderr._buffer = stderr_buffer
225
+
226
+ session._process = mock_process
227
+
228
+ # Patch asyncio.sleep to avoid actual delay
229
+ with patch("asyncio.sleep", new_callable=AsyncMock):
230
+ result = await session.run("echo Hello World")
231
+
232
+ assert result.stdout == "Hello World"
233
+ assert result.stderr == ""
234
+ assert result.outcome.type == "exit"
235
+ assert result.outcome.exit_code == 0
236
+
237
+ @pytest.mark.asyncio
238
+ async def test_run_with_exit_code(self):
239
+ """Test command execution with non-zero exit code."""
240
+ session = _BashSession()
241
+ session._started = True
242
+
243
+ mock_process = MagicMock()
244
+ mock_process.returncode = None
245
+ mock_process.stdin = MagicMock()
246
+ mock_process.stdin.write = MagicMock()
247
+ mock_process.stdin.drain = AsyncMock()
248
+
249
+ stdout_buffer = MagicMock()
250
+ stdout_buffer.decode.return_value = "<<exit>>127\n"
251
+ stdout_buffer.clear = MagicMock()
252
+
253
+ stderr_buffer = MagicMock()
254
+ stderr_buffer.decode.return_value = "command not found"
255
+ stderr_buffer.clear = MagicMock()
256
+
257
+ mock_process.stdout = MagicMock()
258
+ mock_process.stdout._buffer = stdout_buffer
259
+ mock_process.stderr = MagicMock()
260
+ mock_process.stderr._buffer = stderr_buffer
261
+
262
+ session._process = mock_process
263
+
264
+ with patch("asyncio.sleep", new_callable=AsyncMock):
265
+ result = await session.run("nonexistent_command")
266
+
267
+ assert result.outcome.type == "exit"
268
+ assert result.outcome.exit_code == 127
269
+
270
+
271
+ class TestShellTool:
272
+ """Tests for ShellTool."""
273
+
274
+ def test_init(self):
275
+ """Test ShellTool initialization."""
276
+ tool = ShellTool()
277
+ assert tool._session is None
278
+
279
+ @pytest.mark.asyncio
280
+ async def test_call_no_commands(self):
281
+ """Test calling without commands raises error."""
282
+ tool = ShellTool()
283
+
284
+ with pytest.raises(ToolError) as exc_info:
285
+ await tool()
286
+
287
+ assert "No commands provided" in str(exc_info.value)
288
+
289
+ @pytest.mark.asyncio
290
+ async def test_call_empty_commands(self):
291
+ """Test calling with empty commands list raises error."""
292
+ tool = ShellTool()
293
+
294
+ with pytest.raises(ToolError) as exc_info:
295
+ await tool(commands=[])
296
+
297
+ assert "No commands provided" in str(exc_info.value)
298
+
299
+ @pytest.mark.asyncio
300
+ async def test_call_with_command(self):
301
+ """Test calling tool with a command."""
302
+ tool = ShellTool()
303
+
304
+ # Mock session
305
+ mock_session = MagicMock()
306
+ mock_session.is_alive.return_value = True
307
+ mock_session.run = AsyncMock(
308
+ return_value=ShellCommandOutput(
309
+ stdout="test output",
310
+ stderr="",
311
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
312
+ )
313
+ )
314
+ mock_session.start = AsyncMock()
315
+
316
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
317
+ mock_session_class.return_value = mock_session
318
+
319
+ result = await tool(commands=["echo test"])
320
+
321
+ assert isinstance(result, ShellResult)
322
+ assert len(result.output) == 1
323
+ assert result.output[0].stdout == "test output"
324
+ mock_session.start.assert_called_once()
325
+ mock_session.run.assert_called_once_with("echo test", None)
326
+
327
+ @pytest.mark.asyncio
328
+ async def test_call_with_timeout(self):
329
+ """Test calling tool with timeout_ms."""
330
+ tool = ShellTool()
331
+
332
+ mock_session = MagicMock()
333
+ mock_session.is_alive.return_value = True
334
+ mock_session.run = AsyncMock(
335
+ return_value=ShellCommandOutput(
336
+ stdout="output",
337
+ stderr="",
338
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
339
+ )
340
+ )
341
+ mock_session.start = AsyncMock()
342
+
343
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
344
+ mock_session_class.return_value = mock_session
345
+
346
+ result = await tool(commands=["sleep 1"], timeout_ms=5000)
347
+
348
+ mock_session.run.assert_called_once_with("sleep 1", 5000)
349
+ assert result.max_output_length is None
350
+
351
+ @pytest.mark.asyncio
352
+ async def test_call_with_max_output_length(self):
353
+ """Test calling tool with max_output_length."""
354
+ tool = ShellTool()
355
+
356
+ mock_session = MagicMock()
357
+ mock_session.is_alive.return_value = True
358
+ mock_session.run = AsyncMock(
359
+ return_value=ShellCommandOutput(
360
+ stdout="output",
361
+ stderr="",
362
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
363
+ )
364
+ )
365
+ mock_session.start = AsyncMock()
366
+
367
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
368
+ mock_session_class.return_value = mock_session
369
+
370
+ result = await tool(commands=["echo test"], max_output_length=2048)
371
+
372
+ assert result.max_output_length == 2048
373
+
374
+ @pytest.mark.asyncio
375
+ async def test_call_multiple_commands(self):
376
+ """Test calling tool with multiple commands."""
377
+ tool = ShellTool()
378
+
379
+ mock_session = MagicMock()
380
+ mock_session.is_alive.return_value = True
381
+ mock_session.run = AsyncMock(
382
+ side_effect=[
383
+ ShellCommandOutput(
384
+ stdout="first",
385
+ stderr="",
386
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
387
+ ),
388
+ ShellCommandOutput(
389
+ stdout="second",
390
+ stderr="",
391
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
392
+ ),
393
+ ]
394
+ )
395
+ mock_session.start = AsyncMock()
396
+
397
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
398
+ mock_session_class.return_value = mock_session
399
+
400
+ result = await tool(commands=["echo first", "echo second"])
401
+
402
+ assert len(result.output) == 2
403
+ assert result.output[0].stdout == "first"
404
+ assert result.output[1].stdout == "second"
405
+
406
+ @pytest.mark.asyncio
407
+ async def test_call_reuses_session(self):
408
+ """Test that existing session is reused."""
409
+ tool = ShellTool()
410
+
411
+ mock_session = MagicMock()
412
+ mock_session.is_alive.return_value = True
413
+ mock_session.run = AsyncMock(
414
+ return_value=ShellCommandOutput(
415
+ stdout="output",
416
+ stderr="",
417
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
418
+ )
419
+ )
420
+ mock_session.start = AsyncMock()
421
+
422
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
423
+ mock_session_class.return_value = mock_session
424
+
425
+ # First call
426
+ await tool(commands=["echo first"])
427
+ # Second call
428
+ await tool(commands=["echo second"])
429
+
430
+ # Session should only be created once
431
+ assert mock_session_class.call_count == 1
432
+
433
+ @pytest.mark.asyncio
434
+ async def test_auto_restart_on_timeout(self):
435
+ """Test auto-restart after timeout."""
436
+ tool = ShellTool()
437
+
438
+ # Create a timed-out session
439
+ old_session = MagicMock()
440
+ old_session._timed_out = True
441
+ old_session._process = MagicMock()
442
+ old_session._process.returncode = None
443
+ old_session.is_alive.return_value = False
444
+ old_session.stop = MagicMock()
445
+
446
+ tool._session = old_session
447
+
448
+ # New session
449
+ new_session = MagicMock()
450
+ new_session.is_alive.return_value = True
451
+ new_session.run = AsyncMock(
452
+ return_value=ShellCommandOutput(
453
+ stdout="output",
454
+ stderr="",
455
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
456
+ )
457
+ )
458
+ new_session.start = AsyncMock()
459
+
460
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
461
+ mock_session_class.return_value = new_session
462
+
463
+ result = await tool(commands=["echo test"])
464
+
465
+ # Old session should be stopped
466
+ old_session.stop.assert_called_once()
467
+ # New session should be created and started
468
+ new_session.start.assert_called_once()
469
+ # Result should include restart message
470
+ assert "timed out" in result.output[0].stderr
471
+ assert "auto-restarted" in result.output[0].stderr
472
+
473
+ @pytest.mark.asyncio
474
+ async def test_auto_restart_on_exit(self):
475
+ """Test auto-restart after session exit."""
476
+ tool = ShellTool()
477
+
478
+ # Create an exited session
479
+ old_session = MagicMock()
480
+ old_session._timed_out = False
481
+ old_session._process = MagicMock()
482
+ old_session._process.returncode = 1
483
+ old_session.is_alive.return_value = False
484
+ old_session.stop = MagicMock()
485
+
486
+ tool._session = old_session
487
+
488
+ # New session
489
+ new_session = MagicMock()
490
+ new_session.is_alive.return_value = True
491
+ new_session.run = AsyncMock(
492
+ return_value=ShellCommandOutput(
493
+ stdout="output",
494
+ stderr="",
495
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
496
+ )
497
+ )
498
+ new_session.start = AsyncMock()
499
+
500
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
501
+ mock_session_class.return_value = new_session
502
+
503
+ result = await tool(commands=["echo test"])
504
+
505
+ # Result should include restart message with exit code
506
+ assert "exited with code 1" in result.output[0].stderr
507
+
508
+ @pytest.mark.asyncio
509
+ async def test_command_execution_error(self):
510
+ """Test handling of command execution error."""
511
+ tool = ShellTool()
512
+
513
+ mock_session = MagicMock()
514
+ mock_session.is_alive.return_value = True
515
+ mock_session.run = AsyncMock(side_effect=Exception("Test error"))
516
+ mock_session.start = AsyncMock()
517
+
518
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
519
+ mock_session_class.return_value = mock_session
520
+
521
+ result = await tool(commands=["failing command"])
522
+
523
+ assert len(result.output) == 1
524
+ assert "Test error" in result.output[0].stderr
525
+ assert result.output[0].outcome.exit_code == 1
526
+
527
+ @pytest.mark.asyncio
528
+ async def test_restart_message_added_to_existing_stderr(self):
529
+ """Test that restart message is prepended to existing stderr."""
530
+ tool = ShellTool()
531
+
532
+ # Create a timed-out session
533
+ old_session = MagicMock()
534
+ old_session._timed_out = True
535
+ old_session._process = MagicMock()
536
+ old_session._process.returncode = None
537
+ old_session.is_alive.return_value = False
538
+ old_session.stop = MagicMock()
539
+
540
+ tool._session = old_session
541
+
542
+ # New session
543
+ new_session = MagicMock()
544
+ new_session.is_alive.return_value = True
545
+ new_session.run = AsyncMock(
546
+ return_value=ShellCommandOutput(
547
+ stdout="output",
548
+ stderr="original error",
549
+ outcome=ShellCallOutcome(type="exit", exit_code=1),
550
+ )
551
+ )
552
+ new_session.start = AsyncMock()
553
+
554
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
555
+ mock_session_class.return_value = new_session
556
+
557
+ result = await tool(commands=["echo test"])
558
+
559
+ # Both restart message and original error should be in stderr
560
+ assert "timed out" in result.output[0].stderr
561
+ assert "original error" in result.output[0].stderr
562
+
563
+ @pytest.mark.asyncio
564
+ async def test_session_dies_mid_execution(self):
565
+ """Test that session is restarted if it dies mid-execution."""
566
+ tool = ShellTool()
567
+
568
+ mock_session = MagicMock()
569
+ # First command succeeds, then session dies, then restarts
570
+ mock_session.is_alive.side_effect = [True, False, True]
571
+ mock_session.run = AsyncMock(
572
+ side_effect=[
573
+ ShellCommandOutput(
574
+ stdout="first",
575
+ stderr="",
576
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
577
+ ),
578
+ ShellCommandOutput(
579
+ stdout="second",
580
+ stderr="",
581
+ outcome=ShellCallOutcome(type="exit", exit_code=0),
582
+ ),
583
+ ]
584
+ )
585
+ mock_session.start = AsyncMock()
586
+ mock_session._timed_out = True
587
+ mock_session._process = MagicMock()
588
+ mock_session._process.returncode = None
589
+ mock_session.stop = MagicMock()
590
+
591
+ with patch("hud.tools.shell._BashSession") as mock_session_class:
592
+ mock_session_class.return_value = mock_session
593
+
594
+ result = await tool(commands=["echo first", "echo second"])
595
+
596
+ assert len(result.output) == 2
@@ -0,0 +1,85 @@
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+ from mcp.types import TextContent
5
+
6
+ from hud.tools.submit import SubmitTool, get_submission, set_submission
7
+
8
+
9
+ @pytest.fixture(autouse=True)
10
+ def reset_submission():
11
+ """Reset submission before each test."""
12
+ set_submission(None)
13
+ yield
14
+ set_submission(None)
15
+
16
+
17
+ def test_set_and_get_submission():
18
+ """Test setting and getting submission value."""
19
+ assert get_submission() is None
20
+
21
+ set_submission("test value")
22
+ assert get_submission() == "test value"
23
+
24
+ set_submission("another value")
25
+ assert get_submission() == "another value"
26
+
27
+ set_submission(None)
28
+ assert get_submission() is None
29
+
30
+
31
+ @pytest.mark.asyncio
32
+ async def test_submit_tool_with_response():
33
+ """Test SubmitTool with a response string."""
34
+ tool = SubmitTool()
35
+
36
+ result = await tool(response="Test response")
37
+
38
+ assert get_submission() == "Test response"
39
+ assert len(result) == 1
40
+ assert isinstance(result[0], TextContent)
41
+ assert result[0].text == "Test response"
42
+
43
+
44
+ @pytest.mark.asyncio
45
+ async def test_submit_tool_with_none():
46
+ """Test SubmitTool with None response."""
47
+ tool = SubmitTool()
48
+
49
+ result = await tool(response=None)
50
+
51
+ assert get_submission() is None
52
+ assert len(result) == 0
53
+
54
+
55
+ @pytest.mark.asyncio
56
+ async def test_submit_tool_with_empty_string():
57
+ """Test SubmitTool with empty string."""
58
+ tool = SubmitTool()
59
+
60
+ result = await tool(response="")
61
+
62
+ assert get_submission() == ""
63
+ assert len(result) == 0
64
+
65
+
66
+ @pytest.mark.asyncio
67
+ async def test_submit_tool_overwrite():
68
+ """Test that submitting overwrites previous submission."""
69
+ tool = SubmitTool()
70
+
71
+ await tool(response="First submission")
72
+ assert get_submission() == "First submission"
73
+
74
+ await tool(response="Second submission")
75
+ assert get_submission() == "Second submission"
76
+
77
+
78
+ @pytest.mark.asyncio
79
+ async def test_submit_tool_properties():
80
+ """Test SubmitTool properties."""
81
+ tool = SubmitTool()
82
+
83
+ assert tool.name == "response"
84
+ assert tool.title == "Submit Tool"
85
+ assert "final response" in tool.description.lower()