hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -9,6 +9,7 @@ from hud.tools.computer.anthropic import AnthropicComputerTool
9
9
  from hud.tools.computer.hud import HudComputerTool
10
10
  from hud.tools.computer.openai import OpenAIComputerTool
11
11
  from hud.tools.executors.base import BaseExecutor
12
+ from hud.tools.types import Coordinate
12
13
 
13
14
 
14
15
  @pytest.mark.asyncio
@@ -193,7 +194,9 @@ class TestHudComputerToolExtended:
193
194
  async def test_drag_action(self, base_executor):
194
195
  """Test drag action with BaseExecutor."""
195
196
  tool = HudComputerTool(executor=base_executor)
196
- result = await tool(action="drag", path=[(100, 100), (200, 200)])
197
+ result = await tool(
198
+ action="drag", path=[Coordinate(x=100, y=100), Coordinate(x=200, y=200)]
199
+ )
197
200
  assert result
198
201
  assert any("Drag" in content.text for content in result if isinstance(content, TextContent))
199
202
 
@@ -272,14 +275,6 @@ class TestHudComputerToolExtended:
272
275
  result = await tool(action="click", x=100, y=100, pattern=[100])
273
276
  assert result
274
277
 
275
- @pytest.mark.asyncio
276
- async def test_invalid_action(self, base_executor):
277
- """Test invalid action returns error."""
278
- tool = HudComputerTool(executor=base_executor)
279
-
280
- with pytest.raises(Exception): # Will raise McpError
281
- await tool(action="invalid_action")
282
-
283
278
  @pytest.mark.asyncio
284
279
  async def test_screenshot_action(self, base_executor):
285
280
  """Test screenshot action."""
@@ -1,9 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from typing import Literal
4
+
3
5
  import pytest
4
6
  from mcp.types import ImageContent, TextContent
5
7
 
6
8
  from hud.tools.computer.hud import HudComputerTool
9
+ from hud.tools.types import Coordinate
7
10
 
8
11
  # (action, kwargs)
9
12
  CASES = [
@@ -17,7 +20,7 @@ CASES = [
17
20
  # Skip move test - it has Field parameter handling issues when called directly
18
21
  # ("move", {"x": 5, "y": 5}), # x,y are for absolute positioning
19
22
  ("wait", {"time": 5}),
20
- ("drag", {"path": [(0, 0), (10, 10)]}),
23
+ ("drag", {"path": [Coordinate(x=0, y=0), Coordinate(x=10, y=10)]}),
21
24
  ("mouse_down", {}),
22
25
  ("mouse_up", {}),
23
26
  ("hold_key", {"text": "a", "duration": 0.1}),
@@ -26,7 +29,26 @@ CASES = [
26
29
 
27
30
  @pytest.mark.asyncio
28
31
  @pytest.mark.parametrize("action, params", CASES)
29
- async def test_hud_computer_actions(action: str, params: dict):
32
+ async def test_hud_computer_actions(
33
+ action: Literal[
34
+ "click",
35
+ "press",
36
+ "keydown",
37
+ "keyup",
38
+ "write",
39
+ "scroll",
40
+ "move",
41
+ "wait",
42
+ "drag",
43
+ "response",
44
+ "screenshot",
45
+ "position",
46
+ "hold_key",
47
+ "mouse_down",
48
+ "mouse_up",
49
+ ],
50
+ params: dict,
51
+ ):
30
52
  comp = HudComputerTool()
31
53
  blocks = await comp(action=action, **params)
32
54
  # Ensure at least one content block is returned
@@ -0,0 +1,181 @@
1
+ """Test JupyterTool"""
2
+
3
+ from __future__ import annotations
4
+
5
+ from unittest.mock import AsyncMock, MagicMock, patch
6
+
7
+ import pytest
8
+
9
+ # Import tornado modules before tests to avoid forward reference issues with mocking
10
+ import tornado.httpclient
11
+ import tornado.ioloop
12
+ import tornado.websocket # noqa: F401
13
+ from mcp.types import TextContent
14
+
15
+ from hud.tools.jupyter import JupyterTool, strip_ansi
16
+
17
+
18
+ class TestStripAnsi:
19
+ """Test strip_ansi utility function."""
20
+
21
+ def test_strip_ansi(self):
22
+ """Test stripping ANSI color codes."""
23
+ input_text = "\x1b[31mRed text\x1b[0m"
24
+ assert strip_ansi(input_text) == "Red text"
25
+
26
+
27
+ class TestJupyterTool:
28
+ """Test class for JupyterTool"""
29
+
30
+ def test_jupyter_tool_init(self):
31
+ """Test JupyterTool initialization with defaults."""
32
+ tool = JupyterTool()
33
+ assert tool.name == "jupyter"
34
+ assert tool.title == "Jupyter Code Execution"
35
+ assert tool.description == "Execute Python code in a Jupyter kernel"
36
+ assert tool._base_url == "http://localhost:8888"
37
+ assert tool._base_ws_url == "ws://localhost:8888"
38
+ assert tool._kernel_name == "python3"
39
+ assert tool._kernel_id == ""
40
+ assert tool._ws is None
41
+ assert tool._initialized is False
42
+
43
+ def test_shared_kernel(self):
44
+ """Test reregister_shared_kernel and from_shared_kernel."""
45
+ # Succeed on `reregister_shared_kernel` and `from_shared_kernel`
46
+ JupyterTool._kernel_registry.clear()
47
+ JupyterTool.register_shared_kernel("shared_kernel", "kernel-456")
48
+ tool = JupyterTool.from_shared_kernel("shared_kernel", url_suffix="localhost:8888")
49
+
50
+ assert tool._kernel_id == "kernel-456"
51
+ assert tool._base_url == "http://localhost:8888"
52
+
53
+ # Failure on `from_shared_kernel`
54
+ JupyterTool._kernel_registry.clear()
55
+ with pytest.raises(ValueError) as exc_info:
56
+ JupyterTool.from_shared_kernel("nonexistent_kernel")
57
+
58
+ assert "No kernel registered with name 'nonexistent_kernel'" in str(exc_info.value)
59
+
60
+ @pytest.mark.asyncio
61
+ async def test_call(self):
62
+ """Test public API integration with successful execution."""
63
+ tool = JupyterTool()
64
+
65
+ with (
66
+ patch.object(tool, "_ensure_kernel", new_callable=AsyncMock),
67
+ patch.object(tool, "_execute", new_callable=AsyncMock) as mock_execute,
68
+ ):
69
+ mock_execute.return_value = "Hello, World!"
70
+ result = await tool(code="print('Hello, World!')")
71
+ assert isinstance(result[0], TextContent)
72
+ assert result[0].text == "Hello, World!"
73
+
74
+ @pytest.mark.asyncio
75
+ async def test_ensure_kernel(self):
76
+ """Test kernel initialization on first call."""
77
+ tool = JupyterTool()
78
+ with patch.object(tool, "_connect", new_callable=AsyncMock):
79
+ await tool._ensure_kernel()
80
+ assert tool._initialized is True
81
+
82
+ @pytest.mark.asyncio
83
+ async def test_connect_new_kernel(self):
84
+ """Test connecting and starting a new kernel."""
85
+ tool = JupyterTool()
86
+ mock_response = MagicMock(body=b'{"id": "new-kernel-123"}')
87
+ mock_client = MagicMock(fetch=AsyncMock(return_value=mock_response))
88
+
89
+ with (
90
+ patch("tornado.httpclient.AsyncHTTPClient", return_value=mock_client),
91
+ patch("tornado.websocket.websocket_connect", new_callable=AsyncMock),
92
+ patch("tornado.ioloop.PeriodicCallback"),
93
+ ):
94
+ await tool._connect()
95
+ assert tool._kernel_id == "new-kernel-123"
96
+
97
+ @pytest.mark.asyncio
98
+ async def test_connect_existing_kernel(self):
99
+ """Test connecting to an existing kernel."""
100
+ tool = JupyterTool(kernel_id="existing-kernel-456")
101
+ with (
102
+ patch("tornado.httpclient.AsyncHTTPClient"),
103
+ patch("tornado.websocket.websocket_connect", new_callable=AsyncMock),
104
+ patch("tornado.ioloop.PeriodicCallback"),
105
+ ):
106
+ await tool._connect()
107
+ assert tool._kernel_id == "existing-kernel-456"
108
+
109
+ @pytest.mark.asyncio
110
+ async def test_execute_success(self):
111
+ """Test successful code execution via Jupyter protocol."""
112
+ tool = JupyterTool(kernel_id="test-kernel")
113
+ stream_msg = (
114
+ '{"msg_type": "stream", "parent_header": {"msg_id": "test-msg"}, '
115
+ '"content": {"text": "Output"}}'
116
+ )
117
+ reply_msg = (
118
+ '{"msg_type": "execute_reply", "parent_header": {"msg_id": "test-msg"}, "content": {}}'
119
+ )
120
+ tool._ws = MagicMock(read_message=AsyncMock(side_effect=[stream_msg, reply_msg]))
121
+
122
+ with patch("hud.tools.jupyter.uuid4") as mock_uuid:
123
+ mock_uuid.return_value.hex = "test-msg"
124
+ result = await tool._execute("print('Output')")
125
+ assert result == "Output"
126
+
127
+ @pytest.mark.asyncio
128
+ async def test_execute_with_error(self):
129
+ """Test code execution with error via Jupyter protocol."""
130
+ tool = JupyterTool(kernel_id="test-kernel")
131
+ error_msg = (
132
+ '{"msg_type": "error", "parent_header": {"msg_id": "test-msg"}, '
133
+ '"content": {"traceback": ["Traceback", "Error"]}}'
134
+ )
135
+ tool._ws = MagicMock(read_message=AsyncMock(side_effect=[error_msg]))
136
+
137
+ with patch("hud.tools.jupyter.uuid4") as mock_uuid:
138
+ mock_uuid.return_value.hex = "test-msg"
139
+ result = await tool._execute("1/0")
140
+ assert "Traceback" in result and "Error" in result
141
+
142
+ @pytest.mark.asyncio
143
+ async def test_execute_timeout(self):
144
+ """Test code execution timeout with kernel interrupt."""
145
+ import asyncio
146
+
147
+ tool = JupyterTool(kernel_id="test-kernel")
148
+
149
+ # Mock websocket to hang indefinitely
150
+ async def hang_forever():
151
+ await asyncio.sleep(9999)
152
+
153
+ tool._ws = MagicMock(read_message=hang_forever)
154
+ mock_client = MagicMock(fetch=AsyncMock())
155
+
156
+ with (
157
+ patch("hud.tools.jupyter.uuid4") as mock_uuid,
158
+ patch("tornado.httpclient.AsyncHTTPClient", return_value=mock_client),
159
+ ):
160
+ mock_uuid.return_value.hex = "test-msg"
161
+ result = await tool._execute("while True: pass", execution_timeout=1)
162
+ assert "[Execution timed out" in result
163
+
164
+ @pytest.mark.asyncio
165
+ async def test_shutdown(self):
166
+ """Test shutdown cleans up kernel state."""
167
+ tool = JupyterTool(kernel_id="shutdown-kernel")
168
+ tool._initialized = True
169
+ tool._ws = MagicMock()
170
+ tool._heartbeat_callback = MagicMock()
171
+
172
+ with patch("tornado.httpclient.AsyncHTTPClient"):
173
+ await tool.shutdown()
174
+ assert tool._kernel_id == ""
175
+ assert tool._ws is None
176
+ assert not tool._initialized
177
+
178
+ def test_get_kernel_id(self):
179
+ """Test getting kernel ID."""
180
+ tool = JupyterTool(kernel_id="test-kernel-789")
181
+ assert tool.get_kernel_id() == "test-kernel-789"