hud-python 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -420
  87. hud/tools/computer/hud.py +376 -334
  88. hud/tools/computer/openai.py +295 -292
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.5.dist-info/METADATA +0 -284
  190. hud_python-0.3.5.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,453 @@
1
+ """Tests for hud.cli.debug module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from unittest.mock import AsyncMock, MagicMock, Mock, patch
7
+
8
+ import pytest
9
+
10
+ from hud.cli.debug import debug_mcp_stdio
11
+ from hud.cli.utils import CaptureLogger
12
+
13
+
14
+ class TestDebugMCPStdio:
15
+ """Test the debug_mcp_stdio function."""
16
+
17
+ @pytest.mark.asyncio
18
+ async def test_phase_1_command_not_found(self) -> None:
19
+ """Test Phase 1 failure when command not found."""
20
+ logger = CaptureLogger(print_output=False)
21
+
22
+ with patch("subprocess.run", side_effect=FileNotFoundError()):
23
+ phases = await debug_mcp_stdio(["nonexistent"], logger, max_phase=5)
24
+ assert phases == 0
25
+ output = logger.get_output()
26
+ assert "Command not found: nonexistent" in output
27
+
28
+ @pytest.mark.asyncio
29
+ async def test_phase_1_command_fails(self) -> None:
30
+ """Test Phase 1 failure when command returns error."""
31
+ logger = CaptureLogger(print_output=False)
32
+
33
+ mock_result = Mock()
34
+ mock_result.returncode = 1
35
+ mock_result.stderr = "Command failed with error"
36
+
37
+ with patch("subprocess.run", return_value=mock_result):
38
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
39
+ assert phases == 0
40
+ output = logger.get_output()
41
+ assert "Command failed with exit code 1" in output
42
+ assert "Command failed with error" in output
43
+
44
+ @pytest.mark.asyncio
45
+ async def test_phase_1_success(self) -> None:
46
+ """Test Phase 1 success."""
47
+ logger = CaptureLogger(print_output=False)
48
+
49
+ mock_result = Mock()
50
+ mock_result.returncode = 0
51
+ mock_result.stderr = ""
52
+
53
+ with patch("subprocess.run", return_value=mock_result):
54
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=1)
55
+ assert phases == 1
56
+ output = logger.get_output()
57
+ assert "Command executable found" in output
58
+ assert "Stopping at phase 1 as requested" in output
59
+
60
+ @pytest.mark.asyncio
61
+ async def test_phase_1_usage_in_stderr(self) -> None:
62
+ """Test Phase 1 success when usage info in stderr."""
63
+ logger = CaptureLogger(print_output=False)
64
+
65
+ mock_result = Mock()
66
+ mock_result.returncode = 1
67
+ mock_result.stderr = "usage: test-cmd [options]"
68
+
69
+ with patch("subprocess.run", return_value=mock_result):
70
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=1)
71
+ assert phases == 1
72
+ output = logger.get_output()
73
+ assert "Command executable found" in output
74
+
75
+ @pytest.mark.asyncio
76
+ async def test_phase_2_mcp_initialize_success(self) -> None:
77
+ """Test Phase 2 MCP initialization success."""
78
+ logger = CaptureLogger(print_output=False)
79
+
80
+ # Mock Phase 1 success
81
+ mock_run_result = Mock()
82
+ mock_run_result.returncode = 0
83
+
84
+ # Mock subprocess.Popen for Phase 2
85
+ mock_proc = MagicMock()
86
+ mock_proc.stdin = MagicMock()
87
+ mock_proc.stdout = MagicMock()
88
+ mock_proc.stderr = MagicMock()
89
+
90
+ # Mock successful MCP response
91
+ init_response = {
92
+ "jsonrpc": "2.0",
93
+ "id": 1,
94
+ "result": {
95
+ "serverInfo": {"name": "TestServer", "version": "1.0"},
96
+ "capabilities": {"tools": {}, "resources": {}},
97
+ },
98
+ }
99
+
100
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
101
+ mock_proc.stderr.__iter__ = lambda x: iter([]) # No stderr output
102
+
103
+ with (
104
+ patch("subprocess.run", return_value=mock_run_result),
105
+ patch("subprocess.Popen", return_value=mock_proc),
106
+ ):
107
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=2)
108
+ assert phases == 2
109
+ output = logger.get_output()
110
+ assert "MCP server initialized successfully" in output
111
+ assert "Server: TestServer v1.0" in output
112
+
113
+ @pytest.mark.asyncio
114
+ async def test_phase_2_no_response(self) -> None:
115
+ """Test Phase 2 failure when no MCP response."""
116
+ logger = CaptureLogger(print_output=False)
117
+
118
+ # Mock Phase 1 success
119
+ mock_run_result = Mock()
120
+ mock_run_result.returncode = 0
121
+
122
+ # Mock subprocess.Popen for Phase 2
123
+ mock_proc = MagicMock()
124
+ mock_proc.stdin = MagicMock()
125
+ mock_proc.stdout = MagicMock()
126
+ mock_proc.stderr = MagicMock()
127
+
128
+ # No stdout response
129
+ mock_proc.stdout.readline.return_value = ""
130
+ mock_proc.stderr.__iter__ = lambda x: iter(["[ERROR] Server failed to start"])
131
+
132
+ with (
133
+ patch("subprocess.run", return_value=mock_run_result),
134
+ patch("subprocess.Popen", return_value=mock_proc),
135
+ patch("time.time", side_effect=[0, 0, 20]),
136
+ ):
137
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
138
+ assert phases == 1
139
+ output = logger.get_output()
140
+ assert "No valid MCP response received" in output
141
+
142
+ @pytest.mark.asyncio
143
+ async def test_phase_2_invalid_json_response(self) -> None:
144
+ """Test Phase 2 handling of invalid JSON response."""
145
+ logger = CaptureLogger(print_output=False)
146
+
147
+ # Mock Phase 1 success
148
+ mock_run_result = Mock()
149
+ mock_run_result.returncode = 0
150
+
151
+ # Mock subprocess.Popen
152
+ mock_proc = MagicMock()
153
+ mock_proc.stdin = MagicMock()
154
+ mock_proc.stdout = MagicMock()
155
+ mock_proc.stderr = MagicMock()
156
+
157
+ # Invalid JSON response
158
+ mock_proc.stdout.readline.return_value = "Invalid JSON\n"
159
+ mock_proc.stderr.__iter__ = lambda x: iter([])
160
+
161
+ with (
162
+ patch("subprocess.run", return_value=mock_run_result),
163
+ patch("subprocess.Popen", return_value=mock_proc),
164
+ ):
165
+ # Simulate timeout - time.time() is called multiple times in the loop
166
+ # Return increasing values to simulate time passing
167
+ time_values = list(range(20))
168
+ with patch("time.time", side_effect=time_values):
169
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
170
+ assert phases == 1
171
+ output = logger.get_output()
172
+ # The error message might vary, but should indicate no valid response
173
+ assert (
174
+ "Failed to parse MCP response" in output
175
+ or "No valid MCP response received" in output
176
+ )
177
+
178
+ @pytest.mark.asyncio
179
+ async def test_phase_3_tool_discovery(self) -> None:
180
+ """Test Phase 3 tool discovery."""
181
+ logger = CaptureLogger(print_output=False)
182
+
183
+ # Mock Phase 1 & 2 success
184
+ mock_run_result = Mock()
185
+ mock_run_result.returncode = 0
186
+
187
+ mock_proc = MagicMock()
188
+ mock_proc.stdin = MagicMock()
189
+ mock_proc.stdout = MagicMock()
190
+ mock_proc.stderr = MagicMock()
191
+
192
+ init_response = {
193
+ "jsonrpc": "2.0",
194
+ "id": 1,
195
+ "result": {"serverInfo": {"name": "TestServer", "version": "1.0"}},
196
+ }
197
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
198
+ mock_proc.stderr.__iter__ = lambda x: iter([])
199
+
200
+ # Mock tool discovery - create proper mock tools
201
+ mock_tools = []
202
+ for tool_name in ["setup", "evaluate", "computer", "custom_tool"]:
203
+ tool = Mock()
204
+ tool.name = tool_name
205
+ mock_tools.append(tool)
206
+
207
+ with (
208
+ patch("subprocess.run", return_value=mock_run_result),
209
+ patch("subprocess.Popen", return_value=mock_proc),
210
+ patch("hud.cli.debug.MCPClient") as MockClient,
211
+ ):
212
+ mock_client = MockClient.return_value
213
+ mock_client.initialize = AsyncMock()
214
+ mock_client.list_tools = AsyncMock(return_value=mock_tools)
215
+ mock_client.list_resources = AsyncMock(return_value=[])
216
+ mock_client.shutdown = AsyncMock()
217
+
218
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=3)
219
+ assert phases == 3
220
+ output = logger.get_output()
221
+ assert "Found 4 tools" in output
222
+ assert "Lifecycle tools: setup=✅, evaluate=✅" in output
223
+ assert "Interaction tools: computer" in output
224
+ assert "All tools: setup, evaluate, computer, custom_tool" in output
225
+
226
+ @pytest.mark.asyncio
227
+ async def test_phase_3_no_tools(self) -> None:
228
+ """Test Phase 3 when no tools found."""
229
+ logger = CaptureLogger(print_output=False)
230
+
231
+ # Mock Phase 1 & 2 success
232
+ mock_run_result = Mock()
233
+ mock_run_result.returncode = 0
234
+
235
+ mock_proc = MagicMock()
236
+ init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
237
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
238
+ mock_proc.stderr.__iter__ = lambda x: iter([])
239
+
240
+ with (
241
+ patch("subprocess.run", return_value=mock_run_result),
242
+ patch("subprocess.Popen", return_value=mock_proc),
243
+ patch("hud.cli.debug.MCPClient") as MockClient,
244
+ ):
245
+ mock_client = MockClient.return_value
246
+ mock_client.initialize = AsyncMock()
247
+ mock_client.list_tools = AsyncMock(return_value=[])
248
+ mock_client.shutdown = AsyncMock()
249
+
250
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
251
+ assert phases == 2
252
+ output = logger.get_output()
253
+ assert "No tools found" in output
254
+ assert "@mcp.tool() decorator" in output
255
+
256
+ @pytest.mark.asyncio
257
+ async def test_phase_4_remote_deployment(self) -> None:
258
+ """Test Phase 4 remote deployment readiness."""
259
+ logger = CaptureLogger(print_output=False)
260
+
261
+ # Setup mocks for phases 1-3
262
+ mock_run_result = Mock()
263
+ mock_run_result.returncode = 0
264
+
265
+ mock_proc = MagicMock()
266
+ init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
267
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
268
+ mock_proc.stderr.__iter__ = lambda x: iter([])
269
+
270
+ # Create proper mock tools
271
+ mock_tools = []
272
+ for tool_name in ["setup", "evaluate"]:
273
+ tool = Mock()
274
+ tool.name = tool_name
275
+ mock_tools.append(tool)
276
+
277
+ with (
278
+ patch("subprocess.run", return_value=mock_run_result),
279
+ patch("subprocess.Popen", return_value=mock_proc),
280
+ patch("hud.cli.debug.MCPClient") as MockClient,
281
+ ):
282
+ mock_client = MockClient.return_value
283
+ mock_client.initialize = AsyncMock()
284
+ mock_client.list_tools = AsyncMock(return_value=mock_tools)
285
+ mock_client.list_resources = AsyncMock(return_value=[])
286
+ mock_client.call_tool = AsyncMock()
287
+ mock_client.shutdown = AsyncMock()
288
+
289
+ with patch("time.time", side_effect=[0, 5, 5, 5, 5]): # Start at 0, then 5 for the rest
290
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=4)
291
+ assert phases == 4
292
+ output = logger.get_output()
293
+ assert "Total initialization time: 5.00s" in output
294
+ # Should have tested setup and evaluate tools
295
+ assert mock_client.call_tool.call_count == 2
296
+
297
+ @pytest.mark.asyncio
298
+ async def test_phase_4_slow_initialization(self) -> None:
299
+ """Test Phase 4 with slow initialization warning."""
300
+ logger = CaptureLogger(print_output=False)
301
+
302
+ # Setup basic mocks
303
+ mock_run_result = Mock()
304
+ mock_run_result.returncode = 0
305
+
306
+ mock_proc = MagicMock()
307
+ init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
308
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
309
+ mock_proc.stderr.__iter__ = lambda x: iter([])
310
+
311
+ with (
312
+ patch("subprocess.run", return_value=mock_run_result),
313
+ patch("subprocess.Popen", return_value=mock_proc),
314
+ patch("hud.cli.debug.MCPClient") as MockClient,
315
+ ):
316
+ mock_client = MockClient.return_value
317
+ mock_client.initialize = AsyncMock()
318
+ # Create proper mock tool
319
+ test_tool = Mock()
320
+ test_tool.name = "test"
321
+ mock_client.list_tools = AsyncMock(return_value=[test_tool])
322
+ mock_client.list_resources = AsyncMock(return_value=[])
323
+ mock_client.shutdown = AsyncMock()
324
+
325
+ # Simulate slow init (>30s)
326
+ # time.time() is called at start and after phase 3
327
+ with patch("time.time", side_effect=[0, 0, 0, 35, 35, 35]):
328
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
329
+ output = logger.get_output()
330
+ # Check if we got to phase 4 where the timing check happens
331
+ if phases >= 4:
332
+ assert "Initialization took >30s" in output
333
+ assert "Consider optimizing startup time" in output
334
+
335
+ @pytest.mark.asyncio
336
+ async def test_phase_5_concurrent_clients(self) -> None:
337
+ """Test Phase 5 concurrent clients."""
338
+ logger = CaptureLogger(print_output=False)
339
+
340
+ # Setup mocks for all phases
341
+ mock_run_result = Mock()
342
+ mock_run_result.returncode = 0
343
+
344
+ mock_proc = MagicMock()
345
+ init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
346
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
347
+ mock_proc.stderr.__iter__ = lambda x: iter([])
348
+
349
+ with (
350
+ patch("subprocess.run", return_value=mock_run_result),
351
+ patch("subprocess.Popen", return_value=mock_proc),
352
+ patch("hud.cli.debug.MCPClient") as MockClient,
353
+ ):
354
+ # Create different mock instances for each client
355
+ mock_clients = []
356
+ for i in range(4): # 1 main + 3 concurrent
357
+ mock_client = MagicMock()
358
+ mock_client.initialize = AsyncMock()
359
+ # Create proper mock tool
360
+ test_tool = Mock()
361
+ test_tool.name = "test"
362
+ mock_client.list_tools = AsyncMock(return_value=[test_tool])
363
+ mock_client.list_resources = AsyncMock(return_value=[])
364
+ mock_client.shutdown = AsyncMock()
365
+ mock_clients.append(mock_client)
366
+
367
+ MockClient.side_effect = mock_clients
368
+
369
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
370
+ assert phases == 5
371
+ output = logger.get_output()
372
+ assert "Creating 3 concurrent MCP clients" in output
373
+ assert "All concurrent clients connected" in output
374
+
375
+ # Verify all clients were shut down
376
+ for client in mock_clients:
377
+ client.shutdown.assert_called()
378
+
379
+ @pytest.mark.asyncio
380
+ async def test_phase_5_concurrent_failure(self) -> None:
381
+ """Test Phase 5 handling concurrent client failures."""
382
+ logger = CaptureLogger(print_output=False)
383
+
384
+ # Setup basic mocks
385
+ mock_run_result = Mock()
386
+ mock_run_result.returncode = 0
387
+
388
+ mock_proc = MagicMock()
389
+ init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
390
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
391
+ mock_proc.stderr.__iter__ = lambda x: iter([])
392
+
393
+ with (
394
+ patch("subprocess.run", return_value=mock_run_result),
395
+ patch("subprocess.Popen", return_value=mock_proc),
396
+ patch("hud.cli.debug.MCPClient") as MockClient,
397
+ ):
398
+ # Set up for phase 1-4 success first
399
+ test_tool = Mock()
400
+ test_tool.name = "test"
401
+
402
+ # Phase 1-4 client
403
+ phase_client = MagicMock()
404
+ phase_client.initialize = AsyncMock()
405
+ phase_client.list_tools = AsyncMock(return_value=[test_tool])
406
+ phase_client.list_resources = AsyncMock(return_value=[])
407
+ phase_client.shutdown = AsyncMock()
408
+
409
+ # Phase 5 clients - first succeeds, second fails
410
+ mock_client1 = MagicMock()
411
+ mock_client1.initialize = AsyncMock()
412
+ mock_client1.list_tools = AsyncMock(return_value=[test_tool])
413
+ mock_client1.list_resources = AsyncMock(return_value=[])
414
+ mock_client1.shutdown = AsyncMock()
415
+
416
+ mock_client2 = MagicMock()
417
+ mock_client2.initialize = AsyncMock(side_effect=Exception("Connection failed"))
418
+ mock_client2.shutdown = AsyncMock()
419
+
420
+ MockClient.side_effect = [phase_client, mock_client1, mock_client2]
421
+
422
+ await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
423
+ output = logger.get_output()
424
+ assert "Concurrent test failed: Connection failed" in output
425
+
426
+ @pytest.mark.asyncio
427
+ async def test_docker_command_handling(self) -> None:
428
+ """Test special handling of Docker commands."""
429
+ logger = CaptureLogger(print_output=False)
430
+
431
+ mock_result = Mock()
432
+ mock_result.returncode = 0
433
+
434
+ with patch("subprocess.run", return_value=mock_result) as mock_run:
435
+ await debug_mcp_stdio(["docker", "run", "--rm", "image:latest"], logger, max_phase=1)
436
+ # Should add echo command for Docker
437
+ call_args = mock_run.call_args[0][0]
438
+ assert call_args == ["docker"]
439
+
440
+ @pytest.mark.asyncio
441
+ async def test_phase_exception_handling(self) -> None:
442
+ """Test general exception handling in phases."""
443
+ logger = CaptureLogger(print_output=False)
444
+
445
+ with patch("subprocess.run", side_effect=Exception("Unexpected error")):
446
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
447
+ assert phases == 0
448
+ output = logger.get_output()
449
+ assert "Startup test failed: Unexpected error" in output
450
+
451
+
452
+ if __name__ == "__main__":
453
+ pytest.main([__file__])
@@ -0,0 +1,139 @@
1
+ """Tests for hud.cli.mcp_server module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+ from unittest.mock import MagicMock, patch
7
+
8
+ import pytest
9
+
10
+ from hud.cli.mcp_server import (
11
+ create_proxy_server,
12
+ get_docker_cmd,
13
+ get_image_name,
14
+ inject_supervisor,
15
+ run_mcp_dev_server,
16
+ update_pyproject_toml,
17
+ )
18
+
19
+ if TYPE_CHECKING:
20
+ from pathlib import Path
21
+
22
+
23
+ class TestCreateMCPServer:
24
+ """Test MCP server creation."""
25
+
26
+ def test_create_mcp_server(self) -> None:
27
+ """Test that MCP server is created with correct configuration."""
28
+ mcp = create_proxy_server(".", "test-image:latest")
29
+ assert mcp._mcp_server.name == "HUD Dev Proxy - test-image:latest"
30
+ # Proxy server doesn't define its own tools, it forwards to Docker containers
31
+
32
+
33
+ class TestDockerUtils:
34
+ """Test Docker utility functions."""
35
+
36
+ def test_get_docker_cmd(self) -> None:
37
+ """Test extracting CMD from Docker image."""
38
+ with patch("subprocess.run") as mock_run:
39
+ mock_result = MagicMock()
40
+ mock_result.returncode = 0
41
+ mock_result.stdout = '["python", "-m", "server"]'
42
+ mock_run.return_value = mock_result
43
+
44
+ cmd = get_docker_cmd("test-image:latest")
45
+ assert cmd is None
46
+
47
+ def test_get_docker_cmd_failure(self) -> None:
48
+ """Test handling when Docker inspect fails."""
49
+ import subprocess
50
+
51
+ with patch("subprocess.run") as mock_run:
52
+ # check=True causes CalledProcessError on non-zero return
53
+ mock_run.side_effect = subprocess.CalledProcessError(1, "docker inspect")
54
+
55
+ cmd = get_docker_cmd("test-image:latest")
56
+ assert cmd is None
57
+
58
+ def test_inject_supervisor(self) -> None:
59
+ """Test supervisor injection into Docker CMD."""
60
+ original_cmd = ["python", "-m", "server"]
61
+ modified = inject_supervisor(original_cmd)
62
+
63
+ assert modified[0] == "sh"
64
+ assert modified[1] == "-c"
65
+ assert "watchfiles" in modified[2]
66
+ assert "python -m server" in modified[2]
67
+
68
+
69
+ class TestImageResolution:
70
+ """Test image name resolution."""
71
+
72
+ def test_get_image_name_override(self) -> None:
73
+ """Test image name with override."""
74
+ name, source = get_image_name(".", "custom-image:v1")
75
+ assert name == "custom-image:v1"
76
+ assert source == "override"
77
+
78
+ def test_get_image_name_from_pyproject(self, tmp_path: Path) -> None:
79
+ """Test image name from pyproject.toml."""
80
+ pyproject = tmp_path / "pyproject.toml"
81
+ pyproject.write_text("""
82
+ [tool.hud]
83
+ image = "my-project:latest"
84
+ """)
85
+
86
+ name, source = get_image_name(str(tmp_path))
87
+ assert name == "my-project:latest"
88
+ assert source == "cache"
89
+
90
+ def test_get_image_name_auto_generate(self, tmp_path: Path) -> None:
91
+ """Test auto-generated image name."""
92
+ test_dir = tmp_path / "my_test_project"
93
+ test_dir.mkdir()
94
+
95
+ name, source = get_image_name(str(test_dir))
96
+ assert name == "hud-my-test-project:dev"
97
+ assert source == "auto"
98
+
99
+ def test_update_pyproject_toml(self, tmp_path: Path) -> None:
100
+ """Test updating pyproject.toml with image name."""
101
+ pyproject = tmp_path / "pyproject.toml"
102
+ pyproject.write_text("""
103
+ [project]
104
+ name = "test"
105
+ """)
106
+
107
+ update_pyproject_toml(str(tmp_path), "new-image:v1", silent=True)
108
+
109
+ content = pyproject.read_text()
110
+ assert "[tool.hud]" in content
111
+ assert 'image = "new-image:v1"' in content
112
+
113
+
114
+ class TestRunMCPDevServer:
115
+ """Test the main server runner."""
116
+
117
+ def test_run_dev_server_image_not_found(self) -> None:
118
+ """Test handling when Docker image doesn't exist."""
119
+ import click
120
+
121
+ with (
122
+ patch("hud.cli.mcp_server.image_exists", return_value=False),
123
+ patch("click.confirm", return_value=False),
124
+ pytest.raises(click.Abort),
125
+ ):
126
+ run_mcp_dev_server(
127
+ directory=".",
128
+ image="missing:latest",
129
+ build=False,
130
+ no_cache=False,
131
+ transport="http",
132
+ port=8765,
133
+ no_reload=False,
134
+ verbose=False,
135
+ inspector=False,
136
+ no_logs=False,
137
+ docker_args=[],
138
+ interactive=False,
139
+ )