hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (130) hide show
  1. hud/__init__.py +22 -22
  2. hud/agents/__init__.py +13 -15
  3. hud/agents/base.py +599 -599
  4. hud/agents/claude.py +373 -373
  5. hud/agents/langchain.py +261 -250
  6. hud/agents/misc/__init__.py +7 -7
  7. hud/agents/misc/response_agent.py +82 -80
  8. hud/agents/openai.py +352 -352
  9. hud/agents/openai_chat_generic.py +154 -154
  10. hud/agents/tests/__init__.py +1 -1
  11. hud/agents/tests/test_base.py +742 -742
  12. hud/agents/tests/test_claude.py +324 -324
  13. hud/agents/tests/test_client.py +363 -363
  14. hud/agents/tests/test_openai.py +237 -237
  15. hud/cli/__init__.py +617 -617
  16. hud/cli/__main__.py +8 -8
  17. hud/cli/analyze.py +371 -371
  18. hud/cli/analyze_metadata.py +230 -230
  19. hud/cli/build.py +498 -427
  20. hud/cli/clone.py +185 -185
  21. hud/cli/cursor.py +92 -92
  22. hud/cli/debug.py +392 -392
  23. hud/cli/docker_utils.py +83 -83
  24. hud/cli/init.py +280 -281
  25. hud/cli/interactive.py +353 -353
  26. hud/cli/mcp_server.py +764 -756
  27. hud/cli/pull.py +330 -336
  28. hud/cli/push.py +404 -370
  29. hud/cli/remote_runner.py +311 -311
  30. hud/cli/runner.py +160 -160
  31. hud/cli/tests/__init__.py +3 -3
  32. hud/cli/tests/test_analyze.py +284 -284
  33. hud/cli/tests/test_cli_init.py +265 -265
  34. hud/cli/tests/test_cli_main.py +27 -27
  35. hud/cli/tests/test_clone.py +142 -142
  36. hud/cli/tests/test_cursor.py +253 -253
  37. hud/cli/tests/test_debug.py +453 -453
  38. hud/cli/tests/test_mcp_server.py +139 -139
  39. hud/cli/tests/test_utils.py +388 -388
  40. hud/cli/utils.py +263 -263
  41. hud/clients/README.md +143 -143
  42. hud/clients/__init__.py +16 -16
  43. hud/clients/base.py +378 -379
  44. hud/clients/fastmcp.py +222 -222
  45. hud/clients/mcp_use.py +298 -278
  46. hud/clients/tests/__init__.py +1 -1
  47. hud/clients/tests/test_client_integration.py +111 -111
  48. hud/clients/tests/test_fastmcp.py +342 -342
  49. hud/clients/tests/test_protocol.py +188 -188
  50. hud/clients/utils/__init__.py +1 -1
  51. hud/clients/utils/retry_transport.py +160 -160
  52. hud/datasets.py +327 -322
  53. hud/misc/__init__.py +1 -1
  54. hud/misc/claude_plays_pokemon.py +292 -292
  55. hud/otel/__init__.py +35 -35
  56. hud/otel/collector.py +142 -142
  57. hud/otel/config.py +164 -164
  58. hud/otel/context.py +536 -536
  59. hud/otel/exporters.py +366 -366
  60. hud/otel/instrumentation.py +97 -97
  61. hud/otel/processors.py +118 -118
  62. hud/otel/tests/__init__.py +1 -1
  63. hud/otel/tests/test_processors.py +197 -197
  64. hud/server/__init__.py +5 -5
  65. hud/server/context.py +114 -114
  66. hud/server/helper/__init__.py +5 -5
  67. hud/server/low_level.py +132 -132
  68. hud/server/server.py +170 -166
  69. hud/server/tests/__init__.py +3 -3
  70. hud/settings.py +73 -73
  71. hud/shared/__init__.py +5 -5
  72. hud/shared/exceptions.py +180 -180
  73. hud/shared/requests.py +264 -264
  74. hud/shared/tests/test_exceptions.py +157 -157
  75. hud/shared/tests/test_requests.py +275 -275
  76. hud/telemetry/__init__.py +25 -25
  77. hud/telemetry/instrument.py +379 -379
  78. hud/telemetry/job.py +309 -309
  79. hud/telemetry/replay.py +74 -74
  80. hud/telemetry/trace.py +83 -83
  81. hud/tools/__init__.py +33 -33
  82. hud/tools/base.py +365 -365
  83. hud/tools/bash.py +161 -161
  84. hud/tools/computer/__init__.py +15 -15
  85. hud/tools/computer/anthropic.py +437 -437
  86. hud/tools/computer/hud.py +376 -376
  87. hud/tools/computer/openai.py +295 -295
  88. hud/tools/computer/settings.py +82 -82
  89. hud/tools/edit.py +314 -314
  90. hud/tools/executors/__init__.py +30 -30
  91. hud/tools/executors/base.py +539 -539
  92. hud/tools/executors/pyautogui.py +621 -621
  93. hud/tools/executors/tests/__init__.py +1 -1
  94. hud/tools/executors/tests/test_base_executor.py +338 -338
  95. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  96. hud/tools/executors/xdo.py +511 -511
  97. hud/tools/playwright.py +412 -412
  98. hud/tools/tests/__init__.py +3 -3
  99. hud/tools/tests/test_base.py +282 -282
  100. hud/tools/tests/test_bash.py +158 -158
  101. hud/tools/tests/test_bash_extended.py +197 -197
  102. hud/tools/tests/test_computer.py +425 -425
  103. hud/tools/tests/test_computer_actions.py +34 -34
  104. hud/tools/tests/test_edit.py +259 -259
  105. hud/tools/tests/test_init.py +27 -27
  106. hud/tools/tests/test_playwright_tool.py +183 -183
  107. hud/tools/tests/test_tools.py +145 -145
  108. hud/tools/tests/test_utils.py +156 -156
  109. hud/tools/types.py +72 -72
  110. hud/tools/utils.py +50 -50
  111. hud/types.py +136 -136
  112. hud/utils/__init__.py +10 -10
  113. hud/utils/async_utils.py +65 -65
  114. hud/utils/design.py +236 -168
  115. hud/utils/mcp.py +55 -55
  116. hud/utils/progress.py +149 -149
  117. hud/utils/telemetry.py +66 -66
  118. hud/utils/tests/test_async_utils.py +173 -173
  119. hud/utils/tests/test_init.py +17 -17
  120. hud/utils/tests/test_progress.py +261 -261
  121. hud/utils/tests/test_telemetry.py +82 -82
  122. hud/utils/tests/test_version.py +8 -8
  123. hud/version.py +7 -7
  124. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
  125. hud_python-0.4.3.dist-info/RECORD +131 -0
  126. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
  127. hud/agents/art.py +0 -101
  128. hud_python-0.4.1.dist-info/RECORD +0 -132
  129. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
  130. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
@@ -1,453 +1,453 @@
1
- """Tests for hud.cli.debug module."""
2
-
3
- from __future__ import annotations
4
-
5
- import json
6
- from unittest.mock import AsyncMock, MagicMock, Mock, patch
7
-
8
- import pytest
9
-
10
- from hud.cli.debug import debug_mcp_stdio
11
- from hud.cli.utils import CaptureLogger
12
-
13
-
14
- class TestDebugMCPStdio:
15
- """Test the debug_mcp_stdio function."""
16
-
17
- @pytest.mark.asyncio
18
- async def test_phase_1_command_not_found(self) -> None:
19
- """Test Phase 1 failure when command not found."""
20
- logger = CaptureLogger(print_output=False)
21
-
22
- with patch("subprocess.run", side_effect=FileNotFoundError()):
23
- phases = await debug_mcp_stdio(["nonexistent"], logger, max_phase=5)
24
- assert phases == 0
25
- output = logger.get_output()
26
- assert "Command not found: nonexistent" in output
27
-
28
- @pytest.mark.asyncio
29
- async def test_phase_1_command_fails(self) -> None:
30
- """Test Phase 1 failure when command returns error."""
31
- logger = CaptureLogger(print_output=False)
32
-
33
- mock_result = Mock()
34
- mock_result.returncode = 1
35
- mock_result.stderr = "Command failed with error"
36
-
37
- with patch("subprocess.run", return_value=mock_result):
38
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
39
- assert phases == 0
40
- output = logger.get_output()
41
- assert "Command failed with exit code 1" in output
42
- assert "Command failed with error" in output
43
-
44
- @pytest.mark.asyncio
45
- async def test_phase_1_success(self) -> None:
46
- """Test Phase 1 success."""
47
- logger = CaptureLogger(print_output=False)
48
-
49
- mock_result = Mock()
50
- mock_result.returncode = 0
51
- mock_result.stderr = ""
52
-
53
- with patch("subprocess.run", return_value=mock_result):
54
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=1)
55
- assert phases == 1
56
- output = logger.get_output()
57
- assert "Command executable found" in output
58
- assert "Stopping at phase 1 as requested" in output
59
-
60
- @pytest.mark.asyncio
61
- async def test_phase_1_usage_in_stderr(self) -> None:
62
- """Test Phase 1 success when usage info in stderr."""
63
- logger = CaptureLogger(print_output=False)
64
-
65
- mock_result = Mock()
66
- mock_result.returncode = 1
67
- mock_result.stderr = "usage: test-cmd [options]"
68
-
69
- with patch("subprocess.run", return_value=mock_result):
70
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=1)
71
- assert phases == 1
72
- output = logger.get_output()
73
- assert "Command executable found" in output
74
-
75
- @pytest.mark.asyncio
76
- async def test_phase_2_mcp_initialize_success(self) -> None:
77
- """Test Phase 2 MCP initialization success."""
78
- logger = CaptureLogger(print_output=False)
79
-
80
- # Mock Phase 1 success
81
- mock_run_result = Mock()
82
- mock_run_result.returncode = 0
83
-
84
- # Mock subprocess.Popen for Phase 2
85
- mock_proc = MagicMock()
86
- mock_proc.stdin = MagicMock()
87
- mock_proc.stdout = MagicMock()
88
- mock_proc.stderr = MagicMock()
89
-
90
- # Mock successful MCP response
91
- init_response = {
92
- "jsonrpc": "2.0",
93
- "id": 1,
94
- "result": {
95
- "serverInfo": {"name": "TestServer", "version": "1.0"},
96
- "capabilities": {"tools": {}, "resources": {}},
97
- },
98
- }
99
-
100
- mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
101
- mock_proc.stderr.__iter__ = lambda x: iter([]) # No stderr output
102
-
103
- with (
104
- patch("subprocess.run", return_value=mock_run_result),
105
- patch("subprocess.Popen", return_value=mock_proc),
106
- ):
107
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=2)
108
- assert phases == 2
109
- output = logger.get_output()
110
- assert "MCP server initialized successfully" in output
111
- assert "Server: TestServer v1.0" in output
112
-
113
- @pytest.mark.asyncio
114
- async def test_phase_2_no_response(self) -> None:
115
- """Test Phase 2 failure when no MCP response."""
116
- logger = CaptureLogger(print_output=False)
117
-
118
- # Mock Phase 1 success
119
- mock_run_result = Mock()
120
- mock_run_result.returncode = 0
121
-
122
- # Mock subprocess.Popen for Phase 2
123
- mock_proc = MagicMock()
124
- mock_proc.stdin = MagicMock()
125
- mock_proc.stdout = MagicMock()
126
- mock_proc.stderr = MagicMock()
127
-
128
- # No stdout response
129
- mock_proc.stdout.readline.return_value = ""
130
- mock_proc.stderr.__iter__ = lambda x: iter(["[ERROR] Server failed to start"])
131
-
132
- with (
133
- patch("subprocess.run", return_value=mock_run_result),
134
- patch("subprocess.Popen", return_value=mock_proc),
135
- patch("time.time", side_effect=[0, 0, 20]),
136
- ):
137
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
138
- assert phases == 1
139
- output = logger.get_output()
140
- assert "No valid MCP response received" in output
141
-
142
- @pytest.mark.asyncio
143
- async def test_phase_2_invalid_json_response(self) -> None:
144
- """Test Phase 2 handling of invalid JSON response."""
145
- logger = CaptureLogger(print_output=False)
146
-
147
- # Mock Phase 1 success
148
- mock_run_result = Mock()
149
- mock_run_result.returncode = 0
150
-
151
- # Mock subprocess.Popen
152
- mock_proc = MagicMock()
153
- mock_proc.stdin = MagicMock()
154
- mock_proc.stdout = MagicMock()
155
- mock_proc.stderr = MagicMock()
156
-
157
- # Invalid JSON response
158
- mock_proc.stdout.readline.return_value = "Invalid JSON\n"
159
- mock_proc.stderr.__iter__ = lambda x: iter([])
160
-
161
- with (
162
- patch("subprocess.run", return_value=mock_run_result),
163
- patch("subprocess.Popen", return_value=mock_proc),
164
- ):
165
- # Simulate timeout - time.time() is called multiple times in the loop
166
- # Return increasing values to simulate time passing
167
- time_values = list(range(20))
168
- with patch("time.time", side_effect=time_values):
169
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
170
- assert phases == 1
171
- output = logger.get_output()
172
- # The error message might vary, but should indicate no valid response
173
- assert (
174
- "Failed to parse MCP response" in output
175
- or "No valid MCP response received" in output
176
- )
177
-
178
- @pytest.mark.asyncio
179
- async def test_phase_3_tool_discovery(self) -> None:
180
- """Test Phase 3 tool discovery."""
181
- logger = CaptureLogger(print_output=False)
182
-
183
- # Mock Phase 1 & 2 success
184
- mock_run_result = Mock()
185
- mock_run_result.returncode = 0
186
-
187
- mock_proc = MagicMock()
188
- mock_proc.stdin = MagicMock()
189
- mock_proc.stdout = MagicMock()
190
- mock_proc.stderr = MagicMock()
191
-
192
- init_response = {
193
- "jsonrpc": "2.0",
194
- "id": 1,
195
- "result": {"serverInfo": {"name": "TestServer", "version": "1.0"}},
196
- }
197
- mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
198
- mock_proc.stderr.__iter__ = lambda x: iter([])
199
-
200
- # Mock tool discovery - create proper mock tools
201
- mock_tools = []
202
- for tool_name in ["setup", "evaluate", "computer", "custom_tool"]:
203
- tool = Mock()
204
- tool.name = tool_name
205
- mock_tools.append(tool)
206
-
207
- with (
208
- patch("subprocess.run", return_value=mock_run_result),
209
- patch("subprocess.Popen", return_value=mock_proc),
210
- patch("hud.cli.debug.MCPClient") as MockClient,
211
- ):
212
- mock_client = MockClient.return_value
213
- mock_client.initialize = AsyncMock()
214
- mock_client.list_tools = AsyncMock(return_value=mock_tools)
215
- mock_client.list_resources = AsyncMock(return_value=[])
216
- mock_client.shutdown = AsyncMock()
217
-
218
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=3)
219
- assert phases == 3
220
- output = logger.get_output()
221
- assert "Found 4 tools" in output
222
- assert "Lifecycle tools: setup=✅, evaluate=✅" in output
223
- assert "Interaction tools: computer" in output
224
- assert "All tools: setup, evaluate, computer, custom_tool" in output
225
-
226
- @pytest.mark.asyncio
227
- async def test_phase_3_no_tools(self) -> None:
228
- """Test Phase 3 when no tools found."""
229
- logger = CaptureLogger(print_output=False)
230
-
231
- # Mock Phase 1 & 2 success
232
- mock_run_result = Mock()
233
- mock_run_result.returncode = 0
234
-
235
- mock_proc = MagicMock()
236
- init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
237
- mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
238
- mock_proc.stderr.__iter__ = lambda x: iter([])
239
-
240
- with (
241
- patch("subprocess.run", return_value=mock_run_result),
242
- patch("subprocess.Popen", return_value=mock_proc),
243
- patch("hud.cli.debug.MCPClient") as MockClient,
244
- ):
245
- mock_client = MockClient.return_value
246
- mock_client.initialize = AsyncMock()
247
- mock_client.list_tools = AsyncMock(return_value=[])
248
- mock_client.shutdown = AsyncMock()
249
-
250
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
251
- assert phases == 2
252
- output = logger.get_output()
253
- assert "No tools found" in output
254
- assert "@mcp.tool() decorator" in output
255
-
256
- @pytest.mark.asyncio
257
- async def test_phase_4_remote_deployment(self) -> None:
258
- """Test Phase 4 remote deployment readiness."""
259
- logger = CaptureLogger(print_output=False)
260
-
261
- # Setup mocks for phases 1-3
262
- mock_run_result = Mock()
263
- mock_run_result.returncode = 0
264
-
265
- mock_proc = MagicMock()
266
- init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
267
- mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
268
- mock_proc.stderr.__iter__ = lambda x: iter([])
269
-
270
- # Create proper mock tools
271
- mock_tools = []
272
- for tool_name in ["setup", "evaluate"]:
273
- tool = Mock()
274
- tool.name = tool_name
275
- mock_tools.append(tool)
276
-
277
- with (
278
- patch("subprocess.run", return_value=mock_run_result),
279
- patch("subprocess.Popen", return_value=mock_proc),
280
- patch("hud.cli.debug.MCPClient") as MockClient,
281
- ):
282
- mock_client = MockClient.return_value
283
- mock_client.initialize = AsyncMock()
284
- mock_client.list_tools = AsyncMock(return_value=mock_tools)
285
- mock_client.list_resources = AsyncMock(return_value=[])
286
- mock_client.call_tool = AsyncMock()
287
- mock_client.shutdown = AsyncMock()
288
-
289
- with patch("time.time", side_effect=[0, 5, 5, 5, 5]): # Start at 0, then 5 for the rest
290
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=4)
291
- assert phases == 4
292
- output = logger.get_output()
293
- assert "Total initialization time: 5.00s" in output
294
- # Should have tested setup and evaluate tools
295
- assert mock_client.call_tool.call_count == 2
296
-
297
- @pytest.mark.asyncio
298
- async def test_phase_4_slow_initialization(self) -> None:
299
- """Test Phase 4 with slow initialization warning."""
300
- logger = CaptureLogger(print_output=False)
301
-
302
- # Setup basic mocks
303
- mock_run_result = Mock()
304
- mock_run_result.returncode = 0
305
-
306
- mock_proc = MagicMock()
307
- init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
308
- mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
309
- mock_proc.stderr.__iter__ = lambda x: iter([])
310
-
311
- with (
312
- patch("subprocess.run", return_value=mock_run_result),
313
- patch("subprocess.Popen", return_value=mock_proc),
314
- patch("hud.cli.debug.MCPClient") as MockClient,
315
- ):
316
- mock_client = MockClient.return_value
317
- mock_client.initialize = AsyncMock()
318
- # Create proper mock tool
319
- test_tool = Mock()
320
- test_tool.name = "test"
321
- mock_client.list_tools = AsyncMock(return_value=[test_tool])
322
- mock_client.list_resources = AsyncMock(return_value=[])
323
- mock_client.shutdown = AsyncMock()
324
-
325
- # Simulate slow init (>30s)
326
- # time.time() is called at start and after phase 3
327
- with patch("time.time", side_effect=[0, 0, 0, 35, 35, 35]):
328
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
329
- output = logger.get_output()
330
- # Check if we got to phase 4 where the timing check happens
331
- if phases >= 4:
332
- assert "Initialization took >30s" in output
333
- assert "Consider optimizing startup time" in output
334
-
335
- @pytest.mark.asyncio
336
- async def test_phase_5_concurrent_clients(self) -> None:
337
- """Test Phase 5 concurrent clients."""
338
- logger = CaptureLogger(print_output=False)
339
-
340
- # Setup mocks for all phases
341
- mock_run_result = Mock()
342
- mock_run_result.returncode = 0
343
-
344
- mock_proc = MagicMock()
345
- init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
346
- mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
347
- mock_proc.stderr.__iter__ = lambda x: iter([])
348
-
349
- with (
350
- patch("subprocess.run", return_value=mock_run_result),
351
- patch("subprocess.Popen", return_value=mock_proc),
352
- patch("hud.cli.debug.MCPClient") as MockClient,
353
- ):
354
- # Create different mock instances for each client
355
- mock_clients = []
356
- for i in range(4): # 1 main + 3 concurrent
357
- mock_client = MagicMock()
358
- mock_client.initialize = AsyncMock()
359
- # Create proper mock tool
360
- test_tool = Mock()
361
- test_tool.name = "test"
362
- mock_client.list_tools = AsyncMock(return_value=[test_tool])
363
- mock_client.list_resources = AsyncMock(return_value=[])
364
- mock_client.shutdown = AsyncMock()
365
- mock_clients.append(mock_client)
366
-
367
- MockClient.side_effect = mock_clients
368
-
369
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
370
- assert phases == 5
371
- output = logger.get_output()
372
- assert "Creating 3 concurrent MCP clients" in output
373
- assert "All concurrent clients connected" in output
374
-
375
- # Verify all clients were shut down
376
- for client in mock_clients:
377
- client.shutdown.assert_called()
378
-
379
- @pytest.mark.asyncio
380
- async def test_phase_5_concurrent_failure(self) -> None:
381
- """Test Phase 5 handling concurrent client failures."""
382
- logger = CaptureLogger(print_output=False)
383
-
384
- # Setup basic mocks
385
- mock_run_result = Mock()
386
- mock_run_result.returncode = 0
387
-
388
- mock_proc = MagicMock()
389
- init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
390
- mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
391
- mock_proc.stderr.__iter__ = lambda x: iter([])
392
-
393
- with (
394
- patch("subprocess.run", return_value=mock_run_result),
395
- patch("subprocess.Popen", return_value=mock_proc),
396
- patch("hud.cli.debug.MCPClient") as MockClient,
397
- ):
398
- # Set up for phase 1-4 success first
399
- test_tool = Mock()
400
- test_tool.name = "test"
401
-
402
- # Phase 1-4 client
403
- phase_client = MagicMock()
404
- phase_client.initialize = AsyncMock()
405
- phase_client.list_tools = AsyncMock(return_value=[test_tool])
406
- phase_client.list_resources = AsyncMock(return_value=[])
407
- phase_client.shutdown = AsyncMock()
408
-
409
- # Phase 5 clients - first succeeds, second fails
410
- mock_client1 = MagicMock()
411
- mock_client1.initialize = AsyncMock()
412
- mock_client1.list_tools = AsyncMock(return_value=[test_tool])
413
- mock_client1.list_resources = AsyncMock(return_value=[])
414
- mock_client1.shutdown = AsyncMock()
415
-
416
- mock_client2 = MagicMock()
417
- mock_client2.initialize = AsyncMock(side_effect=Exception("Connection failed"))
418
- mock_client2.shutdown = AsyncMock()
419
-
420
- MockClient.side_effect = [phase_client, mock_client1, mock_client2]
421
-
422
- await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
423
- output = logger.get_output()
424
- assert "Concurrent test failed: Connection failed" in output
425
-
426
- @pytest.mark.asyncio
427
- async def test_docker_command_handling(self) -> None:
428
- """Test special handling of Docker commands."""
429
- logger = CaptureLogger(print_output=False)
430
-
431
- mock_result = Mock()
432
- mock_result.returncode = 0
433
-
434
- with patch("subprocess.run", return_value=mock_result) as mock_run:
435
- await debug_mcp_stdio(["docker", "run", "--rm", "image:latest"], logger, max_phase=1)
436
- # Should add echo command for Docker
437
- call_args = mock_run.call_args[0][0]
438
- assert call_args == ["docker"]
439
-
440
- @pytest.mark.asyncio
441
- async def test_phase_exception_handling(self) -> None:
442
- """Test general exception handling in phases."""
443
- logger = CaptureLogger(print_output=False)
444
-
445
- with patch("subprocess.run", side_effect=Exception("Unexpected error")):
446
- phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
447
- assert phases == 0
448
- output = logger.get_output()
449
- assert "Startup test failed: Unexpected error" in output
450
-
451
-
452
- if __name__ == "__main__":
453
- pytest.main([__file__])
1
+ """Tests for hud.cli.debug module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from unittest.mock import AsyncMock, MagicMock, Mock, patch
7
+
8
+ import pytest
9
+
10
+ from hud.cli.debug import debug_mcp_stdio
11
+ from hud.cli.utils import CaptureLogger
12
+
13
+
14
+ class TestDebugMCPStdio:
15
+ """Test the debug_mcp_stdio function."""
16
+
17
+ @pytest.mark.asyncio
18
+ async def test_phase_1_command_not_found(self) -> None:
19
+ """Test Phase 1 failure when command not found."""
20
+ logger = CaptureLogger(print_output=False)
21
+
22
+ with patch("subprocess.run", side_effect=FileNotFoundError()):
23
+ phases = await debug_mcp_stdio(["nonexistent"], logger, max_phase=5)
24
+ assert phases == 0
25
+ output = logger.get_output()
26
+ assert "Command not found: nonexistent" in output
27
+
28
+ @pytest.mark.asyncio
29
+ async def test_phase_1_command_fails(self) -> None:
30
+ """Test Phase 1 failure when command returns error."""
31
+ logger = CaptureLogger(print_output=False)
32
+
33
+ mock_result = Mock()
34
+ mock_result.returncode = 1
35
+ mock_result.stderr = "Command failed with error"
36
+
37
+ with patch("subprocess.run", return_value=mock_result):
38
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
39
+ assert phases == 0
40
+ output = logger.get_output()
41
+ assert "Command failed with exit code 1" in output
42
+ assert "Command failed with error" in output
43
+
44
+ @pytest.mark.asyncio
45
+ async def test_phase_1_success(self) -> None:
46
+ """Test Phase 1 success."""
47
+ logger = CaptureLogger(print_output=False)
48
+
49
+ mock_result = Mock()
50
+ mock_result.returncode = 0
51
+ mock_result.stderr = ""
52
+
53
+ with patch("subprocess.run", return_value=mock_result):
54
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=1)
55
+ assert phases == 1
56
+ output = logger.get_output()
57
+ assert "Command executable found" in output
58
+ assert "Stopping at phase 1 as requested" in output
59
+
60
+ @pytest.mark.asyncio
61
+ async def test_phase_1_usage_in_stderr(self) -> None:
62
+ """Test Phase 1 success when usage info in stderr."""
63
+ logger = CaptureLogger(print_output=False)
64
+
65
+ mock_result = Mock()
66
+ mock_result.returncode = 1
67
+ mock_result.stderr = "usage: test-cmd [options]"
68
+
69
+ with patch("subprocess.run", return_value=mock_result):
70
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=1)
71
+ assert phases == 1
72
+ output = logger.get_output()
73
+ assert "Command executable found" in output
74
+
75
+ @pytest.mark.asyncio
76
+ async def test_phase_2_mcp_initialize_success(self) -> None:
77
+ """Test Phase 2 MCP initialization success."""
78
+ logger = CaptureLogger(print_output=False)
79
+
80
+ # Mock Phase 1 success
81
+ mock_run_result = Mock()
82
+ mock_run_result.returncode = 0
83
+
84
+ # Mock subprocess.Popen for Phase 2
85
+ mock_proc = MagicMock()
86
+ mock_proc.stdin = MagicMock()
87
+ mock_proc.stdout = MagicMock()
88
+ mock_proc.stderr = MagicMock()
89
+
90
+ # Mock successful MCP response
91
+ init_response = {
92
+ "jsonrpc": "2.0",
93
+ "id": 1,
94
+ "result": {
95
+ "serverInfo": {"name": "TestServer", "version": "1.0"},
96
+ "capabilities": {"tools": {}, "resources": {}},
97
+ },
98
+ }
99
+
100
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
101
+ mock_proc.stderr.__iter__ = lambda x: iter([]) # No stderr output
102
+
103
+ with (
104
+ patch("subprocess.run", return_value=mock_run_result),
105
+ patch("subprocess.Popen", return_value=mock_proc),
106
+ ):
107
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=2)
108
+ assert phases == 2
109
+ output = logger.get_output()
110
+ assert "MCP server initialized successfully" in output
111
+ assert "Server: TestServer v1.0" in output
112
+
113
+ @pytest.mark.asyncio
114
+ async def test_phase_2_no_response(self) -> None:
115
+ """Test Phase 2 failure when no MCP response."""
116
+ logger = CaptureLogger(print_output=False)
117
+
118
+ # Mock Phase 1 success
119
+ mock_run_result = Mock()
120
+ mock_run_result.returncode = 0
121
+
122
+ # Mock subprocess.Popen for Phase 2
123
+ mock_proc = MagicMock()
124
+ mock_proc.stdin = MagicMock()
125
+ mock_proc.stdout = MagicMock()
126
+ mock_proc.stderr = MagicMock()
127
+
128
+ # No stdout response
129
+ mock_proc.stdout.readline.return_value = ""
130
+ mock_proc.stderr.__iter__ = lambda x: iter(["[ERROR] Server failed to start"])
131
+
132
+ with (
133
+ patch("subprocess.run", return_value=mock_run_result),
134
+ patch("subprocess.Popen", return_value=mock_proc),
135
+ patch("time.time", side_effect=[0, 0, 20]),
136
+ ):
137
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
138
+ assert phases == 1
139
+ output = logger.get_output()
140
+ assert "No valid MCP response received" in output
141
+
142
+ @pytest.mark.asyncio
143
+ async def test_phase_2_invalid_json_response(self) -> None:
144
+ """Test Phase 2 handling of invalid JSON response."""
145
+ logger = CaptureLogger(print_output=False)
146
+
147
+ # Mock Phase 1 success
148
+ mock_run_result = Mock()
149
+ mock_run_result.returncode = 0
150
+
151
+ # Mock subprocess.Popen
152
+ mock_proc = MagicMock()
153
+ mock_proc.stdin = MagicMock()
154
+ mock_proc.stdout = MagicMock()
155
+ mock_proc.stderr = MagicMock()
156
+
157
+ # Invalid JSON response
158
+ mock_proc.stdout.readline.return_value = "Invalid JSON\n"
159
+ mock_proc.stderr.__iter__ = lambda x: iter([])
160
+
161
+ with (
162
+ patch("subprocess.run", return_value=mock_run_result),
163
+ patch("subprocess.Popen", return_value=mock_proc),
164
+ ):
165
+ # Simulate timeout - time.time() is called multiple times in the loop
166
+ # Return increasing values to simulate time passing
167
+ time_values = list(range(20))
168
+ with patch("time.time", side_effect=time_values):
169
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
170
+ assert phases == 1
171
+ output = logger.get_output()
172
+ # The error message might vary, but should indicate no valid response
173
+ assert (
174
+ "Failed to parse MCP response" in output
175
+ or "No valid MCP response received" in output
176
+ )
177
+
178
+ @pytest.mark.asyncio
179
+ async def test_phase_3_tool_discovery(self) -> None:
180
+ """Test Phase 3 tool discovery."""
181
+ logger = CaptureLogger(print_output=False)
182
+
183
+ # Mock Phase 1 & 2 success
184
+ mock_run_result = Mock()
185
+ mock_run_result.returncode = 0
186
+
187
+ mock_proc = MagicMock()
188
+ mock_proc.stdin = MagicMock()
189
+ mock_proc.stdout = MagicMock()
190
+ mock_proc.stderr = MagicMock()
191
+
192
+ init_response = {
193
+ "jsonrpc": "2.0",
194
+ "id": 1,
195
+ "result": {"serverInfo": {"name": "TestServer", "version": "1.0"}},
196
+ }
197
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
198
+ mock_proc.stderr.__iter__ = lambda x: iter([])
199
+
200
+ # Mock tool discovery - create proper mock tools
201
+ mock_tools = []
202
+ for tool_name in ["setup", "evaluate", "computer", "custom_tool"]:
203
+ tool = Mock()
204
+ tool.name = tool_name
205
+ mock_tools.append(tool)
206
+
207
+ with (
208
+ patch("subprocess.run", return_value=mock_run_result),
209
+ patch("subprocess.Popen", return_value=mock_proc),
210
+ patch("hud.cli.debug.MCPClient") as MockClient,
211
+ ):
212
+ mock_client = MockClient.return_value
213
+ mock_client.initialize = AsyncMock()
214
+ mock_client.list_tools = AsyncMock(return_value=mock_tools)
215
+ mock_client.list_resources = AsyncMock(return_value=[])
216
+ mock_client.shutdown = AsyncMock()
217
+
218
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=3)
219
+ assert phases == 3
220
+ output = logger.get_output()
221
+ assert "Found 4 tools" in output
222
+ assert "Lifecycle tools: setup=✅, evaluate=✅" in output
223
+ assert "Interaction tools: computer" in output
224
+ assert "All tools: setup, evaluate, computer, custom_tool" in output
225
+
226
+ @pytest.mark.asyncio
227
+ async def test_phase_3_no_tools(self) -> None:
228
+ """Test Phase 3 when no tools found."""
229
+ logger = CaptureLogger(print_output=False)
230
+
231
+ # Mock Phase 1 & 2 success
232
+ mock_run_result = Mock()
233
+ mock_run_result.returncode = 0
234
+
235
+ mock_proc = MagicMock()
236
+ init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
237
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
238
+ mock_proc.stderr.__iter__ = lambda x: iter([])
239
+
240
+ with (
241
+ patch("subprocess.run", return_value=mock_run_result),
242
+ patch("subprocess.Popen", return_value=mock_proc),
243
+ patch("hud.cli.debug.MCPClient") as MockClient,
244
+ ):
245
+ mock_client = MockClient.return_value
246
+ mock_client.initialize = AsyncMock()
247
+ mock_client.list_tools = AsyncMock(return_value=[])
248
+ mock_client.shutdown = AsyncMock()
249
+
250
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
251
+ assert phases == 2
252
+ output = logger.get_output()
253
+ assert "No tools found" in output
254
+ assert "@mcp.tool() decorator" in output
255
+
256
+ @pytest.mark.asyncio
257
+ async def test_phase_4_remote_deployment(self) -> None:
258
+ """Test Phase 4 remote deployment readiness."""
259
+ logger = CaptureLogger(print_output=False)
260
+
261
+ # Setup mocks for phases 1-3
262
+ mock_run_result = Mock()
263
+ mock_run_result.returncode = 0
264
+
265
+ mock_proc = MagicMock()
266
+ init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
267
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
268
+ mock_proc.stderr.__iter__ = lambda x: iter([])
269
+
270
+ # Create proper mock tools
271
+ mock_tools = []
272
+ for tool_name in ["setup", "evaluate"]:
273
+ tool = Mock()
274
+ tool.name = tool_name
275
+ mock_tools.append(tool)
276
+
277
+ with (
278
+ patch("subprocess.run", return_value=mock_run_result),
279
+ patch("subprocess.Popen", return_value=mock_proc),
280
+ patch("hud.cli.debug.MCPClient") as MockClient,
281
+ ):
282
+ mock_client = MockClient.return_value
283
+ mock_client.initialize = AsyncMock()
284
+ mock_client.list_tools = AsyncMock(return_value=mock_tools)
285
+ mock_client.list_resources = AsyncMock(return_value=[])
286
+ mock_client.call_tool = AsyncMock()
287
+ mock_client.shutdown = AsyncMock()
288
+
289
+ with patch("time.time", side_effect=[0, 5, 5, 5, 5]): # Start at 0, then 5 for the rest
290
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=4)
291
+ assert phases == 4
292
+ output = logger.get_output()
293
+ assert "Total initialization time: 5.00s" in output
294
+ # Should have tested setup and evaluate tools
295
+ assert mock_client.call_tool.call_count == 2
296
+
297
+ @pytest.mark.asyncio
298
+ async def test_phase_4_slow_initialization(self) -> None:
299
+ """Test Phase 4 with slow initialization warning."""
300
+ logger = CaptureLogger(print_output=False)
301
+
302
+ # Setup basic mocks
303
+ mock_run_result = Mock()
304
+ mock_run_result.returncode = 0
305
+
306
+ mock_proc = MagicMock()
307
+ init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
308
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
309
+ mock_proc.stderr.__iter__ = lambda x: iter([])
310
+
311
+ with (
312
+ patch("subprocess.run", return_value=mock_run_result),
313
+ patch("subprocess.Popen", return_value=mock_proc),
314
+ patch("hud.cli.debug.MCPClient") as MockClient,
315
+ ):
316
+ mock_client = MockClient.return_value
317
+ mock_client.initialize = AsyncMock()
318
+ # Create proper mock tool
319
+ test_tool = Mock()
320
+ test_tool.name = "test"
321
+ mock_client.list_tools = AsyncMock(return_value=[test_tool])
322
+ mock_client.list_resources = AsyncMock(return_value=[])
323
+ mock_client.shutdown = AsyncMock()
324
+
325
+ # Simulate slow init (>30s)
326
+ # time.time() is called at start and after phase 3
327
+ with patch("time.time", side_effect=[0, 0, 0, 35, 35, 35]):
328
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
329
+ output = logger.get_output()
330
+ # Check if we got to phase 4 where the timing check happens
331
+ if phases >= 4:
332
+ assert "Initialization took >30s" in output
333
+ assert "Consider optimizing startup time" in output
334
+
335
+ @pytest.mark.asyncio
336
+ async def test_phase_5_concurrent_clients(self) -> None:
337
+ """Test Phase 5 concurrent clients."""
338
+ logger = CaptureLogger(print_output=False)
339
+
340
+ # Setup mocks for all phases
341
+ mock_run_result = Mock()
342
+ mock_run_result.returncode = 0
343
+
344
+ mock_proc = MagicMock()
345
+ init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
346
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
347
+ mock_proc.stderr.__iter__ = lambda x: iter([])
348
+
349
+ with (
350
+ patch("subprocess.run", return_value=mock_run_result),
351
+ patch("subprocess.Popen", return_value=mock_proc),
352
+ patch("hud.cli.debug.MCPClient") as MockClient,
353
+ ):
354
+ # Create different mock instances for each client
355
+ mock_clients = []
356
+ for i in range(4): # 1 main + 3 concurrent
357
+ mock_client = MagicMock()
358
+ mock_client.initialize = AsyncMock()
359
+ # Create proper mock tool
360
+ test_tool = Mock()
361
+ test_tool.name = "test"
362
+ mock_client.list_tools = AsyncMock(return_value=[test_tool])
363
+ mock_client.list_resources = AsyncMock(return_value=[])
364
+ mock_client.shutdown = AsyncMock()
365
+ mock_clients.append(mock_client)
366
+
367
+ MockClient.side_effect = mock_clients
368
+
369
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
370
+ assert phases == 5
371
+ output = logger.get_output()
372
+ assert "Creating 3 concurrent MCP clients" in output
373
+ assert "All concurrent clients connected" in output
374
+
375
+ # Verify all clients were shut down
376
+ for client in mock_clients:
377
+ client.shutdown.assert_called()
378
+
379
+ @pytest.mark.asyncio
380
+ async def test_phase_5_concurrent_failure(self) -> None:
381
+ """Test Phase 5 handling concurrent client failures."""
382
+ logger = CaptureLogger(print_output=False)
383
+
384
+ # Setup basic mocks
385
+ mock_run_result = Mock()
386
+ mock_run_result.returncode = 0
387
+
388
+ mock_proc = MagicMock()
389
+ init_response = {"jsonrpc": "2.0", "id": 1, "result": {}}
390
+ mock_proc.stdout.readline.return_value = json.dumps(init_response) + "\n"
391
+ mock_proc.stderr.__iter__ = lambda x: iter([])
392
+
393
+ with (
394
+ patch("subprocess.run", return_value=mock_run_result),
395
+ patch("subprocess.Popen", return_value=mock_proc),
396
+ patch("hud.cli.debug.MCPClient") as MockClient,
397
+ ):
398
+ # Set up for phase 1-4 success first
399
+ test_tool = Mock()
400
+ test_tool.name = "test"
401
+
402
+ # Phase 1-4 client
403
+ phase_client = MagicMock()
404
+ phase_client.initialize = AsyncMock()
405
+ phase_client.list_tools = AsyncMock(return_value=[test_tool])
406
+ phase_client.list_resources = AsyncMock(return_value=[])
407
+ phase_client.shutdown = AsyncMock()
408
+
409
+ # Phase 5 clients - first succeeds, second fails
410
+ mock_client1 = MagicMock()
411
+ mock_client1.initialize = AsyncMock()
412
+ mock_client1.list_tools = AsyncMock(return_value=[test_tool])
413
+ mock_client1.list_resources = AsyncMock(return_value=[])
414
+ mock_client1.shutdown = AsyncMock()
415
+
416
+ mock_client2 = MagicMock()
417
+ mock_client2.initialize = AsyncMock(side_effect=Exception("Connection failed"))
418
+ mock_client2.shutdown = AsyncMock()
419
+
420
+ MockClient.side_effect = [phase_client, mock_client1, mock_client2]
421
+
422
+ await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
423
+ output = logger.get_output()
424
+ assert "Concurrent test failed: Connection failed" in output
425
+
426
+ @pytest.mark.asyncio
427
+ async def test_docker_command_handling(self) -> None:
428
+ """Test special handling of Docker commands."""
429
+ logger = CaptureLogger(print_output=False)
430
+
431
+ mock_result = Mock()
432
+ mock_result.returncode = 0
433
+
434
+ with patch("subprocess.run", return_value=mock_result) as mock_run:
435
+ await debug_mcp_stdio(["docker", "run", "--rm", "image:latest"], logger, max_phase=1)
436
+ # Should add echo command for Docker
437
+ call_args = mock_run.call_args[0][0]
438
+ assert call_args == ["docker"]
439
+
440
+ @pytest.mark.asyncio
441
+ async def test_phase_exception_handling(self) -> None:
442
+ """Test general exception handling in phases."""
443
+ logger = CaptureLogger(print_output=False)
444
+
445
+ with patch("subprocess.run", side_effect=Exception("Unexpected error")):
446
+ phases = await debug_mcp_stdio(["test-cmd"], logger, max_phase=5)
447
+ assert phases == 0
448
+ output = logger.get_output()
449
+ assert "Startup test failed: Unexpected error" in output
450
+
451
+
452
+ if __name__ == "__main__":
453
+ pytest.main([__file__])