hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (130) hide show
  1. hud/__init__.py +22 -22
  2. hud/agents/__init__.py +13 -15
  3. hud/agents/base.py +599 -599
  4. hud/agents/claude.py +373 -373
  5. hud/agents/langchain.py +261 -250
  6. hud/agents/misc/__init__.py +7 -7
  7. hud/agents/misc/response_agent.py +82 -80
  8. hud/agents/openai.py +352 -352
  9. hud/agents/openai_chat_generic.py +154 -154
  10. hud/agents/tests/__init__.py +1 -1
  11. hud/agents/tests/test_base.py +742 -742
  12. hud/agents/tests/test_claude.py +324 -324
  13. hud/agents/tests/test_client.py +363 -363
  14. hud/agents/tests/test_openai.py +237 -237
  15. hud/cli/__init__.py +617 -617
  16. hud/cli/__main__.py +8 -8
  17. hud/cli/analyze.py +371 -371
  18. hud/cli/analyze_metadata.py +230 -230
  19. hud/cli/build.py +498 -427
  20. hud/cli/clone.py +185 -185
  21. hud/cli/cursor.py +92 -92
  22. hud/cli/debug.py +392 -392
  23. hud/cli/docker_utils.py +83 -83
  24. hud/cli/init.py +280 -281
  25. hud/cli/interactive.py +353 -353
  26. hud/cli/mcp_server.py +764 -756
  27. hud/cli/pull.py +330 -336
  28. hud/cli/push.py +404 -370
  29. hud/cli/remote_runner.py +311 -311
  30. hud/cli/runner.py +160 -160
  31. hud/cli/tests/__init__.py +3 -3
  32. hud/cli/tests/test_analyze.py +284 -284
  33. hud/cli/tests/test_cli_init.py +265 -265
  34. hud/cli/tests/test_cli_main.py +27 -27
  35. hud/cli/tests/test_clone.py +142 -142
  36. hud/cli/tests/test_cursor.py +253 -253
  37. hud/cli/tests/test_debug.py +453 -453
  38. hud/cli/tests/test_mcp_server.py +139 -139
  39. hud/cli/tests/test_utils.py +388 -388
  40. hud/cli/utils.py +263 -263
  41. hud/clients/README.md +143 -143
  42. hud/clients/__init__.py +16 -16
  43. hud/clients/base.py +378 -379
  44. hud/clients/fastmcp.py +222 -222
  45. hud/clients/mcp_use.py +298 -278
  46. hud/clients/tests/__init__.py +1 -1
  47. hud/clients/tests/test_client_integration.py +111 -111
  48. hud/clients/tests/test_fastmcp.py +342 -342
  49. hud/clients/tests/test_protocol.py +188 -188
  50. hud/clients/utils/__init__.py +1 -1
  51. hud/clients/utils/retry_transport.py +160 -160
  52. hud/datasets.py +327 -322
  53. hud/misc/__init__.py +1 -1
  54. hud/misc/claude_plays_pokemon.py +292 -292
  55. hud/otel/__init__.py +35 -35
  56. hud/otel/collector.py +142 -142
  57. hud/otel/config.py +164 -164
  58. hud/otel/context.py +536 -536
  59. hud/otel/exporters.py +366 -366
  60. hud/otel/instrumentation.py +97 -97
  61. hud/otel/processors.py +118 -118
  62. hud/otel/tests/__init__.py +1 -1
  63. hud/otel/tests/test_processors.py +197 -197
  64. hud/server/__init__.py +5 -5
  65. hud/server/context.py +114 -114
  66. hud/server/helper/__init__.py +5 -5
  67. hud/server/low_level.py +132 -132
  68. hud/server/server.py +170 -166
  69. hud/server/tests/__init__.py +3 -3
  70. hud/settings.py +73 -73
  71. hud/shared/__init__.py +5 -5
  72. hud/shared/exceptions.py +180 -180
  73. hud/shared/requests.py +264 -264
  74. hud/shared/tests/test_exceptions.py +157 -157
  75. hud/shared/tests/test_requests.py +275 -275
  76. hud/telemetry/__init__.py +25 -25
  77. hud/telemetry/instrument.py +379 -379
  78. hud/telemetry/job.py +309 -309
  79. hud/telemetry/replay.py +74 -74
  80. hud/telemetry/trace.py +83 -83
  81. hud/tools/__init__.py +33 -33
  82. hud/tools/base.py +365 -365
  83. hud/tools/bash.py +161 -161
  84. hud/tools/computer/__init__.py +15 -15
  85. hud/tools/computer/anthropic.py +437 -437
  86. hud/tools/computer/hud.py +376 -376
  87. hud/tools/computer/openai.py +295 -295
  88. hud/tools/computer/settings.py +82 -82
  89. hud/tools/edit.py +314 -314
  90. hud/tools/executors/__init__.py +30 -30
  91. hud/tools/executors/base.py +539 -539
  92. hud/tools/executors/pyautogui.py +621 -621
  93. hud/tools/executors/tests/__init__.py +1 -1
  94. hud/tools/executors/tests/test_base_executor.py +338 -338
  95. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  96. hud/tools/executors/xdo.py +511 -511
  97. hud/tools/playwright.py +412 -412
  98. hud/tools/tests/__init__.py +3 -3
  99. hud/tools/tests/test_base.py +282 -282
  100. hud/tools/tests/test_bash.py +158 -158
  101. hud/tools/tests/test_bash_extended.py +197 -197
  102. hud/tools/tests/test_computer.py +425 -425
  103. hud/tools/tests/test_computer_actions.py +34 -34
  104. hud/tools/tests/test_edit.py +259 -259
  105. hud/tools/tests/test_init.py +27 -27
  106. hud/tools/tests/test_playwright_tool.py +183 -183
  107. hud/tools/tests/test_tools.py +145 -145
  108. hud/tools/tests/test_utils.py +156 -156
  109. hud/tools/types.py +72 -72
  110. hud/tools/utils.py +50 -50
  111. hud/types.py +136 -136
  112. hud/utils/__init__.py +10 -10
  113. hud/utils/async_utils.py +65 -65
  114. hud/utils/design.py +236 -168
  115. hud/utils/mcp.py +55 -55
  116. hud/utils/progress.py +149 -149
  117. hud/utils/telemetry.py +66 -66
  118. hud/utils/tests/test_async_utils.py +173 -173
  119. hud/utils/tests/test_init.py +17 -17
  120. hud/utils/tests/test_progress.py +261 -261
  121. hud/utils/tests/test_telemetry.py +82 -82
  122. hud/utils/tests/test_version.py +8 -8
  123. hud/version.py +7 -7
  124. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
  125. hud_python-0.4.3.dist-info/RECORD +131 -0
  126. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
  127. hud/agents/art.py +0 -101
  128. hud_python-0.4.1.dist-info/RECORD +0 -132
  129. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
  130. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/cli/debug.py CHANGED
@@ -1,392 +1,392 @@
1
- """Debug command implementation for MCP environments."""
2
-
3
- # ruff: noqa: G004
4
- from __future__ import annotations
5
-
6
- import asyncio
7
- import json
8
- import subprocess
9
- import threading
10
- import time
11
-
12
- from rich.console import Console
13
-
14
- from hud.clients import MCPClient
15
- from hud.utils.design import HUDDesign
16
-
17
- from .utils import CaptureLogger, Colors, analyze_error_for_hints
18
-
19
- console = Console()
20
-
21
-
22
- async def debug_mcp_stdio(command: list[str], logger: CaptureLogger, max_phase: int = 5) -> int:
23
- """
24
- Debug any stdio-based MCP server step by step.
25
-
26
- Args:
27
- command: Command and arguments to run the MCP server
28
- logger: CaptureLogger instance for output
29
- max_phase: Maximum phase to run (1-5, default 5 for all phases)
30
-
31
- Returns:
32
- Number of phases completed (0-5)
33
- """
34
- # Create design instance for initial output (before logger takes over)
35
- if logger.print_output:
36
- design = HUDDesign()
37
- design.header("MCP Server Debugger", icon="🔍")
38
- design.dim_info("Command:", " ".join(command))
39
- design.dim_info("Time:", time.strftime("%Y-%m-%d %H:%M:%S"))
40
-
41
- # Explain color coding using Rich formatting
42
- design.info("\nColor Key:")
43
- console.print(" [bold]■[/bold] Commands (bold)")
44
- console.print(" [rgb(192,150,12)]■[/rgb(192,150,12)] STDIO (MCP protocol)")
45
- console.print(" [dim]■[/dim] STDERR (server logs)")
46
- console.print(" [green]■[/green] Success messages")
47
- console.print(" [red]■[/red] Error messages")
48
- console.print(" ■ Info messages")
49
-
50
- phases_completed = 0
51
- total_phases = 5
52
- start_time = time.time()
53
-
54
- # Phase 1: Basic Server Test
55
- logger.phase(1, "Basic Server Startup Test")
56
-
57
- try:
58
- # Test if command runs at all
59
- test_cmd = command + (["echo", "Server OK"] if "docker" in command[0] else [])
60
- logger.command([*test_cmd[:3], "..."] if len(test_cmd) > 3 else test_cmd)
61
-
62
- result = subprocess.run( # noqa: S603, ASYNC221
63
- command[:1],
64
- capture_output=True,
65
- text=True,
66
- timeout=2,
67
- encoding="utf-8",
68
- errors="replace",
69
- )
70
-
71
- if result.returncode == 0 or "usage" in result.stderr.lower():
72
- logger.success("Command executable found")
73
- phases_completed = 1
74
- else:
75
- logger.error(f"Command failed with exit code {result.returncode}")
76
- if result.stderr:
77
- logger._log(
78
- f"Error output: {result.stderr}", Colors.RED if logger.print_output else ""
79
- )
80
- hint = analyze_error_for_hints(result.stderr)
81
- if hint:
82
- logger.hint(hint)
83
- logger.progress_bar(phases_completed, total_phases)
84
- return phases_completed
85
-
86
- # Check if we should stop here
87
- if max_phase <= 1:
88
- logger.info(f"Stopping at phase {max_phase} as requested")
89
- logger.progress_bar(phases_completed, total_phases)
90
- return phases_completed
91
-
92
- except FileNotFoundError:
93
- logger.error(f"Command not found: {command[0]}")
94
- logger.hint("Ensure the command is installed and in PATH")
95
- logger.progress_bar(phases_completed, total_phases)
96
- return phases_completed
97
- except Exception as e:
98
- logger.error(f"Startup test failed: {e}")
99
- logger.progress_bar(phases_completed, total_phases)
100
- return phases_completed
101
-
102
- # Phase 2: MCP Initialize Test
103
- logger.phase(2, "MCP Server Initialize Test")
104
-
105
- logger.info("STDIO is used for MCP protocol, STDERR for server logs")
106
-
107
- init_request = {
108
- "jsonrpc": "2.0",
109
- "id": 1,
110
- "method": "initialize",
111
- "params": {
112
- "protocolVersion": "2024-11-05",
113
- "capabilities": {"roots": {"listChanged": True}},
114
- "clientInfo": {"name": "DebugClient", "version": "1.0.0"},
115
- },
116
- }
117
-
118
- try:
119
- logger.command(command)
120
- logger.stdio(f"Sending: {json.dumps(init_request)}")
121
-
122
- proc = subprocess.Popen( # noqa: S603, ASYNC220
123
- command,
124
- stdin=subprocess.PIPE,
125
- stdout=subprocess.PIPE,
126
- stderr=subprocess.PIPE,
127
- text=True,
128
- bufsize=1,
129
- encoding="utf-8",
130
- errors="replace", # Replace invalid chars with � on Windows
131
- )
132
-
133
- # Ensure pipes are available
134
- if proc.stdin is None or proc.stdout is None or proc.stderr is None:
135
- raise RuntimeError("Failed to create subprocess pipes")
136
-
137
- # Send initialize
138
- proc.stdin.write(json.dumps(init_request) + "\n")
139
- proc.stdin.flush()
140
-
141
- # Collect stderr in background
142
- stderr_lines = []
143
-
144
- def read_stderr() -> None:
145
- if proc.stderr is None:
146
- return
147
- for line in proc.stderr:
148
- line = line.rstrip()
149
- if line:
150
- logger.stderr(line)
151
- stderr_lines.append(line)
152
-
153
- stderr_thread = threading.Thread(target=read_stderr)
154
- stderr_thread.daemon = True
155
- stderr_thread.start()
156
-
157
- # Wait for response
158
- response = None
159
- start = time.time()
160
- while time.time() - start < 15:
161
- line = proc.stdout.readline()
162
- if line:
163
- try:
164
- response = json.loads(line)
165
- if response.get("id") == 1:
166
- logger.stdio(f"Received: {json.dumps(response)}")
167
- break
168
- except Exception as e:
169
- logger.error(f"Failed to parse MCP response: {e}")
170
- continue
171
-
172
- if response and "result" in response:
173
- logger.success("MCP server initialized successfully")
174
- server_info = response["result"].get("serverInfo", {})
175
- logger.info(
176
- f"Server: {server_info.get('name', 'Unknown')} v{server_info.get('version', '?')}"
177
- )
178
-
179
- # Show capabilities
180
- caps = response["result"].get("capabilities", {})
181
- if caps:
182
- logger.info(f"Capabilities: {', '.join(caps.keys())}")
183
- phases_completed = 2
184
- else:
185
- logger.error("No valid MCP response received")
186
-
187
- # Analyze stderr for hints
188
- if stderr_lines:
189
- all_stderr = "\n".join(stderr_lines)
190
- hint = analyze_error_for_hints(all_stderr)
191
- if hint:
192
- logger.hint(hint)
193
- else:
194
- logger.hint("""MCP requires clean stdout. Ensure:
195
- - All print() statements use file=sys.stderr
196
- - Logging is configured to use stderr
197
- - No libraries are printing to stdout""")
198
-
199
- logger.progress_bar(phases_completed, total_phases)
200
- proc.terminate()
201
- try:
202
- proc.wait(timeout=5)
203
- except subprocess.TimeoutExpired:
204
- proc.kill()
205
- proc.wait()
206
- return phases_completed
207
-
208
- proc.terminate()
209
- try:
210
- proc.wait(timeout=5)
211
- except subprocess.TimeoutExpired:
212
- proc.kill()
213
- proc.wait()
214
-
215
- # Check if we should stop here
216
- if phases_completed >= max_phase:
217
- logger.info(f"Stopping at phase {max_phase} as requested")
218
- logger.progress_bar(phases_completed, total_phases)
219
- return phases_completed
220
-
221
- except Exception as e:
222
- logger.error(f"MCP test failed: {e}")
223
- hint = analyze_error_for_hints(str(e))
224
- if hint:
225
- logger.hint(hint)
226
- logger.progress_bar(phases_completed, total_phases)
227
- return phases_completed
228
-
229
- # Phase 3: Tool Discovery
230
- logger.phase(3, "MCP Tool Discovery Test")
231
-
232
- client = None
233
- try:
234
- # Create MCP config for the command
235
- mcp_config = {
236
- "test": {"command": command[0], "args": command[1:] if len(command) > 1 else []}
237
- }
238
-
239
- logger.command(command)
240
- logger.info("Creating MCP client via hud...")
241
-
242
- client = MCPClient(mcp_config=mcp_config, verbose=False, auto_trace=False)
243
- await client.initialize()
244
-
245
- # Wait for initialization
246
- logger.info("Waiting for server initialization...")
247
- await asyncio.sleep(5)
248
-
249
- # Get tools
250
- tools = await client.list_tools()
251
-
252
- if tools:
253
- logger.success(f"Found {len(tools)} tools")
254
-
255
- # Check for lifecycle tools
256
- tool_names = [t.name for t in tools]
257
- has_setup = "setup" in tool_names
258
- has_evaluate = "evaluate" in tool_names
259
-
260
- logger.info(
261
- f"Lifecycle tools: setup={'✅' if has_setup else '❌'}, evaluate={'✅' if has_evaluate else '❌'}" # noqa: E501
262
- )
263
-
264
- # Check for interaction tools
265
- interaction_tools = [
266
- name
267
- for name in tool_names
268
- if name in ["computer", "playwright", "click", "type", "interact", "move"]
269
- ]
270
- if interaction_tools:
271
- logger.info(f"Interaction tools: {', '.join(interaction_tools)}")
272
-
273
- # List all tools
274
- logger.info(f"All tools: {', '.join(tool_names)}")
275
-
276
- # Try to list resources
277
- try:
278
- resources = await client.list_resources()
279
- if resources:
280
- logger.info(
281
- f"Found {len(resources)} resources: {', '.join(str(r.uri) for r in resources[:3])}..." # noqa: E501
282
- )
283
- except Exception as e:
284
- logger.error(f"Failed to list resources: {e}")
285
-
286
- phases_completed = 3
287
-
288
- else:
289
- logger.error("No tools found")
290
- logger.hint("""No tools found. Ensure:
291
- - @mcp.tool() decorator is used on functions
292
- - Tools are registered before mcp.run()
293
- - No import errors preventing tool registration""")
294
- logger.progress_bar(phases_completed, total_phases)
295
- return phases_completed
296
-
297
- # Check if we should stop here
298
- if phases_completed >= max_phase:
299
- logger.info(f"Stopping at phase {max_phase} as requested")
300
- logger.progress_bar(phases_completed, total_phases)
301
- return phases_completed
302
-
303
- # Phase 4: Remote Deployment Readiness
304
- logger.phase(4, "Remote Deployment Readiness")
305
-
306
- # Test if setup/evaluate exist
307
- if "setup" in tool_names:
308
- try:
309
- logger.info("Testing setup tool...")
310
- await client.call_tool(name="setup", arguments={})
311
- logger.success("Setup tool responded")
312
- except Exception as e:
313
- logger.info(f"Setup tool test: {e}")
314
-
315
- if "evaluate" in tool_names:
316
- try:
317
- logger.info("Testing evaluate tool...")
318
- await client.call_tool(name="evaluate", arguments={})
319
- logger.success("Evaluate tool responded")
320
- except Exception as e:
321
- logger.info(f"Evaluate tool test: {e}")
322
-
323
- # Performance check
324
- init_time = time.time() - start_time
325
- logger.info(f"Total initialization time: {init_time:.2f}s")
326
-
327
- if init_time > 30:
328
- logger.error("Initialization took >30s - may be too slow")
329
- logger.hint("Consider optimizing startup time")
330
-
331
- phases_completed = 4
332
-
333
- # Check if we should stop here
334
- if phases_completed >= max_phase:
335
- logger.info(f"Stopping at phase {max_phase} as requested")
336
- logger.progress_bar(phases_completed, total_phases)
337
- return phases_completed
338
-
339
- # Phase 5: Concurrent Clients
340
- logger.phase(5, "Concurrent Clients Testing")
341
-
342
- concurrent_clients = []
343
- try:
344
- logger.info("Creating 3 concurrent MCP clients...")
345
-
346
- for i in range(3):
347
- client_config = {
348
- f"test_concurrent_{i}": {
349
- "command": command[0],
350
- "args": command[1:] if len(command) > 1 else [],
351
- }
352
- }
353
-
354
- concurrent_client = MCPClient(
355
- mcp_config=client_config, verbose=False, auto_trace=False
356
- )
357
- await concurrent_client.initialize()
358
- concurrent_clients.append(concurrent_client)
359
- logger.info(f"Client {i + 1} connected")
360
-
361
- logger.success("All concurrent clients connected")
362
-
363
- # Clean shutdown
364
- for i, c in enumerate(concurrent_clients):
365
- await c.shutdown()
366
- logger.info(f"Client {i + 1} disconnected")
367
-
368
- phases_completed = 5
369
-
370
- except Exception as e:
371
- logger.error(f"Concurrent test failed: {e}")
372
- finally:
373
- for c in concurrent_clients:
374
- try:
375
- await c.shutdown()
376
- except Exception as e:
377
- logger.error(f"Failed to close client: {e}")
378
-
379
- except Exception as e:
380
- logger.error(f"Tool discovery failed: {e}")
381
- logger.progress_bar(phases_completed, total_phases)
382
- return phases_completed
383
- finally:
384
- # Ensure client is closed even on exceptions
385
- if client:
386
- try:
387
- await client.shutdown()
388
- except Exception:
389
- logger.error("Failed to close client")
390
-
391
- logger.progress_bar(phases_completed, total_phases)
392
- return phases_completed
1
+ """Debug command implementation for MCP environments."""
2
+
3
+ # ruff: noqa: G004
4
+ from __future__ import annotations
5
+
6
+ import asyncio
7
+ import json
8
+ import subprocess
9
+ import threading
10
+ import time
11
+
12
+ from rich.console import Console
13
+
14
+ from hud.clients import MCPClient
15
+ from hud.utils.design import HUDDesign
16
+
17
+ from .utils import CaptureLogger, Colors, analyze_error_for_hints
18
+
19
+ console = Console()
20
+
21
+
22
+ async def debug_mcp_stdio(command: list[str], logger: CaptureLogger, max_phase: int = 5) -> int:
23
+ """
24
+ Debug any stdio-based MCP server step by step.
25
+
26
+ Args:
27
+ command: Command and arguments to run the MCP server
28
+ logger: CaptureLogger instance for output
29
+ max_phase: Maximum phase to run (1-5, default 5 for all phases)
30
+
31
+ Returns:
32
+ Number of phases completed (0-5)
33
+ """
34
+ # Create design instance for initial output (before logger takes over)
35
+ if logger.print_output:
36
+ design = HUDDesign()
37
+ design.header("MCP Server Debugger", icon="🔍")
38
+ design.dim_info("Command:", " ".join(command))
39
+ design.dim_info("Time:", time.strftime("%Y-%m-%d %H:%M:%S"))
40
+
41
+ # Explain color coding using Rich formatting
42
+ design.info("\nColor Key:")
43
+ console.print(" [bold]■[/bold] Commands (bold)")
44
+ console.print(" [rgb(192,150,12)]■[/rgb(192,150,12)] STDIO (MCP protocol)")
45
+ console.print(" [dim]■[/dim] STDERR (server logs)")
46
+ console.print(" [green]■[/green] Success messages")
47
+ console.print(" [red]■[/red] Error messages")
48
+ console.print(" ■ Info messages")
49
+
50
+ phases_completed = 0
51
+ total_phases = 5
52
+ start_time = time.time()
53
+
54
+ # Phase 1: Basic Server Test
55
+ logger.phase(1, "Basic Server Startup Test")
56
+
57
+ try:
58
+ # Test if command runs at all
59
+ test_cmd = command + (["echo", "Server OK"] if "docker" in command[0] else [])
60
+ logger.command([*test_cmd[:3], "..."] if len(test_cmd) > 3 else test_cmd)
61
+
62
+ result = subprocess.run( # noqa: S603, ASYNC221
63
+ command[:1],
64
+ capture_output=True,
65
+ text=True,
66
+ timeout=2,
67
+ encoding="utf-8",
68
+ errors="replace",
69
+ )
70
+
71
+ if result.returncode == 0 or "usage" in result.stderr.lower():
72
+ logger.success("Command executable found")
73
+ phases_completed = 1
74
+ else:
75
+ logger.error(f"Command failed with exit code {result.returncode}")
76
+ if result.stderr:
77
+ logger._log(
78
+ f"Error output: {result.stderr}", Colors.RED if logger.print_output else ""
79
+ )
80
+ hint = analyze_error_for_hints(result.stderr)
81
+ if hint:
82
+ logger.hint(hint)
83
+ logger.progress_bar(phases_completed, total_phases)
84
+ return phases_completed
85
+
86
+ # Check if we should stop here
87
+ if max_phase <= 1:
88
+ logger.info(f"Stopping at phase {max_phase} as requested")
89
+ logger.progress_bar(phases_completed, total_phases)
90
+ return phases_completed
91
+
92
+ except FileNotFoundError:
93
+ logger.error(f"Command not found: {command[0]}")
94
+ logger.hint("Ensure the command is installed and in PATH")
95
+ logger.progress_bar(phases_completed, total_phases)
96
+ return phases_completed
97
+ except Exception as e:
98
+ logger.error(f"Startup test failed: {e}")
99
+ logger.progress_bar(phases_completed, total_phases)
100
+ return phases_completed
101
+
102
+ # Phase 2: MCP Initialize Test
103
+ logger.phase(2, "MCP Server Initialize Test")
104
+
105
+ logger.info("STDIO is used for MCP protocol, STDERR for server logs")
106
+
107
+ init_request = {
108
+ "jsonrpc": "2.0",
109
+ "id": 1,
110
+ "method": "initialize",
111
+ "params": {
112
+ "protocolVersion": "2024-11-05",
113
+ "capabilities": {"roots": {"listChanged": True}},
114
+ "clientInfo": {"name": "DebugClient", "version": "1.0.0"},
115
+ },
116
+ }
117
+
118
+ try:
119
+ logger.command(command)
120
+ logger.stdio(f"Sending: {json.dumps(init_request)}")
121
+
122
+ proc = subprocess.Popen( # noqa: S603, ASYNC220
123
+ command,
124
+ stdin=subprocess.PIPE,
125
+ stdout=subprocess.PIPE,
126
+ stderr=subprocess.PIPE,
127
+ text=True,
128
+ bufsize=1,
129
+ encoding="utf-8",
130
+ errors="replace", # Replace invalid chars with � on Windows
131
+ )
132
+
133
+ # Ensure pipes are available
134
+ if proc.stdin is None or proc.stdout is None or proc.stderr is None:
135
+ raise RuntimeError("Failed to create subprocess pipes")
136
+
137
+ # Send initialize
138
+ proc.stdin.write(json.dumps(init_request) + "\n")
139
+ proc.stdin.flush()
140
+
141
+ # Collect stderr in background
142
+ stderr_lines = []
143
+
144
+ def read_stderr() -> None:
145
+ if proc.stderr is None:
146
+ return
147
+ for line in proc.stderr:
148
+ line = line.rstrip()
149
+ if line:
150
+ logger.stderr(line)
151
+ stderr_lines.append(line)
152
+
153
+ stderr_thread = threading.Thread(target=read_stderr)
154
+ stderr_thread.daemon = True
155
+ stderr_thread.start()
156
+
157
+ # Wait for response
158
+ response = None
159
+ start = time.time()
160
+ while time.time() - start < 15:
161
+ line = proc.stdout.readline()
162
+ if line:
163
+ try:
164
+ response = json.loads(line)
165
+ if response.get("id") == 1:
166
+ logger.stdio(f"Received: {json.dumps(response)}")
167
+ break
168
+ except Exception as e:
169
+ logger.error(f"Failed to parse MCP response: {e}")
170
+ continue
171
+
172
+ if response and "result" in response:
173
+ logger.success("MCP server initialized successfully")
174
+ server_info = response["result"].get("serverInfo", {})
175
+ logger.info(
176
+ f"Server: {server_info.get('name', 'Unknown')} v{server_info.get('version', '?')}"
177
+ )
178
+
179
+ # Show capabilities
180
+ caps = response["result"].get("capabilities", {})
181
+ if caps:
182
+ logger.info(f"Capabilities: {', '.join(caps.keys())}")
183
+ phases_completed = 2
184
+ else:
185
+ logger.error("No valid MCP response received")
186
+
187
+ # Analyze stderr for hints
188
+ if stderr_lines:
189
+ all_stderr = "\n".join(stderr_lines)
190
+ hint = analyze_error_for_hints(all_stderr)
191
+ if hint:
192
+ logger.hint(hint)
193
+ else:
194
+ logger.hint("""MCP requires clean stdout. Ensure:
195
+ - All print() statements use file=sys.stderr
196
+ - Logging is configured to use stderr
197
+ - No libraries are printing to stdout""")
198
+
199
+ logger.progress_bar(phases_completed, total_phases)
200
+ proc.terminate()
201
+ try:
202
+ proc.wait(timeout=5)
203
+ except subprocess.TimeoutExpired:
204
+ proc.kill()
205
+ proc.wait()
206
+ return phases_completed
207
+
208
+ proc.terminate()
209
+ try:
210
+ proc.wait(timeout=5)
211
+ except subprocess.TimeoutExpired:
212
+ proc.kill()
213
+ proc.wait()
214
+
215
+ # Check if we should stop here
216
+ if phases_completed >= max_phase:
217
+ logger.info(f"Stopping at phase {max_phase} as requested")
218
+ logger.progress_bar(phases_completed, total_phases)
219
+ return phases_completed
220
+
221
+ except Exception as e:
222
+ logger.error(f"MCP test failed: {e}")
223
+ hint = analyze_error_for_hints(str(e))
224
+ if hint:
225
+ logger.hint(hint)
226
+ logger.progress_bar(phases_completed, total_phases)
227
+ return phases_completed
228
+
229
+ # Phase 3: Tool Discovery
230
+ logger.phase(3, "MCP Tool Discovery Test")
231
+
232
+ client = None
233
+ try:
234
+ # Create MCP config for the command
235
+ mcp_config = {
236
+ "test": {"command": command[0], "args": command[1:] if len(command) > 1 else []}
237
+ }
238
+
239
+ logger.command(command)
240
+ logger.info("Creating MCP client via hud...")
241
+
242
+ client = MCPClient(mcp_config=mcp_config, verbose=False, auto_trace=False)
243
+ await client.initialize()
244
+
245
+ # Wait for initialization
246
+ logger.info("Waiting for server initialization...")
247
+ await asyncio.sleep(5)
248
+
249
+ # Get tools
250
+ tools = await client.list_tools()
251
+
252
+ if tools:
253
+ logger.success(f"Found {len(tools)} tools")
254
+
255
+ # Check for lifecycle tools
256
+ tool_names = [t.name for t in tools]
257
+ has_setup = "setup" in tool_names
258
+ has_evaluate = "evaluate" in tool_names
259
+
260
+ logger.info(
261
+ f"Lifecycle tools: setup={'✅' if has_setup else '❌'}, evaluate={'✅' if has_evaluate else '❌'}" # noqa: E501
262
+ )
263
+
264
+ # Check for interaction tools
265
+ interaction_tools = [
266
+ name
267
+ for name in tool_names
268
+ if name in ["computer", "playwright", "click", "type", "interact", "move"]
269
+ ]
270
+ if interaction_tools:
271
+ logger.info(f"Interaction tools: {', '.join(interaction_tools)}")
272
+
273
+ # List all tools
274
+ logger.info(f"All tools: {', '.join(tool_names)}")
275
+
276
+ # Try to list resources
277
+ try:
278
+ resources = await client.list_resources()
279
+ if resources:
280
+ logger.info(
281
+ f"Found {len(resources)} resources: {', '.join(str(r.uri) for r in resources[:3])}..." # noqa: E501
282
+ )
283
+ except Exception as e:
284
+ logger.error(f"Failed to list resources: {e}")
285
+
286
+ phases_completed = 3
287
+
288
+ else:
289
+ logger.error("No tools found")
290
+ logger.hint("""No tools found. Ensure:
291
+ - @mcp.tool() decorator is used on functions
292
+ - Tools are registered before mcp.run()
293
+ - No import errors preventing tool registration""")
294
+ logger.progress_bar(phases_completed, total_phases)
295
+ return phases_completed
296
+
297
+ # Check if we should stop here
298
+ if phases_completed >= max_phase:
299
+ logger.info(f"Stopping at phase {max_phase} as requested")
300
+ logger.progress_bar(phases_completed, total_phases)
301
+ return phases_completed
302
+
303
+ # Phase 4: Remote Deployment Readiness
304
+ logger.phase(4, "Remote Deployment Readiness")
305
+
306
+ # Test if setup/evaluate exist
307
+ if "setup" in tool_names:
308
+ try:
309
+ logger.info("Testing setup tool...")
310
+ await client.call_tool(name="setup", arguments={})
311
+ logger.success("Setup tool responded")
312
+ except Exception as e:
313
+ logger.info(f"Setup tool test: {e}")
314
+
315
+ if "evaluate" in tool_names:
316
+ try:
317
+ logger.info("Testing evaluate tool...")
318
+ await client.call_tool(name="evaluate", arguments={})
319
+ logger.success("Evaluate tool responded")
320
+ except Exception as e:
321
+ logger.info(f"Evaluate tool test: {e}")
322
+
323
+ # Performance check
324
+ init_time = time.time() - start_time
325
+ logger.info(f"Total initialization time: {init_time:.2f}s")
326
+
327
+ if init_time > 30:
328
+ logger.error("Initialization took >30s - may be too slow")
329
+ logger.hint("Consider optimizing startup time")
330
+
331
+ phases_completed = 4
332
+
333
+ # Check if we should stop here
334
+ if phases_completed >= max_phase:
335
+ logger.info(f"Stopping at phase {max_phase} as requested")
336
+ logger.progress_bar(phases_completed, total_phases)
337
+ return phases_completed
338
+
339
+ # Phase 5: Concurrent Clients
340
+ logger.phase(5, "Concurrent Clients Testing")
341
+
342
+ concurrent_clients = []
343
+ try:
344
+ logger.info("Creating 3 concurrent MCP clients...")
345
+
346
+ for i in range(3):
347
+ client_config = {
348
+ f"test_concurrent_{i}": {
349
+ "command": command[0],
350
+ "args": command[1:] if len(command) > 1 else [],
351
+ }
352
+ }
353
+
354
+ concurrent_client = MCPClient(
355
+ mcp_config=client_config, verbose=False, auto_trace=False
356
+ )
357
+ await concurrent_client.initialize()
358
+ concurrent_clients.append(concurrent_client)
359
+ logger.info(f"Client {i + 1} connected")
360
+
361
+ logger.success("All concurrent clients connected")
362
+
363
+ # Clean shutdown
364
+ for i, c in enumerate(concurrent_clients):
365
+ await c.shutdown()
366
+ logger.info(f"Client {i + 1} disconnected")
367
+
368
+ phases_completed = 5
369
+
370
+ except Exception as e:
371
+ logger.error(f"Concurrent test failed: {e}")
372
+ finally:
373
+ for c in concurrent_clients:
374
+ try:
375
+ await c.shutdown()
376
+ except Exception as e:
377
+ logger.error(f"Failed to close client: {e}")
378
+
379
+ except Exception as e:
380
+ logger.error(f"Tool discovery failed: {e}")
381
+ logger.progress_bar(phases_completed, total_phases)
382
+ return phases_completed
383
+ finally:
384
+ # Ensure client is closed even on exceptions
385
+ if client:
386
+ try:
387
+ await client.shutdown()
388
+ except Exception:
389
+ logger.error("Failed to close client")
390
+
391
+ logger.progress_bar(phases_completed, total_phases)
392
+ return phases_completed