hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +11 -5
  3. hud/agents/base.py +220 -500
  4. hud/agents/claude.py +200 -240
  5. hud/agents/gemini.py +275 -0
  6. hud/agents/gemini_cua.py +335 -0
  7. hud/agents/grounded_openai.py +98 -100
  8. hud/agents/misc/integration_test_agent.py +51 -20
  9. hud/agents/misc/response_agent.py +41 -36
  10. hud/agents/openai.py +291 -292
  11. hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
  12. hud/agents/operator.py +211 -0
  13. hud/agents/tests/conftest.py +133 -0
  14. hud/agents/tests/test_base.py +300 -622
  15. hud/agents/tests/test_base_runtime.py +233 -0
  16. hud/agents/tests/test_claude.py +379 -210
  17. hud/agents/tests/test_client.py +9 -10
  18. hud/agents/tests/test_gemini.py +369 -0
  19. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  20. hud/agents/tests/test_openai.py +376 -140
  21. hud/agents/tests/test_operator.py +362 -0
  22. hud/agents/tests/test_run_eval.py +179 -0
  23. hud/cli/__init__.py +461 -545
  24. hud/cli/analyze.py +43 -5
  25. hud/cli/build.py +664 -110
  26. hud/cli/debug.py +8 -5
  27. hud/cli/dev.py +882 -734
  28. hud/cli/eval.py +782 -668
  29. hud/cli/flows/dev.py +167 -0
  30. hud/cli/flows/init.py +191 -0
  31. hud/cli/flows/tasks.py +153 -56
  32. hud/cli/flows/templates.py +151 -0
  33. hud/cli/flows/tests/__init__.py +1 -0
  34. hud/cli/flows/tests/test_dev.py +126 -0
  35. hud/cli/init.py +60 -58
  36. hud/cli/push.py +29 -11
  37. hud/cli/rft.py +311 -0
  38. hud/cli/rft_status.py +145 -0
  39. hud/cli/tests/test_analyze.py +5 -5
  40. hud/cli/tests/test_analyze_metadata.py +3 -2
  41. hud/cli/tests/test_analyze_module.py +120 -0
  42. hud/cli/tests/test_build.py +108 -6
  43. hud/cli/tests/test_build_failure.py +41 -0
  44. hud/cli/tests/test_build_module.py +50 -0
  45. hud/cli/tests/test_cli_init.py +6 -1
  46. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  47. hud/cli/tests/test_cli_root.py +140 -0
  48. hud/cli/tests/test_convert.py +361 -0
  49. hud/cli/tests/test_debug.py +12 -10
  50. hud/cli/tests/test_dev.py +197 -0
  51. hud/cli/tests/test_eval.py +251 -0
  52. hud/cli/tests/test_eval_bedrock.py +51 -0
  53. hud/cli/tests/test_init.py +124 -0
  54. hud/cli/tests/test_main_module.py +11 -5
  55. hud/cli/tests/test_mcp_server.py +12 -100
  56. hud/cli/tests/test_push_happy.py +74 -0
  57. hud/cli/tests/test_push_wrapper.py +23 -0
  58. hud/cli/tests/test_registry.py +1 -1
  59. hud/cli/tests/test_utils.py +1 -1
  60. hud/cli/{rl → utils}/celebrate.py +14 -12
  61. hud/cli/utils/config.py +18 -1
  62. hud/cli/utils/docker.py +130 -4
  63. hud/cli/utils/env_check.py +9 -9
  64. hud/cli/utils/git.py +136 -0
  65. hud/cli/utils/interactive.py +39 -5
  66. hud/cli/utils/metadata.py +69 -0
  67. hud/cli/utils/runner.py +1 -1
  68. hud/cli/utils/server.py +2 -2
  69. hud/cli/utils/source_hash.py +3 -3
  70. hud/cli/utils/tasks.py +4 -1
  71. hud/cli/utils/tests/__init__.py +0 -0
  72. hud/cli/utils/tests/test_config.py +58 -0
  73. hud/cli/utils/tests/test_docker.py +93 -0
  74. hud/cli/utils/tests/test_docker_hints.py +71 -0
  75. hud/cli/utils/tests/test_env_check.py +74 -0
  76. hud/cli/utils/tests/test_environment.py +42 -0
  77. hud/cli/utils/tests/test_git.py +142 -0
  78. hud/cli/utils/tests/test_interactive_module.py +60 -0
  79. hud/cli/utils/tests/test_local_runner.py +50 -0
  80. hud/cli/utils/tests/test_logging_utils.py +23 -0
  81. hud/cli/utils/tests/test_metadata.py +49 -0
  82. hud/cli/utils/tests/test_package_runner.py +35 -0
  83. hud/cli/utils/tests/test_registry_utils.py +49 -0
  84. hud/cli/utils/tests/test_remote_runner.py +25 -0
  85. hud/cli/utils/tests/test_runner_modules.py +52 -0
  86. hud/cli/utils/tests/test_source_hash.py +36 -0
  87. hud/cli/utils/tests/test_tasks.py +80 -0
  88. hud/cli/utils/version_check.py +258 -0
  89. hud/cli/{rl → utils}/viewer.py +2 -2
  90. hud/clients/README.md +12 -11
  91. hud/clients/__init__.py +4 -3
  92. hud/clients/base.py +166 -26
  93. hud/clients/environment.py +51 -0
  94. hud/clients/fastmcp.py +13 -6
  95. hud/clients/mcp_use.py +40 -15
  96. hud/clients/tests/test_analyze_scenarios.py +206 -0
  97. hud/clients/tests/test_protocol.py +9 -3
  98. hud/datasets/__init__.py +23 -20
  99. hud/datasets/loader.py +327 -0
  100. hud/datasets/runner.py +192 -105
  101. hud/datasets/tests/__init__.py +0 -0
  102. hud/datasets/tests/test_loader.py +221 -0
  103. hud/datasets/tests/test_utils.py +315 -0
  104. hud/datasets/utils.py +270 -90
  105. hud/environment/__init__.py +50 -0
  106. hud/environment/connection.py +206 -0
  107. hud/environment/connectors/__init__.py +33 -0
  108. hud/environment/connectors/base.py +68 -0
  109. hud/environment/connectors/local.py +177 -0
  110. hud/environment/connectors/mcp_config.py +109 -0
  111. hud/environment/connectors/openai.py +101 -0
  112. hud/environment/connectors/remote.py +172 -0
  113. hud/environment/environment.py +694 -0
  114. hud/environment/integrations/__init__.py +45 -0
  115. hud/environment/integrations/adk.py +67 -0
  116. hud/environment/integrations/anthropic.py +196 -0
  117. hud/environment/integrations/gemini.py +92 -0
  118. hud/environment/integrations/langchain.py +82 -0
  119. hud/environment/integrations/llamaindex.py +68 -0
  120. hud/environment/integrations/openai.py +238 -0
  121. hud/environment/mock.py +306 -0
  122. hud/environment/router.py +112 -0
  123. hud/environment/scenarios.py +493 -0
  124. hud/environment/tests/__init__.py +1 -0
  125. hud/environment/tests/test_connection.py +317 -0
  126. hud/environment/tests/test_connectors.py +218 -0
  127. hud/environment/tests/test_environment.py +161 -0
  128. hud/environment/tests/test_integrations.py +257 -0
  129. hud/environment/tests/test_local_connectors.py +201 -0
  130. hud/environment/tests/test_scenarios.py +280 -0
  131. hud/environment/tests/test_tools.py +208 -0
  132. hud/environment/types.py +23 -0
  133. hud/environment/utils/__init__.py +35 -0
  134. hud/environment/utils/formats.py +215 -0
  135. hud/environment/utils/schema.py +171 -0
  136. hud/environment/utils/tool_wrappers.py +113 -0
  137. hud/eval/__init__.py +67 -0
  138. hud/eval/context.py +674 -0
  139. hud/eval/display.py +299 -0
  140. hud/eval/instrument.py +185 -0
  141. hud/eval/manager.py +466 -0
  142. hud/eval/parallel.py +268 -0
  143. hud/eval/task.py +340 -0
  144. hud/eval/tests/__init__.py +1 -0
  145. hud/eval/tests/test_context.py +178 -0
  146. hud/eval/tests/test_eval.py +210 -0
  147. hud/eval/tests/test_manager.py +152 -0
  148. hud/eval/tests/test_parallel.py +168 -0
  149. hud/eval/tests/test_task.py +145 -0
  150. hud/eval/types.py +63 -0
  151. hud/eval/utils.py +183 -0
  152. hud/patches/__init__.py +19 -0
  153. hud/patches/mcp_patches.py +151 -0
  154. hud/patches/warnings.py +54 -0
  155. hud/samples/browser.py +4 -4
  156. hud/server/__init__.py +2 -1
  157. hud/server/low_level.py +2 -1
  158. hud/server/router.py +164 -0
  159. hud/server/server.py +567 -80
  160. hud/server/tests/test_mcp_server_integration.py +11 -11
  161. hud/server/tests/test_mcp_server_more.py +1 -1
  162. hud/server/tests/test_server_extra.py +2 -0
  163. hud/settings.py +45 -3
  164. hud/shared/exceptions.py +36 -10
  165. hud/shared/hints.py +26 -1
  166. hud/shared/requests.py +15 -3
  167. hud/shared/tests/test_exceptions.py +40 -31
  168. hud/shared/tests/test_hints.py +167 -0
  169. hud/telemetry/__init__.py +20 -19
  170. hud/telemetry/exporter.py +201 -0
  171. hud/telemetry/instrument.py +158 -253
  172. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  173. hud/telemetry/tests/test_exporter.py +258 -0
  174. hud/telemetry/tests/test_instrument.py +401 -0
  175. hud/tools/__init__.py +16 -2
  176. hud/tools/apply_patch.py +639 -0
  177. hud/tools/base.py +54 -4
  178. hud/tools/bash.py +2 -2
  179. hud/tools/computer/__init__.py +4 -0
  180. hud/tools/computer/anthropic.py +2 -2
  181. hud/tools/computer/gemini.py +385 -0
  182. hud/tools/computer/hud.py +23 -6
  183. hud/tools/computer/openai.py +20 -21
  184. hud/tools/computer/qwen.py +434 -0
  185. hud/tools/computer/settings.py +37 -0
  186. hud/tools/edit.py +3 -7
  187. hud/tools/executors/base.py +4 -2
  188. hud/tools/executors/pyautogui.py +1 -1
  189. hud/tools/grounding/grounded_tool.py +13 -18
  190. hud/tools/grounding/grounder.py +10 -31
  191. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  192. hud/tools/jupyter.py +330 -0
  193. hud/tools/playwright.py +18 -3
  194. hud/tools/shell.py +308 -0
  195. hud/tools/tests/test_apply_patch.py +718 -0
  196. hud/tools/tests/test_computer.py +4 -9
  197. hud/tools/tests/test_computer_actions.py +24 -2
  198. hud/tools/tests/test_jupyter_tool.py +181 -0
  199. hud/tools/tests/test_shell.py +596 -0
  200. hud/tools/tests/test_submit.py +85 -0
  201. hud/tools/tests/test_types.py +193 -0
  202. hud/tools/types.py +21 -1
  203. hud/types.py +167 -57
  204. hud/utils/__init__.py +2 -0
  205. hud/utils/env.py +67 -0
  206. hud/utils/hud_console.py +61 -3
  207. hud/utils/mcp.py +15 -58
  208. hud/utils/strict_schema.py +162 -0
  209. hud/utils/tests/test_init.py +1 -2
  210. hud/utils/tests/test_mcp.py +1 -28
  211. hud/utils/tests/test_pretty_errors.py +186 -0
  212. hud/utils/tests/test_tool_shorthand.py +154 -0
  213. hud/utils/tests/test_version.py +1 -1
  214. hud/utils/types.py +20 -0
  215. hud/version.py +1 -1
  216. hud_python-0.5.1.dist-info/METADATA +264 -0
  217. hud_python-0.5.1.dist-info/RECORD +299 -0
  218. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
  219. hud/agents/langchain.py +0 -261
  220. hud/agents/lite_llm.py +0 -72
  221. hud/cli/rl/__init__.py +0 -180
  222. hud/cli/rl/config.py +0 -101
  223. hud/cli/rl/display.py +0 -133
  224. hud/cli/rl/gpu.py +0 -63
  225. hud/cli/rl/gpu_utils.py +0 -321
  226. hud/cli/rl/local_runner.py +0 -595
  227. hud/cli/rl/presets.py +0 -96
  228. hud/cli/rl/remote_runner.py +0 -463
  229. hud/cli/rl/rl_api.py +0 -150
  230. hud/cli/rl/vllm.py +0 -177
  231. hud/cli/rl/wait_utils.py +0 -89
  232. hud/datasets/parallel.py +0 -687
  233. hud/misc/__init__.py +0 -1
  234. hud/misc/claude_plays_pokemon.py +0 -292
  235. hud/otel/__init__.py +0 -35
  236. hud/otel/collector.py +0 -142
  237. hud/otel/config.py +0 -181
  238. hud/otel/context.py +0 -570
  239. hud/otel/exporters.py +0 -369
  240. hud/otel/instrumentation.py +0 -135
  241. hud/otel/processors.py +0 -121
  242. hud/otel/tests/__init__.py +0 -1
  243. hud/otel/tests/test_processors.py +0 -197
  244. hud/rl/README.md +0 -30
  245. hud/rl/__init__.py +0 -1
  246. hud/rl/actor.py +0 -176
  247. hud/rl/buffer.py +0 -405
  248. hud/rl/chat_template.jinja +0 -101
  249. hud/rl/config.py +0 -192
  250. hud/rl/distributed.py +0 -132
  251. hud/rl/learner.py +0 -637
  252. hud/rl/tests/__init__.py +0 -1
  253. hud/rl/tests/test_learner.py +0 -186
  254. hud/rl/train.py +0 -382
  255. hud/rl/types.py +0 -101
  256. hud/rl/utils/start_vllm_server.sh +0 -30
  257. hud/rl/utils.py +0 -524
  258. hud/rl/vllm_adapter.py +0 -143
  259. hud/telemetry/job.py +0 -352
  260. hud/telemetry/replay.py +0 -74
  261. hud/telemetry/tests/test_replay.py +0 -40
  262. hud/telemetry/tests/test_trace.py +0 -63
  263. hud/telemetry/trace.py +0 -158
  264. hud/utils/agent_factories.py +0 -86
  265. hud/utils/async_utils.py +0 -65
  266. hud/utils/group_eval.py +0 -223
  267. hud/utils/progress.py +0 -149
  268. hud/utils/tasks.py +0 -127
  269. hud/utils/tests/test_async_utils.py +0 -173
  270. hud/utils/tests/test_progress.py +0 -261
  271. hud_python-0.4.45.dist-info/METADATA +0 -552
  272. hud_python-0.4.45.dist-info/RECORD +0 -228
  273. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
  274. {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
hud/server/server.py CHANGED
@@ -13,9 +13,13 @@ from typing import TYPE_CHECKING, Any
13
13
 
14
14
  import anyio
15
15
  from fastmcp.server.server import FastMCP, Transport
16
+ from starlette.requests import Request
16
17
  from starlette.responses import JSONResponse, Response
17
18
 
19
+ from hud.datasets import run_dataset
20
+ from hud.eval.task import Task
18
21
  from hud.server.low_level import LowLevelServerWithInit
22
+ from hud.types import LegacyTask
19
23
 
20
24
  if TYPE_CHECKING:
21
25
  from collections.abc import AsyncGenerator, Callable
@@ -133,7 +137,9 @@ class MCPServer(FastMCP):
133
137
  FastMCP ``FunctionTool`` interface.
134
138
  """
135
139
 
136
- def __init__(self, *, name: str | None = None, **fastmcp_kwargs: Any) -> None:
140
+ def __init__(
141
+ self, name: str | None = None, instructions: str | None = None, **fastmcp_kwargs: Any
142
+ ) -> None:
137
143
  # Store shutdown function placeholder before super().__init__
138
144
  self._shutdown_fn: Callable | None = None
139
145
 
@@ -179,7 +185,7 @@ class MCPServer(FastMCP):
179
185
 
180
186
  fastmcp_kwargs["lifespan"] = _lifespan
181
187
 
182
- super().__init__(name=name, **fastmcp_kwargs)
188
+ super().__init__(name=name, instructions=instructions, **fastmcp_kwargs)
183
189
  self._initializer_fn: Callable | None = None
184
190
  self._did_init = False
185
191
  self._replaced_server = False
@@ -237,6 +243,7 @@ class MCPServer(FastMCP):
237
243
  old_notification_handlers = self._mcp_server.notification_handlers
238
244
 
239
245
  self._mcp_server = LowLevelServerWithInit(
246
+ self, # Pass FastMCP instance as required by parent class
240
247
  name=self.name,
241
248
  version=self.version,
242
249
  instructions=self.instructions,
@@ -309,11 +316,35 @@ class MCPServer(FastMCP):
309
316
  if transport is None:
310
317
  transport = "stdio"
311
318
 
312
- # Register HTTP helpers for HTTP transport
319
+ # Register HTTP helpers and CORS for HTTP transport
313
320
  if transport in ("http", "sse"):
314
321
  self._register_hud_helpers()
315
322
  logger.info("Registered HUD helper endpoints at /hud/*")
316
323
 
324
+ # Add CORS middleware if not already provided
325
+ from starlette.middleware import Middleware
326
+ from starlette.middleware.cors import CORSMiddleware
327
+
328
+ # Get or create middleware list
329
+ middleware = transport_kwargs.get("middleware", [])
330
+ if isinstance(middleware, list):
331
+ # Check if CORS is already configured
332
+ has_cors = any(
333
+ isinstance(m, Middleware) and m.cls == CORSMiddleware for m in middleware
334
+ )
335
+ if not has_cors:
336
+ # Add CORS with permissive defaults for dev
337
+ cors_middleware = Middleware(
338
+ CORSMiddleware,
339
+ allow_origins=["*"],
340
+ allow_methods=["GET", "POST", "DELETE", "OPTIONS"],
341
+ allow_headers=["*"],
342
+ expose_headers=["Mcp-Session-Id"],
343
+ )
344
+ middleware = [cors_middleware, *middleware]
345
+ transport_kwargs["middleware"] = middleware
346
+ logger.info("Added CORS middleware for browser compatibility")
347
+
317
348
  try:
318
349
  await super().run_async(
319
350
  transport=transport, show_banner=show_banner, **transport_kwargs
@@ -382,90 +413,546 @@ class MCPServer(FastMCP):
382
413
 
383
414
  return _wrapper
384
415
 
416
+ def include_router(
417
+ self,
418
+ router: FastMCP,
419
+ prefix: str | None = None,
420
+ hidden: bool = False,
421
+ **kwargs: Any,
422
+ ) -> None:
423
+ """Include a router's tools/resources with optional hidden dispatcher pattern.
424
+
425
+ Uses import_server for fast static composition (unlike mount which is slower).
426
+
427
+ Args:
428
+ router: FastMCP router to include
429
+ prefix: Optional prefix for tools/resources (ignored if hidden=True)
430
+ hidden: If True, wrap in HiddenRouter (single dispatcher tool that calls sub-tools)
431
+ **kwargs: Additional arguments passed to import_server()
432
+
433
+ Examples:
434
+ # Direct include - tools appear at top level
435
+ mcp.include_router(tools_router)
436
+
437
+ # Prefixed include - tools get prefix
438
+ mcp.include_router(admin_router, prefix="admin")
439
+
440
+ # Hidden include - single dispatcher tool
441
+ mcp.include_router(setup_router, hidden=True)
442
+ """
443
+ if not hidden:
444
+ # Synchronous composition - directly copy tools/resources
445
+ self._sync_import_router(router, hidden=False, prefix=prefix, **kwargs)
446
+ return
447
+
448
+ # Hidden pattern: wrap in HiddenRouter before importing
449
+ from .router import HiddenRouter
450
+
451
+ # Import the hidden router (synchronous)
452
+ self._sync_import_router(HiddenRouter(router), hidden=True, prefix=prefix, **kwargs)
453
+
454
+ def _sync_import_router(
455
+ self,
456
+ router: FastMCP,
457
+ hidden: bool = False,
458
+ prefix: str | None = None,
459
+ **kwargs: Any,
460
+ ) -> None:
461
+ """Synchronously import tools/resources from a router.
462
+
463
+ This is a synchronous alternative to import_server for use at module import time.
464
+ """
465
+ import re
466
+
467
+ # Import tools directly - use internal dict to preserve keys
468
+ tools = (
469
+ router._tool_manager._tools.items() if not hidden else router._sync_list_tools().items() # type: ignore
470
+ )
471
+ for key, tool in tools:
472
+ # Validate tool name
473
+ if not re.match(r"^[a-zA-Z0-9_-]{1,128}$", key):
474
+ raise ValueError(
475
+ f"Tool name '{key}' must match ^[a-zA-Z0-9_-]{{1,128}}$ "
476
+ "(letters, numbers, underscore, hyphen only, 1-128 chars)"
477
+ )
478
+
479
+ new_key = f"{prefix}_{key}" if prefix else key
480
+ self._tool_manager._tools[new_key] = tool
481
+
482
+ # Import resources directly
483
+ for key, resource in router._resource_manager._resources.items():
484
+ new_key = f"{prefix}_{key}" if prefix else key
485
+ self._resource_manager._resources[new_key] = resource
486
+
487
+ # Import prompts directly
488
+ for key, prompt in router._prompt_manager._prompts.items():
489
+ new_key = f"{prefix}_{key}" if prefix else key
490
+ self._prompt_manager._prompts[new_key] = prompt
491
+
492
+ def _get_docker_logs(
493
+ self,
494
+ tail: int = 100,
495
+ since: str | None = None,
496
+ until: str | None = None,
497
+ timestamps: bool = False,
498
+ ) -> dict[str, Any]:
499
+ """Helper function to get Docker container logs.
500
+
501
+ Args:
502
+ tail: Number of lines to show from the end of the logs
503
+ since: Show logs since timestamp or relative time
504
+ until: Show logs before a timestamp or relative time
505
+ timestamps: Show timestamps in log output
506
+
507
+ Returns:
508
+ Dictionary with logs data or error information
509
+ """
510
+ import subprocess
511
+
512
+ container_name = os.environ.get("_HUD_DEV_DOCKER_CONTAINER")
513
+ if not container_name:
514
+ return {"items": [], "container_name": None, "error": "No container name found"}
515
+
516
+ # Build docker logs command
517
+ cmd = ["docker", "logs", "--tail", str(tail)]
518
+
519
+ if since:
520
+ cmd.extend(["--since", since])
521
+ if until:
522
+ cmd.extend(["--until", until])
523
+ if timestamps:
524
+ cmd.append("--timestamps")
525
+
526
+ cmd.append(container_name)
527
+
528
+ try:
529
+ # Run docker logs to get output
530
+ result = subprocess.run( # noqa: S603
531
+ cmd,
532
+ stdout=subprocess.PIPE,
533
+ stderr=subprocess.STDOUT,
534
+ text=True,
535
+ encoding="utf-8",
536
+ errors="replace",
537
+ timeout=5,
538
+ )
539
+
540
+ # Parse logs into items
541
+ items = []
542
+ lines = result.stdout.strip().split("\n") if result.stdout else []
543
+
544
+ for i, line in enumerate(lines):
545
+ if line.strip():
546
+ items.append(
547
+ {
548
+ "id": i,
549
+ "stream": "mixed",
550
+ "log": line,
551
+ "container_name": container_name,
552
+ }
553
+ )
554
+
555
+ return {
556
+ "items": items,
557
+ "container_name": container_name,
558
+ "total_lines": len(items),
559
+ }
560
+
561
+ except subprocess.TimeoutExpired:
562
+ return {"error": "Docker logs timeout", "container_name": container_name, "items": []}
563
+ except Exception as e:
564
+ return {
565
+ "error": f"Failed to get logs: {e!s}",
566
+ "container_name": container_name,
567
+ "items": [],
568
+ }
569
+
385
570
  def _register_hud_helpers(self) -> None:
386
- """Register HUD helper HTTP routes.
571
+ """Register development helper endpoints.
387
572
 
388
573
  This adds:
389
- - GET /hud - Overview of available endpoints
390
- - GET /hud/tools - List all registered tools with their schemas
391
- - GET /hud/resources - List all registered resources
392
- - GET /hud/prompts - List all registered prompts
574
+ - GET /docs - Interactive documentation and tool testing
575
+ - POST /api/tools/{name} - REST wrappers for MCP tools
576
+ - GET /openapi.json - OpenAPI spec for REST endpoints
577
+ - GET /logs - Development log endpoint (when provided by dev runtime)
578
+ - hud-logs tool - MCP tool for fetching logs (when in Docker mode)
393
579
  """
394
580
 
395
- @self.custom_route("/hud/tools", methods=["GET"])
396
- async def list_tools(request: Request) -> Response:
397
- """List all registered tools with their names, descriptions, and schemas."""
398
- tools = []
399
- # _tools is a mapping of tool_name -> FunctionTool/Tool instance
581
+ # Register REST wrapper for each tool
582
+ def create_tool_endpoint(key: str) -> Any:
583
+ """Create a REST endpoint for an MCP tool."""
584
+
585
+ async def tool_endpoint(request: Request) -> Response:
586
+ """Call MCP tool via REST endpoint."""
587
+ try:
588
+ data = await request.json()
589
+ except Exception:
590
+ data = {}
591
+
592
+ try:
593
+ result = await self._tool_manager.call_tool(key, data)
594
+
595
+ # Recursively serialize MCP objects
596
+ def serialize_obj(obj: Any) -> Any:
597
+ """Recursively serialize MCP objects to JSON-compatible format."""
598
+ if obj is None or isinstance(obj, str | int | float | bool):
599
+ return obj
600
+ if isinstance(obj, list | tuple):
601
+ return [serialize_obj(item) for item in obj]
602
+ if isinstance(obj, dict):
603
+ return {k: serialize_obj(v) for k, v in obj.items()}
604
+ if hasattr(obj, "model_dump"):
605
+ # Pydantic v2
606
+ return serialize_obj(obj.model_dump())
607
+ if hasattr(obj, "dict"):
608
+ # Pydantic v1
609
+ return serialize_obj(obj.dict())
610
+ if hasattr(obj, "__dict__"):
611
+ # Dataclass or regular class
612
+ return serialize_obj(obj.__dict__)
613
+ # Fallback: convert to string
614
+ return str(obj)
615
+
616
+ serialized = serialize_obj(result)
617
+ # Return the serialized CallToolResult directly (no wrapper)
618
+ return JSONResponse(serialized)
619
+ except Exception as e:
620
+ # Return a simple error object
621
+ return JSONResponse({"error": str(e)}, status_code=400)
622
+
623
+ return tool_endpoint
624
+
625
+ for tool_key in self._tool_manager._tools.keys(): # noqa: SIM118
626
+ endpoint = create_tool_endpoint(tool_key)
627
+ self.custom_route(f"/api/tools/{tool_key}", methods=["POST"])(endpoint)
628
+
629
+ # Development endpoints - only if dev runtime set a provider
630
+ provider = os.environ.get("_HUD_DEV_LOGS_PROVIDER")
631
+ if provider == "enabled":
632
+
633
+ @self.custom_route("/logs", methods=["GET"])
634
+ async def get_logs(request: Request) -> Response:
635
+ """Return Docker container logs on demand.
636
+
637
+ Query params:
638
+ - limit: max number of lines to return (default 100)
639
+ - tail: number of lines from end to return (default 100)
640
+ """
641
+ # Get query params
642
+ params = request.query_params
643
+ tail = int(params.get("tail", "100"))
644
+
645
+ # Use helper function to get logs
646
+ result = self._get_docker_logs(tail=tail)
647
+
648
+ # Add 'next' field for compatibility with existing API
649
+ if "error" in result:
650
+ return JSONResponse(result, status_code=500)
651
+ else:
652
+ items = result.get("items", [])
653
+ return JSONResponse(
654
+ {
655
+ "items": items,
656
+ "next": len(items) - 1 if items else None,
657
+ }
658
+ )
659
+
660
+ # Import existing types from the codebase
661
+ from pydantic import BaseModel
662
+
663
+ from hud.types import AgentType
664
+
665
+ class EvalRequest(BaseModel):
666
+ """Request model for /eval endpoint."""
667
+
668
+ tasks: list[dict[str, Any]] = []
669
+ agent: str = "claude"
670
+ model: str | None = None
671
+ max_steps: int = 10
672
+ verbose: bool = False
673
+ group_size: int = 1
674
+ name: str | None = None
675
+
676
+ @self.custom_route("/eval", methods=["POST"])
677
+ async def run_eval(request: Request) -> Response:
678
+ """Run evaluation on tasks using the current Docker environment."""
679
+ import asyncio
680
+ import json
681
+
682
+ try:
683
+ body = await request.body()
684
+ data = json.loads(body)
685
+
686
+ # Validate request using Pydantic model
687
+ try:
688
+ eval_request = EvalRequest(**data)
689
+ except Exception as e:
690
+ return JSONResponse({"error": f"Invalid request: {e!s}"}, status_code=400)
691
+
692
+ # Get the Docker MCP config from environment
693
+ docker_mcp_config = os.environ.get("_HUD_DEV_DOCKER_MCP_CONFIG")
694
+ if not docker_mcp_config:
695
+ return JSONResponse(
696
+ {"error": "Docker MCP config not available"}, status_code=500
697
+ )
698
+
699
+ docker_config = json.loads(docker_mcp_config)
700
+
701
+ # Simplify Docker config for evaluation
702
+ if "docker" in docker_config and "args" in docker_config["docker"]:
703
+ original_args = docker_config["docker"]["args"]
704
+ filtered_args = []
705
+ i = 0
706
+
707
+ while i < len(original_args):
708
+ arg = original_args[i]
709
+
710
+ # Skip volume mounts and their values
711
+ if arg in ["-v", "--volume"]:
712
+ i += 2 # Skip the flag and its value
713
+ continue
714
+
715
+ # Skip combined volume mount args
716
+ if arg.startswith(("-v", "--volume=")):
717
+ i += 1
718
+ continue
719
+
720
+ # Skip explicit container name to avoid collisions
721
+ if arg == "--name" and i + 1 < len(original_args):
722
+ i += 2 # Skip the --name and its value
723
+ continue
724
+
725
+ # Skip dev-specific environment variables
726
+ if arg == "-e" and i + 1 < len(original_args):
727
+ next_arg = original_args[i + 1]
728
+ if next_arg in [
729
+ "PYTHONPATH=/app",
730
+ "HUD_DEV=1",
731
+ "PYTHONUNBUFFERED=1",
732
+ ]:
733
+ i += 2 # Skip the -e and its value
734
+ continue
735
+
736
+ filtered_args.append(arg)
737
+ i += 1
738
+
739
+ # Update the docker args with filtered version
740
+ docker_config["docker"]["args"] = filtered_args
741
+
742
+ try:
743
+ agent_type = AgentType(eval_request.agent.lower())
744
+ except ValueError:
745
+ valid_agents = [
746
+ a.value for a in AgentType if a != AgentType.INTEGRATION_TEST
747
+ ]
748
+ return JSONResponse(
749
+ {
750
+ "error": f"Invalid agent type: {eval_request.agent}",
751
+ "valid_agents": valid_agents,
752
+ },
753
+ status_code=400,
754
+ )
755
+
756
+ # Add MCP config to each task and validate basic structure
757
+ task_objects: list[LegacyTask] = []
758
+ for task_data in eval_request.tasks:
759
+ task_data["mcp_config"] = docker_config
760
+ task_objects.append(LegacyTask.model_validate(task_data))
761
+
762
+ agent_params: dict[str, Any] = {}
763
+ if eval_request.model:
764
+ agent_params["checkpoint_name"] = eval_request.model
765
+
766
+ # Fire and forget - launch evaluation in background
767
+ async def run_eval_background() -> None:
768
+ await run_dataset(
769
+ [Task.from_v4(task) for task in task_objects],
770
+ agent_type=agent_type,
771
+ agent_params=agent_params,
772
+ max_steps=eval_request.max_steps,
773
+ group_size=eval_request.group_size,
774
+ )
775
+
776
+ # Start the evaluation in the background (fire and forget)
777
+ asyncio.create_task(run_eval_background()) # noqa: RUF006
778
+
779
+ # Return immediately
780
+ response_data = {
781
+ "status": "started",
782
+ "message": f"Evaluation launched with {len(task_objects)} task(s)",
783
+ "agent": eval_request.agent,
784
+ "model": eval_request.model,
785
+ "max_steps": eval_request.max_steps,
786
+ "verbose": eval_request.verbose,
787
+ }
788
+
789
+ # Include group_size if > 1
790
+ if eval_request.group_size > 1:
791
+ response_data["group_size"] = eval_request.group_size
792
+ response_data["total_episodes"] = (
793
+ len(task_objects) * eval_request.group_size
794
+ )
795
+
796
+ return JSONResponse(response_data)
797
+
798
+ except json.JSONDecodeError:
799
+ return JSONResponse({"error": "Invalid JSON in request body"}, status_code=400)
800
+ except Exception as e:
801
+ return JSONResponse(
802
+ {"error": f"Failed to run evaluation: {e!s}"}, status_code=500
803
+ )
804
+
805
+ @self.custom_route("/openapi.json", methods=["GET"])
806
+ async def openapi_spec(request: Request) -> Response:
807
+ """Generate OpenAPI spec from MCP tools."""
808
+ spec = {
809
+ "openapi": "3.1.0",
810
+ "info": {
811
+ "title": f"{self.name or 'MCP Server'} - Testing API",
812
+ "version": "1.0.0",
813
+ "description": (
814
+ "REST API wrappers for testing MCP tools. "
815
+ "These endpoints are for development/testing only. "
816
+ "Agents should connect via MCP protocol (JSON-RPC over stdio/HTTP)."
817
+ ),
818
+ },
819
+ "paths": {},
820
+ }
821
+
822
+ # Convert each MCP tool to an OpenAPI path
400
823
  for tool_key, tool in self._tool_manager._tools.items():
401
- tool_data = {"name": tool_key}
402
824
  try:
403
- # Prefer converting to MCP model for consistent fields
404
825
  mcp_tool = tool.to_mcp_tool()
405
- tool_data["description"] = getattr(mcp_tool, "description", "")
406
- if hasattr(mcp_tool, "inputSchema") and mcp_tool.inputSchema:
407
- tool_data["input_schema"] = mcp_tool.inputSchema # type: ignore[assignment]
408
- if hasattr(mcp_tool, "outputSchema") and mcp_tool.outputSchema:
409
- tool_data["output_schema"] = mcp_tool.outputSchema # type: ignore[assignment]
410
- except Exception:
411
- # Fallback to direct attributes on FunctionTool
412
- tool_data["description"] = getattr(tool, "description", "")
413
- params = getattr(tool, "parameters", None)
414
- if params:
415
- tool_data["input_schema"] = params
416
- tools.append(tool_data)
417
-
418
- return JSONResponse({"server": self.name, "tools": tools, "count": len(tools)})
419
-
420
- @self.custom_route("/hud/resources", methods=["GET"])
421
- async def list_resources(request: Request) -> Response:
422
- """List all registered resources."""
423
- resources = []
424
- for resource_key, resource in self._resource_manager._resources.items():
425
- resource_data = {
426
- "uri": resource_key,
427
- "name": resource.name,
428
- "description": resource.description,
429
- "mimeType": resource.mime_type,
430
- }
431
- resources.append(resource_data)
432
-
433
- return JSONResponse(
434
- {"server": self.name, "resources": resources, "count": len(resources)}
435
- )
826
+ input_schema = mcp_tool.inputSchema or {"type": "object"}
827
+
828
+ spec["paths"][f"/api/tools/{tool_key}"] = {
829
+ "post": {
830
+ "summary": tool_key,
831
+ "description": mcp_tool.description or "",
832
+ "operationId": f"call_{tool_key}",
833
+ "requestBody": {
834
+ "required": True,
835
+ "content": {"application/json": {"schema": input_schema}},
836
+ },
837
+ "responses": {
838
+ "200": {
839
+ "description": "Success",
840
+ "content": {
841
+ "application/json": {
842
+ "schema": {
843
+ "type": "object",
844
+ "properties": {
845
+ "success": {"type": "boolean"},
846
+ "result": {"type": "object"},
847
+ },
848
+ }
849
+ }
850
+ },
851
+ }
852
+ },
853
+ }
854
+ }
855
+ except Exception as e:
856
+ logger.warning("Failed to generate spec for %s: %s", tool_key, e)
857
+
858
+ return JSONResponse(spec)
859
+
860
+ # Register hud-logs tool when in Docker dev mode
861
+ container_name = os.environ.get("_HUD_DEV_DOCKER_CONTAINER")
862
+ if container_name:
863
+
864
+ @self.tool("hud-logs")
865
+ async def get_docker_logs(
866
+ tail: int = 100,
867
+ since: str | None = None,
868
+ until: str | None = None,
869
+ timestamps: bool = False,
870
+ ) -> dict[str, Any]:
871
+ """Get logs from the Docker container running the HUD environment.
872
+
873
+ Args:
874
+ tail: Number of lines to show from the end of the logs (default: 100)
875
+ since: Show logs since timestamp (e.g. 2013-01-02T13:23:37Z) or relative (42m)
876
+ until: Show logs before timestamp (e.g. 2013-01-02T13:23:37Z) or relative (42m)
877
+ timestamps: Show timestamps in log output
878
+
879
+ Returns:
880
+ Dictionary with:
881
+ - items: List of log entries
882
+ - container_name: Name of the container
883
+ - total_lines: Total number of log lines returned
884
+ - error: Error message if logs could not be retrieved
885
+ """
886
+ # Use helper function to get logs
887
+ return self._get_docker_logs(
888
+ tail=tail,
889
+ since=since,
890
+ until=until,
891
+ timestamps=timestamps,
892
+ )
893
+
894
+ @self.custom_route("/docs", methods=["GET"])
895
+ async def docs_page(request: Request) -> Response:
896
+ """Interactive documentation page."""
897
+ import base64
898
+ import json
436
899
 
437
- @self.custom_route("/hud/prompts", methods=["GET"])
438
- async def list_prompts(request: Request) -> Response:
439
- """List all registered prompts."""
440
- prompts = []
441
- for prompt_key, prompt in self._prompt_manager._prompts.items():
442
- prompt_data = {
443
- "name": prompt_key,
444
- "description": prompt.description,
445
- }
446
- # Check if it has arguments
447
- if hasattr(prompt, "arguments") and prompt.arguments:
448
- prompt_data["arguments"] = [
449
- {"name": arg.name, "description": arg.description, "required": arg.required}
450
- for arg in prompt.arguments
451
- ]
452
- prompts.append(prompt_data)
453
-
454
- return JSONResponse({"server": self.name, "prompts": prompts, "count": len(prompts)})
455
-
456
- @self.custom_route("/hud", methods=["GET"])
457
- async def hud_info(request: Request) -> Response:
458
- """Show available HUD helper endpoints."""
459
900
  base_url = str(request.base_url).rstrip("/")
460
- return JSONResponse(
461
- {
462
- "name": "HUD MCP Development Helpers",
463
- "server": self.name,
464
- "endpoints": {
465
- "tools": f"{base_url}/hud/tools",
466
- "resources": f"{base_url}/hud/resources",
467
- "prompts": f"{base_url}/hud/prompts",
468
- },
469
- "description": "These endpoints help you inspect your MCP server during development.", # noqa: E501
470
- }
471
- )
901
+ tool_count = len(self._tool_manager._tools)
902
+ resource_count = len(self._resource_manager._resources)
903
+
904
+ # Generate Cursor deeplink
905
+ server_config = {"url": f"{base_url}/mcp"}
906
+ config_json = json.dumps(server_config, indent=2)
907
+ config_base64 = base64.b64encode(config_json.encode()).decode()
908
+ cursor_deeplink = f"cursor://anysphere.cursor-deeplink/mcp/install?name={self.name or 'mcp-server'}&config={config_base64}" # noqa: E501
909
+
910
+ html = f"""
911
+ <!DOCTYPE html>
912
+ <html lang="en">
913
+ <head>
914
+ <meta charset="UTF-8">
915
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
916
+ <title>{self.name or "MCP Server"} - Documentation</title>
917
+ <link rel="stylesheet" href="https://unpkg.com/swagger-ui-dist@5/swagger-ui.css">
918
+ <style>
919
+ body {{ margin: 0; padding: 0; font-family: monospace; }}
920
+ .header {{ padding: 1.5rem; border-bottom: 1px solid #e0e0e0; background: #fafafa; }}
921
+ .header h1 {{ margin: 0 0 0.5rem 0; font-size: 1.5rem; color: #000; }}
922
+ .header .info {{ margin: 0.25rem 0; color: #666; font-size: 0.9rem; }}
923
+ .header .warning {{ margin: 0.75rem 0 0 0; padding: 0.5rem; background: #fff3cd; border-left: 3px solid #ffc107; color: #856404; font-size: 0.85rem; }}
924
+ .header a {{ color: #000; text-decoration: underline; }}
925
+ .header a:hover {{ color: #666; }}
926
+ .topbar {{ display: none; }}
927
+ </style>
928
+ </head>
929
+ <body>
930
+ <div class="header">
931
+ <h1>{self.name or "MCP Server"} - Development Tools</h1>
932
+ <div class="info">MCP Endpoint (use this with agents): <a href="{base_url}/mcp">{base_url}/mcp</a></div>
933
+ <div class="info">Tools: {tool_count} | Resources: {resource_count}</div>
934
+ <div class="info">Add to Cursor: <a href="{cursor_deeplink}">Click here to install</a></div>
935
+ <div class="warning">
936
+ ⚠️ The REST API below is for testing only. Agents connect via MCP protocol at <code>{base_url}/mcp</code>
937
+ </div>
938
+ </div>
939
+
940
+ <div id="swagger-ui"></div>
941
+ <script src="https://unpkg.com/swagger-ui-dist@5/swagger-ui-bundle.js"></script>
942
+ <script src="https://unpkg.com/swagger-ui-dist@5/swagger-ui-standalone-preset.js"></script>
943
+ <script>
944
+ window.onload = function() {{
945
+ SwaggerUIBundle({{
946
+ url: '/openapi.json',
947
+ dom_id: '#swagger-ui',
948
+ deepLinking: true,
949
+ presets: [SwaggerUIBundle.presets.apis, SwaggerUIStandalonePreset],
950
+ layout: "StandaloneLayout",
951
+ tryItOutEnabled: true
952
+ }})
953
+ }}
954
+ </script>
955
+ </body>
956
+ </html>
957
+ """ # noqa: E501
958
+ return Response(content=html, media_type="text/html")