hud-python 0.4.47__tar.gz → 0.4.48__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (249) hide show
  1. {hud_python-0.4.47 → hud_python-0.4.48}/PKG-INFO +1 -1
  2. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/base.py +49 -142
  3. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/claude.py +5 -6
  4. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/misc/integration_test_agent.py +2 -0
  5. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/tests/test_base.py +2 -5
  6. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/__init__.py +2 -2
  7. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/eval.py +14 -9
  8. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/flows/tasks.py +2 -4
  9. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/local_runner.py +25 -13
  10. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/vllm.py +2 -0
  11. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_analyze_metadata.py +3 -2
  12. hud_python-0.4.48/hud/cli/tests/test_eval.py +525 -0
  13. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_utils.py +1 -1
  14. {hud_python-0.4.47 → hud_python-0.4.48}/hud/datasets/parallel.py +0 -12
  15. {hud_python-0.4.47 → hud_python-0.4.48}/hud/datasets/runner.py +1 -4
  16. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/actor.py +4 -2
  17. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/distributed.py +1 -1
  18. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/learner.py +2 -1
  19. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/train.py +1 -1
  20. {hud_python-0.4.47 → hud_python-0.4.48}/hud/telemetry/trace.py +1 -1
  21. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/base.py +11 -9
  22. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/computer/__init__.py +2 -0
  23. hud_python-0.4.48/hud/tools/computer/qwen.py +431 -0
  24. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/computer/settings.py +16 -0
  25. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/executors/pyautogui.py +1 -1
  26. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/playwright.py +1 -1
  27. {hud_python-0.4.47 → hud_python-0.4.48}/hud/types.py +2 -3
  28. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/tests/test_version.py +1 -1
  29. {hud_python-0.4.47 → hud_python-0.4.48}/hud/version.py +1 -1
  30. {hud_python-0.4.47 → hud_python-0.4.48}/pyproject.toml +1 -1
  31. {hud_python-0.4.47 → hud_python-0.4.48}/.gitignore +0 -0
  32. {hud_python-0.4.47 → hud_python-0.4.48}/LICENSE +0 -0
  33. {hud_python-0.4.47 → hud_python-0.4.48}/README.md +0 -0
  34. {hud_python-0.4.47 → hud_python-0.4.48}/environments/README.md +0 -0
  35. {hud_python-0.4.47 → hud_python-0.4.48}/environments/blank/README.md +0 -0
  36. {hud_python-0.4.47 → hud_python-0.4.48}/environments/blank/controller/README.md +0 -0
  37. {hud_python-0.4.47 → hud_python-0.4.48}/environments/blank/environment/README.md +0 -0
  38. {hud_python-0.4.47 → hud_python-0.4.48}/environments/blank/pyproject.toml +0 -0
  39. {hud_python-0.4.47 → hud_python-0.4.48}/environments/browser/README.md +0 -0
  40. {hud_python-0.4.47 → hud_python-0.4.48}/environments/browser/environment/2048/README.md +0 -0
  41. {hud_python-0.4.47 → hud_python-0.4.48}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
  42. {hud_python-0.4.47 → hud_python-0.4.48}/environments/browser/environment/README.md +0 -0
  43. {hud_python-0.4.47 → hud_python-0.4.48}/environments/browser/environment/todo/README.md +0 -0
  44. {hud_python-0.4.47 → hud_python-0.4.48}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
  45. {hud_python-0.4.47 → hud_python-0.4.48}/environments/browser/pyproject.toml +0 -0
  46. {hud_python-0.4.47 → hud_python-0.4.48}/environments/deepresearch/pyproject.toml +0 -0
  47. {hud_python-0.4.47 → hud_python-0.4.48}/environments/remote_browser/README.md +0 -0
  48. {hud_python-0.4.47 → hud_python-0.4.48}/environments/remote_browser/pyproject.toml +0 -0
  49. {hud_python-0.4.47 → hud_python-0.4.48}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  50. {hud_python-0.4.47 → hud_python-0.4.48}/environments/text_2048/README.md +0 -0
  51. {hud_python-0.4.47 → hud_python-0.4.48}/environments/text_2048/pyproject.toml +0 -0
  52. {hud_python-0.4.47 → hud_python-0.4.48}/examples/README.md +0 -0
  53. {hud_python-0.4.47 → hud_python-0.4.48}/hud/__init__.py +0 -0
  54. {hud_python-0.4.47 → hud_python-0.4.48}/hud/__main__.py +0 -0
  55. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/__init__.py +0 -0
  56. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/grounded_openai.py +0 -0
  57. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/langchain.py +0 -0
  58. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/lite_llm.py +0 -0
  59. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/misc/__init__.py +0 -0
  60. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/misc/response_agent.py +0 -0
  61. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/openai.py +0 -0
  62. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/openai_chat_generic.py +0 -0
  63. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/tests/__init__.py +0 -0
  64. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/tests/test_claude.py +0 -0
  65. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/tests/test_client.py +0 -0
  66. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  67. {hud_python-0.4.47 → hud_python-0.4.48}/hud/agents/tests/test_openai.py +0 -0
  68. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/__main__.py +0 -0
  69. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/analyze.py +0 -0
  70. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/build.py +0 -0
  71. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/clone.py +0 -0
  72. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/debug.py +0 -0
  73. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/dev.py +0 -0
  74. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/flows/__init__.py +0 -0
  75. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/get.py +0 -0
  76. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/init.py +0 -0
  77. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/list_func.py +0 -0
  78. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/pull.py +0 -0
  79. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/push.py +0 -0
  80. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/remove.py +0 -0
  81. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/__init__.py +0 -0
  82. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/celebrate.py +0 -0
  83. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/config.py +0 -0
  84. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/display.py +0 -0
  85. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/gpu.py +0 -0
  86. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/gpu_utils.py +0 -0
  87. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/presets.py +0 -0
  88. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/remote_runner.py +0 -0
  89. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/rl_api.py +0 -0
  90. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/viewer.py +0 -0
  91. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/rl/wait_utils.py +0 -0
  92. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/__init__.py +0 -0
  93. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_analyze.py +0 -0
  94. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_build.py +0 -0
  95. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_cli_init.py +0 -0
  96. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_cli_main.py +0 -0
  97. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_clone.py +0 -0
  98. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_cursor.py +0 -0
  99. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_debug.py +0 -0
  100. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_list_func.py +0 -0
  101. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_main_module.py +0 -0
  102. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_mcp_server.py +0 -0
  103. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_pull.py +0 -0
  104. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_push.py +0 -0
  105. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/tests/test_registry.py +0 -0
  106. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/__init__.py +0 -0
  107. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/config.py +0 -0
  108. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/cursor.py +0 -0
  109. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/docker.py +0 -0
  110. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/env_check.py +0 -0
  111. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/environment.py +0 -0
  112. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/interactive.py +0 -0
  113. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/local_runner.py +0 -0
  114. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/logging.py +0 -0
  115. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/metadata.py +0 -0
  116. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/package_runner.py +0 -0
  117. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/registry.py +0 -0
  118. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/remote_runner.py +0 -0
  119. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/runner.py +0 -0
  120. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/server.py +0 -0
  121. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/source_hash.py +0 -0
  122. {hud_python-0.4.47 → hud_python-0.4.48}/hud/cli/utils/tasks.py +0 -0
  123. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/README.md +0 -0
  124. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/__init__.py +0 -0
  125. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/base.py +0 -0
  126. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/fastmcp.py +0 -0
  127. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/mcp_use.py +0 -0
  128. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/tests/__init__.py +0 -0
  129. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/tests/test_client_integration.py +0 -0
  130. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/tests/test_fastmcp.py +0 -0
  131. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  132. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/tests/test_protocol.py +0 -0
  133. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/utils/__init__.py +0 -0
  134. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/utils/mcp_use_retry.py +0 -0
  135. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/utils/retry.py +0 -0
  136. {hud_python-0.4.47 → hud_python-0.4.48}/hud/clients/utils/retry_transport.py +0 -0
  137. {hud_python-0.4.47 → hud_python-0.4.48}/hud/datasets/__init__.py +0 -0
  138. {hud_python-0.4.47 → hud_python-0.4.48}/hud/datasets/utils.py +0 -0
  139. {hud_python-0.4.47 → hud_python-0.4.48}/hud/misc/__init__.py +0 -0
  140. {hud_python-0.4.47 → hud_python-0.4.48}/hud/misc/claude_plays_pokemon.py +0 -0
  141. {hud_python-0.4.47 → hud_python-0.4.48}/hud/native/__init__.py +0 -0
  142. {hud_python-0.4.47 → hud_python-0.4.48}/hud/native/comparator.py +0 -0
  143. {hud_python-0.4.47 → hud_python-0.4.48}/hud/native/tests/__init__.py +0 -0
  144. {hud_python-0.4.47 → hud_python-0.4.48}/hud/native/tests/test_comparator.py +0 -0
  145. {hud_python-0.4.47 → hud_python-0.4.48}/hud/native/tests/test_native_init.py +0 -0
  146. {hud_python-0.4.47 → hud_python-0.4.48}/hud/otel/__init__.py +0 -0
  147. {hud_python-0.4.47 → hud_python-0.4.48}/hud/otel/collector.py +0 -0
  148. {hud_python-0.4.47 → hud_python-0.4.48}/hud/otel/config.py +0 -0
  149. {hud_python-0.4.47 → hud_python-0.4.48}/hud/otel/context.py +0 -0
  150. {hud_python-0.4.47 → hud_python-0.4.48}/hud/otel/exporters.py +0 -0
  151. {hud_python-0.4.47 → hud_python-0.4.48}/hud/otel/instrumentation.py +0 -0
  152. {hud_python-0.4.47 → hud_python-0.4.48}/hud/otel/processors.py +0 -0
  153. {hud_python-0.4.47 → hud_python-0.4.48}/hud/otel/tests/__init__.py +0 -0
  154. {hud_python-0.4.47 → hud_python-0.4.48}/hud/otel/tests/test_processors.py +0 -0
  155. {hud_python-0.4.47 → hud_python-0.4.48}/hud/py.typed +0 -0
  156. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/README.md +0 -0
  157. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/__init__.py +0 -0
  158. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/buffer.py +0 -0
  159. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/chat_template.jinja +0 -0
  160. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/config.py +0 -0
  161. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/tests/__init__.py +0 -0
  162. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/tests/test_learner.py +0 -0
  163. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/types.py +0 -0
  164. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/utils/start_vllm_server.sh +0 -0
  165. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/utils.py +0 -0
  166. {hud_python-0.4.47 → hud_python-0.4.48}/hud/rl/vllm_adapter.py +0 -0
  167. {hud_python-0.4.47 → hud_python-0.4.48}/hud/samples/__init__.py +0 -0
  168. {hud_python-0.4.47 → hud_python-0.4.48}/hud/samples/browser.py +0 -0
  169. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/__init__.py +0 -0
  170. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/context.py +0 -0
  171. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/helper/__init__.py +0 -0
  172. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/low_level.py +0 -0
  173. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/server.py +0 -0
  174. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/tests/__init__.py +0 -0
  175. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/tests/test_add_tool.py +0 -0
  176. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/tests/test_context.py +0 -0
  177. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  178. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/tests/test_mcp_server_integration.py +0 -0
  179. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/tests/test_mcp_server_more.py +0 -0
  180. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/tests/test_run_wrapper.py +0 -0
  181. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/tests/test_server_extra.py +0 -0
  182. {hud_python-0.4.47 → hud_python-0.4.48}/hud/server/tests/test_sigterm_runner.py +0 -0
  183. {hud_python-0.4.47 → hud_python-0.4.48}/hud/settings.py +0 -0
  184. {hud_python-0.4.47 → hud_python-0.4.48}/hud/shared/__init__.py +0 -0
  185. {hud_python-0.4.47 → hud_python-0.4.48}/hud/shared/exceptions.py +0 -0
  186. {hud_python-0.4.47 → hud_python-0.4.48}/hud/shared/hints.py +0 -0
  187. {hud_python-0.4.47 → hud_python-0.4.48}/hud/shared/requests.py +0 -0
  188. {hud_python-0.4.47 → hud_python-0.4.48}/hud/shared/tests/__init__.py +0 -0
  189. {hud_python-0.4.47 → hud_python-0.4.48}/hud/shared/tests/test_exceptions.py +0 -0
  190. {hud_python-0.4.47 → hud_python-0.4.48}/hud/shared/tests/test_requests.py +0 -0
  191. {hud_python-0.4.47 → hud_python-0.4.48}/hud/telemetry/__init__.py +0 -0
  192. {hud_python-0.4.47 → hud_python-0.4.48}/hud/telemetry/instrument.py +0 -0
  193. {hud_python-0.4.47 → hud_python-0.4.48}/hud/telemetry/job.py +0 -0
  194. {hud_python-0.4.47 → hud_python-0.4.48}/hud/telemetry/replay.py +0 -0
  195. {hud_python-0.4.47 → hud_python-0.4.48}/hud/telemetry/tests/__init__.py +0 -0
  196. {hud_python-0.4.47 → hud_python-0.4.48}/hud/telemetry/tests/test_replay.py +0 -0
  197. {hud_python-0.4.47 → hud_python-0.4.48}/hud/telemetry/tests/test_trace.py +0 -0
  198. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/__init__.py +0 -0
  199. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/bash.py +0 -0
  200. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/computer/anthropic.py +0 -0
  201. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/computer/hud.py +0 -0
  202. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/computer/openai.py +0 -0
  203. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/edit.py +0 -0
  204. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/executors/__init__.py +0 -0
  205. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/executors/base.py +0 -0
  206. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/executors/tests/__init__.py +0 -0
  207. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/executors/tests/test_base_executor.py +0 -0
  208. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  209. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/executors/xdo.py +0 -0
  210. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/grounding/__init__.py +0 -0
  211. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/grounding/config.py +0 -0
  212. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/grounding/grounded_tool.py +0 -0
  213. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/grounding/grounder.py +0 -0
  214. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/grounding/tests/__init__.py +0 -0
  215. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  216. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/response.py +0 -0
  217. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/submit.py +0 -0
  218. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/__init__.py +0 -0
  219. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_base.py +0 -0
  220. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_bash.py +0 -0
  221. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_bash_extended.py +0 -0
  222. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_computer.py +0 -0
  223. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_computer_actions.py +0 -0
  224. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_edit.py +0 -0
  225. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_init.py +0 -0
  226. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_playwright_tool.py +0 -0
  227. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_response.py +0 -0
  228. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_tools.py +0 -0
  229. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_tools_init.py +0 -0
  230. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/tests/test_utils.py +0 -0
  231. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/types.py +0 -0
  232. {hud_python-0.4.47 → hud_python-0.4.48}/hud/tools/utils.py +0 -0
  233. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/__init__.py +0 -0
  234. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/agent_factories.py +0 -0
  235. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/async_utils.py +0 -0
  236. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/group_eval.py +0 -0
  237. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/hud_console.py +0 -0
  238. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/mcp.py +0 -0
  239. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/pretty_errors.py +0 -0
  240. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/progress.py +0 -0
  241. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/tasks.py +0 -0
  242. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/telemetry.py +0 -0
  243. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/tests/__init__.py +0 -0
  244. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/tests/test_async_utils.py +0 -0
  245. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/tests/test_init.py +0 -0
  246. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/tests/test_mcp.py +0 -0
  247. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/tests/test_progress.py +0 -0
  248. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/tests/test_telemetry.py +0 -0
  249. {hud_python-0.4.47 → hud_python-0.4.48}/hud/utils/tool_shorthand.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.47
3
+ Version: 0.4.48
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -3,10 +3,11 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ import fnmatch
6
7
  import json
7
8
  import logging
8
9
  from abc import ABC, abstractmethod
9
- from typing import TYPE_CHECKING, Any, ClassVar, Literal
10
+ from typing import TYPE_CHECKING, Any, ClassVar, List, Literal
10
11
 
11
12
  import mcp.types as types
12
13
 
@@ -96,12 +97,9 @@ class MCPAgent(ABC):
96
97
  self.console.set_verbose(True)
97
98
 
98
99
  # User filtering
99
- self.allowed_tools = allowed_tools
100
- self.disallowed_tools = disallowed_tools or []
101
-
102
- # Task filtering
103
- self.agent_tools = None
104
- self.lifecycle_tools = []
100
+ self.allowed_tools: List[str] | None = allowed_tools
101
+ self.disallowed_tools: List[str] | None = disallowed_tools
102
+ self._available_tools: List[types.Tool] | None = None
105
103
 
106
104
  # Messages
107
105
  self.system_prompt = system_prompt
@@ -109,7 +107,6 @@ class MCPAgent(ABC):
109
107
  self.initial_screenshot = initial_screenshot
110
108
 
111
109
  # Initialize these here so methods can be called before initialize()
112
- self._available_tools: list[types.Tool] = []
113
110
  self._tool_map: dict[str, types.Tool] = {} # Simplified: just name to tool
114
111
  self.response_tool_name = None
115
112
 
@@ -146,37 +143,48 @@ class MCPAgent(ABC):
146
143
  except Exception as e:
147
144
  self._handle_connection_error(e)
148
145
 
149
- # If task is provided, add lifecycle tools
146
+ # If task is provided, apply agent_config and add lifecycle tools
150
147
  if isinstance(task, Task):
151
- if task.agent_tools:
152
- self.agent_tools = task.agent_tools
153
- if task.setup_tool:
154
- if isinstance(task.setup_tool, list):
155
- for tool in task.setup_tool:
156
- if not self.agent_tools or (
157
- self.agent_tools and tool.name not in self.agent_tools
158
- ):
159
- self.lifecycle_tools.append(tool.name)
160
- elif not self.agent_tools or (
161
- self.agent_tools and task.setup_tool.name not in self.agent_tools
162
- ):
163
- self.lifecycle_tools.append(task.setup_tool.name)
164
- if task.evaluate_tool:
165
- if isinstance(task.evaluate_tool, list):
166
- for tool in task.evaluate_tool:
167
- if not self.agent_tools or (
168
- self.agent_tools and tool.name not in self.agent_tools
169
- ):
170
- self.lifecycle_tools.append(tool.name)
171
- elif not self.agent_tools or (
172
- self.agent_tools and task.evaluate_tool.name not in self.agent_tools
173
- ):
174
- self.lifecycle_tools.append(task.evaluate_tool.name)
175
- if task.system_prompt:
176
- self.system_prompt += "\n\n" + task.system_prompt
177
-
178
- # Re-apply filtering with updated lifecycle tools
179
- await self._filter_tools()
148
+ # Apply agent_config if present
149
+ if task.agent_config:
150
+ if "system_prompt" in task.agent_config and task.agent_config["system_prompt"]:
151
+ self.system_prompt += "\n\n" + task.agent_config["system_prompt"]
152
+ if "append_setup_output" in task.agent_config:
153
+ self.append_setup_output = task.agent_config["append_setup_output"]
154
+ if "initial_screenshot" in task.agent_config:
155
+ self.initial_screenshot = task.agent_config["initial_screenshot"]
156
+ if "allowed_tools" in task.agent_config:
157
+ # If allowed_tools has already been set, we take the intersection of the two
158
+ # If the list had been empty, we were allowing all tools, so we overwrite in this
159
+ if isinstance(self.allowed_tools, list) and len(self.allowed_tools) > 0:
160
+ self.allowed_tools = [tool for tool in self.allowed_tools if tool in task.agent_config["allowed_tools"]]
161
+ else: # If allowed_tools is None, we overwrite it
162
+ self.allowed_tools = task.agent_config["allowed_tools"]
163
+ if "disallowed_tools" in task.agent_config:
164
+ # If disallowed_tools has already been set, we take the union of the two
165
+ if isinstance(self.disallowed_tools, list):
166
+ self.disallowed_tools.extend(task.agent_config["disallowed_tools"])
167
+ else: # If disallowed_tools is None, we overwrite it
168
+ self.disallowed_tools = task.agent_config["disallowed_tools"]
169
+
170
+ all_tools = await self.mcp_client.list_tools()
171
+ self._available_tools = []
172
+
173
+ # Filter tools based on allowed and disallowed patterns
174
+ # No allowed tools and no disallowed tools -> we accept all tools
175
+ # No allowed tools and disallowed tools -> we accept all tools except the disallowed ones
176
+ for tool in all_tools:
177
+ if self.allowed_tools is not None:
178
+ if not any(fnmatch.fnmatch(tool.name, pattern) for pattern in self.allowed_tools):
179
+ continue
180
+ if self.disallowed_tools is not None:
181
+ if any(fnmatch.fnmatch(tool.name, pattern) for pattern in self.disallowed_tools):
182
+ continue
183
+ self._available_tools.append(tool)
184
+
185
+ self.console.info(
186
+ f"Agent initialized with {len(self.get_available_tools())} tools: {', '.join([t.name for t in self.get_available_tools()])}" # noqa: E501
187
+ )
180
188
 
181
189
  async def run(self, prompt_or_task: str | Task | dict[str, Any], max_steps: int = 10) -> Trace:
182
190
  """
@@ -575,108 +583,6 @@ class MCPAgent(ABC):
575
583
 
576
584
  return await self.format_blocks(blocks)
577
585
 
578
- async def _filter_tools(self) -> None:
579
- """Apply tool filtering based on allowed/disallowed lists."""
580
- # Get all tools from client
581
- if self.mcp_client is None:
582
- raise ValueError("MCP client is not initialized")
583
-
584
- all_tools = await self.mcp_client.list_tools()
585
-
586
- response_tools_by_server: dict[str, str] = {} # server_name -> tool_name
587
- for tool in all_tools:
588
- if "response" in tool.name or tool.name == "response":
589
- self.console.debug(f"Found response tool: '{tool.name}'")
590
- # Extract server name from tool name (e.g., "grader_response" -> "grader")
591
- if "_" in tool.name:
592
- server_name = tool.name.split("_", 1)[0]
593
- response_tools_by_server[server_name] = tool.name
594
- else:
595
- response_tools_by_server["_default"] = tool.name
596
-
597
- # Add response tool to lifecycle tools BEFORE filtering
598
- if response_tools_by_server and hasattr(self.mcp_client, "mcp_config"):
599
- # Get server names in order from mcp_config
600
- server_names = list(self.mcp_client.mcp_config.keys())
601
- self.console.debug(f"Server names: {server_names}")
602
-
603
- # Try to find response tool from last server first
604
- response_tool_name = None
605
- for server_name in reversed(server_names):
606
- if server_name in response_tools_by_server:
607
- response_tool_name = response_tools_by_server[server_name]
608
- self.console.debug(
609
- f"Found response tool '{response_tool_name}' from server '{server_name}'"
610
- )
611
- break
612
-
613
- # Fallback to any response tool
614
- if not response_tool_name and response_tools_by_server:
615
- response_tool_name = next(iter(response_tools_by_server.values()))
616
- self.console.debug(f"Using fallback response tool '{response_tool_name}'")
617
-
618
- # Add to lifecycle tools if found
619
- if response_tool_name and response_tool_name not in self.lifecycle_tools:
620
- self.console.debug(f"Auto-detected '{response_tool_name}' tool as a lifecycle tool")
621
- self.response_tool_name = response_tool_name
622
- self.lifecycle_tools.append(response_tool_name)
623
- elif response_tool_name:
624
- self.console.debug(
625
- f"Response tool '{response_tool_name}' already in lifecycle_tools"
626
- )
627
- self.response_tool_name = response_tool_name
628
- else:
629
- self.console.debug("No response tools found or no mcp_config")
630
-
631
- # Filter tools
632
- self._available_tools = []
633
- self._tool_map = {}
634
-
635
- self.console.debug(f"All tools: {[t.name for t in all_tools]}")
636
- self.console.debug(f"Allowed tools: {self.allowed_tools}")
637
- self.console.debug(f"Agent tools: {self.agent_tools}")
638
- self.console.debug(f"Disallowed tools: {self.disallowed_tools}")
639
- self.console.debug(f"Lifecycle tools: {self.lifecycle_tools}")
640
-
641
- for tool in all_tools:
642
- # Lifecycle tools (setup, evaluate, response) should always be included
643
- is_lifecycle = tool.name in self.lifecycle_tools
644
-
645
- # Check if tool should be included
646
- if not is_lifecycle:
647
- if self.allowed_tools and tool.name not in self.allowed_tools:
648
- self.console.debug(f"Skipping tool '{tool.name}' - not in allowed_tools")
649
- continue
650
- if self.agent_tools and tool.name not in self.agent_tools:
651
- self.console.debug(f"Skipping tool '{tool.name}' - not in agent_tools")
652
- continue
653
- if tool.name in self.disallowed_tools:
654
- self.console.debug(f"Skipping tool '{tool.name}' - in disallowed_tools")
655
- continue
656
-
657
- self.console.debug(
658
- f"Adding tool '{tool.name}' to available tools (lifecycle={is_lifecycle})"
659
- )
660
- self._available_tools.append(tool)
661
- self._tool_map[tool.name] = tool
662
-
663
- # Check if all required tools are available
664
- if self.required_tools:
665
- available_tool_names = {tool.name for tool in self._available_tools}
666
- missing_tools = [
667
- tool for tool in self.required_tools if tool not in available_tool_names
668
- ]
669
- if missing_tools:
670
- raise ValueError(
671
- f"Required tools not available: {missing_tools}. "
672
- f"Available tools: {list(available_tool_names)}"
673
- )
674
-
675
- available_tools = self.get_available_tools()
676
- self.console.info(
677
- f"Agent initialized with {len(available_tools)} tools: {', '.join([t.name for t in available_tools])}" # noqa: E501
678
- )
679
-
680
586
  async def _maybe_submit_response(self, response: AgentResponse, messages: list[Any]) -> None:
681
587
  """Submit response through lifecycle tool if available.
682
588
 
@@ -715,8 +621,9 @@ class MCPAgent(ABC):
715
621
 
716
622
  def get_available_tools(self) -> list[types.Tool]:
717
623
  """Get list of available MCP tools for LLM use (excludes lifecycle tools)."""
718
- lifecycle_tool_names = self.lifecycle_tools
719
- return [tool for tool in self._available_tools if tool.name not in lifecycle_tool_names]
624
+ if self._available_tools is None:
625
+ raise RuntimeError("Tools have not been initialized. Call initialize() before accessing available tools.")
626
+ return self._available_tools
720
627
 
721
628
  def get_tool_schemas(self) -> list[dict]:
722
629
  """Get tool schemas in a format suitable for the model."""
@@ -326,7 +326,7 @@ class ClaudeAgent(MCPAgent):
326
326
  selected_computer_tool = None
327
327
 
328
328
  for priority_name in computer_tool_priority:
329
- for tool in self._available_tools:
329
+ for tool in self.get_available_tools():
330
330
  # Check both exact match and suffix match (for prefixed tools)
331
331
  if tool.name == priority_name or tool.name.endswith(f"_{priority_name}"):
332
332
  selected_computer_tool = tool
@@ -350,13 +350,12 @@ class ClaudeAgent(MCPAgent):
350
350
  )
351
351
 
352
352
  # Add other non-computer tools
353
- for tool in self._available_tools:
354
- # Skip computer tools (already handled) and lifecycle tools
355
- is_computer_tool = any(
353
+ for tool in self.get_available_tools():
354
+ # Skip computer tools (already handled)
355
+ if any(
356
356
  tool.name == priority_name or tool.name.endswith(f"_{priority_name}")
357
357
  for priority_name in computer_tool_priority
358
- )
359
- if is_computer_tool or tool.name in self.lifecycle_tools:
358
+ ):
360
359
  continue
361
360
 
362
361
  claude_tool = {
@@ -17,6 +17,8 @@ class IntegrationTestRunner(MCPAgent):
17
17
  # Initialize using base to set up client and telemetry correctly
18
18
  await self.initialize(task)
19
19
 
20
+ self.console.info(f"Full system prompt: {self.system_prompt}")
21
+
20
22
  # Validate task shape
21
23
  if not getattr(task, "integration_test_tool", None):
22
24
  raise ValueError(
@@ -326,9 +326,6 @@ class TestBaseMCPAgent:
326
326
  """Test getting tool schemas."""
327
327
  agent = MockMCPAgent()
328
328
 
329
- # Add setup to lifecycle tools to test filtering
330
- agent.lifecycle_tools = ["setup"]
331
-
332
329
  agent._available_tools = [
333
330
  types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
334
331
  types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
@@ -598,7 +595,7 @@ class TestMCPAgentExtended:
598
595
  agent = MockAgentExtended(mcp_client=mock_client, allowed_tools=["tool1", "tool3"])
599
596
  await agent.initialize("test")
600
597
 
601
- available_names = [tool.name for tool in agent._available_tools]
598
+ available_names = [tool.name for tool in agent.get_available_tools()]
602
599
  assert "tool1" in available_names
603
600
  assert "tool3" in available_names
604
601
  assert "tool2" not in available_names
@@ -617,7 +614,7 @@ class TestMCPAgentExtended:
617
614
  agent = MockAgentExtended(mcp_client=mock_client, disallowed_tools=["tool2"])
618
615
  await agent.initialize("test")
619
616
 
620
- available_names = [tool.name for tool in agent._available_tools]
617
+ available_names = [tool.name for tool in agent.get_available_tools()]
621
618
  assert "tool1" in available_names
622
619
  assert "tool3" in available_names
623
620
  assert "tool2" not in available_names
@@ -935,8 +935,8 @@ def eval(
935
935
  "--max-concurrent",
936
936
  help="Max concurrent tasks (prevents rate limits in both asyncio and parallel modes)",
937
937
  ),
938
- max_steps: int = typer.Option(
939
- 30,
938
+ max_steps: int | None = typer.Option(
939
+ None,
940
940
  "--max-steps",
941
941
  help="Maximum steps per task (default: 10 for single, 50 for full)",
942
942
  ),
@@ -199,6 +199,8 @@ async def run_single_task(
199
199
  ) -> None:
200
200
  """Load one task and execute it, or detect if JSON contains a list and run as dataset."""
201
201
 
202
+ # Provide early feedback to user
203
+ hud_console.info("🔧 Initializing evaluation...")
202
204
  # Import Task and run_dataset lazily
203
205
  try:
204
206
  from hud.utils.tasks import load_tasks
@@ -318,7 +320,10 @@ async def run_single_task(
318
320
  )
319
321
  display_group_statistics(stats, show_details=True)
320
322
  else:
321
- # Original single-run logic
323
+ # Enable agent step logging for single task mode
324
+ logging.getLogger("hud.agents").setLevel(logging.INFO)
325
+ logging.getLogger("hud.agents.base").setLevel(logging.INFO)
326
+
322
327
  with hud.trace(name=task_prompt):
323
328
  agent = build_agent(
324
329
  agent_type,
@@ -352,6 +357,9 @@ async def run_full_dataset(
352
357
  Uses either asyncio-based run_dataset or process-based parallel execution
353
358
  depending on the parallel flag."""
354
359
 
360
+ # Provide early feedback to user
361
+ hud_console.info("🔧 Initializing evaluation...")
362
+
355
363
  # Import run_dataset lazily
356
364
  try:
357
365
  from hud.datasets import run_dataset, run_dataset_parallel, run_dataset_parallel_manual
@@ -367,7 +375,7 @@ async def run_full_dataset(
367
375
  hud_console.info(f"📊 Loading tasks from: {source}…")
368
376
  tasks: list[Task] = load_tasks(source) # type: ignore[assignment]
369
377
 
370
- if not tasks:
378
+ if len(tasks) == 0:
371
379
  hud_console.error(f"No tasks found in: {source}")
372
380
  raise typer.Exit(1)
373
381
 
@@ -646,10 +654,10 @@ def eval_command(
646
654
  hud eval hud-evals/SheetBench-50 --full --agent claude
647
655
 
648
656
  # Run large dataset with PARALLEL execution (auto-optimized)
649
- hud eval hud-evals/OSWorld-Verified-XLang --full --parallel
657
+ hud eval hud-evals/OSWorld-Verified-Gold --full --parallel
650
658
 
651
659
  # Parallel mode with manual configuration (16 workers, 25 tasks each)
652
- hud eval hud-evals/OSWorld-Verified-XLang --full --parallel --max-workers 16
660
+ hud eval hud-evals/OSWorld-Verified-Gold --full --parallel --max-workers 16
653
661
 
654
662
  # Limit total concurrent tasks to prevent rate limits
655
663
  hud eval hud-evals/SheetBench-50 --full --parallel --max-concurrent 20
@@ -674,6 +682,8 @@ def eval_command(
674
682
  """
675
683
  from hud.settings import settings
676
684
 
685
+ # Always configure basic logging so agent steps can be logged
686
+ # Set to INFO by default for consistency with run_evaluation.py
677
687
  if very_verbose:
678
688
  logging.basicConfig(
679
689
  level=logging.DEBUG,
@@ -683,11 +693,6 @@ def eval_command(
683
693
  logging.getLogger("hud.agents").setLevel(logging.DEBUG)
684
694
  logging.getLogger("hud.agents.base").setLevel(logging.DEBUG)
685
695
  elif verbose:
686
- logging.basicConfig(
687
- level=logging.INFO,
688
- format="%(asctime)s - %(name)s - %(message)s",
689
- datefmt="%H:%M:%S",
690
- )
691
696
  logging.getLogger("hud.agents").setLevel(logging.INFO)
692
697
  logging.getLogger("hud.agents.base").setLevel(logging.INFO)
693
698
 
@@ -364,10 +364,8 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
364
364
  item["setup_tool"] = _simplify_tool_call(t.setup_tool)
365
365
  if t.evaluate_tool is not None:
366
366
  item["evaluate_tool"] = _simplify_tool_call(t.evaluate_tool)
367
- if t.agent_tools is not None:
368
- item["agent_tools"] = t.agent_tools
369
- if t.system_prompt is not None:
370
- item["system_prompt"] = t.system_prompt
367
+ if t.agent_config is not None:
368
+ item["agent_config"] = t.agent_config
371
369
  if t.metadata:
372
370
  item["metadata"] = t.metadata
373
371
  if t.id is not None:
@@ -230,19 +230,33 @@ def run_local_training(
230
230
  console.print("Enter the model name (HuggingFace ID):")
231
231
  model = input().strip()
232
232
 
233
- # Validate model is a VL model (whether provided via CLI or selected)
234
- if model:
233
+ # try to get model from config file
234
+ if config_file:
235
+ console.print(f"\n[cyan]Loading configuration from: {config_file}[/cyan]")
236
+ config = load_config(config_file)
237
+ if hasattr(config, "model") and hasattr(config.model, "base_model"):
238
+ if model is None:
239
+ model = config.model.base_model
240
+ else:
241
+ console.print(
242
+ f"[yellow]Model already set to {model}, using that instead "
243
+ f"of {config.model.base_model}[/yellow] (override)"
244
+ )
245
+
246
+ if model is None:
247
+ console.print("[red]❌ No model specified either through CLI or config file[/red]")
235
248
  try:
236
- validate_vl_model(model)
237
- except ValueError as e:
238
- console.print(f"\n[red]❌ {e}[/red]")
239
- try:
240
- import typer
249
+ import typer
241
250
 
242
- raise typer.Exit(1)
243
- except Exception:
244
- return
245
- else:
251
+ raise typer.Exit(1)
252
+ except Exception:
253
+ return
254
+
255
+ # Validate model is a VL model (whether provided via CLI or selected)
256
+ try:
257
+ validate_vl_model(model)
258
+ except ValueError as e:
259
+ console.print(f"\n[red]❌ {e}[/red]")
246
260
  try:
247
261
  import typer
248
262
 
@@ -488,7 +502,6 @@ def run_local_training(
488
502
  from .vllm import start_vllm_server, wait_for_vllm_server
489
503
 
490
504
  start_vllm_server(config.model.base_model, vllm_gpu_idx, restart=restart)
491
-
492
505
  server_ready = asyncio.run(wait_for_vllm_server())
493
506
  if not server_ready:
494
507
  console.print("[red]❌ Failed to start vLLM server[/red]")
@@ -507,7 +520,6 @@ def run_local_training(
507
520
  f"\n[bold green]🎯 Starting DDP training on {len(training_gpus)} GPUs...[/bold green]\n"
508
521
  )
509
522
  launch_ddp_training(training_gpus, tasks_file, temp_config_path, verbose)
510
- console.print("\n[green]✅ Training completed successfully![/green]")
511
523
  else:
512
524
  console.print("\n[bold green]🎯 Starting single-GPU training...[/bold green]\n")
513
525
  try:
@@ -165,6 +165,8 @@ async def wait_for_vllm_server(timeout: int = 360) -> bool: # noqa: ASYNC109
165
165
  if response.status_code == 200:
166
166
  console.print("[green]✅ vLLM server is ready![/green]")
167
167
  return True
168
+ except httpx.ConnectError:
169
+ pass
168
170
  except Exception as e:
169
171
  hud_console.error(f"Failed to connect to vLLM server: {e}")
170
172
 
@@ -214,6 +214,7 @@ class TestAnalyzeFromMetadata:
214
214
 
215
215
  @mock.patch("hud.cli.utils.metadata.check_local_cache")
216
216
  @mock.patch("hud.cli.utils.metadata.fetch_lock_from_registry")
217
+ @mock.patch("hud.cli.utils.metadata.hud_console")
217
218
  @mock.patch("hud.cli.utils.metadata.console")
218
219
  async def test_analyze_not_found(self, mock_console, mock_hud_console, mock_fetch, mock_check):
219
220
  """Test when environment not found anywhere."""
@@ -222,9 +223,9 @@ class TestAnalyzeFromMetadata:
222
223
 
223
224
  await analyze_from_metadata("test/notfound:latest", "json", verbose=False)
224
225
 
225
- # Should show error
226
+ # Should show error via hud_console
226
227
  mock_hud_console.error.assert_called_with("Environment metadata not found")
227
- # Should print suggestions
228
+ # Should print suggestions via console
228
229
  mock_console.print.assert_called()
229
230
 
230
231
  @mock.patch("hud.cli.utils.metadata.check_local_cache")