hud-python 0.5.24__tar.gz → 0.5.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (347) hide show
  1. {hud_python-0.5.24 → hud_python-0.5.25}/PKG-INFO +1 -1
  2. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/openai_chat.py +8 -0
  3. hud_python-0.5.25/hud/agents/tests/test_integration_test_agent.py +42 -0
  4. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/types.py +7 -0
  5. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/dev.py +22 -20
  6. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/dev.py +5 -3
  7. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/list_func.py +11 -10
  8. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/remove.py +1 -1
  9. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_build.py +2 -2
  10. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/context.py +6 -0
  11. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/test_context.py +138 -0
  12. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/session.py +8 -6
  13. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_bash.py +81 -0
  14. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_shell.py +123 -0
  15. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_version.py +1 -1
  16. {hud_python-0.5.24 → hud_python-0.5.25}/hud/version.py +1 -1
  17. {hud_python-0.5.24 → hud_python-0.5.25}/pyproject.toml +1 -1
  18. {hud_python-0.5.24 → hud_python-0.5.25}/.gitignore +0 -0
  19. {hud_python-0.5.24 → hud_python-0.5.25}/LICENSE +0 -0
  20. {hud_python-0.5.24 → hud_python-0.5.25}/README.md +0 -0
  21. {hud_python-0.5.24 → hud_python-0.5.25}/examples/README.md +0 -0
  22. {hud_python-0.5.24 → hud_python-0.5.25}/hud/__init__.py +0 -0
  23. {hud_python-0.5.24 → hud_python-0.5.25}/hud/__main__.py +0 -0
  24. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/__init__.py +0 -0
  25. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/base.py +0 -0
  26. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/claude.py +0 -0
  27. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/gateway.py +0 -0
  28. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/gemini.py +0 -0
  29. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/gemini_cua.py +0 -0
  30. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/grounded_openai.py +0 -0
  31. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/misc/__init__.py +0 -0
  32. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/misc/integration_test_agent.py +0 -0
  33. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/misc/response_agent.py +0 -0
  34. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/openai.py +0 -0
  35. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/operator.py +0 -0
  36. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/resolver.py +0 -0
  37. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/__init__.py +0 -0
  38. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/conftest.py +0 -0
  39. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_base.py +0 -0
  40. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_base_runtime.py +0 -0
  41. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_claude.py +0 -0
  42. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_gemini.py +0 -0
  43. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  44. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_openai.py +0 -0
  45. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_operator.py +0 -0
  46. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_resolver.py +0 -0
  47. {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_run_eval.py +0 -0
  48. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/__init__.py +0 -0
  49. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/__main__.py +0 -0
  50. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/analyze.py +0 -0
  51. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/build.py +0 -0
  52. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/clone.py +0 -0
  53. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/__init__.py +0 -0
  54. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/base.py +0 -0
  55. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/harbor.py +0 -0
  56. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/tests/__init__.py +0 -0
  57. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/tests/conftest.py +0 -0
  58. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/tests/test_harbor.py +0 -0
  59. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/debug.py +0 -0
  60. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/deploy.py +0 -0
  61. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/eval.py +0 -0
  62. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/__init__.py +0 -0
  63. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/init.py +0 -0
  64. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/tasks.py +0 -0
  65. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/templates.py +0 -0
  66. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/tests/__init__.py +0 -0
  67. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/tests/test_dev.py +0 -0
  68. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/get.py +0 -0
  69. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/init.py +0 -0
  70. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/link.py +0 -0
  71. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/pull.py +0 -0
  72. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/push.py +0 -0
  73. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/rft.py +0 -0
  74. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/rft_status.py +0 -0
  75. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/__init__.py +0 -0
  76. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_analyze.py +0 -0
  77. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_analyze_metadata.py +0 -0
  78. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_analyze_module.py +0 -0
  79. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_build_failure.py +0 -0
  80. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_build_module.py +0 -0
  81. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_cli_init.py +0 -0
  82. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_cli_main.py +0 -0
  83. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  84. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_cli_root.py +0 -0
  85. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_clone.py +0 -0
  86. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_convert.py +0 -0
  87. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_cursor.py +0 -0
  88. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_debug.py +0 -0
  89. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_debug_directory_mode.py +0 -0
  90. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_deploy.py +0 -0
  91. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_dev.py +0 -0
  92. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_eval.py +0 -0
  93. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_eval_bedrock.py +0 -0
  94. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_init.py +0 -0
  95. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_list_func.py +0 -0
  96. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_main_module.py +0 -0
  97. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_mcp_server.py +0 -0
  98. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_pull.py +0 -0
  99. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_push.py +0 -0
  100. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_push_happy.py +0 -0
  101. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_push_wrapper.py +0 -0
  102. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_registry.py +0 -0
  103. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_utils.py +0 -0
  104. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/__init__.py +0 -0
  105. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/build_display.py +0 -0
  106. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/build_logs.py +0 -0
  107. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/celebrate.py +0 -0
  108. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/config.py +0 -0
  109. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/context.py +0 -0
  110. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/cursor.py +0 -0
  111. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/docker.py +0 -0
  112. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/env_check.py +0 -0
  113. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/environment.py +0 -0
  114. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/git.py +0 -0
  115. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/interactive.py +0 -0
  116. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/local_runner.py +0 -0
  117. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/logging.py +0 -0
  118. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/mcp.py +0 -0
  119. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/metadata.py +0 -0
  120. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/package_runner.py +0 -0
  121. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/registry.py +0 -0
  122. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/remote_runner.py +0 -0
  123. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/runner.py +0 -0
  124. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/server.py +0 -0
  125. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/source_hash.py +0 -0
  126. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tasks.py +0 -0
  127. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/__init__.py +0 -0
  128. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_config.py +0 -0
  129. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_docker.py +0 -0
  130. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  131. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_env_check.py +0 -0
  132. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_environment.py +0 -0
  133. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_git.py +0 -0
  134. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_interactive_module.py +0 -0
  135. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_local_runner.py +0 -0
  136. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  137. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_metadata.py +0 -0
  138. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_package_runner.py +0 -0
  139. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  140. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  141. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  142. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_source_hash.py +0 -0
  143. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_tasks.py +0 -0
  144. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/validation.py +0 -0
  145. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/version_check.py +0 -0
  146. {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/viewer.py +0 -0
  147. {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/__init__.py +0 -0
  148. {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/loader.py +0 -0
  149. {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/runner.py +0 -0
  150. {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/tests/__init__.py +0 -0
  151. {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/tests/test_loader.py +0 -0
  152. {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/tests/test_utils.py +0 -0
  153. {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/utils.py +0 -0
  154. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/__init__.py +0 -0
  155. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connection.py +0 -0
  156. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/__init__.py +0 -0
  157. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/base.py +0 -0
  158. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/local.py +0 -0
  159. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/mcp_config.py +0 -0
  160. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/openai.py +0 -0
  161. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/remote.py +0 -0
  162. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/environment.py +0 -0
  163. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/__init__.py +0 -0
  164. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/adk.py +0 -0
  165. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/anthropic.py +0 -0
  166. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/gemini.py +0 -0
  167. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/langchain.py +0 -0
  168. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/llamaindex.py +0 -0
  169. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/openai.py +0 -0
  170. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/mock.py +0 -0
  171. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/router.py +0 -0
  172. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/scenarios.py +0 -0
  173. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/__init__.py +0 -0
  174. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_connection.py +0 -0
  175. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_connectors.py +0 -0
  176. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_environment.py +0 -0
  177. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_integrations.py +0 -0
  178. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_local_connectors.py +0 -0
  179. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_scenarios.py +0 -0
  180. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_tools.py +0 -0
  181. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/types.py +0 -0
  182. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/utils/__init__.py +0 -0
  183. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/utils/formats.py +0 -0
  184. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/utils/schema.py +0 -0
  185. {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/utils/tool_wrappers.py +0 -0
  186. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/__init__.py +0 -0
  187. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/display.py +0 -0
  188. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/instrument.py +0 -0
  189. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/manager.py +0 -0
  190. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/parallel.py +0 -0
  191. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/task.py +0 -0
  192. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/__init__.py +0 -0
  193. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/test_eval.py +0 -0
  194. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/test_manager.py +0 -0
  195. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/test_parallel.py +0 -0
  196. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/test_task.py +0 -0
  197. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/types.py +0 -0
  198. {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/utils.py +0 -0
  199. {hud_python-0.5.24 → hud_python-0.5.25}/hud/native/__init__.py +0 -0
  200. {hud_python-0.5.24 → hud_python-0.5.25}/hud/native/comparator.py +0 -0
  201. {hud_python-0.5.24 → hud_python-0.5.25}/hud/native/tests/__init__.py +0 -0
  202. {hud_python-0.5.24 → hud_python-0.5.25}/hud/native/tests/test_comparator.py +0 -0
  203. {hud_python-0.5.24 → hud_python-0.5.25}/hud/native/tests/test_native_init.py +0 -0
  204. {hud_python-0.5.24 → hud_python-0.5.25}/hud/patches/__init__.py +0 -0
  205. {hud_python-0.5.24 → hud_python-0.5.25}/hud/patches/mcp_patches.py +0 -0
  206. {hud_python-0.5.24 → hud_python-0.5.25}/hud/patches/warnings.py +0 -0
  207. {hud_python-0.5.24 → hud_python-0.5.25}/hud/py.typed +0 -0
  208. {hud_python-0.5.24 → hud_python-0.5.25}/hud/samples/__init__.py +0 -0
  209. {hud_python-0.5.24 → hud_python-0.5.25}/hud/samples/browser.py +0 -0
  210. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/__init__.py +0 -0
  211. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/context.py +0 -0
  212. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/helper/__init__.py +0 -0
  213. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/low_level.py +0 -0
  214. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/router.py +0 -0
  215. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/server.py +0 -0
  216. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/__init__.py +0 -0
  217. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_add_tool.py +0 -0
  218. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_context.py +0 -0
  219. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  220. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_mcp_server_integration.py +0 -0
  221. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_mcp_server_more.py +0 -0
  222. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_run_wrapper.py +0 -0
  223. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_server_extra.py +0 -0
  224. {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_sigterm_runner.py +0 -0
  225. {hud_python-0.5.24 → hud_python-0.5.25}/hud/settings.py +0 -0
  226. {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/__init__.py +0 -0
  227. {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/exceptions.py +0 -0
  228. {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/hints.py +0 -0
  229. {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/requests.py +0 -0
  230. {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/tests/__init__.py +0 -0
  231. {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/tests/test_exceptions.py +0 -0
  232. {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/tests/test_hints.py +0 -0
  233. {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/tests/test_requests.py +0 -0
  234. {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/__init__.py +0 -0
  235. {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/exporter.py +0 -0
  236. {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/instrument.py +0 -0
  237. {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/tests/__init__.py +0 -0
  238. {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/tests/test_eval_telemetry.py +0 -0
  239. {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/tests/test_exporter.py +0 -0
  240. {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/tests/test_instrument.py +0 -0
  241. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/__init__.py +0 -0
  242. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/agent.py +0 -0
  243. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/base.py +0 -0
  244. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/__init__.py +0 -0
  245. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/apply_patch.py +0 -0
  246. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/bash.py +0 -0
  247. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/edit.py +0 -0
  248. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/gemini_edit.py +0 -0
  249. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/gemini_shell.py +0 -0
  250. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/shell.py +0 -0
  251. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/__init__.py +0 -0
  252. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_apply_patch.py +0 -0
  253. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_bash_extended.py +0 -0
  254. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_bash_integration.py +0 -0
  255. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_edit.py +0 -0
  256. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_gemini_tools.py +0 -0
  257. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/utils.py +0 -0
  258. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/__init__.py +0 -0
  259. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/anthropic.py +0 -0
  260. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/gemini.py +0 -0
  261. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/glm.py +0 -0
  262. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/hud.py +0 -0
  263. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/openai.py +0 -0
  264. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/qwen.py +0 -0
  265. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/settings.py +0 -0
  266. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/tests/__init__.py +0 -0
  267. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/tests/test_computer.py +0 -0
  268. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/tests/test_computer_actions.py +0 -0
  269. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/tests/test_glm_computer.py +0 -0
  270. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/__init__.py +0 -0
  271. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/base.py +0 -0
  272. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/pyautogui.py +0 -0
  273. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/tests/__init__.py +0 -0
  274. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/tests/test_base_executor.py +0 -0
  275. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  276. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/xdo.py +0 -0
  277. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/__init__.py +0 -0
  278. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/base.py +0 -0
  279. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/gemini.py +0 -0
  280. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/glob.py +0 -0
  281. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/grep.py +0 -0
  282. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/list.py +0 -0
  283. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/read.py +0 -0
  284. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/tests/__init__.py +0 -0
  285. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/tests/test_glob.py +0 -0
  286. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/tests/test_grep.py +0 -0
  287. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/tests/test_list.py +0 -0
  288. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/tests/test_read.py +0 -0
  289. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/__init__.py +0 -0
  290. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/config.py +0 -0
  291. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/grounded_tool.py +0 -0
  292. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/grounder.py +0 -0
  293. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/tests/__init__.py +0 -0
  294. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  295. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/__init__.py +0 -0
  296. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/base.py +0 -0
  297. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/code_execution.py +0 -0
  298. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/google_search.py +0 -0
  299. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/url_context.py +0 -0
  300. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/web_fetch.py +0 -0
  301. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/web_search.py +0 -0
  302. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/jupyter.py +0 -0
  303. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/__init__.py +0 -0
  304. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/base.py +0 -0
  305. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/claude.py +0 -0
  306. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/gemini.py +0 -0
  307. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/session.py +0 -0
  308. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/tests/__init__.py +0 -0
  309. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/tests/test_claude.py +0 -0
  310. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/tests/test_gemini.py +0 -0
  311. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/tests/test_session.py +0 -0
  312. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/native_types.py +0 -0
  313. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/playwright.py +0 -0
  314. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/response.py +0 -0
  315. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/submit.py +0 -0
  316. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/__init__.py +0 -0
  317. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_agent_tool.py +0 -0
  318. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_base.py +0 -0
  319. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_init.py +0 -0
  320. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_jupyter_tool.py +0 -0
  321. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_native_tool_e2e.py +0 -0
  322. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_native_types.py +0 -0
  323. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_playwright_tool.py +0 -0
  324. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_response.py +0 -0
  325. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_submit.py +0 -0
  326. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_tools.py +0 -0
  327. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_tools_init.py +0 -0
  328. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_types.py +0 -0
  329. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_utils.py +0 -0
  330. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/types.py +0 -0
  331. {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/utils.py +0 -0
  332. {hud_python-0.5.24 → hud_python-0.5.25}/hud/types.py +0 -0
  333. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/__init__.py +0 -0
  334. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/env.py +0 -0
  335. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/hud_console.py +0 -0
  336. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/mcp.py +0 -0
  337. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/pretty_errors.py +0 -0
  338. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/strict_schema.py +0 -0
  339. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/telemetry.py +0 -0
  340. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/__init__.py +0 -0
  341. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_init.py +0 -0
  342. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_mcp.py +0 -0
  343. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_pretty_errors.py +0 -0
  344. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_telemetry.py +0 -0
  345. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_tool_shorthand.py +0 -0
  346. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tool_shorthand.py +0 -0
  347. {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.5.24
3
+ Version: 0.5.25
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -88,6 +88,14 @@ class OpenAIChatAgent(MCPAgent):
88
88
  )
89
89
 
90
90
  self.completion_kwargs = dict(self.config.completion_kwargs)
91
+
92
+ # If a specific checkpoint is requested, inject it into extra_body
93
+ # so the HUD gateway routes to the exact checkpoint for inference.
94
+ if self.config.checkpoint:
95
+ extra_body = self.completion_kwargs.get("extra_body") or {}
96
+ extra_body["checkpoint"] = self.config.checkpoint
97
+ self.completion_kwargs["extra_body"] = extra_body
98
+
91
99
  self.mcp_schemas: list[ChatCompletionToolParam] = []
92
100
  self.hud_console = HUDConsole(logger=logger)
93
101
 
@@ -0,0 +1,42 @@
1
+ """Tests for IntegrationTestRunner."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+
7
+ import pytest
8
+
9
+ from hud.agents.misc import IntegrationTestRunner
10
+
11
+
12
+ def test_runs_all_integration_test_calls(mock_eval_context) -> None:
13
+ """Runner executes each configured integration test call in order."""
14
+
15
+ async def _run() -> None:
16
+ mock_eval_context._integration_test_calls = [
17
+ ("tool_a", {"x": 1}),
18
+ ("tool_b", {"y": "ok"}),
19
+ ]
20
+
21
+ runner = IntegrationTestRunner.create()
22
+ result = await runner.run(mock_eval_context)
23
+
24
+ assert result.done is True
25
+ assert mock_eval_context.tool_calls == [
26
+ ("tool_a", {"x": 1}),
27
+ ("tool_b", {"y": "ok"}),
28
+ ]
29
+
30
+ asyncio.run(_run())
31
+
32
+
33
+ def test_raises_when_no_integration_test_calls(mock_eval_context) -> None:
34
+ """Runner fails fast when no integration calls are configured."""
35
+
36
+ async def _run() -> None:
37
+ runner = IntegrationTestRunner.create()
38
+
39
+ with pytest.raises(ValueError, match="integration_test_tool"):
40
+ await runner.run(mock_eval_context)
41
+
42
+ asyncio.run(_run())
@@ -119,6 +119,13 @@ class OpenAIChatConfig(BaseAgentConfig):
119
119
 
120
120
  model_name: str = "OpenAI Chat"
121
121
  model: str = Field(default="gpt-5-mini", validation_alias=_model_alias)
122
+ checkpoint: str | None = Field(
123
+ default=None,
124
+ description="Specific checkpoint name for inference routing. "
125
+ "When set, the HUD gateway routes to this exact checkpoint rather than "
126
+ "the model's current active checkpoint. Passed as 'checkpoint' in the "
127
+ "request body's extra_body.",
128
+ )
122
129
  openai_client: Any = None # AsyncOpenAI
123
130
  api_key: str | None = None
124
131
  base_url: str | None = None
@@ -15,6 +15,7 @@ from pathlib import Path
15
15
  from typing import Any
16
16
 
17
17
  import typer
18
+ from rich.markup import escape
18
19
 
19
20
  from hud.utils.hud_console import HUDConsole
20
21
 
@@ -49,41 +50,42 @@ def show_dev_server_info(
49
50
 
50
51
  # Server section
51
52
  hud_console.section_title("Server")
52
- hud_console.info(f"{hud_console.sym.ITEM} {server_name}")
53
+ hud_console.print(f"{hud_console.sym.ITEM} {escape(server_name)}")
53
54
  if transport == "http":
54
- hud_console.info(f"{hud_console.sym.ITEM} http://localhost:{port}/mcp")
55
+ hud_console.print(f"{hud_console.sym.ITEM} http://localhost:{port}/mcp")
55
56
  else:
56
- hud_console.info(f"{hud_console.sym.ITEM} (stdio)")
57
+ hud_console.print(f"{hud_console.sym.ITEM} (stdio)")
57
58
 
58
59
  # Quick Links (only for HTTP mode)
59
60
  if transport == "http":
60
61
  hud_console.section_title("Quick Links")
61
- hud_console.info(f"{hud_console.sym.ITEM} Docs: http://localhost:{port}/docs")
62
- hud_console.info(f"{hud_console.sym.ITEM} Cursor:")
62
+ hud_console.print(f"{hud_console.sym.ITEM} Docs: http://localhost:{port}/docs")
63
+ hud_console.print(f"{hud_console.sym.ITEM} Cursor:")
63
64
  # Display the Cursor link on its own line to prevent wrapping
64
65
  hud_console.link(cursor_deeplink)
65
66
 
66
67
  # Show eval endpoint if in Docker mode
67
68
  if docker_mode:
68
- hud_console.info(
69
+ hud_console.print(
69
70
  f"{hud_console.sym.ITEM} Eval API: http://localhost:{port}/eval (POST)"
70
71
  )
71
72
 
72
73
  # Show debugging URLs from telemetry
73
74
  if telemetry:
74
75
  if "live_url" in telemetry:
75
- hud_console.info(f"{hud_console.sym.ITEM} Live URL: {telemetry['live_url']}")
76
+ url = escape(telemetry["live_url"])
77
+ hud_console.print(f"{hud_console.sym.ITEM} Live URL: {url}")
76
78
  if "vnc_url" in telemetry:
77
- hud_console.info(f"{hud_console.sym.ITEM} VNC URL: {telemetry['vnc_url']}")
79
+ hud_console.print(f"{hud_console.sym.ITEM} VNC URL: {escape(telemetry['vnc_url'])}")
78
80
  if "cdp_url" in telemetry:
79
- hud_console.info(f"{hud_console.sym.ITEM} CDP URL: {telemetry['cdp_url']}")
81
+ hud_console.print(f"{hud_console.sym.ITEM} CDP URL: {escape(telemetry['cdp_url'])}")
80
82
 
81
83
  # Check for VNC (browser environment)
82
84
  if env_dir and (env_dir / "environment" / "server.py").exists():
83
85
  try:
84
86
  content = (env_dir / "environment" / "server.py").read_text()
85
87
  if "x11vnc" in content.lower() or "vnc" in content.lower():
86
- hud_console.info(f"{hud_console.sym.ITEM} VNC: http://localhost:8080/vnc.html")
88
+ hud_console.print(f"{hud_console.sym.ITEM} VNC: http://localhost:8080/vnc.html")
87
89
  except Exception: # noqa: S110
88
90
  pass
89
91
 
@@ -91,13 +93,13 @@ def show_dev_server_info(
91
93
  if inspector or interactive:
92
94
  hud_console.info("")
93
95
  if inspector:
94
- hud_console.info(f"{hud_console.sym.SUCCESS} Inspector launching...")
96
+ hud_console.print(f"{hud_console.sym.SUCCESS} Inspector launching...")
95
97
  if interactive:
96
- hud_console.info(f"{hud_console.sym.SUCCESS} Interactive mode enabled")
98
+ hud_console.print(f"{hud_console.sym.SUCCESS} Interactive mode enabled")
97
99
 
98
100
  hud_console.info("")
99
101
  if hot_reload_enabled:
100
- hud_console.info(f"{hud_console.sym.SUCCESS} Hot-reload enabled")
102
+ hud_console.print(f"{hud_console.sym.SUCCESS} Hot-reload enabled")
101
103
  else:
102
104
  hud_console.info("Hot-reload disabled")
103
105
  hud_console.dim_info("Tip", "Pass --watch/-w to enable hot-reload")
@@ -230,7 +232,7 @@ async def run_mcp_module(
230
232
  hud_console.error(f"Failed to import module '{module_name}'")
231
233
  hud_console.info(f"Error: {e}")
232
234
  hud_console.info("")
233
- hud_console.info("[bold cyan]Troubleshooting:[/bold cyan]")
235
+ hud_console.print("[bold cyan]Troubleshooting:[/bold cyan]")
234
236
  hud_console.info(" • Verify module exists and is importable")
235
237
  hud_console.info(" • Check for __init__.py in module directory")
236
238
  hud_console.info(" • Check for import errors in the module")
@@ -238,7 +240,7 @@ async def run_mcp_module(
238
240
  import traceback
239
241
 
240
242
  hud_console.info("")
241
- hud_console.info("[bold cyan]Full traceback:[/bold cyan]")
243
+ hud_console.print("[bold cyan]Full traceback:[/bold cyan]")
242
244
  hud_console.info(traceback.format_exc())
243
245
  sys.exit(1)
244
246
 
@@ -271,14 +273,14 @@ async def run_mcp_module(
271
273
  available = [k for k in dir(module) if not k.startswith("_")]
272
274
  hud_console.info(f"Available in module: {available}")
273
275
  hud_console.info("")
274
- hud_console.info("[bold cyan]Expected structure:[/bold cyan]")
276
+ hud_console.print("[bold cyan]Expected structure:[/bold cyan]")
275
277
  hud_console.info(" from hud.environment import Environment")
276
278
  hud_console.info(" env = Environment('my-env') # or mcp = ...")
277
279
  raise AttributeError(f"Module '{module_name}' must define 'mcp', 'env', or 'environment'")
278
280
 
279
281
  # Only show full header on first run, brief message on reload
280
282
  if is_reload:
281
- hud_console.info(f"{hud_console.sym.SUCCESS} Reloaded")
283
+ hud_console.print(f"{hud_console.sym.SUCCESS} Reloaded")
282
284
  # Run server without showing full UI
283
285
  else:
284
286
  # Show full header on first run
@@ -344,7 +346,7 @@ async def run_mcp_module(
344
346
  env_dir = cwd.parent / "environment"
345
347
  if env_dir.exists() and (env_dir / "server.py").exists():
346
348
  hud_console.info("")
347
- hud_console.info(
349
+ hud_console.print(
348
350
  f"{hud_console.sym.FLOW} Don't forget to start the environment "
349
351
  "backend in another terminal:"
350
352
  )
@@ -976,11 +978,11 @@ def run_mcp_dev_server(
976
978
  if module is None:
977
979
  hud_console.error("Could not auto-detect module in current directory")
978
980
  hud_console.info("")
979
- hud_console.info("[bold cyan]Expected:[/bold cyan]")
981
+ hud_console.print("[bold cyan]Expected:[/bold cyan]")
980
982
  hud_console.info(" • __init__.py file in current directory")
981
983
  hud_console.info(" • Module must define 'mcp' or 'env' variable")
982
984
  hud_console.info("")
983
- hud_console.info("[bold cyan]Examples:[/bold cyan]")
985
+ hud_console.print("[bold cyan]Examples:[/bold cyan]")
984
986
  hud_console.info(" hud dev controller")
985
987
  hud_console.info(" cd controller && hud dev")
986
988
  hud_console.info(" hud dev --docker # For Docker-based environments")
@@ -6,6 +6,8 @@ import json
6
6
  import logging
7
7
  from typing import Any
8
8
 
9
+ from rich.markup import escape
10
+
9
11
  from hud.settings import settings
10
12
  from hud.shared.requests import make_request
11
13
  from hud.utils.hud_console import hud_console
@@ -136,13 +138,13 @@ def show_dev_ui(
136
138
  # Show other info below
137
139
  label = "Base image" if is_docker else "Server"
138
140
  hud_console.info("")
139
- hud_console.info(f"{hud_console.sym.ITEM} {label}: {server_name}")
140
- hud_console.info(f"{hud_console.sym.ITEM} Cursor:")
141
+ hud_console.print(f"{hud_console.sym.ITEM} {escape(label)}: {escape(server_name)}")
142
+ hud_console.print(f"{hud_console.sym.ITEM} Cursor:")
141
143
  # Display the Cursor link on its own line to prevent wrapping
142
144
  hud_console.link(cursor_deeplink)
143
145
  hud_console.info("")
144
146
  if hot_reload_enabled:
145
- hud_console.info(f"{hud_console.sym.SUCCESS} Hot-reload enabled")
147
+ hud_console.print(f"{hud_console.sym.SUCCESS} Hot-reload enabled")
146
148
  else:
147
149
  hud_console.info("Hot-reload disabled")
148
150
  hud_console.dim_info("Tip", "Pass --watch/-w to enable hot-reload")
@@ -6,6 +6,7 @@ from datetime import datetime
6
6
 
7
7
  import typer
8
8
  import yaml
9
+ from rich.markup import escape
9
10
  from rich.table import Table
10
11
 
11
12
  from hud.utils.hud_console import HUDConsole
@@ -59,8 +60,8 @@ def list_environments(
59
60
  else:
60
61
  hud_console.info("No environments found in local registry.")
61
62
  hud_console.info("")
62
- hud_console.info("Pull environments with: [cyan]hud pull <org/name:tag>[/cyan]")
63
- hud_console.info("Build environments with: [cyan]hud build[/cyan]")
63
+ hud_console.print("Pull environments with: [cyan]hud pull <org/name:tag>[/cyan]")
64
+ hud_console.print("Build environments with: [cyan]hud build[/cyan]")
64
65
  return
65
66
 
66
67
  # Collect all environments using the registry helper
@@ -131,8 +132,8 @@ def list_environments(
131
132
  if not environments:
132
133
  hud_console.info("No environments found matching criteria.")
133
134
  hud_console.info("")
134
- hud_console.info("Pull environments with: [cyan]hud pull <org/name:tag>[/cyan]")
135
- hud_console.info("Build environments with: [cyan]hud build[/cyan]")
135
+ hud_console.print("Pull environments with: [cyan]hud pull <org/name:tag>[/cyan]")
136
+ hud_console.print("Build environments with: [cyan]hud build[/cyan]")
136
137
  return
137
138
 
138
139
  # Create table
@@ -179,16 +180,16 @@ def list_environments(
179
180
  example_env = environments[0]
180
181
  example_ref = f"{example_env['name']}:{example_env['tag']}"
181
182
 
182
- hud_console.info(f"Run an environment: [cyan]hud run {example_ref}[/cyan]")
183
- hud_console.info(f"Analyze tools: [cyan]hud analyze {example_ref}[/cyan]")
184
- hud_console.info(f"Debug server: [cyan]hud debug {example_ref}[/cyan]")
183
+ hud_console.print(f"Run an environment: [cyan]hud run {escape(example_ref)}[/cyan]")
184
+ hud_console.print(f"Analyze tools: [cyan]hud analyze {escape(example_ref)}[/cyan]")
185
+ hud_console.print(f"Debug server: [cyan]hud debug {escape(example_ref)}[/cyan]")
185
186
 
186
- hud_console.info("Pull more environments: [cyan]hud pull <org/name:tag>[/cyan]")
187
- hud_console.info("Build new environments: [cyan]hud build[/cyan]")
187
+ hud_console.print("Pull more environments: [cyan]hud pull <org/name:tag>[/cyan]")
188
+ hud_console.print("Build new environments: [cyan]hud build[/cyan]")
188
189
 
189
190
  if verbose:
190
191
  hud_console.info("")
191
- hud_console.info(f"[dim]Registry location: {env_dir}[/dim]")
192
+ hud_console.print(f"[dim]Registry location: {escape(str(env_dir))}[/dim]")
192
193
 
193
194
 
194
195
  def list_command(
@@ -162,7 +162,7 @@ def remove_all_environments(
162
162
 
163
163
  hud_console.info("")
164
164
  hud_console.info("Note: Docker images may still exist locally.")
165
- hud_console.info("To remove them, use: [cyan]docker image prune[/cyan]")
165
+ hud_console.print("To remove them, use: [cyan]docker image prune[/cyan]")
166
166
 
167
167
 
168
168
  def remove_command(
@@ -60,12 +60,12 @@ class TestIncrementVersion:
60
60
  def test_increment_minor(self):
61
61
  """Test incrementing minor version."""
62
62
  assert increment_version("1.2.3", "minor") == "1.3.0"
63
- assert increment_version("0.5.24", "minor") == "0.6.0"
63
+ assert increment_version("0.5.25", "minor") == "0.6.0"
64
64
 
65
65
  def test_increment_major(self):
66
66
  """Test incrementing major version."""
67
67
  assert increment_version("1.2.3", "major") == "2.0.0"
68
- assert increment_version("0.5.24", "major") == "1.0.0"
68
+ assert increment_version("0.5.25", "major") == "1.0.0"
69
69
 
70
70
  def test_increment_with_v_prefix(self):
71
71
  """Test incrementing version with v prefix."""
@@ -356,6 +356,12 @@ class EvalContext(Environment):
356
356
  quiet=quiet,
357
357
  )
358
358
 
359
+ # v5 validation overrides any environment-level integration calls.
360
+ if task.validation is not None:
361
+ ctx._integration_test_calls = [
362
+ (call.name, call.arguments or {}) for call in task.validation
363
+ ]
364
+
359
365
  # Store task info for scenario execution
360
366
  ctx._task = task
361
367
 
@@ -187,3 +187,141 @@ class TestEvalContextFromEnvironment:
187
187
  assert ctx.variants == {"model": "gpt-4o"}
188
188
  assert ctx.group_id == "group-123"
189
189
  assert ctx.index == 5
190
+
191
+
192
+ class TestEvalContextFromTask:
193
+ """Tests for EvalContext.from_task factory."""
194
+
195
+ def test_v5_validation_populates_integration_calls(self) -> None:
196
+ """Task.validation is mapped to integration test calls for replay."""
197
+ from hud.environment import Environment
198
+ from hud.eval.task import Task
199
+ from hud.types import MCPToolCall
200
+
201
+ env = Environment("test-env")
202
+ validation_calls = [
203
+ MCPToolCall(name="tool_a", arguments={"x": 1}),
204
+ MCPToolCall(name="tool_b", arguments={"y": "ok"}),
205
+ ]
206
+ task = Task(
207
+ env=env,
208
+ scenario="demo",
209
+ args={},
210
+ validation=validation_calls,
211
+ )
212
+
213
+ ctx = EvalContext.from_task(task)
214
+ assert ctx._integration_test_calls == [
215
+ ("tool_a", {"x": 1}),
216
+ ("tool_b", {"y": "ok"}),
217
+ ]
218
+
219
+ def test_v5_validation_overrides_environment_integration_calls(self) -> None:
220
+ """Task.validation takes precedence over env-level integration calls."""
221
+ from hud.environment import Environment
222
+ from hud.eval.task import Task
223
+ from hud.types import MCPToolCall
224
+
225
+ env = Environment("test-env")
226
+ env._integration_test_calls = [("old_tool", {"stale": True})]
227
+
228
+ task = Task(
229
+ env=env,
230
+ scenario="demo",
231
+ args={},
232
+ validation=[MCPToolCall(name="new_tool", arguments={"fresh": True})],
233
+ )
234
+
235
+ ctx = EvalContext.from_task(task)
236
+ assert ctx._integration_test_calls == [("new_tool", {"fresh": True})]
237
+
238
+ def test_v5_empty_validation_clears_environment_integration_calls(self) -> None:
239
+ """Task.validation=[] still overrides env-level integration calls."""
240
+ from hud.environment import Environment
241
+ from hud.eval.task import Task
242
+
243
+ env = Environment("test-env")
244
+ env._integration_test_calls = [("old_tool", {"stale": True})]
245
+
246
+ task = Task(
247
+ env=env,
248
+ scenario="demo",
249
+ args={},
250
+ validation=[],
251
+ )
252
+
253
+ ctx = EvalContext.from_task(task)
254
+
255
+ assert ctx._integration_test_calls == []
256
+
257
+ def test_v4_integration_test_tool_remains_supported(self) -> None:
258
+ """Legacy integration_test_tool still populates integration calls."""
259
+ from hud.eval.task import Task
260
+
261
+ task = Task.from_v4(
262
+ {
263
+ "prompt": "test",
264
+ "mcp_config": {"server": {"url": "http://localhost"}},
265
+ "evaluate_tool": {"name": "check", "arguments": {}},
266
+ "integration_test_tool": [
267
+ {"name": "legacy_tool", "arguments": {"v": 1}},
268
+ ],
269
+ }
270
+ )
271
+
272
+ ctx = EvalContext.from_task(task)
273
+ assert ctx._integration_test_calls == [("legacy_tool", {"v": 1})]
274
+
275
+ def test_v5_validation_replays_with_integration_runner(self) -> None:
276
+ """IntegrationTestRunner executes v5 Task.validation calls via EvalContext.from_task."""
277
+ import asyncio
278
+
279
+ from mcp import types as mcp_types
280
+
281
+ from hud.agents.misc import IntegrationTestRunner
282
+ from hud.environment import Environment
283
+ from hud.eval.task import Task
284
+ from hud.types import MCPToolCall, MCPToolResult
285
+
286
+ executed_calls: list[tuple[str, dict[str, object]]] = []
287
+
288
+ async def _run() -> None:
289
+ env = Environment("test-env")
290
+ validation_calls = [
291
+ MCPToolCall(name="tool_a", arguments={"x": 1}),
292
+ MCPToolCall(name="tool_b", arguments={"y": "ok"}),
293
+ ]
294
+ task = Task(
295
+ env=env,
296
+ scenario="demo",
297
+ args={},
298
+ validation=validation_calls,
299
+ )
300
+
301
+ ctx = EvalContext.from_task(task)
302
+
303
+ async def fake_call_tool(call, /, **kwargs):
304
+ if isinstance(call, tuple):
305
+ name = str(call[0])
306
+ arguments = dict(call[1]) if len(call) > 1 else {}
307
+ else:
308
+ name = str(call)
309
+ arguments = {}
310
+ executed_calls.append((name, arguments))
311
+ return MCPToolResult(
312
+ content=[mcp_types.TextContent(type="text", text="ok")],
313
+ isError=False,
314
+ )
315
+
316
+ ctx.call_tool = fake_call_tool # type: ignore[method-assign]
317
+
318
+ runner = IntegrationTestRunner.create()
319
+ result = await runner.run(ctx)
320
+ assert result.done is True
321
+
322
+ asyncio.run(_run())
323
+
324
+ assert executed_calls == [
325
+ ("tool_a", {"x": 1}),
326
+ ("tool_b", {"y": "ok"}),
327
+ ]
@@ -153,18 +153,20 @@ class BashSession:
153
153
  assert self._process.stdout
154
154
  assert self._process.stderr
155
155
 
156
- # Send command with sentinel for exit code capture
157
- # Platform-specific syntax for command chaining and exit code
156
+ # Send command with sentinel for exit code capture.
157
+ # Use a newline before the sentinel echo (not ";" or "&") so that:
158
+ # 1. Heredoc delimiters aren't corrupted (e.g. EOF; echo '...' wouldn't match EOF)
159
+ # 2. The echo is a standalone command, avoiding syntax errors from leading ";"
158
160
  if sys.platform == "win32":
159
161
  if capture_exit_code:
160
- cmd_line = f"{command} & echo {self._sentinel}%errorlevel%\n"
162
+ cmd_line = f"{command}\necho {self._sentinel}%errorlevel%\n"
161
163
  else:
162
- cmd_line = f"{command} & echo {self._sentinel}\n"
164
+ cmd_line = f"{command}\necho {self._sentinel}\n"
163
165
  else:
164
166
  if capture_exit_code:
165
- cmd_line = f"{command}; echo '{self._sentinel}'$?\n"
167
+ cmd_line = f"{command}\necho '{self._sentinel}'$?\n"
166
168
  else:
167
- cmd_line = f"{command}; echo '{self._sentinel}'\n"
169
+ cmd_line = f"{command}\necho '{self._sentinel}'\n"
168
170
 
169
171
  self._process.stdin.write(cmd_line.encode())
170
172
  await self._process.stdin.drain()
@@ -73,6 +73,87 @@ class TestBashSession:
73
73
  assert result.error == ""
74
74
 
75
75
 
76
+ class TestBashSessionHeredoc:
77
+ """Tests for heredoc handling in ClaudeBashSession."""
78
+
79
+ @pytest.mark.asyncio
80
+ async def test_sentinel_on_own_line_after_heredoc(self):
81
+ """Sentinel echo must be on its own line so heredoc terminators aren't corrupted."""
82
+ session = _BashSession()
83
+ session._started = True
84
+
85
+ mock_process = MagicMock()
86
+ mock_process.returncode = None
87
+ mock_process.stdin = MagicMock()
88
+ mock_process.stdin.write = MagicMock()
89
+ mock_process.stdin.drain = AsyncMock()
90
+ mock_process.stdout = MagicMock()
91
+ mock_process.stdout.readuntil = AsyncMock(return_value=b"hello\n<<exit>>\n")
92
+ mock_process.stderr = MagicMock()
93
+ mock_process.stderr.read = AsyncMock(return_value=b"")
94
+
95
+ session._process = mock_process
96
+
97
+ heredoc_cmd = "python3 << 'EOF'\nprint('hello')\nEOF"
98
+ await session.run(heredoc_cmd)
99
+
100
+ written = mock_process.stdin.write.call_args[0][0].decode()
101
+
102
+ # EOF must be followed by newline, then the echo — never "EOF;" or "EOF echo"
103
+ assert "EOF\necho '<<exit>>'\n" in written
104
+ assert "EOF;" not in written
105
+ assert "EOF echo" not in written
106
+
107
+ @pytest.mark.asyncio
108
+ async def test_heredoc_integration(self):
109
+ """Integration test: a real heredoc command completes without hanging."""
110
+ from hud.tools.coding.bash import ClaudeBashSession
111
+
112
+ session = ClaudeBashSession()
113
+ session._timeout = 5.0 # fail fast if sentinel is broken
114
+ await session.start()
115
+ try:
116
+ result = await session.run("cat << 'EOF'\nhello from heredoc\nEOF")
117
+ assert result.output is not None
118
+ assert "hello from heredoc" in result.output
119
+ finally:
120
+ session.stop()
121
+
122
+ @pytest.mark.asyncio
123
+ async def test_heredoc_with_python_integration(self):
124
+ """Integration test: python heredoc executes and returns output."""
125
+ from hud.tools.coding.bash import ClaudeBashSession
126
+
127
+ session = ClaudeBashSession()
128
+ session._timeout = 5.0
129
+ await session.start()
130
+ try:
131
+ result = await session.run("python3 << 'PYEOF'\nprint('result:', 2 + 2)\nPYEOF")
132
+ assert result.output is not None
133
+ assert "result: 4" in result.output
134
+ finally:
135
+ session.stop()
136
+
137
+ @pytest.mark.asyncio
138
+ async def test_command_after_heredoc_still_works(self):
139
+ """Integration test: session is usable for further commands after a heredoc."""
140
+ from hud.tools.coding.bash import ClaudeBashSession
141
+
142
+ session = ClaudeBashSession()
143
+ session._timeout = 5.0
144
+ await session.start()
145
+ try:
146
+ r1 = await session.run("cat << 'EOF'\nfirst\nEOF")
147
+ assert r1.output is not None
148
+ assert "first" in r1.output
149
+
150
+ r2 = await session.run("echo second")
151
+ assert r2.output is not None
152
+ assert "second" in r2.output
153
+ finally:
154
+ session.stop()
155
+
156
+
76
157
  class TestBashTool:
77
158
  """Tests for BashTool."""
78
159