hud-python 0.4.53__tar.gz → 0.4.54__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (297) hide show
  1. {hud_python-0.4.53 → hud_python-0.4.54}/PKG-INFO +8 -7
  2. {hud_python-0.4.53 → hud_python-0.4.54}/README.md +6 -6
  3. {hud_python-0.4.53 → hud_python-0.4.54}/environments/README.md +2 -2
  4. {hud_python-0.4.53 → hud_python-0.4.54}/environments/blank/server/pyproject.toml +1 -1
  5. {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/todo/README.md +2 -2
  6. {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/server/pyproject.toml +1 -1
  7. {hud_python-0.4.53 → hud_python-0.4.54}/environments/deepresearch/server/pyproject.toml +1 -1
  8. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/__init__.py +14 -9
  9. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/dev.py +2 -2
  10. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/eval.py +24 -23
  11. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_eval.py +6 -6
  12. {hud_python-0.4.53 → hud_python-0.4.54}/hud/types.py +10 -0
  13. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_version.py +1 -1
  14. {hud_python-0.4.53 → hud_python-0.4.54}/hud/version.py +1 -1
  15. {hud_python-0.4.53 → hud_python-0.4.54}/pyproject.toml +2 -1
  16. {hud_python-0.4.53 → hud_python-0.4.54}/.gitignore +0 -0
  17. {hud_python-0.4.53 → hud_python-0.4.54}/LICENSE +0 -0
  18. {hud_python-0.4.53 → hud_python-0.4.54}/environments/blank/README.md +0 -0
  19. {hud_python-0.4.53 → hud_python-0.4.54}/environments/blank/environment/README.md +0 -0
  20. {hud_python-0.4.53 → hud_python-0.4.54}/environments/blank/environment/pyproject.toml +0 -0
  21. {hud_python-0.4.53 → hud_python-0.4.54}/environments/blank/server/README.md +0 -0
  22. {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/README.md +0 -0
  23. {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/2048/README.md +0 -0
  24. {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
  25. {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/README.md +0 -0
  26. {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/pyproject.toml +0 -0
  27. {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
  28. {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/pyproject.toml +0 -0
  29. {hud_python-0.4.53 → hud_python-0.4.54}/environments/deepresearch/README.md +0 -0
  30. {hud_python-0.4.53 → hud_python-0.4.54}/environments/deepresearch/environment/pyproject.toml +0 -0
  31. {hud_python-0.4.53 → hud_python-0.4.54}/environments/deepresearch/pyproject.toml +0 -0
  32. {hud_python-0.4.53 → hud_python-0.4.54}/environments/remote_browser/README.md +0 -0
  33. {hud_python-0.4.53 → hud_python-0.4.54}/environments/remote_browser/pyproject.toml +0 -0
  34. {hud_python-0.4.53 → hud_python-0.4.54}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  35. {hud_python-0.4.53 → hud_python-0.4.54}/environments/text_2048/README.md +0 -0
  36. {hud_python-0.4.53 → hud_python-0.4.54}/environments/text_2048/pyproject.toml +0 -0
  37. {hud_python-0.4.53 → hud_python-0.4.54}/examples/README.md +0 -0
  38. {hud_python-0.4.53 → hud_python-0.4.54}/hud/__init__.py +0 -0
  39. {hud_python-0.4.53 → hud_python-0.4.54}/hud/__main__.py +0 -0
  40. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/__init__.py +0 -0
  41. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/base.py +0 -0
  42. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/claude.py +0 -0
  43. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/grounded_openai.py +0 -0
  44. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/langchain.py +0 -0
  45. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/lite_llm.py +0 -0
  46. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/misc/__init__.py +0 -0
  47. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/misc/integration_test_agent.py +0 -0
  48. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/misc/response_agent.py +0 -0
  49. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/openai.py +0 -0
  50. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/openai_chat_generic.py +0 -0
  51. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/__init__.py +0 -0
  52. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_base.py +0 -0
  53. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_base_runtime.py +0 -0
  54. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_claude.py +0 -0
  55. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_client.py +0 -0
  56. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  57. {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_openai.py +0 -0
  58. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/__main__.py +0 -0
  59. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/analyze.py +0 -0
  60. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/build.py +0 -0
  61. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/clone.py +0 -0
  62. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/debug.py +0 -0
  63. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/flows/__init__.py +0 -0
  64. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/flows/tasks.py +0 -0
  65. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/get.py +0 -0
  66. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/init.py +0 -0
  67. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/list_func.py +0 -0
  68. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/pull.py +0 -0
  69. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/push.py +0 -0
  70. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/remove.py +0 -0
  71. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/__init__.py +0 -0
  72. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/celebrate.py +0 -0
  73. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/config.py +0 -0
  74. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/display.py +0 -0
  75. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/gpu.py +0 -0
  76. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/gpu_utils.py +0 -0
  77. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/local_runner.py +0 -0
  78. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/presets.py +0 -0
  79. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/remote_runner.py +0 -0
  80. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/rl_api.py +0 -0
  81. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/viewer.py +0 -0
  82. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/vllm.py +0 -0
  83. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/wait_utils.py +0 -0
  84. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/__init__.py +0 -0
  85. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_analyze.py +0 -0
  86. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_analyze_metadata.py +0 -0
  87. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_analyze_module.py +0 -0
  88. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_build.py +0 -0
  89. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_build_failure.py +0 -0
  90. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_build_module.py +0 -0
  91. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_cli_init.py +0 -0
  92. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_cli_main.py +0 -0
  93. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  94. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_cli_root.py +0 -0
  95. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_clone.py +0 -0
  96. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_cursor.py +0 -0
  97. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_debug.py +0 -0
  98. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_list_func.py +0 -0
  99. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_main_module.py +0 -0
  100. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_mcp_server.py +0 -0
  101. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_pull.py +0 -0
  102. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_push.py +0 -0
  103. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_push_happy.py +0 -0
  104. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_push_wrapper.py +0 -0
  105. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_registry.py +0 -0
  106. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_utils.py +0 -0
  107. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/__init__.py +0 -0
  108. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/config.py +0 -0
  109. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/cursor.py +0 -0
  110. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/docker.py +0 -0
  111. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/env_check.py +0 -0
  112. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/environment.py +0 -0
  113. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/interactive.py +0 -0
  114. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/local_runner.py +0 -0
  115. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/logging.py +0 -0
  116. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/metadata.py +0 -0
  117. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/package_runner.py +0 -0
  118. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/registry.py +0 -0
  119. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/remote_runner.py +0 -0
  120. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/runner.py +0 -0
  121. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/server.py +0 -0
  122. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/source_hash.py +0 -0
  123. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tasks.py +0 -0
  124. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/__init__.py +0 -0
  125. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_config.py +0 -0
  126. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_docker.py +0 -0
  127. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  128. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_env_check.py +0 -0
  129. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_environment.py +0 -0
  130. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_interactive_module.py +0 -0
  131. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_local_runner.py +0 -0
  132. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  133. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_metadata.py +0 -0
  134. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_package_runner.py +0 -0
  135. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  136. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  137. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  138. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_source_hash.py +0 -0
  139. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_tasks.py +0 -0
  140. {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/version_check.py +0 -0
  141. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/README.md +0 -0
  142. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/__init__.py +0 -0
  143. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/base.py +0 -0
  144. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/fastmcp.py +0 -0
  145. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/mcp_use.py +0 -0
  146. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/tests/__init__.py +0 -0
  147. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/tests/test_client_integration.py +0 -0
  148. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/tests/test_fastmcp.py +0 -0
  149. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  150. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/tests/test_protocol.py +0 -0
  151. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/utils/__init__.py +0 -0
  152. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/utils/mcp_use_retry.py +0 -0
  153. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/utils/retry.py +0 -0
  154. {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/utils/retry_transport.py +0 -0
  155. {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/__init__.py +0 -0
  156. {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/parallel.py +0 -0
  157. {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/runner.py +0 -0
  158. {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/tests/__init__.py +0 -0
  159. {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/tests/test_runner.py +0 -0
  160. {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/tests/test_utils.py +0 -0
  161. {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/utils.py +0 -0
  162. {hud_python-0.4.53 → hud_python-0.4.54}/hud/misc/__init__.py +0 -0
  163. {hud_python-0.4.53 → hud_python-0.4.54}/hud/misc/claude_plays_pokemon.py +0 -0
  164. {hud_python-0.4.53 → hud_python-0.4.54}/hud/native/__init__.py +0 -0
  165. {hud_python-0.4.53 → hud_python-0.4.54}/hud/native/comparator.py +0 -0
  166. {hud_python-0.4.53 → hud_python-0.4.54}/hud/native/tests/__init__.py +0 -0
  167. {hud_python-0.4.53 → hud_python-0.4.54}/hud/native/tests/test_comparator.py +0 -0
  168. {hud_python-0.4.53 → hud_python-0.4.54}/hud/native/tests/test_native_init.py +0 -0
  169. {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/__init__.py +0 -0
  170. {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/collector.py +0 -0
  171. {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/config.py +0 -0
  172. {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/context.py +0 -0
  173. {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/exporters.py +0 -0
  174. {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/instrumentation.py +0 -0
  175. {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/processors.py +0 -0
  176. {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/tests/__init__.py +0 -0
  177. {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/tests/test_instrumentation.py +0 -0
  178. {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/tests/test_processors.py +0 -0
  179. {hud_python-0.4.53 → hud_python-0.4.54}/hud/py.typed +0 -0
  180. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/README.md +0 -0
  181. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/__init__.py +0 -0
  182. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/actor.py +0 -0
  183. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/buffer.py +0 -0
  184. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/chat_template.jinja +0 -0
  185. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/config.py +0 -0
  186. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/distributed.py +0 -0
  187. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/learner.py +0 -0
  188. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/tests/__init__.py +0 -0
  189. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/tests/test_learner.py +0 -0
  190. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/train.py +0 -0
  191. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/types.py +0 -0
  192. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/utils/start_vllm_server.sh +0 -0
  193. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/utils.py +0 -0
  194. {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/vllm_adapter.py +0 -0
  195. {hud_python-0.4.53 → hud_python-0.4.54}/hud/samples/__init__.py +0 -0
  196. {hud_python-0.4.53 → hud_python-0.4.54}/hud/samples/browser.py +0 -0
  197. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/__init__.py +0 -0
  198. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/context.py +0 -0
  199. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/helper/__init__.py +0 -0
  200. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/low_level.py +0 -0
  201. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/router.py +0 -0
  202. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/server.py +0 -0
  203. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/__init__.py +0 -0
  204. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_add_tool.py +0 -0
  205. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_context.py +0 -0
  206. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  207. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_mcp_server_integration.py +0 -0
  208. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_mcp_server_more.py +0 -0
  209. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_run_wrapper.py +0 -0
  210. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_server_extra.py +0 -0
  211. {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_sigterm_runner.py +0 -0
  212. {hud_python-0.4.53 → hud_python-0.4.54}/hud/settings.py +0 -0
  213. {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/__init__.py +0 -0
  214. {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/exceptions.py +0 -0
  215. {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/hints.py +0 -0
  216. {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/requests.py +0 -0
  217. {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/tests/__init__.py +0 -0
  218. {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/tests/test_exceptions.py +0 -0
  219. {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/tests/test_hints.py +0 -0
  220. {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/tests/test_requests.py +0 -0
  221. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/__init__.py +0 -0
  222. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/async_context.py +0 -0
  223. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/instrument.py +0 -0
  224. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/job.py +0 -0
  225. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/replay.py +0 -0
  226. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/__init__.py +0 -0
  227. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/test_async_context.py +0 -0
  228. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/test_instrument.py +0 -0
  229. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/test_job.py +0 -0
  230. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/test_replay.py +0 -0
  231. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/test_trace.py +0 -0
  232. {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/trace.py +0 -0
  233. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/__init__.py +0 -0
  234. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/base.py +0 -0
  235. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/bash.py +0 -0
  236. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/__init__.py +0 -0
  237. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/anthropic.py +0 -0
  238. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/hud.py +0 -0
  239. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/openai.py +0 -0
  240. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/qwen.py +0 -0
  241. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/settings.py +0 -0
  242. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/edit.py +0 -0
  243. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/__init__.py +0 -0
  244. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/base.py +0 -0
  245. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/pyautogui.py +0 -0
  246. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/tests/__init__.py +0 -0
  247. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/tests/test_base_executor.py +0 -0
  248. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  249. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/xdo.py +0 -0
  250. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/__init__.py +0 -0
  251. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/config.py +0 -0
  252. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/grounded_tool.py +0 -0
  253. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/grounder.py +0 -0
  254. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/tests/__init__.py +0 -0
  255. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  256. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/playwright.py +0 -0
  257. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/response.py +0 -0
  258. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/submit.py +0 -0
  259. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/__init__.py +0 -0
  260. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_base.py +0 -0
  261. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_bash.py +0 -0
  262. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_bash_extended.py +0 -0
  263. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_computer.py +0 -0
  264. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_computer_actions.py +0 -0
  265. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_edit.py +0 -0
  266. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_init.py +0 -0
  267. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_playwright_tool.py +0 -0
  268. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_response.py +0 -0
  269. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_submit.py +0 -0
  270. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_tools.py +0 -0
  271. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_tools_init.py +0 -0
  272. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_types.py +0 -0
  273. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_utils.py +0 -0
  274. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/types.py +0 -0
  275. {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/utils.py +0 -0
  276. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/__init__.py +0 -0
  277. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/agent_factories.py +0 -0
  278. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/async_utils.py +0 -0
  279. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/group_eval.py +0 -0
  280. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/hud_console.py +0 -0
  281. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/mcp.py +0 -0
  282. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/pretty_errors.py +0 -0
  283. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/progress.py +0 -0
  284. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/task_tracking.py +0 -0
  285. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tasks.py +0 -0
  286. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/telemetry.py +0 -0
  287. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/__init__.py +0 -0
  288. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_agent_factories.py +0 -0
  289. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_async_utils.py +0 -0
  290. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_init.py +0 -0
  291. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_mcp.py +0 -0
  292. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_pretty_errors.py +0 -0
  293. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_progress.py +0 -0
  294. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_tasks.py +0 -0
  295. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_telemetry.py +0 -0
  296. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_tool_shorthand.py +0 -0
  297. {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tool_shorthand.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.53
3
+ Version: 0.4.54
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -42,6 +42,7 @@ Requires-Dist: httpx<1,>=0.23.0
42
42
  Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
43
43
  Requires-Dist: hud-mcp-python-sdk>=3.13.2
44
44
  Requires-Dist: hud-mcp-use-python-sdk==2.3.20
45
+ Requires-Dist: langchain==0.3.27
45
46
  Requires-Dist: numpy>=1.24.0
46
47
  Requires-Dist: openai
47
48
  Requires-Dist: opentelemetry-api>=1.34.1
@@ -247,8 +248,8 @@ The above example let's the agent play 2048 ([See replay](https://hud.so/trace/6
247
248
  RL using GRPO a Qwen2.5-VL model on any hud dataset:
248
249
 
249
250
  ```bash
250
- hud get hud-evals/basic-2048 # from HF
251
- hud rl basic-2048.json
251
+ hud get hud-evals/2048-basic # from HF
252
+ hud rl 2048-basic.json
252
253
  ```
253
254
 
254
255
  > See [agent training docs](https://docs.hud.so/train-agents/quickstart)
@@ -439,14 +440,14 @@ Train with the new interactive `hud rl` flow:
439
440
  uv tool install hud-python
440
441
 
441
442
  # Option A: Run directly from a HuggingFace dataset
442
- hud rl hud-evals/basic-2048
443
+ hud rl hud-evals/2048-basic
443
444
 
444
445
  # Option B: Download first, modify, then train
445
- hud get hud-evals/basic-2048
446
- hud rl basic-2048.json
446
+ hud get hud-evals/2048-basic
447
+ hud rl 2048-basic.json
447
448
 
448
449
  # Optional: baseline evaluation
449
- hud eval basic-2048.json
450
+ hud eval 2048-basic.json
450
451
  ```
451
452
 
452
453
  Supports multi‑turn RL for both:
@@ -109,8 +109,8 @@ The above example let's the agent play 2048 ([See replay](https://hud.so/trace/6
109
109
  RL using GRPO a Qwen2.5-VL model on any hud dataset:
110
110
 
111
111
  ```bash
112
- hud get hud-evals/basic-2048 # from HF
113
- hud rl basic-2048.json
112
+ hud get hud-evals/2048-basic # from HF
113
+ hud rl 2048-basic.json
114
114
  ```
115
115
 
116
116
  > See [agent training docs](https://docs.hud.so/train-agents/quickstart)
@@ -301,14 +301,14 @@ Train with the new interactive `hud rl` flow:
301
301
  uv tool install hud-python
302
302
 
303
303
  # Option A: Run directly from a HuggingFace dataset
304
- hud rl hud-evals/basic-2048
304
+ hud rl hud-evals/2048-basic
305
305
 
306
306
  # Option B: Download first, modify, then train
307
- hud get hud-evals/basic-2048
308
- hud rl basic-2048.json
307
+ hud get hud-evals/2048-basic
308
+ hud rl 2048-basic.json
309
309
 
310
310
  # Optional: baseline evaluation
311
- hud eval basic-2048.json
311
+ hud eval 2048-basic.json
312
312
  ```
313
313
 
314
314
  Supports multi‑turn RL for both:
@@ -804,9 +804,9 @@ class TodoCompleted:
804
804
  @problem("todo_basic", description="Complete two todo items", difficulty="easy")
805
805
  class TodoBasic:
806
806
  def get_setup(self):
807
- return {"function": "todo_seed", "args": {"num_items": 5}}
807
+ return {"name": "todo_seed", "arguments": {"num_items": 5}}
808
808
  def get_evaluation(self):
809
- return {"function": "todo_completed", "args": {"expected_count": 2}}
809
+ return {"name": "todo_completed", "arguments": {"expected_count": 2}}
810
810
  ```
811
811
 
812
812
  Decorators keep registration *next to the implementation* and avoid manual bookkeeping. The server simply exposes the combined metadata through an MCP **resource**. Follow `environments/browser/src/hud_controller/problems/registry.py` as a template and expose the JSON with `@mcp.resource("problems://registry")`.
@@ -4,7 +4,7 @@ version = "0.1.0"
4
4
  description = "MCP server for blank environment"
5
5
  requires-python = ">=3.11"
6
6
  dependencies = [
7
- "hud-python>=0.4.53",
7
+ "hud-python>=0.4.54",
8
8
  "httpx>=0.28.1",
9
9
  ]
10
10
 
@@ -47,8 +47,8 @@ await setup({"name": "todo_basic_usage"})
47
47
  await evaluate({"name": "todo_basic_usage"})
48
48
 
49
49
  # Direct function calls
50
- await setup({"function": "todo_reset", "args": {}})
51
- await evaluate({"function": "todo_completion_rate", "args": {"min_rate": 0.5}})
50
+ await setup({"name": "todo_reset", "arguments": {}})
51
+ await evaluate({"name": "todo_completion_rate", "arguments": {"min_rate": 0.5}})
52
52
 
53
53
  # MCP resource discovery
54
54
  todo_evaluators = await client.read_resource("evaluators://todo")
@@ -4,7 +4,7 @@ version = "0.1.0"
4
4
  description = "HUD Browser MCP Server"
5
5
  requires-python = ">=3.11,<3.14"
6
6
  dependencies = [
7
- "hud-python>=0.4.53",
7
+ "hud-python>=0.4.54",
8
8
  "httpx",
9
9
  "playwright",
10
10
  "pyautogui",
@@ -4,7 +4,7 @@ version = "0.1.0"
4
4
  description = "MCP server for DeepResearch environment"
5
5
  requires-python = ">=3.11"
6
6
  dependencies = [
7
- "hud-python>=0.4.53",
7
+ "hud-python>=0.4.54",
8
8
  "httpx>=0.24.0",
9
9
  ]
10
10
 
@@ -12,6 +12,8 @@ from rich.console import Console
12
12
  from rich.panel import Panel
13
13
  from rich.table import Table
14
14
 
15
+ from hud.types import AgentType
16
+
15
17
  from . import list_func as list_module
16
18
  from .analyze import (
17
19
  analyze_environment,
@@ -847,7 +849,7 @@ def eval(
847
849
  hud_console = HUDConsole()
848
850
 
849
851
  if integration_test:
850
- agent = "integration_test"
852
+ agent = AgentType.INTEGRATION_TEST
851
853
 
852
854
  # If no source provided, reuse RL helper to find a tasks file interactively
853
855
  if source is None:
@@ -894,17 +896,17 @@ def eval(
894
896
  # Add standard agent choices
895
897
  choices.extend(
896
898
  [
897
- {"name": "Claude 4 Sonnet", "value": "claude"},
898
- {"name": "OpenAI Computer Use", "value": "openai"},
899
- {"name": "vLLM (Local Server)", "value": "vllm"},
900
- {"name": "LiteLLM (Multi-provider)", "value": "litellm"},
899
+ {"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE},
900
+ {"name": "OpenAI Computer Use", "value": AgentType.OPENAI},
901
+ {"name": "vLLM (Local Server)", "value": AgentType.VLLM},
902
+ {"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM},
901
903
  ]
902
904
  )
903
905
 
904
906
  agent = hud_console.select("Select an agent to use:", choices=choices, default=0)
905
907
 
906
908
  # Handle HUD model selection
907
- if agent and agent not in ["claude", "openai", "vllm", "litellm", "integration_test"]:
909
+ if agent and agent not in [e.value for e in AgentType]:
908
910
  # Find remote model name
909
911
  model = agent
910
912
  if not vllm_base_url:
@@ -921,20 +923,23 @@ def eval(
921
923
  hud_console.error(f"Model {model} not found")
922
924
  raise typer.Exit(1)
923
925
  model = base_model
924
- agent = "vllm" # Use vLLM backend for HUD models
926
+ agent = AgentType.VLLM # Use vLLM backend for HUD models
925
927
  hud_console.info(f"Using HUD model: {model} (trained on {base_model})")
926
928
 
927
929
  # Validate agent choice
928
- valid_agents = ["claude", "openai", "vllm", "litellm", "integration_test"]
930
+ valid_agents = [e.value for e in AgentType]
929
931
  if agent not in valid_agents:
930
932
  hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
931
933
  raise typer.Exit(1)
932
934
 
935
+ # Type narrowing: agent is now guaranteed to be an AgentType value after validation
936
+ agent = AgentType(agent)
937
+
933
938
  # Run the command
934
939
  eval_command(
935
940
  source=source,
936
941
  full=full,
937
- agent=agent, # type: ignore
942
+ agent=agent,
938
943
  model=model,
939
944
  allowed_tools=allowed_tools,
940
945
  max_concurrent=max_concurrent,
@@ -238,9 +238,9 @@ async def run_mcp_module(
238
238
  if env_dir.exists() and (env_dir / "server.py").exists():
239
239
  hud_console.info("")
240
240
  hud_console.info(
241
- f"{hud_console.sym.FLOW} Don't forget to start the environment backend:"
241
+ f"{hud_console.sym.FLOW} Don't forget to start the environment backend in another terminal:"
242
242
  )
243
- hud_console.info(" cd ../environment && uvicorn server:app --reload")
243
+ hud_console.info(" cd environment && uv run python uvicorn server:app --reload")
244
244
 
245
245
  # Launch inspector if requested (first run only)
246
246
  if inspector and transport == "http":
@@ -5,13 +5,14 @@ from __future__ import annotations
5
5
  import asyncio
6
6
  import logging
7
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, Literal
8
+ from typing import TYPE_CHECKING, Any
9
9
 
10
10
  import typer
11
11
 
12
12
  import hud
13
13
  from hud.cli.utils.env_check import ensure_built, find_environment_dir
14
14
  from hud.settings import settings
15
+ from hud.types import AgentType
15
16
  from hud.utils.group_eval import display_group_statistics, run_tasks_grouped
16
17
  from hud.utils.hud_console import HUDConsole
17
18
 
@@ -113,7 +114,7 @@ def _build_vllm_config(
113
114
 
114
115
 
115
116
  def build_agent(
116
- agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"],
117
+ agent_type: AgentType,
117
118
  *,
118
119
  model: str | None = None,
119
120
  allowed_tools: list[str] | None = None,
@@ -123,11 +124,11 @@ def build_agent(
123
124
  """Create and return the requested agent type."""
124
125
 
125
126
  # Import agents lazily to avoid dependency issues
126
- if agent_type == "integration_test":
127
+ if agent_type == AgentType.INTEGRATION_TEST:
127
128
  from hud.agents.misc.integration_test_agent import IntegrationTestRunner
128
129
 
129
130
  return IntegrationTestRunner(verbose=verbose)
130
- elif agent_type == "vllm":
131
+ elif agent_type == AgentType.VLLM:
131
132
  # Create a generic OpenAI agent for vLLM server
132
133
  try:
133
134
  from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
@@ -147,7 +148,7 @@ def build_agent(
147
148
  )
148
149
  return GenericOpenAIChatAgent(**config)
149
150
 
150
- elif agent_type == "openai":
151
+ elif agent_type == AgentType.OPENAI:
151
152
  try:
152
153
  from hud.agents import OperatorAgent
153
154
  except ImportError as e:
@@ -165,7 +166,7 @@ def build_agent(
165
166
  else:
166
167
  return OperatorAgent(verbose=verbose)
167
168
 
168
- elif agent_type == "litellm":
169
+ elif agent_type == AgentType.LITELLM:
169
170
  try:
170
171
  from hud.agents.lite_llm import LiteAgent
171
172
  except ImportError as e:
@@ -209,7 +210,7 @@ def build_agent(
209
210
  async def run_single_task(
210
211
  source: str,
211
212
  *,
212
- agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = "claude",
213
+ agent_type: AgentType = AgentType.CLAUDE,
213
214
  model: str | None = None,
214
215
  allowed_tools: list[str] | None = None,
215
216
  max_steps: int = 10,
@@ -268,14 +269,14 @@ async def run_single_task(
268
269
 
269
270
  # Use grouped evaluation if group_size > 1
270
271
  agent_config: dict[str, Any] = {}
271
- if agent_type == "integration_test":
272
+ if agent_type == AgentType.INTEGRATION_TEST:
272
273
  from hud.agents.misc.integration_test_agent import IntegrationTestRunner
273
274
 
274
275
  agent_class = IntegrationTestRunner
275
276
  agent_config = {"verbose": verbose}
276
277
  if allowed_tools:
277
278
  agent_config["allowed_tools"] = allowed_tools
278
- elif agent_type == "vllm":
279
+ elif agent_type == AgentType.VLLM:
279
280
  # Special handling for vLLM
280
281
  from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
281
282
 
@@ -288,14 +289,14 @@ async def run_single_task(
288
289
  allowed_tools=allowed_tools,
289
290
  verbose=verbose,
290
291
  )
291
- elif agent_type == "openai":
292
+ elif agent_type == AgentType.OPENAI:
292
293
  from hud.agents import OperatorAgent
293
294
 
294
295
  agent_class = OperatorAgent
295
296
  agent_config = {"verbose": verbose}
296
297
  if allowed_tools:
297
298
  agent_config["allowed_tools"] = allowed_tools
298
- elif agent_type == "litellm":
299
+ elif agent_type == AgentType.LITELLM:
299
300
  from hud.agents.lite_llm import LiteAgent
300
301
 
301
302
  agent_class = LiteAgent
@@ -305,7 +306,7 @@ async def run_single_task(
305
306
  }
306
307
  if allowed_tools:
307
308
  agent_config["allowed_tools"] = allowed_tools
308
- elif agent_type == "claude":
309
+ elif agent_type == AgentType.CLAUDE:
309
310
  from hud.agents import ClaudeAgent
310
311
 
311
312
  agent_class = ClaudeAgent
@@ -353,7 +354,7 @@ async def run_single_task(
353
354
  async def run_full_dataset(
354
355
  source: str,
355
356
  *,
356
- agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = "claude",
357
+ agent_type: AgentType = AgentType.CLAUDE,
357
358
  model: str | None = None,
358
359
  allowed_tools: list[str] | None = None,
359
360
  max_concurrent: int = 30,
@@ -395,12 +396,12 @@ async def run_full_dataset(
395
396
 
396
397
  # Build agent class + config for run_dataset
397
398
  agent_config: dict[str, Any]
398
- if agent_type == "integration_test": # --integration-test mode
399
+ if agent_type == AgentType.INTEGRATION_TEST: # --integration-test mode
399
400
  from hud.agents.misc.integration_test_agent import IntegrationTestRunner
400
401
 
401
402
  agent_class = IntegrationTestRunner
402
403
  agent_config = {"verbose": verbose}
403
- elif agent_type == "vllm":
404
+ elif agent_type == AgentType.VLLM:
404
405
  try:
405
406
  from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
406
407
 
@@ -419,7 +420,7 @@ async def run_full_dataset(
419
420
  allowed_tools=allowed_tools,
420
421
  verbose=verbose,
421
422
  )
422
- elif agent_type == "openai":
423
+ elif agent_type == AgentType.OPENAI:
423
424
  try:
424
425
  from hud.agents import OperatorAgent
425
426
 
@@ -435,7 +436,7 @@ async def run_full_dataset(
435
436
  if allowed_tools:
436
437
  agent_config["allowed_tools"] = allowed_tools
437
438
 
438
- elif agent_type == "litellm":
439
+ elif agent_type == AgentType.LITELLM:
439
440
  try:
440
441
  from hud.agents.lite_llm import LiteAgent
441
442
 
@@ -539,8 +540,8 @@ def eval_command(
539
540
  "--full",
540
541
  help="Run the entire dataset (omit for single-task debug mode)",
541
542
  ),
542
- agent: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = typer.Option(
543
- "claude",
543
+ agent: AgentType = typer.Option( # noqa: B008
544
+ AgentType.CLAUDE,
544
545
  "--agent",
545
546
  help="Agent backend to use (claude, openai, vllm for local server, or litellm)",
546
547
  ),
@@ -648,21 +649,21 @@ def eval_command(
648
649
 
649
650
  # We pass integration_test as the agent_type
650
651
  if integration_test:
651
- agent = "integration_test"
652
+ agent = AgentType.INTEGRATION_TEST
652
653
 
653
654
  # Check for required API keys
654
- if agent == "claude":
655
+ if agent == AgentType.CLAUDE:
655
656
  if not settings.anthropic_api_key:
656
657
  hud_console.error("ANTHROPIC_API_KEY is required for Claude agent")
657
658
  hud_console.info(
658
659
  "Set it in your environment or run: hud set ANTHROPIC_API_KEY=your-key-here"
659
660
  )
660
661
  raise typer.Exit(1)
661
- elif agent == "openai" and not settings.openai_api_key:
662
+ elif agent == AgentType.OPENAI and not settings.openai_api_key:
662
663
  hud_console.error("OPENAI_API_KEY is required for OpenAI agent")
663
664
  hud_console.info("Set it in your environment or run: hud set OPENAI_API_KEY=your-key-here")
664
665
  raise typer.Exit(1)
665
- elif agent == "vllm":
666
+ elif agent == AgentType.VLLM:
666
667
  if model:
667
668
  hud_console.info(f"Using vLLM with model: {model}")
668
669
  else:
@@ -11,7 +11,7 @@ from hud.cli.eval import (
11
11
  build_agent,
12
12
  run_single_task,
13
13
  )
14
- from hud.types import Task, Trace
14
+ from hud.types import AgentType, Task, Trace
15
15
 
16
16
 
17
17
  class TestBuildAgent:
@@ -26,7 +26,7 @@ class TestBuildAgent:
26
26
  mock_runner.return_value = mock_instance
27
27
 
28
28
  # Test with verbose=False
29
- result = build_agent("integration_test", verbose=False)
29
+ result = build_agent(AgentType.INTEGRATION_TEST, verbose=False)
30
30
 
31
31
  mock_runner.assert_called_once_with(verbose=False)
32
32
  assert result == mock_instance
@@ -40,7 +40,7 @@ class TestBuildAgent:
40
40
  mock_runner.return_value = mock_instance
41
41
 
42
42
  # Test with verbose=False
43
- result = build_agent("claude", verbose=False)
43
+ result = build_agent(AgentType.CLAUDE, verbose=False)
44
44
 
45
45
  mock_runner.assert_called_once_with(model="claude-sonnet-4-20250514", verbose=False)
46
46
  assert result == mock_instance
@@ -55,7 +55,7 @@ class TestBuildAgent:
55
55
 
56
56
  # Test with verbose=False
57
57
  result = build_agent(
58
- "claude",
58
+ AgentType.CLAUDE,
59
59
  model="claude-sonnet-4-20250514",
60
60
  allowed_tools=["act"],
61
61
  verbose=True,
@@ -97,7 +97,7 @@ class TestRunSingleTask:
97
97
  patch("hud.cli.eval.find_environment_dir", return_value=None),
98
98
  patch("hud.cli.eval.hud.trace"),
99
99
  ):
100
- await run_single_task("test.json", agent_type="integration_test", max_steps=10)
100
+ await run_single_task("test.json", agent_type=AgentType.INTEGRATION_TEST, max_steps=10)
101
101
 
102
102
  # Verify agent.run was called with the task containing agent_config
103
103
  mock_agent.run.assert_called_once()
@@ -119,7 +119,7 @@ class TestRunSingleTask:
119
119
  mock_grouped.return_value = [{"task": mock_task, "rewards": [1.0, 0.5]}]
120
120
 
121
121
  await run_single_task(
122
- "test.json", agent_type="integration_test", group_size=3, max_steps=10
122
+ "test.json", agent_type=AgentType.INTEGRATION_TEST, group_size=3, max_steps=10
123
123
  )
124
124
 
125
125
  # Verify run_tasks_grouped was called with correct group_size
@@ -5,6 +5,7 @@ import json
5
5
  import logging
6
6
  import uuid
7
7
  from collections import defaultdict
8
+ from enum import Enum
8
9
  from string import Template
9
10
  from typing import Any, Literal
10
11
 
@@ -21,6 +22,14 @@ logger = logging.getLogger(__name__)
21
22
  _missing_api_key_error_logged: bool = False
22
23
 
23
24
 
25
+ class AgentType(str, Enum):
26
+ CLAUDE = "claude"
27
+ OPENAI = "openai"
28
+ VLLM = "vllm"
29
+ LITELLM = "litellm"
30
+ INTEGRATION_TEST = "integration_test"
31
+
32
+
24
33
  class Task(BaseModel):
25
34
  """
26
35
  A task configuration that can be used to create a task.
@@ -325,6 +334,7 @@ class Trace(BaseModel):
325
334
 
326
335
  __all__ = [
327
336
  "AgentResponse",
337
+ "AgentType",
328
338
  "MCPToolCall",
329
339
  "MCPToolResult",
330
340
  "Trace",
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.4.53"
8
+ assert hud.__version__ == "0.4.54"
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.4.53"
7
+ __version__ = "0.4.54"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hud-python"
3
- version = "0.4.53"
3
+ version = "0.4.54"
4
4
  description = "SDK for the HUD platform."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11, <3.13"
@@ -18,6 +18,7 @@ dependencies = [
18
18
  "hud-mcp-python-sdk>=3.13.2",
19
19
  "hud-fastmcp-python-sdk>=0.1.2",
20
20
  "hud-mcp-use-python-sdk==2.3.20",
21
+ "langchain==0.3.27",
21
22
  "pathspec>=0.12.1",
22
23
  "wrapt>=1.14.0",
23
24
  # CLI dependencies
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes