hud-python 0.4.68__tar.gz → 0.4.69__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. {hud_python-0.4.68 → hud_python-0.4.69}/PKG-INFO +3 -3
  2. {hud_python-0.4.68 → hud_python-0.4.69}/README.md +2 -2
  3. {hud_python-0.4.68 → hud_python-0.4.69}/environments/browser/pyproject.toml +1 -1
  4. {hud_python-0.4.68 → hud_python-0.4.69}/environments/online_mind2web/pyproject.toml +1 -1
  5. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/eval.py +21 -5
  6. {hud_python-0.4.68 → hud_python-0.4.69}/hud/datasets/utils.py +46 -16
  7. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/tests/test_version.py +1 -1
  8. {hud_python-0.4.68 → hud_python-0.4.69}/hud/version.py +1 -1
  9. {hud_python-0.4.68 → hud_python-0.4.69}/pyproject.toml +1 -1
  10. {hud_python-0.4.68 → hud_python-0.4.69}/.gitignore +0 -0
  11. {hud_python-0.4.68 → hud_python-0.4.69}/LICENSE +0 -0
  12. {hud_python-0.4.68 → hud_python-0.4.69}/environments/README.md +0 -0
  13. {hud_python-0.4.68 → hud_python-0.4.69}/environments/blank/README.md +0 -0
  14. {hud_python-0.4.68 → hud_python-0.4.69}/environments/blank/environment/README.md +0 -0
  15. {hud_python-0.4.68 → hud_python-0.4.69}/environments/blank/environment/pyproject.toml +0 -0
  16. {hud_python-0.4.68 → hud_python-0.4.69}/environments/blank/server/README.md +0 -0
  17. {hud_python-0.4.68 → hud_python-0.4.69}/environments/blank/server/pyproject.toml +0 -0
  18. {hud_python-0.4.68 → hud_python-0.4.69}/environments/browser/README.md +0 -0
  19. {hud_python-0.4.68 → hud_python-0.4.69}/environments/browser/browser-base/README.md +0 -0
  20. {hud_python-0.4.68 → hud_python-0.4.69}/environments/browser/environment/2048/README.md +0 -0
  21. {hud_python-0.4.68 → hud_python-0.4.69}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
  22. {hud_python-0.4.68 → hud_python-0.4.69}/environments/browser/environment/README.md +0 -0
  23. {hud_python-0.4.68 → hud_python-0.4.69}/environments/browser/environment/pyproject.toml +0 -0
  24. {hud_python-0.4.68 → hud_python-0.4.69}/environments/browser/environment/todo/README.md +0 -0
  25. {hud_python-0.4.68 → hud_python-0.4.69}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
  26. {hud_python-0.4.68 → hud_python-0.4.69}/environments/browser/server/pyproject.toml +0 -0
  27. {hud_python-0.4.68 → hud_python-0.4.69}/environments/deepresearch/README.md +0 -0
  28. {hud_python-0.4.68 → hud_python-0.4.69}/environments/deepresearch/environment/pyproject.toml +0 -0
  29. {hud_python-0.4.68 → hud_python-0.4.69}/environments/deepresearch/pyproject.toml +0 -0
  30. {hud_python-0.4.68 → hud_python-0.4.69}/environments/deepresearch/server/pyproject.toml +0 -0
  31. {hud_python-0.4.68 → hud_python-0.4.69}/environments/jupyter/README.md +0 -0
  32. {hud_python-0.4.68 → hud_python-0.4.69}/environments/jupyter/server/pyproject.toml +0 -0
  33. {hud_python-0.4.68 → hud_python-0.4.69}/environments/online_mind2web/README.md +0 -0
  34. {hud_python-0.4.68 → hud_python-0.4.69}/environments/online_mind2web/src/hud_controller/providers/README.md +0 -0
  35. {hud_python-0.4.68 → hud_python-0.4.69}/environments/remote_browser/README.md +0 -0
  36. {hud_python-0.4.68 → hud_python-0.4.69}/environments/remote_browser/pyproject.toml +0 -0
  37. {hud_python-0.4.68 → hud_python-0.4.69}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  38. {hud_python-0.4.68 → hud_python-0.4.69}/environments/rubrics/README.md +0 -0
  39. {hud_python-0.4.68 → hud_python-0.4.69}/environments/rubrics/environment/pyproject.toml +0 -0
  40. {hud_python-0.4.68 → hud_python-0.4.69}/environments/rubrics/pyproject.toml +0 -0
  41. {hud_python-0.4.68 → hud_python-0.4.69}/environments/rubrics/server/pyproject.toml +0 -0
  42. {hud_python-0.4.68 → hud_python-0.4.69}/environments/text_2048/README.md +0 -0
  43. {hud_python-0.4.68 → hud_python-0.4.69}/environments/text_2048/pyproject.toml +0 -0
  44. {hud_python-0.4.68 → hud_python-0.4.69}/examples/README.md +0 -0
  45. {hud_python-0.4.68 → hud_python-0.4.69}/hud/__init__.py +0 -0
  46. {hud_python-0.4.68 → hud_python-0.4.69}/hud/__main__.py +0 -0
  47. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/__init__.py +0 -0
  48. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/base.py +0 -0
  49. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/claude.py +0 -0
  50. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/gemini.py +0 -0
  51. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/grounded_openai.py +0 -0
  52. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/misc/__init__.py +0 -0
  53. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/misc/integration_test_agent.py +0 -0
  54. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/misc/response_agent.py +0 -0
  55. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/openai.py +0 -0
  56. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/openai_chat.py +0 -0
  57. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/operator.py +0 -0
  58. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/tests/__init__.py +0 -0
  59. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/tests/conftest.py +0 -0
  60. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/tests/test_base.py +0 -0
  61. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/tests/test_base_runtime.py +0 -0
  62. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/tests/test_claude.py +0 -0
  63. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/tests/test_client.py +0 -0
  64. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/tests/test_gemini.py +0 -0
  65. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  66. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/tests/test_openai.py +0 -0
  67. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/tests/test_operator.py +0 -0
  68. {hud_python-0.4.68 → hud_python-0.4.69}/hud/agents/utils.py +0 -0
  69. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/__init__.py +0 -0
  70. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/__main__.py +0 -0
  71. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/analyze.py +0 -0
  72. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/build.py +0 -0
  73. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/clone.py +0 -0
  74. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/debug.py +0 -0
  75. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/dev.py +0 -0
  76. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/flows/__init__.py +0 -0
  77. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/flows/dev.py +0 -0
  78. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/flows/tasks.py +0 -0
  79. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/get.py +0 -0
  80. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/init.py +0 -0
  81. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/list_func.py +0 -0
  82. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/pull.py +0 -0
  83. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/push.py +0 -0
  84. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/remove.py +0 -0
  85. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rft.py +0 -0
  86. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rft_status.py +0 -0
  87. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/__init__.py +0 -0
  88. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/celebrate.py +0 -0
  89. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/config.py +0 -0
  90. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/display.py +0 -0
  91. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/gpu.py +0 -0
  92. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/gpu_utils.py +0 -0
  93. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/local_runner.py +0 -0
  94. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/presets.py +0 -0
  95. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/remote_runner.py +0 -0
  96. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/rl_api.py +0 -0
  97. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/viewer.py +0 -0
  98. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/vllm.py +0 -0
  99. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/rl/wait_utils.py +0 -0
  100. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/__init__.py +0 -0
  101. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_analyze.py +0 -0
  102. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_analyze_metadata.py +0 -0
  103. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_analyze_module.py +0 -0
  104. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_build.py +0 -0
  105. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_build_failure.py +0 -0
  106. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_build_module.py +0 -0
  107. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_cli_init.py +0 -0
  108. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_cli_main.py +0 -0
  109. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  110. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_cli_root.py +0 -0
  111. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_clone.py +0 -0
  112. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_convert.py +0 -0
  113. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_cursor.py +0 -0
  114. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_debug.py +0 -0
  115. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_eval.py +0 -0
  116. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_list_func.py +0 -0
  117. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_main_module.py +0 -0
  118. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_mcp_server.py +0 -0
  119. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_pull.py +0 -0
  120. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_push.py +0 -0
  121. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_push_happy.py +0 -0
  122. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_push_wrapper.py +0 -0
  123. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_registry.py +0 -0
  124. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/tests/test_utils.py +0 -0
  125. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/__init__.py +0 -0
  126. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/config.py +0 -0
  127. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/cursor.py +0 -0
  128. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/docker.py +0 -0
  129. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/env_check.py +0 -0
  130. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/environment.py +0 -0
  131. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/interactive.py +0 -0
  132. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/local_runner.py +0 -0
  133. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/logging.py +0 -0
  134. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/metadata.py +0 -0
  135. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/package_runner.py +0 -0
  136. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/registry.py +0 -0
  137. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/remote_runner.py +0 -0
  138. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/runner.py +0 -0
  139. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/server.py +0 -0
  140. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/source_hash.py +0 -0
  141. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tasks.py +0 -0
  142. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/__init__.py +0 -0
  143. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_config.py +0 -0
  144. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_docker.py +0 -0
  145. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_docker_hints.py +0 -0
  146. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_env_check.py +0 -0
  147. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_environment.py +0 -0
  148. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_interactive_module.py +0 -0
  149. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_local_runner.py +0 -0
  150. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_logging_utils.py +0 -0
  151. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_metadata.py +0 -0
  152. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_package_runner.py +0 -0
  153. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_registry_utils.py +0 -0
  154. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_remote_runner.py +0 -0
  155. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_runner_modules.py +0 -0
  156. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_source_hash.py +0 -0
  157. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/tests/test_tasks.py +0 -0
  158. {hud_python-0.4.68 → hud_python-0.4.69}/hud/cli/utils/version_check.py +0 -0
  159. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/README.md +0 -0
  160. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/__init__.py +0 -0
  161. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/base.py +0 -0
  162. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/fastmcp.py +0 -0
  163. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/mcp_use.py +0 -0
  164. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/tests/__init__.py +0 -0
  165. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/tests/test_client_integration.py +0 -0
  166. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/tests/test_fastmcp.py +0 -0
  167. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  168. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/tests/test_protocol.py +0 -0
  169. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/utils/__init__.py +0 -0
  170. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/utils/mcp_use_retry.py +0 -0
  171. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/utils/retry.py +0 -0
  172. {hud_python-0.4.68 → hud_python-0.4.69}/hud/clients/utils/retry_transport.py +0 -0
  173. {hud_python-0.4.68 → hud_python-0.4.69}/hud/datasets/__init__.py +0 -0
  174. {hud_python-0.4.68 → hud_python-0.4.69}/hud/datasets/runner.py +0 -0
  175. {hud_python-0.4.68 → hud_python-0.4.69}/hud/datasets/tests/__init__.py +0 -0
  176. {hud_python-0.4.68 → hud_python-0.4.69}/hud/datasets/tests/test_runner.py +0 -0
  177. {hud_python-0.4.68 → hud_python-0.4.69}/hud/datasets/tests/test_utils.py +0 -0
  178. {hud_python-0.4.68 → hud_python-0.4.69}/hud/misc/__init__.py +0 -0
  179. {hud_python-0.4.68 → hud_python-0.4.69}/hud/misc/claude_plays_pokemon.py +0 -0
  180. {hud_python-0.4.68 → hud_python-0.4.69}/hud/native/__init__.py +0 -0
  181. {hud_python-0.4.68 → hud_python-0.4.69}/hud/native/comparator.py +0 -0
  182. {hud_python-0.4.68 → hud_python-0.4.69}/hud/native/tests/__init__.py +0 -0
  183. {hud_python-0.4.68 → hud_python-0.4.69}/hud/native/tests/test_comparator.py +0 -0
  184. {hud_python-0.4.68 → hud_python-0.4.69}/hud/native/tests/test_native_init.py +0 -0
  185. {hud_python-0.4.68 → hud_python-0.4.69}/hud/otel/__init__.py +0 -0
  186. {hud_python-0.4.68 → hud_python-0.4.69}/hud/otel/collector.py +0 -0
  187. {hud_python-0.4.68 → hud_python-0.4.69}/hud/otel/config.py +0 -0
  188. {hud_python-0.4.68 → hud_python-0.4.69}/hud/otel/context.py +0 -0
  189. {hud_python-0.4.68 → hud_python-0.4.69}/hud/otel/exporters.py +0 -0
  190. {hud_python-0.4.68 → hud_python-0.4.69}/hud/otel/instrumentation.py +0 -0
  191. {hud_python-0.4.68 → hud_python-0.4.69}/hud/otel/processors.py +0 -0
  192. {hud_python-0.4.68 → hud_python-0.4.69}/hud/otel/tests/__init__.py +0 -0
  193. {hud_python-0.4.68 → hud_python-0.4.69}/hud/otel/tests/test_instrumentation.py +0 -0
  194. {hud_python-0.4.68 → hud_python-0.4.69}/hud/otel/tests/test_processors.py +0 -0
  195. {hud_python-0.4.68 → hud_python-0.4.69}/hud/py.typed +0 -0
  196. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/README.md +0 -0
  197. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/__init__.py +0 -0
  198. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/actor.py +0 -0
  199. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/buffer.py +0 -0
  200. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/chat_template.jinja +0 -0
  201. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/config.py +0 -0
  202. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/distributed.py +0 -0
  203. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/learner.py +0 -0
  204. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/tests/__init__.py +0 -0
  205. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/tests/test_learner.py +0 -0
  206. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/train.py +0 -0
  207. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/types.py +0 -0
  208. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/utils/start_vllm_server.sh +0 -0
  209. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/utils.py +0 -0
  210. {hud_python-0.4.68 → hud_python-0.4.69}/hud/rl/vllm_adapter.py +0 -0
  211. {hud_python-0.4.68 → hud_python-0.4.69}/hud/samples/__init__.py +0 -0
  212. {hud_python-0.4.68 → hud_python-0.4.69}/hud/samples/browser.py +0 -0
  213. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/__init__.py +0 -0
  214. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/context.py +0 -0
  215. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/helper/__init__.py +0 -0
  216. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/low_level.py +0 -0
  217. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/router.py +0 -0
  218. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/server.py +0 -0
  219. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/tests/__init__.py +0 -0
  220. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/tests/test_add_tool.py +0 -0
  221. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/tests/test_context.py +0 -0
  222. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  223. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/tests/test_mcp_server_integration.py +0 -0
  224. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/tests/test_mcp_server_more.py +0 -0
  225. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/tests/test_run_wrapper.py +0 -0
  226. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/tests/test_server_extra.py +0 -0
  227. {hud_python-0.4.68 → hud_python-0.4.69}/hud/server/tests/test_sigterm_runner.py +0 -0
  228. {hud_python-0.4.68 → hud_python-0.4.69}/hud/settings.py +0 -0
  229. {hud_python-0.4.68 → hud_python-0.4.69}/hud/shared/__init__.py +0 -0
  230. {hud_python-0.4.68 → hud_python-0.4.69}/hud/shared/exceptions.py +0 -0
  231. {hud_python-0.4.68 → hud_python-0.4.69}/hud/shared/hints.py +0 -0
  232. {hud_python-0.4.68 → hud_python-0.4.69}/hud/shared/requests.py +0 -0
  233. {hud_python-0.4.68 → hud_python-0.4.69}/hud/shared/tests/__init__.py +0 -0
  234. {hud_python-0.4.68 → hud_python-0.4.69}/hud/shared/tests/test_exceptions.py +0 -0
  235. {hud_python-0.4.68 → hud_python-0.4.69}/hud/shared/tests/test_hints.py +0 -0
  236. {hud_python-0.4.68 → hud_python-0.4.69}/hud/shared/tests/test_requests.py +0 -0
  237. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/__init__.py +0 -0
  238. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/async_context.py +0 -0
  239. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/instrument.py +0 -0
  240. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/job.py +0 -0
  241. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/replay.py +0 -0
  242. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/tests/__init__.py +0 -0
  243. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/tests/test_async_context.py +0 -0
  244. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/tests/test_instrument.py +0 -0
  245. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/tests/test_job.py +0 -0
  246. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/tests/test_replay.py +0 -0
  247. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/tests/test_trace.py +0 -0
  248. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/trace.py +0 -0
  249. {hud_python-0.4.68 → hud_python-0.4.69}/hud/telemetry/utils.py +0 -0
  250. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/__init__.py +0 -0
  251. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/base.py +0 -0
  252. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/bash.py +0 -0
  253. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/computer/__init__.py +0 -0
  254. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/computer/anthropic.py +0 -0
  255. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/computer/gemini.py +0 -0
  256. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/computer/hud.py +0 -0
  257. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/computer/openai.py +0 -0
  258. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/computer/qwen.py +0 -0
  259. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/computer/settings.py +0 -0
  260. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/edit.py +0 -0
  261. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/executors/__init__.py +0 -0
  262. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/executors/base.py +0 -0
  263. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/executors/pyautogui.py +0 -0
  264. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/executors/tests/__init__.py +0 -0
  265. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/executors/tests/test_base_executor.py +0 -0
  266. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  267. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/executors/xdo.py +0 -0
  268. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/grounding/__init__.py +0 -0
  269. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/grounding/config.py +0 -0
  270. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/grounding/grounded_tool.py +0 -0
  271. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/grounding/grounder.py +0 -0
  272. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/grounding/tests/__init__.py +0 -0
  273. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  274. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/jupyter.py +0 -0
  275. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/playwright.py +0 -0
  276. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/response.py +0 -0
  277. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/submit.py +0 -0
  278. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/__init__.py +0 -0
  279. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_base.py +0 -0
  280. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_bash.py +0 -0
  281. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_bash_extended.py +0 -0
  282. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_computer.py +0 -0
  283. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_computer_actions.py +0 -0
  284. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_edit.py +0 -0
  285. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_init.py +0 -0
  286. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_jupyter_tool.py +0 -0
  287. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_playwright_tool.py +0 -0
  288. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_response.py +0 -0
  289. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_submit.py +0 -0
  290. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_tools.py +0 -0
  291. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_tools_init.py +0 -0
  292. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_types.py +0 -0
  293. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/tests/test_utils.py +0 -0
  294. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/types.py +0 -0
  295. {hud_python-0.4.68 → hud_python-0.4.69}/hud/tools/utils.py +0 -0
  296. {hud_python-0.4.68 → hud_python-0.4.69}/hud/types.py +0 -0
  297. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/__init__.py +0 -0
  298. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/hud_console.py +0 -0
  299. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/mcp.py +0 -0
  300. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/pretty_errors.py +0 -0
  301. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/strict_schema.py +0 -0
  302. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/tasks.py +0 -0
  303. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/telemetry.py +0 -0
  304. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/tests/__init__.py +0 -0
  305. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/tests/test_init.py +0 -0
  306. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/tests/test_mcp.py +0 -0
  307. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/tests/test_pretty_errors.py +0 -0
  308. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/tests/test_tasks.py +0 -0
  309. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/tests/test_telemetry.py +0 -0
  310. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/tests/test_tool_shorthand.py +0 -0
  311. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/tool_shorthand.py +0 -0
  312. {hud_python-0.4.68 → hud_python-0.4.69}/hud/utils/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.68
3
+ Version: 0.4.69
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -520,8 +520,8 @@ Thanks to all our contributors!
520
520
 
521
521
  ```bibtex
522
522
  @software{hud2025agentevalplatform,
523
- author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Oskars Putans and Govind Pimpale and Mayank Singamreddy and Nguyen Nhat Minh},
524
- title = {HUD: An Evaluation Platform for Agents},
523
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
524
+ title = {HUD: An Evaluation and RL Envrionments Platform for Agents},
525
525
  date = {2025-04},
526
526
  url = {https://github.com/hud-evals/hud-python},
527
527
  langid = {en}
@@ -403,8 +403,8 @@ Thanks to all our contributors!
403
403
 
404
404
  ```bibtex
405
405
  @software{hud2025agentevalplatform,
406
- author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Oskars Putans and Govind Pimpale and Mayank Singamreddy and Nguyen Nhat Minh},
407
- title = {HUD: An Evaluation Platform for Agents},
406
+ author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
407
+ title = {HUD: An Evaluation and RL Envrionments Platform for Agents},
408
408
  date = {2025-04},
409
409
  url = {https://github.com/hud-evals/hud-python},
410
410
  langid = {en}
@@ -3,7 +3,7 @@ name = "hud-browser-controller"
3
3
  version = "0.1.0"
4
4
  description = "HUD Browser Controller - MCP interface for browser environments"
5
5
  requires-python = ">=3.11,<3.14"
6
- dependencies = [ "pydantic>=2.6,<3", "pydantic-settings>=2.2,<3", "hud-python>=0.4.68", "playwright", "pyautogui", "httpx", "typer", "fastapi>=0.104.1", "uvicorn[standard]>=0.24.0", "python-multipart>=0.0.6",]
6
+ dependencies = [ "pydantic>=2.6,<3", "pydantic-settings>=2.2,<3", "hud-python>=0.4.69", "playwright", "pyautogui", "httpx", "typer", "fastapi>=0.104.1", "uvicorn[standard]>=0.24.0", "python-multipart>=0.0.6",]
7
7
 
8
8
  [build-system]
9
9
  requires = [ "hatchling",]
@@ -3,7 +3,7 @@ name = "hud-om2w"
3
3
  version = "0.1.0"
4
4
  description = "HUD Remote Browser Controller with MCP tools for cloud browser providers"
5
5
  requires-python = ">=3.11,<3.13"
6
- dependencies = [ "hud-python>=0.4.68", "anthropic>=0.74.0", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
6
+ dependencies = [ "hud-python>=0.4.69", "anthropic>=0.74.0", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
7
7
 
8
8
  [build-system]
9
9
  requires = [ "hatchling",]
@@ -53,13 +53,19 @@ _AGENT_PRESETS: list[AgentPreset] = [
53
53
  AgentPreset(
54
54
  "Gemini 2.5 Computer Use", AgentType.GEMINI, "gemini-2.5-computer-use-preview-10-2025"
55
55
  ),
56
- # HUD Gateway presets
56
+ # HUD Gateway presets (models via HUD Inference API)
57
57
  AgentPreset(
58
- "Grok 4.1 Fast",
58
+ "Grok 4-1 Fast (xAI)",
59
59
  AgentType.OPENAI_COMPATIBLE,
60
- "xai/grok-4-1-fast-reasoning",
60
+ "grok-4-1-fast",
61
61
  {"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "Grok"}},
62
62
  ),
63
+ AgentPreset(
64
+ "GLM-4.5V (Z-AI)",
65
+ AgentType.OPENAI_COMPATIBLE,
66
+ "glm-4.5v",
67
+ {"openai_compatible": {"base_url": settings.hud_gateway_url, "model_name": "GLM"}},
68
+ ),
63
69
  ]
64
70
 
65
71
  _DEFAULT_CONFIG_TEMPLATE = """# HUD Eval Configuration
@@ -180,7 +186,7 @@ class EvalConfig(BaseModel):
180
186
  hud_console.error("HUD_API_KEY is required for remote execution")
181
187
  hud_console.info("Set it: hud set HUD_API_KEY=your-key-here")
182
188
  raise typer.Exit(1)
183
- if self.agent_type in (AgentType.GEMINI, AgentType.OPERATOR):
189
+ if self.agent_type == AgentType.GEMINI:
184
190
  hud_console.error(
185
191
  f"Remote execution is not supported for {self.agent_type.value} agent"
186
192
  )
@@ -229,6 +235,12 @@ class EvalConfig(BaseModel):
229
235
 
230
236
  if self.agent_type == AgentType.OPENAI_COMPATIBLE:
231
237
  base_url = kwargs.get("base_url", "")
238
+ model_name = kwargs.get("model_name", "")
239
+ if model_name:
240
+ kwargs["model_name"] = model_name
241
+ else:
242
+ kwargs["model_name"] = "OpenAI Compatible"
243
+
232
244
  if "api_key" not in kwargs:
233
245
  # Use HUD API key for gateway, otherwise fall back to OpenAI API key
234
246
  if settings.hud_gateway_url in base_url:
@@ -730,7 +742,11 @@ def eval_command(
730
742
 
731
743
  # Run
732
744
  start_time = time.time()
733
- results, tasks = asyncio.run(_run_evaluation(cfg))
745
+ try:
746
+ results, tasks = asyncio.run(_run_evaluation(cfg))
747
+ except ValueError as e:
748
+ hud_console.error(str(e))
749
+ raise typer.Exit(1) from None
734
750
  elapsed = time.time() - start_time
735
751
 
736
752
  if cfg.remote:
@@ -12,8 +12,10 @@ from pydantic import BaseModel, Field, field_validator, model_validator
12
12
 
13
13
  from hud.settings import settings
14
14
  from hud.types import AgentType, Task, Trace
15
+ from hud.utils.hud_console import HUDConsole
15
16
 
16
17
  logger = logging.getLogger(__name__)
18
+ hud_console = HUDConsole()
17
19
 
18
20
 
19
21
  class SingleTaskRequest(BaseModel):
@@ -92,6 +94,31 @@ async def submit_rollouts(
92
94
  if not settings.api_key:
93
95
  raise ValueError("HUD_API_KEY is required for remote execution")
94
96
 
97
+ # Validate tasks have remote-compatible mcp_config (URL-based, not command-based)
98
+ local_task_servers: list[tuple[int, str, str]] = [] # (task_idx, task_id, server_name)
99
+ affected_task_indices: set[int] = set()
100
+ for i, task in enumerate(tasks):
101
+ if task.mcp_config:
102
+ for server_name, server_cfg in task.mcp_config.items():
103
+ if (
104
+ isinstance(server_cfg, dict)
105
+ and "command" in server_cfg
106
+ and not server_cfg.get("url")
107
+ ):
108
+ local_task_servers.append((i, task.id or f"task_{i}", server_name))
109
+ affected_task_indices.add(i)
110
+
111
+ if local_task_servers:
112
+ task_details = ", ".join(f"{tid} ({srv})" for _, tid, srv in local_task_servers[:3])
113
+ if len(local_task_servers) > 3:
114
+ task_details += f", ... and {len(local_task_servers) - 3} more"
115
+ raise ValueError(
116
+ f"Remote execution requires URL-based mcp_config, but "
117
+ f"{len(affected_task_indices)} task(s) use local Docker configs "
118
+ f"(command-based): {task_details}. "
119
+ "Convert to remote with: hud convert <tasks_file>"
120
+ )
121
+
95
122
  # Build single task requests
96
123
  requests: list[SingleTaskRequest] = []
97
124
  for task_idx, task in enumerate(tasks):
@@ -136,31 +163,34 @@ async def submit_rollouts(
136
163
  total_accepted += result.get("accepted", 0)
137
164
  total_rejected += result.get("rejected", 0)
138
165
 
139
- logger.info(
140
- "Batch %d/%d: %d/%d accepted",
141
- (i // batch_size) + 1,
142
- (len(requests) + batch_size - 1) // batch_size,
143
- result.get("accepted", 0),
144
- len(batch),
166
+ for item in result.get("results", []):
167
+ if isinstance(item, dict) and item.get("status") == "rejected":
168
+ hud_console.warning(f"Task rejected: {item.get('error', 'Unknown reason')}")
169
+
170
+ batch_num = (i // batch_size) + 1
171
+ total_batches = (len(requests) + batch_size - 1) // batch_size
172
+ hud_console.info(
173
+ f"Batch {batch_num}/{total_batches}: "
174
+ f"{result.get('accepted', 0)}/{len(batch)} accepted"
145
175
  )
146
176
 
147
177
  except httpx.HTTPStatusError as exc:
148
- logger.error(
149
- "Batch submission failed: %s - %s", exc.response.status_code, exc.response.text
150
- )
178
+ if 400 <= exc.response.status_code < 500:
179
+ raise ValueError(f"Submission failed: {exc.response.text}") from exc
180
+ hud_console.error(f"Batch submission failed: {exc.response.status_code}")
151
181
  total_rejected += len(batch)
152
182
 
153
183
  except Exception as exc:
154
- logger.exception("Batch submission failed: %s", exc)
184
+ hud_console.error(f"Batch submission failed: {exc}")
155
185
  total_rejected += len(batch)
156
186
 
157
187
  # Log final summary
158
- logger.info(
159
- "Submitted %d/%d requests (%d rejected)",
160
- total_accepted,
161
- len(requests),
162
- total_rejected,
163
- )
188
+ if total_rejected > 0:
189
+ hud_console.warning(
190
+ f"Submitted {total_accepted}/{len(requests)} requests ({total_rejected} rejected)"
191
+ )
192
+ else:
193
+ hud_console.info(f"Submitted {total_accepted}/{len(requests)} requests")
164
194
 
165
195
 
166
196
  async def cancel_job(job_id: str) -> dict[str, Any]:
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.4.68"
8
+ assert hud.__version__ == "0.4.69"
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.4.68"
7
+ __version__ = "0.4.69"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hud-python"
3
- version = "0.4.68"
3
+ version = "0.4.69"
4
4
  description = "SDK for the HUD platform."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11, <3.13"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes