hud-python 0.4.42__tar.gz → 0.4.43__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (246) hide show
  1. {hud_python-0.4.42 → hud_python-0.4.43}/PKG-INFO +1 -1
  2. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/openai_chat_generic.py +1 -1
  3. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/__init__.py +6 -0
  4. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/dev.py +24 -2
  5. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/eval.py +10 -11
  6. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/flows/tasks.py +4 -5
  7. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/__init__.py +6 -0
  8. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/config.py +2 -2
  9. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/gpu_utils.py +5 -3
  10. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/remote_runner.py +18 -9
  11. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/rl_api.py +2 -2
  12. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/environment.py +1 -5
  13. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/config.py +14 -9
  14. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/train.py +9 -6
  15. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/group_eval.py +2 -2
  16. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/tasks.py +1 -1
  17. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/tests/test_version.py +1 -1
  18. {hud_python-0.4.42 → hud_python-0.4.43}/hud/version.py +1 -1
  19. {hud_python-0.4.42 → hud_python-0.4.43}/pyproject.toml +1 -1
  20. {hud_python-0.4.42 → hud_python-0.4.43}/.gitignore +0 -0
  21. {hud_python-0.4.42 → hud_python-0.4.43}/LICENSE +0 -0
  22. {hud_python-0.4.42 → hud_python-0.4.43}/README.md +0 -0
  23. {hud_python-0.4.42 → hud_python-0.4.43}/environments/README.md +0 -0
  24. {hud_python-0.4.42 → hud_python-0.4.43}/environments/blank/README.md +0 -0
  25. {hud_python-0.4.42 → hud_python-0.4.43}/environments/blank/controller/README.md +0 -0
  26. {hud_python-0.4.42 → hud_python-0.4.43}/environments/blank/environment/README.md +0 -0
  27. {hud_python-0.4.42 → hud_python-0.4.43}/environments/blank/pyproject.toml +0 -0
  28. {hud_python-0.4.42 → hud_python-0.4.43}/environments/browser/README.md +0 -0
  29. {hud_python-0.4.42 → hud_python-0.4.43}/environments/browser/environment/2048/README.md +0 -0
  30. {hud_python-0.4.42 → hud_python-0.4.43}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
  31. {hud_python-0.4.42 → hud_python-0.4.43}/environments/browser/environment/README.md +0 -0
  32. {hud_python-0.4.42 → hud_python-0.4.43}/environments/browser/environment/todo/README.md +0 -0
  33. {hud_python-0.4.42 → hud_python-0.4.43}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
  34. {hud_python-0.4.42 → hud_python-0.4.43}/environments/browser/pyproject.toml +0 -0
  35. {hud_python-0.4.42 → hud_python-0.4.43}/environments/deepresearch/pyproject.toml +0 -0
  36. {hud_python-0.4.42 → hud_python-0.4.43}/environments/remote_browser/README.md +0 -0
  37. {hud_python-0.4.42 → hud_python-0.4.43}/environments/remote_browser/pyproject.toml +0 -0
  38. {hud_python-0.4.42 → hud_python-0.4.43}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  39. {hud_python-0.4.42 → hud_python-0.4.43}/environments/text_2048/README.md +0 -0
  40. {hud_python-0.4.42 → hud_python-0.4.43}/environments/text_2048/pyproject.toml +0 -0
  41. {hud_python-0.4.42 → hud_python-0.4.43}/examples/README.md +0 -0
  42. {hud_python-0.4.42 → hud_python-0.4.43}/hud/__init__.py +0 -0
  43. {hud_python-0.4.42 → hud_python-0.4.43}/hud/__main__.py +0 -0
  44. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/__init__.py +0 -0
  45. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/base.py +0 -0
  46. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/claude.py +0 -0
  47. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/grounded_openai.py +0 -0
  48. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/langchain.py +0 -0
  49. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/lite_llm.py +0 -0
  50. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/misc/__init__.py +0 -0
  51. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/misc/response_agent.py +0 -0
  52. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/openai.py +0 -0
  53. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/tests/__init__.py +0 -0
  54. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/tests/test_base.py +0 -0
  55. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/tests/test_claude.py +0 -0
  56. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/tests/test_client.py +0 -0
  57. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  58. {hud_python-0.4.42 → hud_python-0.4.43}/hud/agents/tests/test_openai.py +0 -0
  59. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/__main__.py +0 -0
  60. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/analyze.py +0 -0
  61. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/build.py +0 -0
  62. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/clone.py +0 -0
  63. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/debug.py +0 -0
  64. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/flows/__init__.py +0 -0
  65. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/get.py +0 -0
  66. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/init.py +0 -0
  67. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/list_func.py +0 -0
  68. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/pull.py +0 -0
  69. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/push.py +0 -0
  70. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/remove.py +0 -0
  71. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/celebrate.py +0 -0
  72. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/display.py +0 -0
  73. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/gpu.py +0 -0
  74. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/local_runner.py +0 -0
  75. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/presets.py +0 -0
  76. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/viewer.py +0 -0
  77. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/vllm.py +0 -0
  78. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/rl/wait_utils.py +0 -0
  79. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/__init__.py +0 -0
  80. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_analyze.py +0 -0
  81. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_analyze_metadata.py +0 -0
  82. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_build.py +0 -0
  83. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_cli_init.py +0 -0
  84. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_cli_main.py +0 -0
  85. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_clone.py +0 -0
  86. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_cursor.py +0 -0
  87. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_debug.py +0 -0
  88. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_list_func.py +0 -0
  89. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_main_module.py +0 -0
  90. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_mcp_server.py +0 -0
  91. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_pull.py +0 -0
  92. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_push.py +0 -0
  93. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_registry.py +0 -0
  94. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/tests/test_utils.py +0 -0
  95. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/__init__.py +0 -0
  96. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/config.py +0 -0
  97. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/cursor.py +0 -0
  98. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/docker.py +0 -0
  99. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/env_check.py +0 -0
  100. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/interactive.py +0 -0
  101. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/local_runner.py +0 -0
  102. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/logging.py +0 -0
  103. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/metadata.py +0 -0
  104. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/package_runner.py +0 -0
  105. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/registry.py +0 -0
  106. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/remote_runner.py +0 -0
  107. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/runner.py +0 -0
  108. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/server.py +0 -0
  109. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/source_hash.py +0 -0
  110. {hud_python-0.4.42 → hud_python-0.4.43}/hud/cli/utils/tasks.py +0 -0
  111. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/README.md +0 -0
  112. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/__init__.py +0 -0
  113. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/base.py +0 -0
  114. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/fastmcp.py +0 -0
  115. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/mcp_use.py +0 -0
  116. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/tests/__init__.py +0 -0
  117. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/tests/test_client_integration.py +0 -0
  118. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/tests/test_fastmcp.py +0 -0
  119. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  120. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/tests/test_protocol.py +0 -0
  121. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/utils/__init__.py +0 -0
  122. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/utils/mcp_use_retry.py +0 -0
  123. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/utils/retry.py +0 -0
  124. {hud_python-0.4.42 → hud_python-0.4.43}/hud/clients/utils/retry_transport.py +0 -0
  125. {hud_python-0.4.42 → hud_python-0.4.43}/hud/datasets/__init__.py +0 -0
  126. {hud_python-0.4.42 → hud_python-0.4.43}/hud/datasets/parallel.py +0 -0
  127. {hud_python-0.4.42 → hud_python-0.4.43}/hud/datasets/runner.py +0 -0
  128. {hud_python-0.4.42 → hud_python-0.4.43}/hud/datasets/utils.py +0 -0
  129. {hud_python-0.4.42 → hud_python-0.4.43}/hud/misc/__init__.py +0 -0
  130. {hud_python-0.4.42 → hud_python-0.4.43}/hud/misc/claude_plays_pokemon.py +0 -0
  131. {hud_python-0.4.42 → hud_python-0.4.43}/hud/native/__init__.py +0 -0
  132. {hud_python-0.4.42 → hud_python-0.4.43}/hud/native/comparator.py +0 -0
  133. {hud_python-0.4.42 → hud_python-0.4.43}/hud/native/tests/__init__.py +0 -0
  134. {hud_python-0.4.42 → hud_python-0.4.43}/hud/native/tests/test_comparator.py +0 -0
  135. {hud_python-0.4.42 → hud_python-0.4.43}/hud/native/tests/test_native_init.py +0 -0
  136. {hud_python-0.4.42 → hud_python-0.4.43}/hud/otel/__init__.py +0 -0
  137. {hud_python-0.4.42 → hud_python-0.4.43}/hud/otel/collector.py +0 -0
  138. {hud_python-0.4.42 → hud_python-0.4.43}/hud/otel/config.py +0 -0
  139. {hud_python-0.4.42 → hud_python-0.4.43}/hud/otel/context.py +0 -0
  140. {hud_python-0.4.42 → hud_python-0.4.43}/hud/otel/exporters.py +0 -0
  141. {hud_python-0.4.42 → hud_python-0.4.43}/hud/otel/instrumentation.py +0 -0
  142. {hud_python-0.4.42 → hud_python-0.4.43}/hud/otel/processors.py +0 -0
  143. {hud_python-0.4.42 → hud_python-0.4.43}/hud/otel/tests/__init__.py +0 -0
  144. {hud_python-0.4.42 → hud_python-0.4.43}/hud/otel/tests/test_processors.py +0 -0
  145. {hud_python-0.4.42 → hud_python-0.4.43}/hud/py.typed +0 -0
  146. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/README.md +0 -0
  147. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/__init__.py +0 -0
  148. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/actor.py +0 -0
  149. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/buffer.py +0 -0
  150. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/chat_template.jinja +0 -0
  151. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/distributed.py +0 -0
  152. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/learner.py +0 -0
  153. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/tests/__init__.py +0 -0
  154. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/tests/test_learner.py +0 -0
  155. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/types.py +0 -0
  156. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/utils/start_vllm_server.sh +0 -0
  157. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/utils.py +0 -0
  158. {hud_python-0.4.42 → hud_python-0.4.43}/hud/rl/vllm_adapter.py +0 -0
  159. {hud_python-0.4.42 → hud_python-0.4.43}/hud/samples/__init__.py +0 -0
  160. {hud_python-0.4.42 → hud_python-0.4.43}/hud/samples/browser.py +0 -0
  161. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/__init__.py +0 -0
  162. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/context.py +0 -0
  163. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/helper/__init__.py +0 -0
  164. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/low_level.py +0 -0
  165. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/server.py +0 -0
  166. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/tests/__init__.py +0 -0
  167. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/tests/test_add_tool.py +0 -0
  168. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/tests/test_context.py +0 -0
  169. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  170. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/tests/test_mcp_server_integration.py +0 -0
  171. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/tests/test_mcp_server_more.py +0 -0
  172. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/tests/test_run_wrapper.py +0 -0
  173. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/tests/test_server_extra.py +0 -0
  174. {hud_python-0.4.42 → hud_python-0.4.43}/hud/server/tests/test_sigterm_runner.py +0 -0
  175. {hud_python-0.4.42 → hud_python-0.4.43}/hud/settings.py +0 -0
  176. {hud_python-0.4.42 → hud_python-0.4.43}/hud/shared/__init__.py +0 -0
  177. {hud_python-0.4.42 → hud_python-0.4.43}/hud/shared/exceptions.py +0 -0
  178. {hud_python-0.4.42 → hud_python-0.4.43}/hud/shared/hints.py +0 -0
  179. {hud_python-0.4.42 → hud_python-0.4.43}/hud/shared/requests.py +0 -0
  180. {hud_python-0.4.42 → hud_python-0.4.43}/hud/shared/tests/__init__.py +0 -0
  181. {hud_python-0.4.42 → hud_python-0.4.43}/hud/shared/tests/test_exceptions.py +0 -0
  182. {hud_python-0.4.42 → hud_python-0.4.43}/hud/shared/tests/test_requests.py +0 -0
  183. {hud_python-0.4.42 → hud_python-0.4.43}/hud/telemetry/__init__.py +0 -0
  184. {hud_python-0.4.42 → hud_python-0.4.43}/hud/telemetry/instrument.py +0 -0
  185. {hud_python-0.4.42 → hud_python-0.4.43}/hud/telemetry/job.py +0 -0
  186. {hud_python-0.4.42 → hud_python-0.4.43}/hud/telemetry/replay.py +0 -0
  187. {hud_python-0.4.42 → hud_python-0.4.43}/hud/telemetry/tests/__init__.py +0 -0
  188. {hud_python-0.4.42 → hud_python-0.4.43}/hud/telemetry/tests/test_replay.py +0 -0
  189. {hud_python-0.4.42 → hud_python-0.4.43}/hud/telemetry/tests/test_trace.py +0 -0
  190. {hud_python-0.4.42 → hud_python-0.4.43}/hud/telemetry/trace.py +0 -0
  191. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/__init__.py +0 -0
  192. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/base.py +0 -0
  193. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/bash.py +0 -0
  194. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/computer/__init__.py +0 -0
  195. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/computer/anthropic.py +0 -0
  196. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/computer/hud.py +0 -0
  197. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/computer/openai.py +0 -0
  198. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/computer/settings.py +0 -0
  199. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/edit.py +0 -0
  200. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/executors/__init__.py +0 -0
  201. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/executors/base.py +0 -0
  202. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/executors/pyautogui.py +0 -0
  203. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/executors/tests/__init__.py +0 -0
  204. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/executors/tests/test_base_executor.py +0 -0
  205. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  206. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/executors/xdo.py +0 -0
  207. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/grounding/__init__.py +0 -0
  208. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/grounding/config.py +0 -0
  209. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/grounding/grounded_tool.py +0 -0
  210. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/grounding/grounder.py +0 -0
  211. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/grounding/tests/__init__.py +0 -0
  212. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  213. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/playwright.py +0 -0
  214. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/response.py +0 -0
  215. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/submit.py +0 -0
  216. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/__init__.py +0 -0
  217. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_base.py +0 -0
  218. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_bash.py +0 -0
  219. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_bash_extended.py +0 -0
  220. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_computer.py +0 -0
  221. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_computer_actions.py +0 -0
  222. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_edit.py +0 -0
  223. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_init.py +0 -0
  224. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_playwright_tool.py +0 -0
  225. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_response.py +0 -0
  226. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_tools.py +0 -0
  227. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_tools_init.py +0 -0
  228. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/tests/test_utils.py +0 -0
  229. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/types.py +0 -0
  230. {hud_python-0.4.42 → hud_python-0.4.43}/hud/tools/utils.py +0 -0
  231. {hud_python-0.4.42 → hud_python-0.4.43}/hud/types.py +0 -0
  232. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/__init__.py +0 -0
  233. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/agent_factories.py +0 -0
  234. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/async_utils.py +0 -0
  235. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/hud_console.py +0 -0
  236. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/mcp.py +0 -0
  237. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/pretty_errors.py +0 -0
  238. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/progress.py +0 -0
  239. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/telemetry.py +0 -0
  240. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/tests/__init__.py +0 -0
  241. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/tests/test_async_utils.py +0 -0
  242. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/tests/test_init.py +0 -0
  243. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/tests/test_mcp.py +0 -0
  244. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/tests/test_progress.py +0 -0
  245. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/tests/test_telemetry.py +0 -0
  246. {hud_python-0.4.42 → hud_python-0.4.43}/hud/utils/tool_shorthand.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.42
3
+ Version: 0.4.43
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -205,7 +205,7 @@ class GenericOpenAIChatAgent(MCPAgent):
205
205
  try:
206
206
  response = await self._invoke_chat_completion(
207
207
  messages=messages,
208
- tools=tools, # type: ignore
208
+ tools=tools, # type: ignore
209
209
  extra=extra,
210
210
  )
211
211
  except Exception as e:
@@ -1178,6 +1178,11 @@ def rl(
1178
1178
  "--vllm-gpu",
1179
1179
  help="Specific GPU for vLLM server",
1180
1180
  ),
1181
+ vllm_gpu_count: int = typer.Option(
1182
+ 1,
1183
+ "--vllm-gpu-count",
1184
+ help="Number of GPUs for vLLM server",
1185
+ ),
1181
1186
  skip_vllm_startup: bool = typer.Option(
1182
1187
  False,
1183
1188
  "--skip-vllm-startup",
@@ -1199,6 +1204,7 @@ def rl(
1199
1204
  no_ddp=no_ddp,
1200
1205
  ddp_gpus=ddp_gpus,
1201
1206
  vllm_gpu=vllm_gpu,
1207
+ vllm_gpu_count=vllm_gpu_count,
1202
1208
  yes=yes,
1203
1209
  skip_vllm_startup=skip_vllm_startup,
1204
1210
  )
@@ -73,6 +73,24 @@ def create_proxy_server(
73
73
  "PYTHONUNBUFFERED=1", # Ensure Python output is not buffered
74
74
  ]
75
75
 
76
+ # Check for .env file in the project directory and add env vars
77
+ env_file = project_path / ".env"
78
+ loaded_env_vars = {}
79
+ if env_file.exists():
80
+ try:
81
+ from hud.cli.utils.config import parse_env_file
82
+
83
+ env_contents = env_file.read_text(encoding="utf-8")
84
+ loaded_env_vars = parse_env_file(env_contents)
85
+ for key, value in loaded_env_vars.items():
86
+ docker_cmd.extend(["-e", f"{key}={value}"])
87
+ if verbose and loaded_env_vars:
88
+ hud_console.info(
89
+ f"Loaded {len(loaded_env_vars)} environment variable(s) from .env file"
90
+ )
91
+ except Exception as e:
92
+ hud_console.warning(f"Failed to load .env file: {e}")
93
+
76
94
  # Add user-provided Docker arguments
77
95
  if docker_args:
78
96
  docker_cmd.extend(docker_args)
@@ -112,8 +130,12 @@ def create_proxy_server(
112
130
  hud_console.info("The container's CMD determines reload behavior")
113
131
  hud_console.command_example(f"docker logs -f {container_name}", "View container logs")
114
132
 
115
- # Show the full Docker command if there are environment variables
116
- if docker_args and any(arg == "-e" or arg.startswith("--env") for arg in docker_args):
133
+ # Show the full Docker command if there are environment variables (from .env or args)
134
+ has_env_from_args = docker_args and any(
135
+ arg == "-e" or arg.startswith("--env") for arg in docker_args
136
+ )
137
+ has_env_from_file = bool(loaded_env_vars)
138
+ if has_env_from_args or has_env_from_file:
117
139
  hud_console.info("")
118
140
  hud_console.info("Docker command with environment variables:")
119
141
  hud_console.info(" ".join(docker_cmd))
@@ -298,16 +298,15 @@ async def run_single_task(
298
298
  agent_config["allowed_tools"] = allowed_tools
299
299
 
300
300
  # Run with grouping
301
- with hud.trace(name=f"{task_prompt} (group_size={group_size})"):
302
- stats = await run_tasks_grouped(
303
- tasks=[task],
304
- agent_class=agent_class,
305
- agent_config=agent_config,
306
- group_size=group_size,
307
- max_parallel_episodes=48, # Same as RL default
308
- max_steps=max_steps,
309
- verbose=verbose,
310
- )
301
+ stats = await run_tasks_grouped(
302
+ tasks=[task],
303
+ agent_class=agent_class,
304
+ agent_config=agent_config,
305
+ group_size=group_size,
306
+ max_parallel_episodes=48, # Same as RL default
307
+ max_steps=max_steps,
308
+ verbose=verbose,
309
+ )
311
310
 
312
311
  # Display results
313
312
  display_group_statistics(stats, show_details=True)
@@ -499,7 +498,7 @@ async def run_full_dataset(
499
498
  )
500
499
 
501
500
  # Display results
502
- display_group_statistics(stats, show_details=len(stats) <= 20)
501
+ display_group_statistics(stats, show_details=len(stats) <= 50)
503
502
 
504
503
  # Return stats for consistency with other modes
505
504
  return stats
@@ -212,17 +212,14 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
212
212
  # Check if tasks already have remote URLs
213
213
  already_remote = _validate_tasks(tasks)
214
214
 
215
- # If tasks already reference a remote MCP URL, do not require a local environment
216
- # or attempt any image updates. Use the dataset as-is.
217
- if already_remote:
218
- return str(tasks_path)
219
-
220
215
  # Extract existing images from tasks
221
216
  existing_images = _extract_existing_images(tasks)
222
217
 
223
218
  # Locate environment
224
219
  env_dir = find_environment_dir(tasks_path)
225
220
  if not env_dir:
221
+ if already_remote:
222
+ return str(tasks_path)
226
223
  hud_console.error("Could not locate an environment directory (Dockerfile + pyproject.toml)")
227
224
  hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
228
225
  raise typer.Exit(1)
@@ -373,6 +370,8 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
373
370
  item["system_prompt"] = t.system_prompt
374
371
  if t.metadata:
375
372
  item["metadata"] = t.metadata
373
+ if t.id is not None:
374
+ item["id"] = t.id
376
375
 
377
376
  tasks_payload.append(item)
378
377
 
@@ -78,6 +78,11 @@ def rl_command(
78
78
  "-y",
79
79
  help="Auto-accept all prompts and use defaults (lazy mode)",
80
80
  ),
81
+ vllm_gpu_count: int = typer.Option(
82
+ None,
83
+ "--vllm-gpu-count",
84
+ help="Number of GPUs for vLLM server",
85
+ ),
81
86
  skip_vllm_startup: bool = typer.Option(
82
87
  False,
83
88
  "--skip-vllm-startup",
@@ -145,6 +150,7 @@ def rl_command(
145
150
  model=model,
146
151
  config_file=config_file,
147
152
  output_dir=output_dir,
153
+ vllm_gpu_count=vllm_gpu_count,
148
154
  yes=yes,
149
155
  )
150
156
  return
@@ -84,7 +84,7 @@ def save_config(config: Config, path: Path) -> None:
84
84
  """Save configuration to a JSON file."""
85
85
  config_dict = config.to_dict()
86
86
 
87
- with open(path, "w") as f:
87
+ with open(path, "w", encoding="utf-8") as f:
88
88
  json.dump(config_dict, f, indent=2)
89
89
  f.write("\n") # Add newline at end of file
90
90
 
@@ -94,7 +94,7 @@ def save_config(config: Config, path: Path) -> None:
94
94
 
95
95
  def load_config(path: Path) -> Config:
96
96
  """Load configuration from a JSON file."""
97
- with open(path) as f:
97
+ with open(path, encoding="utf-8") as f:
98
98
  data = json.load(f)
99
99
 
100
100
  # Use Config.from_dict which handles missing fields gracefully
@@ -245,10 +245,12 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
245
245
  # Apply scaling rule
246
246
  if num_gpus == 1:
247
247
  # Special case: 2 groups for single GPU
248
+ groups_per_gpu = 2
248
249
  config.training.batch_size = 2 * group_size
249
250
  else:
250
- # Multi-GPU: each GPU processes 1 group
251
- config.training.batch_size = num_gpus * group_size
251
+ groups_per_gpu = config.training.batch_size // group_size
252
+ # Multi-GPU: each GPU processes groups_per_gpu groups
253
+ config.training.batch_size = num_gpus * group_size * groups_per_gpu
252
254
 
253
255
  # Update max_parallel_episodes to match
254
256
  config.actor.max_parallel_episodes = config.training.batch_size
@@ -263,7 +265,7 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
263
265
  f"\n[cyan]📊 Adjusted batch_size to {config.training.batch_size} ({config.training.batch_size // group_size} groups)[/cyan]" # noqa: E501
264
266
  )
265
267
  console.print(
266
- f"[cyan] Each of the {num_gpus} GPU(s) will process {config.training.batch_size // group_size // num_gpus} group(s) in parallel[/cyan]" # noqa: E501
268
+ f"[cyan] Each of the {num_gpus} GPU(s) will process {groups_per_gpu} group(s) in parallel[/cyan]" # noqa: E501
267
269
  )
268
270
 
269
271
  return config
@@ -32,7 +32,9 @@ GPU_PRICING = {
32
32
  }
33
33
 
34
34
 
35
- def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int = 600) -> None:
35
+ def ensure_vllm_deployed(
36
+ model_name: str, gpu_type: str = "A100", gpu_count: int = 1, timeout: int = 600
37
+ ) -> None:
36
38
  """Deploy vLLM for a model if needed and wait until it's ready.
37
39
 
38
40
  Args:
@@ -47,7 +49,7 @@ def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int =
47
49
  return
48
50
 
49
51
  hud_console.info(f"Deploying vLLM server for {model_name}...")
50
- rl_api.deploy_vllm(model_name, gpu_type=gpu_type)
52
+ rl_api.deploy_vllm(model_name, gpu_type=gpu_type, gpu_count=gpu_count)
51
53
  hud_console.success("vLLM deployment started")
52
54
 
53
55
  hud_console.info("Waiting for vLLM server to be ready...")
@@ -72,6 +74,7 @@ def run_remote_training(
72
74
  model: str | None,
73
75
  config_file: Path | None,
74
76
  output_dir: str,
77
+ vllm_gpu_count: int = 1,
75
78
  yes: bool = False,
76
79
  ) -> None:
77
80
  """Run RL training remotely via the API server following the new interactive flow."""
@@ -183,14 +186,18 @@ def run_remote_training(
183
186
 
184
187
  # Ask for model type
185
188
  if yes:
186
- model_type = "Qwen/Qwen2.5-VL-3B-Instruct" # Default model in yes mode
189
+ if config_file:
190
+ config = load_config(config_file)
191
+ model_type = config.model.base_model
192
+ else:
193
+ model_type = "Qwen/Qwen2.5-VL-3B-Instruct"
187
194
  hud_console.info(f"Auto-selecting base model: {model_type} (--yes mode)")
188
195
  else:
189
196
  model_type = hud_console.select(
190
197
  "Select base model type:",
191
198
  choices=[
192
199
  {"name": "Qwen2.5-VL-3B-Instruct", "value": "Qwen/Qwen2.5-VL-3B-Instruct"},
193
- # {"name": "Qwen2.5-VL-7B-Instruct", "value": "Qwen/Qwen2.5-VL-7B-Instruct"}, # noqa: E501
200
+ {"name": "Qwen2.5-3B-Instruct", "value": "Qwen/Qwen2.5-3B-Instruct"},
194
201
  ],
195
202
  default=0,
196
203
  )
@@ -218,7 +225,7 @@ def run_remote_training(
218
225
  try:
219
226
  rl_api.create_model(model_name, model_type)
220
227
  hud_console.success(f"Created model: {model_name}")
221
- ensure_vllm_deployed(model_name, gpu_type="A100")
228
+ ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
222
229
 
223
230
  except Exception as e:
224
231
  # If the name already exists, suggest a new name and prompt once
@@ -247,7 +254,7 @@ def run_remote_training(
247
254
  rl_api.create_model(chosen, model_type)
248
255
  hud_console.success(f"Created model: {chosen}")
249
256
  model_name = chosen
250
- ensure_vllm_deployed(model_name, gpu_type="A100")
257
+ ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
251
258
  except Exception as e2:
252
259
  hud_console.error(f"Failed to create model: {e2}")
253
260
  raise
@@ -281,7 +288,7 @@ def run_remote_training(
281
288
  return
282
289
 
283
290
  # Ensure vLLM is deployed
284
- ensure_vllm_deployed(model_name, gpu_type="A100")
291
+ ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
285
292
  except KeyboardInterrupt:
286
293
  hud_console.dim_info("Training cancelled", "")
287
294
  return
@@ -323,7 +330,7 @@ def run_remote_training(
323
330
  )
324
331
 
325
332
  if yes:
326
- num_gpus = 2 # Default to 2 GPUs in yes mode
333
+ num_gpus = 2 # Default to 2 GPUs in yes mode
327
334
  hud_console.info(f"Auto-selecting {num_gpus} GPU(s) (--yes mode)")
328
335
  else:
329
336
  num_gpus = hud_console.select(
@@ -425,10 +432,12 @@ def run_remote_training(
425
432
  # Load provided config
426
433
  hud_console.info(f"Loading configuration from: {config_file}")
427
434
  config = load_config(config_file)
428
- config_dict = config.to_dict()
429
435
  gpu_choice = config.training.gpu_type
430
436
  num_gpus = config.training.num_gpus
431
437
 
438
+ config = adjust_config_for_ddp(config, int(num_gpus))
439
+ config_dict = config.to_dict()
440
+
432
441
  # Launch training
433
442
  try:
434
443
  # Little celebration before launching
@@ -61,12 +61,12 @@ def list_models() -> list[RLModelInfo]:
61
61
  ]
62
62
 
63
63
 
64
- def deploy_vllm(model_name: str, gpu_type: str = "A100") -> dict[str, Any]:
64
+ def deploy_vllm(model_name: str, gpu_type: str = "A100", gpu_count: int = 1) -> dict[str, Any]:
65
65
  """Deploy a vLLM server for a model."""
66
66
  return make_request_sync(
67
67
  method="POST",
68
68
  url=f"{settings.hud_rl_url}/models/{model_name}/deploy",
69
- json={"gpu_type": gpu_type},
69
+ json={"gpu_type": gpu_type, "gpu_count": gpu_count},
70
70
  api_key=settings.api_key,
71
71
  )
72
72
 
@@ -127,8 +127,4 @@ def is_environment_directory(path: str | Path) -> bool:
127
127
  return False
128
128
 
129
129
  # Must have pyproject.toml
130
- if not (dir_path / "pyproject.toml").exists():
131
- hud_console.error("pyproject.toml not found")
132
- return False
133
-
134
- return True
130
+ return (dir_path / "pyproject.toml").exists()
@@ -13,6 +13,7 @@ SUPPORTED_MODELS = [
13
13
  "Qwen/Qwen2.5-VL-32B-Instruct",
14
14
  "Qwen/Qwen2.5-VL-72B-Instruct",
15
15
  "Qwen/Qwen2.5-7B-Instruct",
16
+ "Qwen/Qwen2.5-3B-Instruct",
16
17
  ]
17
18
 
18
19
 
@@ -39,9 +40,9 @@ class ModelConfig:
39
40
  """Model and LoRA configuration."""
40
41
 
41
42
  base_model: str = "Qwen/Qwen2.5-VL-3B-Instruct"
42
- lora_r: int = 8
43
- lora_alpha: int = 16
44
- lora_dropout: float = 0.05
43
+ lora_r: int = 16
44
+ lora_alpha: int = 32
45
+ lora_dropout: float = 0.1
45
46
  target_modules: tuple[str, ...] = (
46
47
  "q_proj",
47
48
  "k_proj",
@@ -61,6 +62,7 @@ class ModelConfig:
61
62
  @dataclass
62
63
  class TrainingConfig:
63
64
  """Training hyperparameters."""
65
+
64
66
  # GPU parameters
65
67
  gpu_type: str = "A100"
66
68
  num_gpus: int = 2
@@ -71,9 +73,9 @@ class TrainingConfig:
71
73
  save_every_batches: int = 1
72
74
 
73
75
  # Batching parameters
74
- epochs: int = 2
75
- batch_size: int = 24
76
- group_size: int = 4
76
+ epochs: int = 1
77
+ batch_size: int = 16
78
+ group_size: int = 8
77
79
  mini_batch_size: int = 1
78
80
  update_after_group: bool = True # Whether to update the policy after each task group
79
81
  accumulate_over_minibatches: bool = False # Whether to accumulate over minibatches
@@ -84,7 +86,7 @@ class TrainingConfig:
84
86
  leave_one_out: bool = True
85
87
 
86
88
  # Replay buffer parameters
87
- buffer_steps: int = 4
89
+ buffer_steps: int = 8
88
90
  select_strategy: Literal["recent", "variance", "random"] = "variance"
89
91
 
90
92
  # Aggregation parameters
@@ -92,8 +94,8 @@ class TrainingConfig:
92
94
  token_agg: Literal["mean", "sum"] = "mean" # noqa: S105
93
95
 
94
96
  # Regularization parameters
95
- kl_beta: float = 0.0
96
- entropy_beta: float = 0.0
97
+ kl_beta: float = 0.001
98
+ entropy_beta: float = 0.001
97
99
  top_eps: float = 0.2
98
100
  bottom_eps: float = 0.1
99
101
 
@@ -143,6 +145,7 @@ class Config:
143
145
  job_id: str | None = None # Use existing job ID if provided
144
146
  stats_interval: int = 1
145
147
  verbose: bool = False
148
+ very_verbose: bool = False
146
149
 
147
150
  # Paths
148
151
  out_dir: str = "./checkpoints"
@@ -166,6 +169,7 @@ class Config:
166
169
  job_id=d.get("job_id"),
167
170
  stats_interval=d.get("stats_interval", 1),
168
171
  verbose=d.get("verbose", False),
172
+ very_verbose=d.get("very_verbose", False),
169
173
  out_dir=d.get("out_dir", "./checkpoints"),
170
174
  adapter_prefix=d.get("adapter_prefix", "cua-grpo-step"),
171
175
  seed=d.get("seed", 1234),
@@ -181,6 +185,7 @@ class Config:
181
185
  "job_id": self.job_id,
182
186
  "stats_interval": self.stats_interval,
183
187
  "verbose": self.verbose,
188
+ "very_verbose": self.very_verbose,
184
189
  "out_dir": self.out_dir,
185
190
  "adapter_prefix": self.adapter_prefix,
186
191
  "seed": self.seed,
@@ -56,6 +56,10 @@ async def train(config: Config, tasks: list[Task]) -> None:
56
56
  logging.basicConfig(level=logging.INFO)
57
57
  # Remove httpx logger
58
58
  logging.getLogger("httpx").setLevel(logging.WARNING)
59
+ if config.very_verbose:
60
+ logging.basicConfig(level=logging.DEBUG)
61
+ # Remove httpx logger
62
+ logging.getLogger("httpx").setLevel(logging.INFO)
59
63
 
60
64
  if is_main_process():
61
65
  hud_console.header("Starting GRPO Training")
@@ -103,10 +107,9 @@ async def train(config: Config, tasks: list[Task]) -> None:
103
107
  if is_main_process():
104
108
  hud_console.info(f"Creating job with config.job_id: {config.job_id}")
105
109
  job_obj = hud.create_job(
106
- job_id=config.job_id, name=config.job_name, metadata={
107
- "config": config.to_dict(),
108
- "agent_class": config.model.base_model
109
- }
110
+ job_id=config.job_id,
111
+ name=config.job_name,
112
+ metadata={"config": config.to_dict(), "agent_class": config.model.base_model},
110
113
  )
111
114
  hud_console.info(f"Created job with job_obj.id: {job_obj.id}")
112
115
  job_obj.update_status_sync("running")
@@ -299,7 +302,7 @@ async def main() -> None:
299
302
 
300
303
  # Load config
301
304
  if args.config:
302
- with open(args.config) as f: # noqa: ASYNC230
305
+ with open(args.config, encoding="utf-8") as f: # noqa: ASYNC230
303
306
  config_dict = json.load(f)
304
307
  config = Config.from_dict(config_dict)
305
308
  else:
@@ -337,7 +340,7 @@ async def main() -> None:
337
340
  # Load tasks
338
341
  if args.tasks_json:
339
342
  # Tasks provided as JSON list via command line
340
- tasks = load_tasks(args.tasks_jso)
343
+ tasks = load_tasks(args.tasks_json)
341
344
  elif args.tasks:
342
345
  # Tasks provided as file path or HuggingFace dataset
343
346
  tasks = load_tasks(args.tasks)
@@ -189,7 +189,7 @@ def display_group_statistics(stats: list[dict[str, Any]], show_details: bool = T
189
189
  hud_console.info(f"Overall mean reward: {overall_mean:.3f} ± {overall_std:.3f}")
190
190
 
191
191
  # Detailed table
192
- if show_details and len(stats) <= 20: # Only show for reasonable dataset sizes
192
+ if show_details and len(stats) <= 50: # Only show for reasonable dataset sizes
193
193
  table = Table(title="\nPer-Task Performance Distribution")
194
194
  table.add_column("Task", style="cyan", no_wrap=True)
195
195
  table.add_column("Mean±Std", justify="right", style="green")
@@ -216,7 +216,7 @@ def display_group_statistics(stats: list[dict[str, Any]], show_details: bool = T
216
216
  # High variance tasks
217
217
  high_variance_tasks = [s for s in stats if s["std_reward"] > 0.3 and s["group_size"] > 1]
218
218
  if high_variance_tasks:
219
- hud_console.warning(f"\n⚠️ {len(high_variance_tasks)} tasks show high variance (std > 0.3)")
219
+ hud_console.warning(f"\n{len(high_variance_tasks)} tasks show high variance (std > 0.3)")
220
220
  for task in high_variance_tasks[:3]:
221
221
  hud_console.info(
222
222
  f" • {task['task_id']}: μ={task['mean_reward']:.3f}, σ={task['std_reward']:.3f}" # noqa: RUF001
@@ -40,7 +40,7 @@ def load_tasks(tasks_input: str | list[dict], *, raw: bool = False) -> list[Task
40
40
  if Path(tasks_input).exists():
41
41
  file_path = Path(tasks_input)
42
42
 
43
- with open(file_path) as f:
43
+ with open(file_path, encoding="utf-8") as f:
44
44
  # Handle JSON files (array of tasks)
45
45
  if file_path.suffix.lower() == ".json":
46
46
  data = json.load(f)
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.4.42"
8
+ assert hud.__version__ == "0.4.43"
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.4.42"
7
+ __version__ = "0.4.43"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hud-python"
3
- version = "0.4.42"
3
+ version = "0.4.43"
4
4
  description = "SDK for the HUD platform."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11, <3.13"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes