hud-python 0.4.42__tar.gz → 0.4.44__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (246) hide show
  1. {hud_python-0.4.42 → hud_python-0.4.44}/PKG-INFO +1 -1
  2. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/openai_chat_generic.py +1 -1
  3. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/__init__.py +6 -0
  4. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/dev.py +24 -2
  5. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/eval.py +10 -11
  6. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/flows/tasks.py +4 -5
  7. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/__init__.py +6 -0
  8. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/config.py +2 -2
  9. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/gpu_utils.py +5 -3
  10. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/remote_runner.py +18 -9
  11. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/rl_api.py +2 -2
  12. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/environment.py +1 -5
  13. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/config.py +14 -9
  14. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/distributed.py +34 -1
  15. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/learner.py +28 -5
  16. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/train.py +73 -50
  17. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/group_eval.py +2 -2
  18. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tasks.py +1 -1
  19. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_version.py +1 -1
  20. {hud_python-0.4.42 → hud_python-0.4.44}/hud/version.py +1 -1
  21. {hud_python-0.4.42 → hud_python-0.4.44}/pyproject.toml +1 -1
  22. {hud_python-0.4.42 → hud_python-0.4.44}/.gitignore +0 -0
  23. {hud_python-0.4.42 → hud_python-0.4.44}/LICENSE +0 -0
  24. {hud_python-0.4.42 → hud_python-0.4.44}/README.md +0 -0
  25. {hud_python-0.4.42 → hud_python-0.4.44}/environments/README.md +0 -0
  26. {hud_python-0.4.42 → hud_python-0.4.44}/environments/blank/README.md +0 -0
  27. {hud_python-0.4.42 → hud_python-0.4.44}/environments/blank/controller/README.md +0 -0
  28. {hud_python-0.4.42 → hud_python-0.4.44}/environments/blank/environment/README.md +0 -0
  29. {hud_python-0.4.42 → hud_python-0.4.44}/environments/blank/pyproject.toml +0 -0
  30. {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/README.md +0 -0
  31. {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/environment/2048/README.md +0 -0
  32. {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
  33. {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/environment/README.md +0 -0
  34. {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/environment/todo/README.md +0 -0
  35. {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
  36. {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/pyproject.toml +0 -0
  37. {hud_python-0.4.42 → hud_python-0.4.44}/environments/deepresearch/pyproject.toml +0 -0
  38. {hud_python-0.4.42 → hud_python-0.4.44}/environments/remote_browser/README.md +0 -0
  39. {hud_python-0.4.42 → hud_python-0.4.44}/environments/remote_browser/pyproject.toml +0 -0
  40. {hud_python-0.4.42 → hud_python-0.4.44}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  41. {hud_python-0.4.42 → hud_python-0.4.44}/environments/text_2048/README.md +0 -0
  42. {hud_python-0.4.42 → hud_python-0.4.44}/environments/text_2048/pyproject.toml +0 -0
  43. {hud_python-0.4.42 → hud_python-0.4.44}/examples/README.md +0 -0
  44. {hud_python-0.4.42 → hud_python-0.4.44}/hud/__init__.py +0 -0
  45. {hud_python-0.4.42 → hud_python-0.4.44}/hud/__main__.py +0 -0
  46. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/__init__.py +0 -0
  47. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/base.py +0 -0
  48. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/claude.py +0 -0
  49. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/grounded_openai.py +0 -0
  50. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/langchain.py +0 -0
  51. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/lite_llm.py +0 -0
  52. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/misc/__init__.py +0 -0
  53. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/misc/response_agent.py +0 -0
  54. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/openai.py +0 -0
  55. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/__init__.py +0 -0
  56. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/test_base.py +0 -0
  57. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/test_claude.py +0 -0
  58. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/test_client.py +0 -0
  59. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
  60. {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/test_openai.py +0 -0
  61. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/__main__.py +0 -0
  62. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/analyze.py +0 -0
  63. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/build.py +0 -0
  64. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/clone.py +0 -0
  65. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/debug.py +0 -0
  66. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/flows/__init__.py +0 -0
  67. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/get.py +0 -0
  68. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/init.py +0 -0
  69. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/list_func.py +0 -0
  70. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/pull.py +0 -0
  71. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/push.py +0 -0
  72. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/remove.py +0 -0
  73. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/celebrate.py +0 -0
  74. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/display.py +0 -0
  75. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/gpu.py +0 -0
  76. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/local_runner.py +0 -0
  77. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/presets.py +0 -0
  78. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/viewer.py +0 -0
  79. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/vllm.py +0 -0
  80. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/wait_utils.py +0 -0
  81. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/__init__.py +0 -0
  82. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_analyze.py +0 -0
  83. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_analyze_metadata.py +0 -0
  84. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_build.py +0 -0
  85. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_cli_init.py +0 -0
  86. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_cli_main.py +0 -0
  87. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_clone.py +0 -0
  88. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_cursor.py +0 -0
  89. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_debug.py +0 -0
  90. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_list_func.py +0 -0
  91. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_main_module.py +0 -0
  92. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_mcp_server.py +0 -0
  93. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_pull.py +0 -0
  94. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_push.py +0 -0
  95. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_registry.py +0 -0
  96. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_utils.py +0 -0
  97. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/__init__.py +0 -0
  98. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/config.py +0 -0
  99. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/cursor.py +0 -0
  100. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/docker.py +0 -0
  101. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/env_check.py +0 -0
  102. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/interactive.py +0 -0
  103. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/local_runner.py +0 -0
  104. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/logging.py +0 -0
  105. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/metadata.py +0 -0
  106. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/package_runner.py +0 -0
  107. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/registry.py +0 -0
  108. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/remote_runner.py +0 -0
  109. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/runner.py +0 -0
  110. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/server.py +0 -0
  111. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/source_hash.py +0 -0
  112. {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/tasks.py +0 -0
  113. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/README.md +0 -0
  114. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/__init__.py +0 -0
  115. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/base.py +0 -0
  116. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/fastmcp.py +0 -0
  117. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/mcp_use.py +0 -0
  118. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/tests/__init__.py +0 -0
  119. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/tests/test_client_integration.py +0 -0
  120. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/tests/test_fastmcp.py +0 -0
  121. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/tests/test_mcp_use_retry.py +0 -0
  122. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/tests/test_protocol.py +0 -0
  123. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/utils/__init__.py +0 -0
  124. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/utils/mcp_use_retry.py +0 -0
  125. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/utils/retry.py +0 -0
  126. {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/utils/retry_transport.py +0 -0
  127. {hud_python-0.4.42 → hud_python-0.4.44}/hud/datasets/__init__.py +0 -0
  128. {hud_python-0.4.42 → hud_python-0.4.44}/hud/datasets/parallel.py +0 -0
  129. {hud_python-0.4.42 → hud_python-0.4.44}/hud/datasets/runner.py +0 -0
  130. {hud_python-0.4.42 → hud_python-0.4.44}/hud/datasets/utils.py +0 -0
  131. {hud_python-0.4.42 → hud_python-0.4.44}/hud/misc/__init__.py +0 -0
  132. {hud_python-0.4.42 → hud_python-0.4.44}/hud/misc/claude_plays_pokemon.py +0 -0
  133. {hud_python-0.4.42 → hud_python-0.4.44}/hud/native/__init__.py +0 -0
  134. {hud_python-0.4.42 → hud_python-0.4.44}/hud/native/comparator.py +0 -0
  135. {hud_python-0.4.42 → hud_python-0.4.44}/hud/native/tests/__init__.py +0 -0
  136. {hud_python-0.4.42 → hud_python-0.4.44}/hud/native/tests/test_comparator.py +0 -0
  137. {hud_python-0.4.42 → hud_python-0.4.44}/hud/native/tests/test_native_init.py +0 -0
  138. {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/__init__.py +0 -0
  139. {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/collector.py +0 -0
  140. {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/config.py +0 -0
  141. {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/context.py +0 -0
  142. {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/exporters.py +0 -0
  143. {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/instrumentation.py +0 -0
  144. {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/processors.py +0 -0
  145. {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/tests/__init__.py +0 -0
  146. {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/tests/test_processors.py +0 -0
  147. {hud_python-0.4.42 → hud_python-0.4.44}/hud/py.typed +0 -0
  148. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/README.md +0 -0
  149. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/__init__.py +0 -0
  150. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/actor.py +0 -0
  151. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/buffer.py +0 -0
  152. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/chat_template.jinja +0 -0
  153. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/tests/__init__.py +0 -0
  154. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/tests/test_learner.py +0 -0
  155. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/types.py +0 -0
  156. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/utils/start_vllm_server.sh +0 -0
  157. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/utils.py +0 -0
  158. {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/vllm_adapter.py +0 -0
  159. {hud_python-0.4.42 → hud_python-0.4.44}/hud/samples/__init__.py +0 -0
  160. {hud_python-0.4.42 → hud_python-0.4.44}/hud/samples/browser.py +0 -0
  161. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/__init__.py +0 -0
  162. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/context.py +0 -0
  163. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/helper/__init__.py +0 -0
  164. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/low_level.py +0 -0
  165. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/server.py +0 -0
  166. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/__init__.py +0 -0
  167. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_add_tool.py +0 -0
  168. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_context.py +0 -0
  169. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_mcp_server_handlers.py +0 -0
  170. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_mcp_server_integration.py +0 -0
  171. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_mcp_server_more.py +0 -0
  172. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_run_wrapper.py +0 -0
  173. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_server_extra.py +0 -0
  174. {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_sigterm_runner.py +0 -0
  175. {hud_python-0.4.42 → hud_python-0.4.44}/hud/settings.py +0 -0
  176. {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/__init__.py +0 -0
  177. {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/exceptions.py +0 -0
  178. {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/hints.py +0 -0
  179. {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/requests.py +0 -0
  180. {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/tests/__init__.py +0 -0
  181. {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/tests/test_exceptions.py +0 -0
  182. {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/tests/test_requests.py +0 -0
  183. {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/__init__.py +0 -0
  184. {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/instrument.py +0 -0
  185. {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/job.py +0 -0
  186. {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/replay.py +0 -0
  187. {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/tests/__init__.py +0 -0
  188. {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/tests/test_replay.py +0 -0
  189. {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/tests/test_trace.py +0 -0
  190. {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/trace.py +0 -0
  191. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/__init__.py +0 -0
  192. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/base.py +0 -0
  193. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/bash.py +0 -0
  194. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/computer/__init__.py +0 -0
  195. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/computer/anthropic.py +0 -0
  196. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/computer/hud.py +0 -0
  197. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/computer/openai.py +0 -0
  198. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/computer/settings.py +0 -0
  199. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/edit.py +0 -0
  200. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/__init__.py +0 -0
  201. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/base.py +0 -0
  202. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/pyautogui.py +0 -0
  203. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/tests/__init__.py +0 -0
  204. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/tests/test_base_executor.py +0 -0
  205. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  206. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/xdo.py +0 -0
  207. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/__init__.py +0 -0
  208. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/config.py +0 -0
  209. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/grounded_tool.py +0 -0
  210. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/grounder.py +0 -0
  211. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/tests/__init__.py +0 -0
  212. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
  213. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/playwright.py +0 -0
  214. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/response.py +0 -0
  215. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/submit.py +0 -0
  216. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/__init__.py +0 -0
  217. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_base.py +0 -0
  218. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_bash.py +0 -0
  219. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_bash_extended.py +0 -0
  220. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_computer.py +0 -0
  221. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_computer_actions.py +0 -0
  222. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_edit.py +0 -0
  223. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_init.py +0 -0
  224. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_playwright_tool.py +0 -0
  225. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_response.py +0 -0
  226. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_tools.py +0 -0
  227. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_tools_init.py +0 -0
  228. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_utils.py +0 -0
  229. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/types.py +0 -0
  230. {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/utils.py +0 -0
  231. {hud_python-0.4.42 → hud_python-0.4.44}/hud/types.py +0 -0
  232. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/__init__.py +0 -0
  233. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/agent_factories.py +0 -0
  234. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/async_utils.py +0 -0
  235. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/hud_console.py +0 -0
  236. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/mcp.py +0 -0
  237. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/pretty_errors.py +0 -0
  238. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/progress.py +0 -0
  239. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/telemetry.py +0 -0
  240. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/__init__.py +0 -0
  241. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_async_utils.py +0 -0
  242. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_init.py +0 -0
  243. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_mcp.py +0 -0
  244. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_progress.py +0 -0
  245. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_telemetry.py +0 -0
  246. {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tool_shorthand.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.42
3
+ Version: 0.4.44
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -205,7 +205,7 @@ class GenericOpenAIChatAgent(MCPAgent):
205
205
  try:
206
206
  response = await self._invoke_chat_completion(
207
207
  messages=messages,
208
- tools=tools, # type: ignore
208
+ tools=tools, # type: ignore
209
209
  extra=extra,
210
210
  )
211
211
  except Exception as e:
@@ -1178,6 +1178,11 @@ def rl(
1178
1178
  "--vllm-gpu",
1179
1179
  help="Specific GPU for vLLM server",
1180
1180
  ),
1181
+ vllm_gpu_count: int = typer.Option(
1182
+ 1,
1183
+ "--vllm-gpu-count",
1184
+ help="Number of GPUs for vLLM server",
1185
+ ),
1181
1186
  skip_vllm_startup: bool = typer.Option(
1182
1187
  False,
1183
1188
  "--skip-vllm-startup",
@@ -1199,6 +1204,7 @@ def rl(
1199
1204
  no_ddp=no_ddp,
1200
1205
  ddp_gpus=ddp_gpus,
1201
1206
  vllm_gpu=vllm_gpu,
1207
+ vllm_gpu_count=vllm_gpu_count,
1202
1208
  yes=yes,
1203
1209
  skip_vllm_startup=skip_vllm_startup,
1204
1210
  )
@@ -73,6 +73,24 @@ def create_proxy_server(
73
73
  "PYTHONUNBUFFERED=1", # Ensure Python output is not buffered
74
74
  ]
75
75
 
76
+ # Check for .env file in the project directory and add env vars
77
+ env_file = project_path / ".env"
78
+ loaded_env_vars = {}
79
+ if env_file.exists():
80
+ try:
81
+ from hud.cli.utils.config import parse_env_file
82
+
83
+ env_contents = env_file.read_text(encoding="utf-8")
84
+ loaded_env_vars = parse_env_file(env_contents)
85
+ for key, value in loaded_env_vars.items():
86
+ docker_cmd.extend(["-e", f"{key}={value}"])
87
+ if verbose and loaded_env_vars:
88
+ hud_console.info(
89
+ f"Loaded {len(loaded_env_vars)} environment variable(s) from .env file"
90
+ )
91
+ except Exception as e:
92
+ hud_console.warning(f"Failed to load .env file: {e}")
93
+
76
94
  # Add user-provided Docker arguments
77
95
  if docker_args:
78
96
  docker_cmd.extend(docker_args)
@@ -112,8 +130,12 @@ def create_proxy_server(
112
130
  hud_console.info("The container's CMD determines reload behavior")
113
131
  hud_console.command_example(f"docker logs -f {container_name}", "View container logs")
114
132
 
115
- # Show the full Docker command if there are environment variables
116
- if docker_args and any(arg == "-e" or arg.startswith("--env") for arg in docker_args):
133
+ # Show the full Docker command if there are environment variables (from .env or args)
134
+ has_env_from_args = docker_args and any(
135
+ arg == "-e" or arg.startswith("--env") for arg in docker_args
136
+ )
137
+ has_env_from_file = bool(loaded_env_vars)
138
+ if has_env_from_args or has_env_from_file:
117
139
  hud_console.info("")
118
140
  hud_console.info("Docker command with environment variables:")
119
141
  hud_console.info(" ".join(docker_cmd))
@@ -298,16 +298,15 @@ async def run_single_task(
298
298
  agent_config["allowed_tools"] = allowed_tools
299
299
 
300
300
  # Run with grouping
301
- with hud.trace(name=f"{task_prompt} (group_size={group_size})"):
302
- stats = await run_tasks_grouped(
303
- tasks=[task],
304
- agent_class=agent_class,
305
- agent_config=agent_config,
306
- group_size=group_size,
307
- max_parallel_episodes=48, # Same as RL default
308
- max_steps=max_steps,
309
- verbose=verbose,
310
- )
301
+ stats = await run_tasks_grouped(
302
+ tasks=[task],
303
+ agent_class=agent_class,
304
+ agent_config=agent_config,
305
+ group_size=group_size,
306
+ max_parallel_episodes=48, # Same as RL default
307
+ max_steps=max_steps,
308
+ verbose=verbose,
309
+ )
311
310
 
312
311
  # Display results
313
312
  display_group_statistics(stats, show_details=True)
@@ -499,7 +498,7 @@ async def run_full_dataset(
499
498
  )
500
499
 
501
500
  # Display results
502
- display_group_statistics(stats, show_details=len(stats) <= 20)
501
+ display_group_statistics(stats, show_details=len(stats) <= 50)
503
502
 
504
503
  # Return stats for consistency with other modes
505
504
  return stats
@@ -212,17 +212,14 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
212
212
  # Check if tasks already have remote URLs
213
213
  already_remote = _validate_tasks(tasks)
214
214
 
215
- # If tasks already reference a remote MCP URL, do not require a local environment
216
- # or attempt any image updates. Use the dataset as-is.
217
- if already_remote:
218
- return str(tasks_path)
219
-
220
215
  # Extract existing images from tasks
221
216
  existing_images = _extract_existing_images(tasks)
222
217
 
223
218
  # Locate environment
224
219
  env_dir = find_environment_dir(tasks_path)
225
220
  if not env_dir:
221
+ if already_remote:
222
+ return str(tasks_path)
226
223
  hud_console.error("Could not locate an environment directory (Dockerfile + pyproject.toml)")
227
224
  hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
228
225
  raise typer.Exit(1)
@@ -373,6 +370,8 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
373
370
  item["system_prompt"] = t.system_prompt
374
371
  if t.metadata:
375
372
  item["metadata"] = t.metadata
373
+ if t.id is not None:
374
+ item["id"] = t.id
376
375
 
377
376
  tasks_payload.append(item)
378
377
 
@@ -78,6 +78,11 @@ def rl_command(
78
78
  "-y",
79
79
  help="Auto-accept all prompts and use defaults (lazy mode)",
80
80
  ),
81
+ vllm_gpu_count: int = typer.Option(
82
+ None,
83
+ "--vllm-gpu-count",
84
+ help="Number of GPUs for vLLM server",
85
+ ),
81
86
  skip_vllm_startup: bool = typer.Option(
82
87
  False,
83
88
  "--skip-vllm-startup",
@@ -145,6 +150,7 @@ def rl_command(
145
150
  model=model,
146
151
  config_file=config_file,
147
152
  output_dir=output_dir,
153
+ vllm_gpu_count=vllm_gpu_count,
148
154
  yes=yes,
149
155
  )
150
156
  return
@@ -84,7 +84,7 @@ def save_config(config: Config, path: Path) -> None:
84
84
  """Save configuration to a JSON file."""
85
85
  config_dict = config.to_dict()
86
86
 
87
- with open(path, "w") as f:
87
+ with open(path, "w", encoding="utf-8") as f:
88
88
  json.dump(config_dict, f, indent=2)
89
89
  f.write("\n") # Add newline at end of file
90
90
 
@@ -94,7 +94,7 @@ def save_config(config: Config, path: Path) -> None:
94
94
 
95
95
  def load_config(path: Path) -> Config:
96
96
  """Load configuration from a JSON file."""
97
- with open(path) as f:
97
+ with open(path, encoding="utf-8") as f:
98
98
  data = json.load(f)
99
99
 
100
100
  # Use Config.from_dict which handles missing fields gracefully
@@ -245,10 +245,12 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
245
245
  # Apply scaling rule
246
246
  if num_gpus == 1:
247
247
  # Special case: 2 groups for single GPU
248
+ groups_per_gpu = 2
248
249
  config.training.batch_size = 2 * group_size
249
250
  else:
250
- # Multi-GPU: each GPU processes 1 group
251
- config.training.batch_size = num_gpus * group_size
251
+ groups_per_gpu = config.training.batch_size // group_size
252
+ # Multi-GPU: each GPU processes groups_per_gpu groups
253
+ config.training.batch_size = num_gpus * group_size * groups_per_gpu
252
254
 
253
255
  # Update max_parallel_episodes to match
254
256
  config.actor.max_parallel_episodes = config.training.batch_size
@@ -263,7 +265,7 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
263
265
  f"\n[cyan]📊 Adjusted batch_size to {config.training.batch_size} ({config.training.batch_size // group_size} groups)[/cyan]" # noqa: E501
264
266
  )
265
267
  console.print(
266
- f"[cyan] Each of the {num_gpus} GPU(s) will process {config.training.batch_size // group_size // num_gpus} group(s) in parallel[/cyan]" # noqa: E501
268
+ f"[cyan] Each of the {num_gpus} GPU(s) will process {groups_per_gpu} group(s) in parallel[/cyan]" # noqa: E501
267
269
  )
268
270
 
269
271
  return config
@@ -32,7 +32,9 @@ GPU_PRICING = {
32
32
  }
33
33
 
34
34
 
35
- def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int = 600) -> None:
35
+ def ensure_vllm_deployed(
36
+ model_name: str, gpu_type: str = "A100", gpu_count: int = 1, timeout: int = 600
37
+ ) -> None:
36
38
  """Deploy vLLM for a model if needed and wait until it's ready.
37
39
 
38
40
  Args:
@@ -47,7 +49,7 @@ def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int =
47
49
  return
48
50
 
49
51
  hud_console.info(f"Deploying vLLM server for {model_name}...")
50
- rl_api.deploy_vllm(model_name, gpu_type=gpu_type)
52
+ rl_api.deploy_vllm(model_name, gpu_type=gpu_type, gpu_count=gpu_count)
51
53
  hud_console.success("vLLM deployment started")
52
54
 
53
55
  hud_console.info("Waiting for vLLM server to be ready...")
@@ -72,6 +74,7 @@ def run_remote_training(
72
74
  model: str | None,
73
75
  config_file: Path | None,
74
76
  output_dir: str,
77
+ vllm_gpu_count: int = 1,
75
78
  yes: bool = False,
76
79
  ) -> None:
77
80
  """Run RL training remotely via the API server following the new interactive flow."""
@@ -183,14 +186,18 @@ def run_remote_training(
183
186
 
184
187
  # Ask for model type
185
188
  if yes:
186
- model_type = "Qwen/Qwen2.5-VL-3B-Instruct" # Default model in yes mode
189
+ if config_file:
190
+ config = load_config(config_file)
191
+ model_type = config.model.base_model
192
+ else:
193
+ model_type = "Qwen/Qwen2.5-VL-3B-Instruct"
187
194
  hud_console.info(f"Auto-selecting base model: {model_type} (--yes mode)")
188
195
  else:
189
196
  model_type = hud_console.select(
190
197
  "Select base model type:",
191
198
  choices=[
192
199
  {"name": "Qwen2.5-VL-3B-Instruct", "value": "Qwen/Qwen2.5-VL-3B-Instruct"},
193
- # {"name": "Qwen2.5-VL-7B-Instruct", "value": "Qwen/Qwen2.5-VL-7B-Instruct"}, # noqa: E501
200
+ {"name": "Qwen2.5-3B-Instruct", "value": "Qwen/Qwen2.5-3B-Instruct"},
194
201
  ],
195
202
  default=0,
196
203
  )
@@ -218,7 +225,7 @@ def run_remote_training(
218
225
  try:
219
226
  rl_api.create_model(model_name, model_type)
220
227
  hud_console.success(f"Created model: {model_name}")
221
- ensure_vllm_deployed(model_name, gpu_type="A100")
228
+ ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
222
229
 
223
230
  except Exception as e:
224
231
  # If the name already exists, suggest a new name and prompt once
@@ -247,7 +254,7 @@ def run_remote_training(
247
254
  rl_api.create_model(chosen, model_type)
248
255
  hud_console.success(f"Created model: {chosen}")
249
256
  model_name = chosen
250
- ensure_vllm_deployed(model_name, gpu_type="A100")
257
+ ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
251
258
  except Exception as e2:
252
259
  hud_console.error(f"Failed to create model: {e2}")
253
260
  raise
@@ -281,7 +288,7 @@ def run_remote_training(
281
288
  return
282
289
 
283
290
  # Ensure vLLM is deployed
284
- ensure_vllm_deployed(model_name, gpu_type="A100")
291
+ ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
285
292
  except KeyboardInterrupt:
286
293
  hud_console.dim_info("Training cancelled", "")
287
294
  return
@@ -323,7 +330,7 @@ def run_remote_training(
323
330
  )
324
331
 
325
332
  if yes:
326
- num_gpus = 2 # Default to 2 GPUs in yes mode
333
+ num_gpus = 2 # Default to 2 GPUs in yes mode
327
334
  hud_console.info(f"Auto-selecting {num_gpus} GPU(s) (--yes mode)")
328
335
  else:
329
336
  num_gpus = hud_console.select(
@@ -425,10 +432,12 @@ def run_remote_training(
425
432
  # Load provided config
426
433
  hud_console.info(f"Loading configuration from: {config_file}")
427
434
  config = load_config(config_file)
428
- config_dict = config.to_dict()
429
435
  gpu_choice = config.training.gpu_type
430
436
  num_gpus = config.training.num_gpus
431
437
 
438
+ config = adjust_config_for_ddp(config, int(num_gpus))
439
+ config_dict = config.to_dict()
440
+
432
441
  # Launch training
433
442
  try:
434
443
  # Little celebration before launching
@@ -61,12 +61,12 @@ def list_models() -> list[RLModelInfo]:
61
61
  ]
62
62
 
63
63
 
64
- def deploy_vllm(model_name: str, gpu_type: str = "A100") -> dict[str, Any]:
64
+ def deploy_vllm(model_name: str, gpu_type: str = "A100", gpu_count: int = 1) -> dict[str, Any]:
65
65
  """Deploy a vLLM server for a model."""
66
66
  return make_request_sync(
67
67
  method="POST",
68
68
  url=f"{settings.hud_rl_url}/models/{model_name}/deploy",
69
- json={"gpu_type": gpu_type},
69
+ json={"gpu_type": gpu_type, "gpu_count": gpu_count},
70
70
  api_key=settings.api_key,
71
71
  )
72
72
 
@@ -127,8 +127,4 @@ def is_environment_directory(path: str | Path) -> bool:
127
127
  return False
128
128
 
129
129
  # Must have pyproject.toml
130
- if not (dir_path / "pyproject.toml").exists():
131
- hud_console.error("pyproject.toml not found")
132
- return False
133
-
134
- return True
130
+ return (dir_path / "pyproject.toml").exists()
@@ -13,6 +13,7 @@ SUPPORTED_MODELS = [
13
13
  "Qwen/Qwen2.5-VL-32B-Instruct",
14
14
  "Qwen/Qwen2.5-VL-72B-Instruct",
15
15
  "Qwen/Qwen2.5-7B-Instruct",
16
+ "Qwen/Qwen2.5-3B-Instruct",
16
17
  ]
17
18
 
18
19
 
@@ -39,9 +40,9 @@ class ModelConfig:
39
40
  """Model and LoRA configuration."""
40
41
 
41
42
  base_model: str = "Qwen/Qwen2.5-VL-3B-Instruct"
42
- lora_r: int = 8
43
- lora_alpha: int = 16
44
- lora_dropout: float = 0.05
43
+ lora_r: int = 16
44
+ lora_alpha: int = 32
45
+ lora_dropout: float = 0.1
45
46
  target_modules: tuple[str, ...] = (
46
47
  "q_proj",
47
48
  "k_proj",
@@ -61,6 +62,7 @@ class ModelConfig:
61
62
  @dataclass
62
63
  class TrainingConfig:
63
64
  """Training hyperparameters."""
65
+
64
66
  # GPU parameters
65
67
  gpu_type: str = "A100"
66
68
  num_gpus: int = 2
@@ -71,9 +73,9 @@ class TrainingConfig:
71
73
  save_every_batches: int = 1
72
74
 
73
75
  # Batching parameters
74
- epochs: int = 2
75
- batch_size: int = 24
76
- group_size: int = 4
76
+ epochs: int = 1
77
+ batch_size: int = 16
78
+ group_size: int = 8
77
79
  mini_batch_size: int = 1
78
80
  update_after_group: bool = True # Whether to update the policy after each task group
79
81
  accumulate_over_minibatches: bool = False # Whether to accumulate over minibatches
@@ -84,7 +86,7 @@ class TrainingConfig:
84
86
  leave_one_out: bool = True
85
87
 
86
88
  # Replay buffer parameters
87
- buffer_steps: int = 4
89
+ buffer_steps: int = 8
88
90
  select_strategy: Literal["recent", "variance", "random"] = "variance"
89
91
 
90
92
  # Aggregation parameters
@@ -92,8 +94,8 @@ class TrainingConfig:
92
94
  token_agg: Literal["mean", "sum"] = "mean" # noqa: S105
93
95
 
94
96
  # Regularization parameters
95
- kl_beta: float = 0.0
96
- entropy_beta: float = 0.0
97
+ kl_beta: float = 0.001
98
+ entropy_beta: float = 0.001
97
99
  top_eps: float = 0.2
98
100
  bottom_eps: float = 0.1
99
101
 
@@ -143,6 +145,7 @@ class Config:
143
145
  job_id: str | None = None # Use existing job ID if provided
144
146
  stats_interval: int = 1
145
147
  verbose: bool = False
148
+ very_verbose: bool = False
146
149
 
147
150
  # Paths
148
151
  out_dir: str = "./checkpoints"
@@ -166,6 +169,7 @@ class Config:
166
169
  job_id=d.get("job_id"),
167
170
  stats_interval=d.get("stats_interval", 1),
168
171
  verbose=d.get("verbose", False),
172
+ very_verbose=d.get("very_verbose", False),
169
173
  out_dir=d.get("out_dir", "./checkpoints"),
170
174
  adapter_prefix=d.get("adapter_prefix", "cua-grpo-step"),
171
175
  seed=d.get("seed", 1234),
@@ -181,6 +185,7 @@ class Config:
181
185
  "job_id": self.job_id,
182
186
  "stats_interval": self.stats_interval,
183
187
  "verbose": self.verbose,
188
+ "very_verbose": self.very_verbose,
184
189
  "out_dir": self.out_dir,
185
190
  "adapter_prefix": self.adapter_prefix,
186
191
  "seed": self.seed,
@@ -66,7 +66,13 @@ def all_reduce_mean(tensor: torch.Tensor) -> torch.Tensor:
66
66
 
67
67
 
68
68
  def broadcast_object(obj: Any, src: int = 0) -> Any:
69
- """Broadcast a Python object from src rank to all ranks."""
69
+ """Broadcast a Python object from src rank to all ranks.
70
+
71
+ Args:
72
+ obj: Object to broadcast (used on src rank)
73
+ src: Source rank
74
+ device: Device for temporary tensor buffer during pickling transfer
75
+ """
70
76
  if not dist.is_initialized():
71
77
  return obj
72
78
 
@@ -75,6 +81,33 @@ def broadcast_object(obj: Any, src: int = 0) -> Any:
75
81
  return obj_list[0]
76
82
 
77
83
 
84
+ def scatter_object(
85
+ obj_list: list[Any] | None,
86
+ src: int = 0,
87
+ ) -> Any:
88
+ """Scatter a list of Python objects from src so each rank receives one object.
89
+
90
+ Usage:
91
+ - On src rank: pass the full list (length == world_size)
92
+ - On non-src ranks: pass None
93
+
94
+ Returns:
95
+ The object intended for this rank.
96
+ """
97
+ if not dist.is_initialized():
98
+ # Single-process: return first element if provided, else None
99
+ if obj_list is None or len(obj_list) == 0:
100
+ return None
101
+ return obj_list[0]
102
+
103
+ out: list[Any] = [None]
104
+ if dist.get_rank() == src:
105
+ dist.scatter_object_list(out, obj_list, src=src)
106
+ else:
107
+ dist.scatter_object_list(out, None, src=src)
108
+ return out[0]
109
+
110
+
78
111
  def gather_tensors(tensor: torch.Tensor) -> list[torch.Tensor] | None:
79
112
  """Gather tensors from all ranks to rank 0.
80
113
 
@@ -240,6 +240,8 @@ class GRPOLearner:
240
240
  if sample.inputs:
241
241
  sample = sample.to_device(self.device)
242
242
  sample.old_logprobs, _ = self.compute_logprobs(self.policy, sample.inputs)
243
+ # Free GPU memory for this sample immediately
244
+ sample.to_device(torch.device("cpu"))
243
245
 
244
246
  policy_module = self.policy.module if hasattr(self.policy, "module") else self.policy
245
247
  with policy_module.disable_adapter():
@@ -247,7 +249,10 @@ class GRPOLearner:
247
249
  if is_main_process():
248
250
  progress.update(f"Processing batch of traces... {i}/{len(batch)}")
249
251
  if sample.inputs:
252
+ # Move back to GPU for reference computation, then free
253
+ sample = sample.to_device(self.device)
250
254
  sample.ref_logprobs, _ = self.compute_logprobs(self.policy, sample.inputs)
255
+ sample.to_device(torch.device("cpu"))
251
256
 
252
257
  hud_console.info_log("Creating mini-batches...")
253
258
  group_size = self.config.training.group_size
@@ -488,15 +493,21 @@ class GRPOLearner:
488
493
  out = model(**model_inputs)
489
494
 
490
495
  logits = out.logits / self.config.actor.temperature
491
- log_probs = F.log_softmax(logits, dim=-1)
492
496
 
497
+ # Compute token log-probs via negative cross-entropy to avoid materializing full log_probs
493
498
  targets = inputs["input_ids"][:, 1:]
494
- token_log_probs = log_probs[:, :-1].gather(-1, targets.unsqueeze(-1)).squeeze(-1)
499
+ logits_slice = logits[:, :-1, :]
500
+ loss_flat = F.cross_entropy(
501
+ logits_slice.reshape(-1, logits_slice.size(-1)),
502
+ targets.reshape(-1),
503
+ reduction="none",
504
+ )
505
+ token_log_probs = (-loss_flat).reshape_as(targets)
495
506
 
496
507
  # Compute entropy only for assistant tokens to save memory
497
508
  assistant_mask = inputs["assistant_mask"]
498
509
  entropy = torch.zeros_like(token_log_probs)
499
- if assistant_mask.any():
510
+ if assistant_mask.any() and getattr(self.config.training, "entropy_beta", 0.0) != 0.0:
500
511
  entropy[assistant_mask] = entropy_from_logits(logits[:, :-1][assistant_mask])
501
512
 
502
513
  return token_log_probs, entropy
@@ -506,8 +517,20 @@ class GRPOLearner:
506
517
  # Return dummy values that match expected shapes
507
518
  seq_len = inputs["input_ids"].shape[1] - 1 if "input_ids" in inputs else 0
508
519
  batch_size = inputs["input_ids"].shape[0] if "input_ids" in inputs else 1
509
- dummy_logprobs = torch.zeros(batch_size, seq_len, device=self.device)
510
- dummy_entropy = torch.zeros(batch_size, seq_len, device=self.device)
520
+ # Create dummy tensors that still participate in autograd so backward doesn't fail
521
+ try:
522
+ param_sum = torch.sum(
523
+ next(self.policy.parameters())
524
+ ) # touch params to build a graph
525
+ base = param_sum * 0.0
526
+ except StopIteration:
527
+ base = torch.tensor(0.0, device=self.device)
528
+ dummy_logprobs = (
529
+ base + torch.zeros(batch_size, seq_len, device=self.device)
530
+ ).requires_grad_(True)
531
+ dummy_entropy = (
532
+ base + torch.zeros(batch_size, seq_len, device=self.device)
533
+ ).requires_grad_(True)
511
534
  return dummy_logprobs, dummy_entropy
512
535
 
513
536
  def save(self, path: str) -> None: