hud-python 0.6.6__tar.gz → 0.6.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. {hud_python-0.6.6 → hud_python-0.6.7}/PKG-INFO +1 -1
  2. hud_python-0.6.7/cookbooks/fireworks-rl-training/README.md +129 -0
  3. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/deploy.py +41 -1
  4. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/tests/test_deploy.py +86 -0
  5. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/runtime.py +4 -1
  6. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/sync.py +2 -2
  7. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/tests/test_hosted.py +19 -0
  8. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/tests/test_sync.py +12 -0
  9. {hud_python-0.6.6 → hud_python-0.6.7}/hud/version.py +1 -1
  10. {hud_python-0.6.6 → hud_python-0.6.7}/pyproject.toml +1 -1
  11. hud_python-0.6.6/cookbooks/fireworks-rl-training/README.md +0 -114
  12. {hud_python-0.6.6 → hud_python-0.6.7}/.gitignore +0 -0
  13. {hud_python-0.6.6 → hud_python-0.6.7}/LICENSE +0 -0
  14. {hud_python-0.6.6 → hud_python-0.6.7}/README.md +0 -0
  15. {hud_python-0.6.6 → hud_python-0.6.7}/cookbooks/a2a-chat/README.md +0 -0
  16. {hud_python-0.6.6 → hud_python-0.6.7}/cookbooks/a2a-chat/pyproject.toml +0 -0
  17. {hud_python-0.6.6 → hud_python-0.6.7}/cookbooks/codex-coding/README.md +0 -0
  18. {hud_python-0.6.6 → hud_python-0.6.7}/cookbooks/codex-coding/pyproject.toml +0 -0
  19. {hud_python-0.6.6 → hud_python-0.6.7}/cookbooks/connect4-selfplay/README.md +0 -0
  20. {hud_python-0.6.6 → hud_python-0.6.7}/cookbooks/fireworks-rl-training/pyproject.toml +0 -0
  21. {hud_python-0.6.6 → hud_python-0.6.7}/cookbooks/rl-training/README.md +0 -0
  22. {hud_python-0.6.6 → hud_python-0.6.7}/cookbooks/rl-training/pyproject.toml +0 -0
  23. {hud_python-0.6.6 → hud_python-0.6.7}/hud/__init__.py +0 -0
  24. {hud_python-0.6.6 → hud_python-0.6.7}/hud/__main__.py +0 -0
  25. {hud_python-0.6.6 → hud_python-0.6.7}/hud/_legacy.py +0 -0
  26. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/__init__.py +0 -0
  27. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/base.py +0 -0
  28. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/browser_use/__init__.py +0 -0
  29. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/browser_use/agent.py +0 -0
  30. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/__init__.py +0 -0
  31. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/agent.py +0 -0
  32. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/sdk/__init__.py +0 -0
  33. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/sdk/agent.py +0 -0
  34. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/sdk/computer_mcp.py +0 -0
  35. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/tools/__init__.py +0 -0
  36. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/tools/base.py +0 -0
  37. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/tools/coding.py +0 -0
  38. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/tools/computer.py +0 -0
  39. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/tools/hosted.py +0 -0
  40. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/tools/mcp_proxy.py +0 -0
  41. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/tools/settings.py +0 -0
  42. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/tools/tests/__init__.py +0 -0
  43. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/claude/tools/tests/test_computer.py +0 -0
  44. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/__init__.py +0 -0
  45. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/agent.py +0 -0
  46. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/settings.py +0 -0
  47. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/tools/__init__.py +0 -0
  48. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/tools/base.py +0 -0
  49. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/tools/coding.py +0 -0
  50. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/tools/computer.py +0 -0
  51. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/tools/filesystem.py +0 -0
  52. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/tools/hosted.py +0 -0
  53. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/tools/mcp_proxy.py +0 -0
  54. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/tools/tests/__init__.py +0 -0
  55. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/gemini/tools/tests/test_computer.py +0 -0
  56. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/misc/__init__.py +0 -0
  57. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/misc/response_automation.py +0 -0
  58. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/__init__.py +0 -0
  59. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/agent.py +0 -0
  60. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/tools/__init__.py +0 -0
  61. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/tools/apply_patch.py +0 -0
  62. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/tools/base.py +0 -0
  63. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/tools/coding.py +0 -0
  64. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/tools/computer.py +0 -0
  65. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/tools/hosted.py +0 -0
  66. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/tools/mcp_proxy.py +0 -0
  67. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/tools/strict_schema.py +0 -0
  68. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/tools/tests/__init__.py +0 -0
  69. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/tools/tests/test_computer.py +0 -0
  70. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai/tools/tests/test_strict_schema.py +0 -0
  71. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai_compatible/__init__.py +0 -0
  72. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai_compatible/agent.py +0 -0
  73. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai_compatible/tools/__init__.py +0 -0
  74. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai_compatible/tools/base.py +0 -0
  75. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai_compatible/tools/filesystem.py +0 -0
  76. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/openai_compatible/tools/mcp_proxy.py +0 -0
  77. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/robot/__init__.py +0 -0
  78. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/robot/_types.py +0 -0
  79. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/robot/adapter.py +0 -0
  80. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/robot/agent.py +0 -0
  81. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/robot/batching.py +0 -0
  82. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/robot/model.py +0 -0
  83. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/robot/record.py +0 -0
  84. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/robot/video.py +0 -0
  85. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tests/__init__.py +0 -0
  86. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tests/test_apply_patch.py +0 -0
  87. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tests/test_base.py +0 -0
  88. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tests/test_claude_agent.py +0 -0
  89. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tests/test_claude_sdk_agent.py +0 -0
  90. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tests/test_gemini_agent.py +0 -0
  91. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tests/test_openai_agent.py +0 -0
  92. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tests/test_openai_compatible_agent.py +0 -0
  93. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tests/test_provider_native_tools.py +0 -0
  94. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tests/test_tool_agent.py +0 -0
  95. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tests/test_trace.py +0 -0
  96. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tool_agent.py +0 -0
  97. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tools/__init__.py +0 -0
  98. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tools/base.py +0 -0
  99. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tools/hosted.py +0 -0
  100. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tools/mcp.py +0 -0
  101. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tools/rfb.py +0 -0
  102. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/tools/ssh.py +0 -0
  103. {hud_python-0.6.6 → hud_python-0.6.7}/hud/agents/types.py +0 -0
  104. {hud_python-0.6.6 → hud_python-0.6.7}/hud/capabilities/__init__.py +0 -0
  105. {hud_python-0.6.6 → hud_python-0.6.7}/hud/capabilities/base.py +0 -0
  106. {hud_python-0.6.6 → hud_python-0.6.7}/hud/capabilities/cdp.py +0 -0
  107. {hud_python-0.6.6 → hud_python-0.6.7}/hud/capabilities/filetracking.py +0 -0
  108. {hud_python-0.6.6 → hud_python-0.6.7}/hud/capabilities/mcp.py +0 -0
  109. {hud_python-0.6.6 → hud_python-0.6.7}/hud/capabilities/rfb.py +0 -0
  110. {hud_python-0.6.6 → hud_python-0.6.7}/hud/capabilities/robot.py +0 -0
  111. {hud_python-0.6.6 → hud_python-0.6.7}/hud/capabilities/ssh.py +0 -0
  112. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/__init__.py +0 -0
  113. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/__main__.py +0 -0
  114. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/cancel.py +0 -0
  115. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/client.py +0 -0
  116. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/eval.py +0 -0
  117. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/init.py +0 -0
  118. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/jobs.py +0 -0
  119. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/login.py +0 -0
  120. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/models.py +0 -0
  121. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/presets.py +0 -0
  122. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/serve.py +0 -0
  123. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/sync.py +0 -0
  124. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/task.py +0 -0
  125. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/templates.py +0 -0
  126. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/tests/__init__.py +0 -0
  127. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/tests/test_cli_init.py +0 -0
  128. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/tests/test_cli_main.py +0 -0
  129. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  130. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/tests/test_eval_bedrock.py +0 -0
  131. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/tests/test_eval_config.py +0 -0
  132. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/tests/test_init.py +0 -0
  133. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/tests/test_main_module.py +0 -0
  134. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/tests/test_sync_export.py +0 -0
  135. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/trace.py +0 -0
  136. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/__init__.py +0 -0
  137. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/api.py +0 -0
  138. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/build_display.py +0 -0
  139. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/build_logs.py +0 -0
  140. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/config.py +0 -0
  141. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/context.py +0 -0
  142. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/display.py +0 -0
  143. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/jobs.py +0 -0
  144. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/registry.py +0 -0
  145. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/source.py +0 -0
  146. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/tasks.py +0 -0
  147. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/tests/__init__.py +0 -0
  148. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/tests/test_build_display.py +0 -0
  149. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/tests/test_config.py +0 -0
  150. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/tests/test_context.py +0 -0
  151. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/tests/test_registry.py +0 -0
  152. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/tests/test_source.py +0 -0
  153. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/tests/test_tasks.py +0 -0
  154. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/tests/test_version_check.py +0 -0
  155. {hud_python-0.6.6 → hud_python-0.6.7}/hud/cli/utils/version_check.py +0 -0
  156. {hud_python-0.6.6 → hud_python-0.6.7}/hud/clients/__init__.py +0 -0
  157. {hud_python-0.6.6 → hud_python-0.6.7}/hud/clients/client.py +0 -0
  158. {hud_python-0.6.6 → hud_python-0.6.7}/hud/clients/tests/__init__.py +0 -0
  159. {hud_python-0.6.6 → hud_python-0.6.7}/hud/clients/tests/test_connect.py +0 -0
  160. {hud_python-0.6.6 → hud_python-0.6.7}/hud/conftest.py +0 -0
  161. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/__init__.py +0 -0
  162. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/env.py +0 -0
  163. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/file_tracker.py +0 -0
  164. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/file_tracking.py +0 -0
  165. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/legacy.py +0 -0
  166. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/robot/__init__.py +0 -0
  167. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/robot/bridge.py +0 -0
  168. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/robot/endpoint.py +0 -0
  169. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/robot/sim_runner.py +0 -0
  170. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/server.py +0 -0
  171. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/tests/__init__.py +0 -0
  172. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/tests/conftest.py +0 -0
  173. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/tests/test_capability_backing.py +0 -0
  174. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/tests/test_file_tracker.py +0 -0
  175. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/tests/test_file_tracking.py +0 -0
  176. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/tests/test_legacy.py +0 -0
  177. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/tests/test_loader.py +0 -0
  178. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/tests/test_manifest.py +0 -0
  179. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/tests/test_server.py +0 -0
  180. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/tests/test_tunnel.py +0 -0
  181. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/utils.py +0 -0
  182. {hud_python-0.6.6 → hud_python-0.6.7}/hud/environment/workspace.py +0 -0
  183. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/__init__.py +0 -0
  184. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/chat.py +0 -0
  185. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/file_tracking.py +0 -0
  186. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/job.py +0 -0
  187. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/run.py +0 -0
  188. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/task.py +0 -0
  189. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/taskset.py +0 -0
  190. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/tests/__init__.py +0 -0
  191. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/tests/test_chat.py +0 -0
  192. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/tests/test_docker_provider.py +0 -0
  193. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/tests/test_file_tracking_observer.py +0 -0
  194. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/tests/test_job.py +0 -0
  195. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/tests/test_rollout.py +0 -0
  196. {hud_python-0.6.6 → hud_python-0.6.7}/hud/eval/tests/test_task.py +0 -0
  197. {hud_python-0.6.6 → hud_python-0.6.7}/hud/graders/__init__.py +0 -0
  198. {hud_python-0.6.6 → hud_python-0.6.7}/hud/graders/base.py +0 -0
  199. {hud_python-0.6.6 → hud_python-0.6.7}/hud/graders/bash.py +0 -0
  200. {hud_python-0.6.6 → hud_python-0.6.7}/hud/graders/combine.py +0 -0
  201. {hud_python-0.6.6 → hud_python-0.6.7}/hud/graders/judge.py +0 -0
  202. {hud_python-0.6.6 → hud_python-0.6.7}/hud/graders/results.py +0 -0
  203. {hud_python-0.6.6 → hud_python-0.6.7}/hud/graders/text.py +0 -0
  204. {hud_python-0.6.6 → hud_python-0.6.7}/hud/patches/__init__.py +0 -0
  205. {hud_python-0.6.6 → hud_python-0.6.7}/hud/patches/mcp_patches.py +0 -0
  206. {hud_python-0.6.6 → hud_python-0.6.7}/hud/patches/tests/__init__.py +0 -0
  207. {hud_python-0.6.6 → hud_python-0.6.7}/hud/patches/tests/test_warnings.py +0 -0
  208. {hud_python-0.6.6 → hud_python-0.6.7}/hud/patches/warnings.py +0 -0
  209. {hud_python-0.6.6 → hud_python-0.6.7}/hud/py.typed +0 -0
  210. {hud_python-0.6.6 → hud_python-0.6.7}/hud/server.py +0 -0
  211. {hud_python-0.6.6 → hud_python-0.6.7}/hud/settings.py +0 -0
  212. {hud_python-0.6.6 → hud_python-0.6.7}/hud/telemetry/__init__.py +0 -0
  213. {hud_python-0.6.6 → hud_python-0.6.7}/hud/telemetry/context.py +0 -0
  214. {hud_python-0.6.6 → hud_python-0.6.7}/hud/telemetry/exporter.py +0 -0
  215. {hud_python-0.6.6 → hud_python-0.6.7}/hud/telemetry/filetracking.py +0 -0
  216. {hud_python-0.6.6 → hud_python-0.6.7}/hud/telemetry/instrument.py +0 -0
  217. {hud_python-0.6.6 → hud_python-0.6.7}/hud/telemetry/span.py +0 -0
  218. {hud_python-0.6.6 → hud_python-0.6.7}/hud/telemetry/tests/__init__.py +0 -0
  219. {hud_python-0.6.6 → hud_python-0.6.7}/hud/telemetry/tests/test_exporter.py +0 -0
  220. {hud_python-0.6.6 → hud_python-0.6.7}/hud/telemetry/tests/test_filetracking.py +0 -0
  221. {hud_python-0.6.6 → hud_python-0.6.7}/hud/telemetry/tests/test_instrument.py +0 -0
  222. {hud_python-0.6.6 → hud_python-0.6.7}/hud/train/__init__.py +0 -0
  223. {hud_python-0.6.6 → hud_python-0.6.7}/hud/train/base.py +0 -0
  224. {hud_python-0.6.6 → hud_python-0.6.7}/hud/train/client.py +0 -0
  225. {hud_python-0.6.6 → hud_python-0.6.7}/hud/train/types.py +0 -0
  226. {hud_python-0.6.6 → hud_python-0.6.7}/hud/types.py +0 -0
  227. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/__init__.py +0 -0
  228. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/exceptions.py +0 -0
  229. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/gateway.py +0 -0
  230. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/hints.py +0 -0
  231. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/hud_console.py +0 -0
  232. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/modules.py +0 -0
  233. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/platform.py +0 -0
  234. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/requests.py +0 -0
  235. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/serialization.py +0 -0
  236. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/tests/__init__.py +0 -0
  237. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/tests/test_exceptions.py +0 -0
  238. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/tests/test_hints.py +0 -0
  239. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/tests/test_hud_console.py +0 -0
  240. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/tests/test_platform.py +0 -0
  241. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/tests/test_requests.py +0 -0
  242. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/tests/test_serialization.py +0 -0
  243. {hud_python-0.6.6 → hud_python-0.6.7}/hud/utils/time.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.6.6
3
+ Version: 0.6.7
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -0,0 +1,129 @@
1
+ # Fireworks RL Training
2
+
3
+ Direct Fireworks Training API loop over the same arithmetic preview task used by
4
+ `cookbooks/rl-training`.
5
+
6
+ This does **not** use Fireworks native datasets or RFT jobs. It follows the
7
+ Training API service path from the Fireworks docs:
8
+
9
+ 1. `FiretitanServiceClient.from_firetitan_config(...)`
10
+ 2. `create_deployment_sampler(...)` for high-parallel rollouts
11
+ 3. local grading of HUD-style multiplication tasks
12
+ 4. `forward_backward_custom(...)` + `optim_step(...)`
13
+ 5. `save_weights_for_sampler(...)` + sampler refresh
14
+
15
+ References:
16
+
17
+ - Fireworks Training API introduction: https://docs.fireworks.ai/fine-tuning/training-api/introduction
18
+ - Training and sampling lifecycle: https://docs.fireworks.ai/fine-tuning/training-api/training-and-sampling
19
+ - Loss functions / GRPO reference: https://docs.fireworks.ai/fine-tuning/training-api/loss-functions
20
+
21
+ ## Setup
22
+
23
+ The repo-level `.env` is loaded automatically. It must contain:
24
+
25
+ ```bash
26
+ FIREWORKS_API_KEY=...
27
+ FIREWORKS_ACCOUNT_ID=...
28
+ ```
29
+
30
+ Install the isolated cookbook environment:
31
+
32
+ ```bash
33
+ uv sync --pre
34
+ ```
35
+
36
+ ## Calibrate task difficulty first
37
+
38
+ What matters for GRPO is **within-group** reward spread: advantages are computed
39
+ within each prompt group, so a group whose rollouts all score the same (all 0 or
40
+ all 1) produces zero advantage and no gradient — even if the *overall* mean looks
41
+ healthy. Calibration reports `within_group_reward_std` for exactly this; treat
42
+ it, not `reward_mean`, as the signal that training has something to learn.
43
+
44
+ Two backends:
45
+
46
+ - `--calibration-backend inference` (default): Fireworks' OpenAI-compatible API.
47
+ Cheap, but samples `gpt-oss-120b` (`--inference-model`), not the training base —
48
+ the small serverless catalog on the `lorenss` key has no Qwen3 8B. Use it only
49
+ for a rough task sanity check.
50
+ - `--calibration-backend managed`: provisions the same deployment sampler that
51
+ training uses and samples the **actual base model** (Qwen3 8B). This is the
52
+ calibration that counts. It still skips the trainer and `optim_step`.
53
+
54
+ ```bash
55
+ uv run train.py --calibrate-only --calibration-backend managed \
56
+ --groups-per-step 6 --rollouts-per-prompt 6 --parallelism 18 --debug-samples 4
57
+ ```
58
+
59
+ `--debug-samples N` prints the first N rollouts (reward, output-token count,
60
+ text) so you can see *why* a group scored the way it did. Tune the multiplication
61
+ range until `within_group_reward_std` is clearly above zero:
62
+
63
+ - Groups all-correct (`within_group_reward_std ~= 0`) → make it harder
64
+ (`--min-a/--max-a/--min-b/--max-b`).
65
+ - Groups all-wrong → make it easier, or raise `--max-tokens` so the model can
66
+ finish its working before the budget cuts it off.
67
+
68
+ The shipped defaults (3-digit × 3-digit, `--max-tokens 512`, thinking disabled)
69
+ calibrate to `reward_mean ~= 0.47`, `within_group_reward_std ~= 0.20` on Qwen3 8B:
70
+ a regime where the same problem is sometimes solved (when the model shows its
71
+ work) and sometimes slipped (when it answers directly) — so RL has a gradient to
72
+ follow.
73
+
74
+ ### Reasoning models and the token budget
75
+
76
+ Qwen3 is a hybrid reasoning model: by default it opens a `<think>` block and, on
77
+ a tight `--max-tokens`, spends the whole budget reasoning and never emits the
78
+ answer (reward collapses to zero). This cookbook disables thinking by default
79
+ through the chat template so direct rollouts reach the integer. Pass
80
+ `--enable-thinking` to keep the reasoning block — and raise `--max-tokens`
81
+ accordingly so the answer still fits.
82
+
83
+ ## Train
84
+
85
+ Once calibration has non-trivial rewards:
86
+
87
+ ```bash
88
+ uv run train.py --steps 5 --groups-per-step 8 --rollouts-per-prompt 8 --parallelism 32
89
+ ```
90
+
91
+ This uses the direct Training API managed service path. If you want calibration
92
+ to go through the managed deployment sampler too, pass
93
+ `--calibration-backend managed`; this provisions the same resources as training.
94
+
95
+ ### Preview account constraints
96
+
97
+ On the `lorenss` preview account today:
98
+
99
+ - **Trainer creation works** end to end with a provisioned key: rollouts,
100
+ `forward_backward_custom`, `optim_step`, checkpoint save, and sampler hotload
101
+ all run, and multi-step training completes. (An earlier `unkey inference api id
102
+ is not configured` 500 on trainer creation was an account-side provisioning gap,
103
+ now resolved.)
104
+ - **LoRA is unavailable**: the validated `qwen3-8b-128k` shape only accepts
105
+ full-parameter training, so `--lora-rank > 0` fails at trainer creation with
106
+ `no validated training shape exists for ... trainer_mode=LORA_TRAINER`.
107
+ - **Hotloads sync full 8B weights** between steps and occasionally exceed the
108
+ SDK's 600s hotload budget (`RuntimeError: Hotload failed for sampler snapshot
109
+ ...`). This is transient preview-infra latency, not a loop bug — re-running the
110
+ same command generally proceeds. There is no clean knob to extend the timeout
111
+ on the managed sampler path.
112
+
113
+ Metrics are written to:
114
+
115
+ - `runs/fireworks-rl-preview/metrics.jsonl`
116
+ - `runs/fireworks-rl-preview/reward_loss.png` if `matplotlib` is installed
117
+
118
+ ## Notes
119
+
120
+ - Defaults use Qwen 3 8B full-parameter training:
121
+ - `accounts/fireworks/models/qwen3-8b`
122
+ - `Qwen/Qwen3-8B`
123
+ - `accounts/fireworks/trainingShapes/qwen3-8b-128k`
124
+ - LoRA can be tested with `--lora-rank N`, but the validated Qwen3 8B training
125
+ shape currently rejects LoRA mode on the `lorenss` preview account.
126
+ - The first checkpoint sync happens after step 0 and subsequent rollouts sample
127
+ the updated weights through the same deployment.
128
+ - `--keep-trainer` and `--keep-deployment` are available for debugging. By
129
+ default the trainer is cleaned up and the deployment scales to zero on exit.
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ import json
6
7
  import logging
7
8
  import os
8
9
  import time
@@ -12,6 +13,7 @@ from typing import Any
12
13
 
13
14
  import httpx
14
15
  import typer
16
+ from pydantic import ValidationError
15
17
 
16
18
  from hud.cli.utils.build_display import display_build_summary
17
19
  from hud.cli.utils.build_logs import poll_build_status, stream_build_logs
@@ -19,6 +21,7 @@ from hud.cli.utils.config import parse_env_file, parse_key_value
19
21
  from hud.cli.utils.context import create_build_context_tarball, format_size
20
22
  from hud.cli.utils.registry import get_registry_environment
21
23
  from hud.cli.utils.source import EnvironmentSource, normalize_environment_name
24
+ from hud.eval.runtime import RuntimeConfig
22
25
  from hud.utils.exceptions import HudRequestError
23
26
  from hud.utils.hud_console import HUDConsole
24
27
  from hud.utils.platform import PlatformClient
@@ -32,6 +35,7 @@ class _DeployPlan:
32
35
  name: str
33
36
  registry_id: str | None
34
37
  runtime: str | None
38
+ runtime_config: dict[str, Any] | None
35
39
  env_vars: dict[str, str]
36
40
  build_args: dict[str, str]
37
41
  build_secrets: dict[str, str]
@@ -75,6 +79,26 @@ def _normalize_runtime(runtime: str | None, console: HUDConsole) -> str | None:
75
79
  raise typer.Exit(1)
76
80
 
77
81
 
82
+ def _load_runtime_config(path: str | None, console: HUDConsole) -> dict[str, Any] | None:
83
+ if path is None:
84
+ return None
85
+ config_path = Path(path).expanduser()
86
+ try:
87
+ raw = json.loads(config_path.read_text(encoding="utf-8"))
88
+ config = RuntimeConfig.model_validate(raw)
89
+ except FileNotFoundError:
90
+ console.error(f"Runtime config file not found: {config_path}")
91
+ raise typer.Exit(1) from None
92
+ except json.JSONDecodeError as exc:
93
+ console.error(f"Invalid runtime config JSON in {config_path}: {exc.msg}")
94
+ raise typer.Exit(1) from exc
95
+ except ValidationError as exc:
96
+ console.error(f"Invalid runtime config in {config_path}: {exc}")
97
+ raise typer.Exit(1) from exc
98
+ payload = config.request_payload()
99
+ return payload or None
100
+
101
+
78
102
  def _load_env_vars(path: Path, console: HUDConsole, *, warn_missing: bool) -> dict[str, str]:
79
103
  if not path.exists():
80
104
  if warn_missing:
@@ -322,6 +346,7 @@ def _prepare_deploy_plan(
322
346
  build_args: list[str] | None,
323
347
  build_secrets: list[str] | None,
324
348
  runtime: str | None,
349
+ runtime_config: str | None,
325
350
  verbose: bool,
326
351
  platform: PlatformClient,
327
352
  console: HUDConsole,
@@ -357,11 +382,13 @@ def _prepare_deploy_plan(
357
382
  build_args_dict = _parse_key_value_flags(build_args, option="--build-arg", console=console)
358
383
  if build_args_dict and verbose:
359
384
  console.info(f"Build arguments: {', '.join(build_args_dict.keys())}")
385
+ normalized_runtime = _normalize_runtime(runtime, console)
360
386
 
361
387
  return _DeployPlan(
362
388
  name=resolved_name,
363
389
  registry_id=registry_id,
364
- runtime=_normalize_runtime(runtime, console),
390
+ runtime=normalized_runtime,
391
+ runtime_config=_load_runtime_config(runtime_config, console),
365
392
  env_vars=env_vars,
366
393
  build_args=build_args_dict,
367
394
  build_secrets=_collect_build_secrets(build_secrets, env_dir=env_dir, console=console),
@@ -379,6 +406,7 @@ def deploy_environment(
379
406
  build_args: list[str] | None = None,
380
407
  build_secrets: list[str] | None = None,
381
408
  runtime: str | None = None,
409
+ runtime_config: str | None = None,
382
410
  ) -> None:
383
411
  """Deploy one HUD environment to the platform."""
384
412
  hud_console = HUDConsole()
@@ -411,6 +439,7 @@ def deploy_environment(
411
439
  build_args=build_args,
412
440
  build_secrets=build_secrets,
413
441
  runtime=runtime,
442
+ runtime_config=runtime_config,
414
443
  verbose=verbose,
415
444
  platform=platform,
416
445
  console=hud_console,
@@ -485,6 +514,8 @@ async def _trigger_build(
485
514
  payload["registry_id"] = plan.registry_id
486
515
  if plan.runtime:
487
516
  payload["runtime_provider"] = plan.runtime
517
+ if plan.runtime_config:
518
+ payload["runtime_config"] = plan.runtime_config
488
519
  if plan.env_vars:
489
520
  payload["environment_variables"] = plan.env_vars
490
521
  if plan.build_args:
@@ -644,6 +675,7 @@ def deploy_all(
644
675
  build_args: list[str] | None = None,
645
676
  build_secrets: list[str] | None = None,
646
677
  runtime: str | None = None,
678
+ runtime_config: str | None = None,
647
679
  ) -> None:
648
680
  """Deploy each HUD environment under a parent directory."""
649
681
  hud_console = HUDConsole()
@@ -683,6 +715,7 @@ def deploy_all(
683
715
  build_args=build_args,
684
716
  build_secrets=build_secrets,
685
717
  runtime=runtime,
718
+ runtime_config=runtime_config,
686
719
  )
687
720
  succeeded.append(env_dir.name)
688
721
  except (typer.Exit, SystemExit):
@@ -762,6 +795,11 @@ def deploy_command(
762
795
  "--runtime",
763
796
  help="Persist Modal as the hosted runtime for this registry",
764
797
  ),
798
+ runtime_config: str | None = typer.Option(
799
+ None,
800
+ "--runtime-config",
801
+ help="Path to a JSON RuntimeConfig for hosted runs",
802
+ ),
765
803
  ) -> None:
766
804
  """Deploy HUD environment to the platform.
767
805
 
@@ -781,6 +819,7 @@ def deploy_command(
781
819
  build_args=build_args,
782
820
  build_secrets=secrets,
783
821
  runtime=runtime,
822
+ runtime_config=runtime_config,
784
823
  )
785
824
  return
786
825
 
@@ -795,4 +834,5 @@ def deploy_command(
795
834
  build_args=build_args,
796
835
  build_secrets=secrets,
797
836
  runtime=runtime,
837
+ runtime_config=runtime_config,
798
838
  )
@@ -179,6 +179,47 @@ class TestCollectEnvironmentVariables:
179
179
  assert "INVALID_FORMAT" not in result
180
180
 
181
181
 
182
+ class TestRuntimeConfigFile:
183
+ def test_load_runtime_config_uses_sdk_shape(self, tmp_path: Path) -> None:
184
+ from hud.cli.deploy import _load_runtime_config
185
+ from hud.utils.hud_console import HUDConsole
186
+
187
+ config_path = tmp_path / "runtime.json"
188
+ config_path.write_text(
189
+ json.dumps(
190
+ {
191
+ "resources": {"gpu": {"type": "A10G", "count": 2}},
192
+ "limits": {"startup_timeout_s": 300},
193
+ }
194
+ ),
195
+ encoding="utf-8",
196
+ )
197
+
198
+ assert _load_runtime_config(str(config_path), HUDConsole()) == {
199
+ "resources": {"gpu": {"type": "A10G", "count": 2}},
200
+ "limits": {"startup_timeout_s": 300},
201
+ }
202
+
203
+ def test_load_runtime_config_preserves_null_override(self, tmp_path: Path) -> None:
204
+ from hud.cli.deploy import _load_runtime_config
205
+ from hud.utils.hud_console import HUDConsole
206
+
207
+ config_path = tmp_path / "runtime.json"
208
+ config_path.write_text(json.dumps({"resources": None}), encoding="utf-8")
209
+
210
+ assert _load_runtime_config(str(config_path), HUDConsole()) == {"resources": None}
211
+
212
+ def test_load_runtime_config_rejects_unknown_fields(self, tmp_path: Path) -> None:
213
+ from hud.cli.deploy import _load_runtime_config
214
+ from hud.utils.hud_console import HUDConsole
215
+
216
+ config_path = tmp_path / "runtime.json"
217
+ config_path.write_text(json.dumps({"provider_config": {}}), encoding="utf-8")
218
+
219
+ with pytest.raises(typer.Exit):
220
+ _load_runtime_config(str(config_path), HUDConsole())
221
+
222
+
182
223
  class TestDeployEnvironment:
183
224
  """Tests for deploy_environment function."""
184
225
 
@@ -262,6 +303,7 @@ class TestDeployAsync:
262
303
  name="test-env",
263
304
  registry_id=None,
264
305
  runtime=None,
306
+ runtime_config=None,
265
307
  env_vars={},
266
308
  build_args={},
267
309
  build_secrets={},
@@ -292,6 +334,7 @@ class TestDeployAsync:
292
334
  name="test-env",
293
335
  registry_id=None,
294
336
  runtime=None,
337
+ runtime_config=None,
295
338
  env_vars={},
296
339
  build_args={},
297
340
  build_secrets={},
@@ -331,6 +374,7 @@ class TestDeployAsync:
331
374
  name="test-env",
332
375
  registry_id=None,
333
376
  runtime="modal",
377
+ runtime_config=None,
334
378
  env_vars={},
335
379
  build_args={},
336
380
  build_secrets={},
@@ -343,6 +387,48 @@ class TestDeployAsync:
343
387
  assert platform.payload is not None
344
388
  assert platform.payload["runtime_provider"] == "modal"
345
389
 
390
+ @pytest.mark.asyncio
391
+ async def test_trigger_build_sends_runtime_config(self) -> None:
392
+ from hud.cli.deploy import _DeployPlan, _trigger_build
393
+ from hud.utils.hud_console import HUDConsole
394
+ from hud.utils.platform import PlatformClient
395
+
396
+ class FakePlatform(PlatformClient):
397
+ payload: dict[str, object] | None = None
398
+
399
+ async def apost(
400
+ self,
401
+ path: str,
402
+ *,
403
+ json: object | None = None,
404
+ ) -> dict[str, object]:
405
+ assert path == "/builds/trigger"
406
+ assert isinstance(json, dict)
407
+ object.__setattr__(self, "payload", json)
408
+ return {"id": "build-1", "registry_id": "registry-1"}
409
+
410
+ runtime_config = {"resources": {"gpu": {"type": "A10G", "count": 1}}}
411
+ platform = FakePlatform("https://api.example", "key")
412
+ result = await _trigger_build(
413
+ platform,
414
+ build_id="build-1",
415
+ plan=_DeployPlan(
416
+ name="test-env",
417
+ registry_id=None,
418
+ runtime="modal",
419
+ runtime_config=runtime_config,
420
+ env_vars={},
421
+ build_args={},
422
+ build_secrets={},
423
+ ),
424
+ no_cache=False,
425
+ console=HUDConsole(),
426
+ )
427
+
428
+ assert result == {"id": "build-1", "registry_id": "registry-1"}
429
+ assert platform.payload is not None
430
+ assert platform.payload["runtime_config"] == runtime_config
431
+
346
432
 
347
433
  class TestSaveDeployLink:
348
434
  """Tests for _save_deploy_link function."""
@@ -108,6 +108,9 @@ class RuntimeConfig(BaseModel):
108
108
  self.model_dump() | override.model_dump(exclude_unset=True)
109
109
  )
110
110
 
111
+ def request_payload(self) -> dict[str, Any]:
112
+ return self.model_dump(mode="json", exclude_unset=True)
113
+
111
114
 
112
115
  class Provider(Protocol):
113
116
  """Server placement: called with the task row being placed, acquire one
@@ -925,7 +928,7 @@ class HostedRuntime:
925
928
  if group_id is not None:
926
929
  payload["group_id"] = group_id
927
930
  if task.runtime_config is not None:
928
- runtime_config = task.runtime_config.model_dump(mode="json", exclude_none=True)
931
+ runtime_config = task.runtime_config.request_payload()
929
932
  if runtime_config:
930
933
  payload["runtime_config"] = runtime_config
931
934
  await platform.apost("/rollouts/submit", json=payload)
@@ -163,7 +163,7 @@ def task_upload_payload(task: Task) -> dict[str, Any]:
163
163
  if task.columns:
164
164
  payload["columns"] = task.columns
165
165
  if task.runtime_config is not None:
166
- payload["runtime_config"] = task.runtime_config.model_dump(exclude_none=True)
166
+ payload["runtime_config"] = task.runtime_config.request_payload()
167
167
  return payload
168
168
 
169
169
 
@@ -176,7 +176,7 @@ def _task_signature(task: Task) -> str:
176
176
  if task.columns:
177
177
  sig_data["columns"] = task.columns
178
178
  if task.runtime_config is not None:
179
- sig_data["runtime_config"] = task.runtime_config.model_dump(exclude_none=True)
179
+ sig_data["runtime_config"] = task.runtime_config.request_payload()
180
180
  return f"{task.id}|" + json.dumps(
181
181
  sig_data,
182
182
  sort_keys=True,
@@ -164,6 +164,25 @@ async def test_run_submits_and_polls_to_terminal(monkeypatch: pytest.MonkeyPatch
164
164
  assert payload["agent"]["config"]["model"] == "test-model"
165
165
 
166
166
 
167
+ @pytest.mark.asyncio
168
+ async def test_run_preserves_runtime_config_null_override(
169
+ monkeypatch: pytest.MonkeyPatch,
170
+ ) -> None:
171
+ platform = _FakePlatform([{"status": "completed", "reward": 0.5}])
172
+ monkeypatch.setattr(
173
+ "hud.eval.runtime.PlatformClient.from_settings", classmethod(lambda cls: platform)
174
+ )
175
+
176
+ await HostedRuntime(poll_interval=0.0).run(
177
+ Task(env="sums", id="add", runtime_config=RuntimeConfig(resources=None)),
178
+ _agent(),
179
+ job_id=uuid.uuid4().hex,
180
+ trace_id=uuid.uuid4().hex,
181
+ )
182
+
183
+ assert platform.posts[0][1]["runtime_config"] == {"resources": None}
184
+
185
+
167
186
  @pytest.mark.asyncio
168
187
  async def test_run_timeout_requests_platform_cancel(monkeypatch: pytest.MonkeyPatch) -> None:
169
188
  platform = _FakePlatform([{"status": "running"}])
@@ -148,3 +148,15 @@ def test_task_upload_payload_includes_runtime_config() -> None:
148
148
  payload = task_upload_payload(task)
149
149
 
150
150
  assert payload["runtime_config"] == {"image": "img:tag"}
151
+
152
+
153
+ def test_task_upload_payload_preserves_runtime_config_null_override() -> None:
154
+ task = Task(
155
+ env="e",
156
+ id="solve",
157
+ runtime_config=RuntimeConfig(resources=None),
158
+ )
159
+
160
+ payload = task_upload_payload(task)
161
+
162
+ assert payload["runtime_config"] == {"resources": None}
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.6.6"
7
+ __version__ = "0.6.7"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hud-python"
3
- version = "0.6.6"
3
+ version = "0.6.7"
4
4
  description = "SDK for the HUD platform."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11, <3.13"
@@ -1,114 +0,0 @@
1
- # Fireworks RL Training
2
-
3
- Direct Fireworks Training API loop over the same arithmetic preview task used by
4
- `cookbooks/rl-training`.
5
-
6
- This does **not** use Fireworks native datasets or RFT jobs. It follows the
7
- Training API service path from the Fireworks docs:
8
-
9
- 1. `FiretitanServiceClient.from_firetitan_config(...)`
10
- 2. `create_deployment_sampler(...)` for high-parallel rollouts
11
- 3. local grading of HUD-style multiplication tasks
12
- 4. `forward_backward_custom(...)` + `optim_step(...)`
13
- 5. `save_weights_for_sampler(...)` + sampler refresh
14
-
15
- References:
16
-
17
- - Fireworks Training API introduction: https://docs.fireworks.ai/fine-tuning/training-api/introduction
18
- - Training and sampling lifecycle: https://docs.fireworks.ai/fine-tuning/training-api/training-and-sampling
19
- - Loss functions / GRPO reference: https://docs.fireworks.ai/fine-tuning/training-api/loss-functions
20
-
21
- ## Setup
22
-
23
- The repo-level `.env` is loaded automatically. It must contain:
24
-
25
- ```bash
26
- FIREWORKS_API_KEY=...
27
- FIREWORKS_ACCOUNT_ID=...
28
- ```
29
-
30
- Install the isolated cookbook environment:
31
-
32
- ```bash
33
- uv sync --pre
34
- ```
35
-
36
- ## Calibrate task difficulty first
37
-
38
- Calibration defaults to Fireworks' OpenAI-compatible inference API, so it does
39
- **not** create a trainer, provision a Training API deployment, or call
40
- `optim_step`. This is the cheap way to tune task difficulty before paying for a
41
- Training API run.
42
-
43
- The calibration model is separate from the training base model because the
44
- `lorenss` key currently exposes only a small serverless inference catalog (no
45
- Qwen3 8B deployment). Override it with `--inference-model` if you have a closer
46
- deployed model.
47
-
48
- ```bash
49
- uv run train.py --calibrate-only --groups-per-step 8 --rollouts-per-prompt 8 --parallelism 32
50
- ```
51
-
52
- The goal is a reward distribution with variance. If reward is all zero, make the
53
- task easier:
54
-
55
- ```bash
56
- uv run train.py --calibrate-only --min-a 10 --max-a 99 --min-b 2 --max-b 9
57
- ```
58
-
59
- If reward is all one, make the task harder:
60
-
61
- ```bash
62
- uv run train.py --calibrate-only --min-a 1000 --max-a 9999 --min-b 11 --max-b 99
63
- ```
64
-
65
- The current defaults are calibrated for the visible `gpt-oss-120b` inference
66
- model on the `lorenss` key: 2-digit by 1-digit multiplication with a direct
67
- "reply only with the integer" prompt. A 32-rollout calibration gave a non-trivial
68
- baseline (`reward_mean ~= 0.22`, `reward_std ~= 0.42`), while the original
69
- 3-digit by 2-digit range was all-zero.
70
-
71
- ## Train
72
-
73
- Once calibration has non-trivial rewards:
74
-
75
- ```bash
76
- uv run train.py --steps 5 --groups-per-step 8 --rollouts-per-prompt 8 --parallelism 32
77
- ```
78
-
79
- This uses the direct Training API managed service path. If you want calibration
80
- to go through the managed deployment sampler too, pass
81
- `--calibration-backend managed`; this provisions the same resources as training.
82
-
83
- ### Current Fireworks preview account blocker
84
-
85
- On the `lorenss` preview account, trainer creation currently fails before the
86
- first train step with:
87
-
88
- ```text
89
- failed to ensure FIREWORKS_API_KEY secret: unkey inference api id is not configured
90
- ```
91
-
92
- This happens even with `create_deployment=False`, so it is an account/control
93
- plane provisioning issue rather than a problem in the rollout or loss code. Once
94
- Fireworks enables the missing Unkey inference API config for the account, the
95
- same `uv run train.py ...` command should proceed to trainer startup and the
96
- first `forward_backward_custom(...)` call.
97
-
98
- Metrics are written to:
99
-
100
- - `runs/fireworks-rl-preview/metrics.jsonl`
101
- - `runs/fireworks-rl-preview/reward_loss.png` if `matplotlib` is installed
102
-
103
- ## Notes
104
-
105
- - Defaults use Qwen 3 8B full-parameter training:
106
- - `accounts/fireworks/models/qwen3-8b`
107
- - `Qwen/Qwen3-8B`
108
- - `accounts/fireworks/trainingShapes/qwen3-8b-128k`
109
- - LoRA can be tested with `--lora-rank N`, but the validated Qwen3 8B training
110
- shape currently rejects LoRA mode on the `lorenss` preview account.
111
- - The first checkpoint sync happens after step 0 and subsequent rollouts sample
112
- the updated weights through the same deployment.
113
- - `--keep-trainer` and `--keep-deployment` are available for debugging. By
114
- default the trainer is cleaned up and the deployment scales to zero on exit.
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes