hud-python 0.4.16__tar.gz → 0.4.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (182) hide show
  1. {hud_python-0.4.16 → hud_python-0.4.17}/PKG-INFO +1 -1
  2. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/claude.py +8 -2
  3. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/openai.py +8 -2
  4. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/rl/__init__.py +11 -2
  5. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/rl/pod.py +4 -0
  6. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/rl/ssh.py +34 -2
  7. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/rl/train.py +190 -51
  8. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/tests/test_version.py +1 -1
  9. {hud_python-0.4.16 → hud_python-0.4.17}/hud/version.py +1 -1
  10. {hud_python-0.4.16 → hud_python-0.4.17}/pyproject.toml +1 -1
  11. {hud_python-0.4.16 → hud_python-0.4.17}/.gitignore +0 -0
  12. {hud_python-0.4.16 → hud_python-0.4.17}/LICENSE +0 -0
  13. {hud_python-0.4.16 → hud_python-0.4.17}/README.md +0 -0
  14. {hud_python-0.4.16 → hud_python-0.4.17}/environments/README.md +0 -0
  15. {hud_python-0.4.16 → hud_python-0.4.17}/environments/browser/README.md +0 -0
  16. {hud_python-0.4.16 → hud_python-0.4.17}/environments/browser/apps/2048/README.md +0 -0
  17. {hud_python-0.4.16 → hud_python-0.4.17}/environments/browser/apps/2048/backend/pyproject.toml +0 -0
  18. {hud_python-0.4.16 → hud_python-0.4.17}/environments/browser/apps/README.md +0 -0
  19. {hud_python-0.4.16 → hud_python-0.4.17}/environments/browser/apps/todo/README.md +0 -0
  20. {hud_python-0.4.16 → hud_python-0.4.17}/environments/browser/apps/todo/backend/pyproject.toml +0 -0
  21. {hud_python-0.4.16 → hud_python-0.4.17}/environments/browser/pyproject.toml +0 -0
  22. {hud_python-0.4.16 → hud_python-0.4.17}/environments/remote_browser/README.md +0 -0
  23. {hud_python-0.4.16 → hud_python-0.4.17}/environments/remote_browser/pyproject.toml +0 -0
  24. {hud_python-0.4.16 → hud_python-0.4.17}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
  25. {hud_python-0.4.16 → hud_python-0.4.17}/environments/text_2048/README.md +0 -0
  26. {hud_python-0.4.16 → hud_python-0.4.17}/environments/text_2048/pyproject.toml +0 -0
  27. {hud_python-0.4.16 → hud_python-0.4.17}/examples/README.md +0 -0
  28. {hud_python-0.4.16 → hud_python-0.4.17}/hud/__init__.py +0 -0
  29. {hud_python-0.4.16 → hud_python-0.4.17}/hud/__main__.py +0 -0
  30. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/__init__.py +0 -0
  31. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/base.py +0 -0
  32. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/langchain.py +0 -0
  33. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/misc/__init__.py +0 -0
  34. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/misc/response_agent.py +0 -0
  35. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/openai_chat_generic.py +0 -0
  36. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/tests/__init__.py +0 -0
  37. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/tests/test_base.py +0 -0
  38. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/tests/test_claude.py +0 -0
  39. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/tests/test_client.py +0 -0
  40. {hud_python-0.4.16 → hud_python-0.4.17}/hud/agents/tests/test_openai.py +0 -0
  41. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/__init__.py +0 -0
  42. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/__main__.py +0 -0
  43. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/analyze.py +0 -0
  44. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/build.py +0 -0
  45. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/clone.py +0 -0
  46. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/debug.py +0 -0
  47. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/dev.py +0 -0
  48. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/eval.py +0 -0
  49. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/hf.py +0 -0
  50. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/init.py +0 -0
  51. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/list_func.py +0 -0
  52. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/pull.py +0 -0
  53. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/push.py +0 -0
  54. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/remove.py +0 -0
  55. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/rl/README.md +0 -0
  56. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/rl/init.py +0 -0
  57. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/rl/utils.py +0 -0
  58. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/__init__.py +0 -0
  59. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_analyze.py +0 -0
  60. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_analyze_metadata.py +0 -0
  61. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_build.py +0 -0
  62. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_cli_init.py +0 -0
  63. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_cli_main.py +0 -0
  64. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_clone.py +0 -0
  65. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_cursor.py +0 -0
  66. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_debug.py +0 -0
  67. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_list_func.py +0 -0
  68. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_main_module.py +0 -0
  69. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_mcp_server.py +0 -0
  70. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_pull.py +0 -0
  71. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_push.py +0 -0
  72. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_registry.py +0 -0
  73. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/tests/test_utils.py +0 -0
  74. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/utils/__init__.py +0 -0
  75. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/utils/cursor.py +0 -0
  76. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/utils/docker.py +0 -0
  77. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/utils/environment.py +0 -0
  78. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/utils/interactive.py +0 -0
  79. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/utils/logging.py +0 -0
  80. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/utils/metadata.py +0 -0
  81. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/utils/registry.py +0 -0
  82. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/utils/remote_runner.py +0 -0
  83. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/utils/runner.py +0 -0
  84. {hud_python-0.4.16 → hud_python-0.4.17}/hud/cli/utils/server.py +0 -0
  85. {hud_python-0.4.16 → hud_python-0.4.17}/hud/clients/README.md +0 -0
  86. {hud_python-0.4.16 → hud_python-0.4.17}/hud/clients/__init__.py +0 -0
  87. {hud_python-0.4.16 → hud_python-0.4.17}/hud/clients/base.py +0 -0
  88. {hud_python-0.4.16 → hud_python-0.4.17}/hud/clients/fastmcp.py +0 -0
  89. {hud_python-0.4.16 → hud_python-0.4.17}/hud/clients/mcp_use.py +0 -0
  90. {hud_python-0.4.16 → hud_python-0.4.17}/hud/clients/tests/__init__.py +0 -0
  91. {hud_python-0.4.16 → hud_python-0.4.17}/hud/clients/tests/test_client_integration.py +0 -0
  92. {hud_python-0.4.16 → hud_python-0.4.17}/hud/clients/tests/test_fastmcp.py +0 -0
  93. {hud_python-0.4.16 → hud_python-0.4.17}/hud/clients/tests/test_protocol.py +0 -0
  94. {hud_python-0.4.16 → hud_python-0.4.17}/hud/clients/utils/__init__.py +0 -0
  95. {hud_python-0.4.16 → hud_python-0.4.17}/hud/clients/utils/retry_transport.py +0 -0
  96. {hud_python-0.4.16 → hud_python-0.4.17}/hud/datasets/__init__.py +0 -0
  97. {hud_python-0.4.16 → hud_python-0.4.17}/hud/datasets/execution/__init__.py +0 -0
  98. {hud_python-0.4.16 → hud_python-0.4.17}/hud/datasets/execution/parallel.py +0 -0
  99. {hud_python-0.4.16 → hud_python-0.4.17}/hud/datasets/execution/runner.py +0 -0
  100. {hud_python-0.4.16 → hud_python-0.4.17}/hud/datasets/task.py +0 -0
  101. {hud_python-0.4.16 → hud_python-0.4.17}/hud/datasets/utils.py +0 -0
  102. {hud_python-0.4.16 → hud_python-0.4.17}/hud/misc/__init__.py +0 -0
  103. {hud_python-0.4.16 → hud_python-0.4.17}/hud/misc/claude_plays_pokemon.py +0 -0
  104. {hud_python-0.4.16 → hud_python-0.4.17}/hud/otel/__init__.py +0 -0
  105. {hud_python-0.4.16 → hud_python-0.4.17}/hud/otel/collector.py +0 -0
  106. {hud_python-0.4.16 → hud_python-0.4.17}/hud/otel/config.py +0 -0
  107. {hud_python-0.4.16 → hud_python-0.4.17}/hud/otel/context.py +0 -0
  108. {hud_python-0.4.16 → hud_python-0.4.17}/hud/otel/exporters.py +0 -0
  109. {hud_python-0.4.16 → hud_python-0.4.17}/hud/otel/instrumentation.py +0 -0
  110. {hud_python-0.4.16 → hud_python-0.4.17}/hud/otel/processors.py +0 -0
  111. {hud_python-0.4.16 → hud_python-0.4.17}/hud/otel/tests/__init__.py +0 -0
  112. {hud_python-0.4.16 → hud_python-0.4.17}/hud/otel/tests/test_processors.py +0 -0
  113. {hud_python-0.4.16 → hud_python-0.4.17}/hud/py.typed +0 -0
  114. {hud_python-0.4.16 → hud_python-0.4.17}/hud/server/__init__.py +0 -0
  115. {hud_python-0.4.16 → hud_python-0.4.17}/hud/server/context.py +0 -0
  116. {hud_python-0.4.16 → hud_python-0.4.17}/hud/server/helper/__init__.py +0 -0
  117. {hud_python-0.4.16 → hud_python-0.4.17}/hud/server/low_level.py +0 -0
  118. {hud_python-0.4.16 → hud_python-0.4.17}/hud/server/server.py +0 -0
  119. {hud_python-0.4.16 → hud_python-0.4.17}/hud/server/tests/__init__.py +0 -0
  120. {hud_python-0.4.16 → hud_python-0.4.17}/hud/settings.py +0 -0
  121. {hud_python-0.4.16 → hud_python-0.4.17}/hud/shared/__init__.py +0 -0
  122. {hud_python-0.4.16 → hud_python-0.4.17}/hud/shared/exceptions.py +0 -0
  123. {hud_python-0.4.16 → hud_python-0.4.17}/hud/shared/requests.py +0 -0
  124. {hud_python-0.4.16 → hud_python-0.4.17}/hud/shared/tests/__init__.py +0 -0
  125. {hud_python-0.4.16 → hud_python-0.4.17}/hud/shared/tests/test_exceptions.py +0 -0
  126. {hud_python-0.4.16 → hud_python-0.4.17}/hud/shared/tests/test_requests.py +0 -0
  127. {hud_python-0.4.16 → hud_python-0.4.17}/hud/telemetry/__init__.py +0 -0
  128. {hud_python-0.4.16 → hud_python-0.4.17}/hud/telemetry/instrument.py +0 -0
  129. {hud_python-0.4.16 → hud_python-0.4.17}/hud/telemetry/job.py +0 -0
  130. {hud_python-0.4.16 → hud_python-0.4.17}/hud/telemetry/replay.py +0 -0
  131. {hud_python-0.4.16 → hud_python-0.4.17}/hud/telemetry/tests/__init__.py +0 -0
  132. {hud_python-0.4.16 → hud_python-0.4.17}/hud/telemetry/tests/test_replay.py +0 -0
  133. {hud_python-0.4.16 → hud_python-0.4.17}/hud/telemetry/tests/test_trace.py +0 -0
  134. {hud_python-0.4.16 → hud_python-0.4.17}/hud/telemetry/trace.py +0 -0
  135. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/__init__.py +0 -0
  136. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/base.py +0 -0
  137. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/bash.py +0 -0
  138. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/computer/__init__.py +0 -0
  139. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/computer/anthropic.py +0 -0
  140. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/computer/hud.py +0 -0
  141. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/computer/openai.py +0 -0
  142. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/computer/settings.py +0 -0
  143. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/edit.py +0 -0
  144. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/executors/__init__.py +0 -0
  145. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/executors/base.py +0 -0
  146. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/executors/pyautogui.py +0 -0
  147. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/executors/tests/__init__.py +0 -0
  148. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/executors/tests/test_base_executor.py +0 -0
  149. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
  150. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/executors/xdo.py +0 -0
  151. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/playwright.py +0 -0
  152. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/response.py +0 -0
  153. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/__init__.py +0 -0
  154. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_base.py +0 -0
  155. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_bash.py +0 -0
  156. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_bash_extended.py +0 -0
  157. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_computer.py +0 -0
  158. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_computer_actions.py +0 -0
  159. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_edit.py +0 -0
  160. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_init.py +0 -0
  161. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_playwright_tool.py +0 -0
  162. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_response.py +0 -0
  163. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_tools.py +0 -0
  164. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_tools_init.py +0 -0
  165. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/tests/test_utils.py +0 -0
  166. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/types.py +0 -0
  167. {hud_python-0.4.16 → hud_python-0.4.17}/hud/tools/utils.py +0 -0
  168. {hud_python-0.4.16 → hud_python-0.4.17}/hud/types.py +0 -0
  169. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/__init__.py +0 -0
  170. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/async_utils.py +0 -0
  171. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/design.py +0 -0
  172. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/mcp.py +0 -0
  173. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/progress.py +0 -0
  174. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/telemetry.py +0 -0
  175. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/tests/__init__.py +0 -0
  176. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/tests/test_async_utils.py +0 -0
  177. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/tests/test_init.py +0 -0
  178. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/tests/test_mcp.py +0 -0
  179. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/tests/test_progress.py +0 -0
  180. {hud_python-0.4.16 → hud_python-0.4.17}/hud/utils/tests/test_telemetry.py +0 -0
  181. {hud_python-0.4.16 → hud_python-0.4.17}/rl/README.md +0 -0
  182. {hud_python-0.4.16 → hud_python-0.4.17}/rl/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.4.16
3
+ Version: 0.4.17
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -85,8 +85,8 @@ class ClaudeAgent(MCPAgent):
85
85
  self._claude_to_mcp_tool_map: dict[str, str] = {}
86
86
  self.claude_tools: list[dict] = []
87
87
 
88
- # Base system prompt for autonomous operation
89
- self.system_prompt = """
88
+ # Append Claude-specific instructions to the base system prompt
89
+ claude_instructions = """
90
90
  You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
91
91
 
92
92
  When working on tasks:
@@ -99,6 +99,12 @@ class ClaudeAgent(MCPAgent):
99
99
  Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
100
100
  """.strip() # noqa: E501
101
101
 
102
+ # Append Claude instructions to any base system prompt
103
+ if self.system_prompt:
104
+ self.system_prompt = f"{self.system_prompt}\n\n{claude_instructions}"
105
+ else:
106
+ self.system_prompt = claude_instructions
107
+
102
108
  async def initialize(self, task: str | Task | None = None) -> None:
103
109
  """Initialize the agent and build tool mappings."""
104
110
  await super().initialize(task)
@@ -78,8 +78,8 @@ class OperatorAgent(MCPAgent):
78
78
 
79
79
  self.model_name = "openai-" + self.model
80
80
 
81
- # Base system prompt for autonomous operation
82
- self.system_prompt = """
81
+ # Append OpenAI-specific instructions to the base system prompt
82
+ openai_instructions = """
83
83
  You are an autonomous computer-using agent. Follow these guidelines:
84
84
 
85
85
  1. NEVER ask for confirmation. Complete all tasks autonomously.
@@ -93,6 +93,12 @@ class OperatorAgent(MCPAgent):
93
93
  Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
94
94
  """.strip() # noqa: E501
95
95
 
96
+ # Append OpenAI instructions to any base system prompt
97
+ if self.system_prompt:
98
+ self.system_prompt = f"{self.system_prompt}\n\n{openai_instructions}"
99
+ else:
100
+ self.system_prompt = openai_instructions
101
+
96
102
  async def _run_context(self, context: list[types.ContentBlock], max_steps: int = 10) -> Trace:
97
103
  """
98
104
  Run the agent with the given prompt or task.
@@ -23,7 +23,10 @@ def rl_main(
23
23
  ctx: typer.Context,
24
24
  model: str = typer.Option("Qwen/Qwen2.5-3B-Instruct", "--model", "-m", help="Model to train"),
25
25
  dataset: str | None = typer.Option(
26
- None, "--dataset", "-d", help="Override dataset from lock file"
26
+ None,
27
+ "--dataset",
28
+ "-d",
29
+ help="Dataset: JSON file path or HuggingFace name (auto-detects if not provided)",
27
30
  ),
28
31
  config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"), # noqa: B008
29
32
  gpus: str = typer.Option("2xA100", "--gpus", help="GPU configuration (e.g., 2xA100, 4xH100)"),
@@ -39,9 +42,15 @@ def rl_main(
39
42
  3. Push environment to registry if needed
40
43
  4. Start remote training on Prime Intellect
41
44
 
45
+ Dataset can be:
46
+ - A local JSON file with tasks (e.g., tasks.json)
47
+ - A HuggingFace dataset name (e.g., 'username/dataset-name')
48
+ - Auto-detected from current directory if not specified
49
+
42
50
  Examples:
43
- hud rl # Interactive mode with prompts
51
+ hud rl # Interactive mode, auto-detect tasks.json
44
52
  hud rl --model gpt2 # Train with specific model
53
+ hud rl --dataset tasks.json # Use local task file
45
54
  hud rl --gpus 4xH100 # Use different GPU configuration
46
55
  hud rl init my-env:latest # Generate config for environment
47
56
  """
@@ -62,6 +62,7 @@ async def create_and_connect_prime_pod(
62
62
  image: str,
63
63
  team_id: str | None = None,
64
64
  dataset_size: int | None = None,
65
+ is_json_file: bool = False,
65
66
  ) -> None:
66
67
  """Create a Prime Intellect pod and connect to it for training."""
67
68
  design.section_title("🌐 Creating Prime Intellect Pod")
@@ -330,6 +331,7 @@ async def create_and_connect_prime_pod(
330
331
  output_dir=output_dir,
331
332
  image=image,
332
333
  dataset_size=dataset_size,
334
+ is_json_file=is_json_file,
333
335
  )
334
336
  else:
335
337
  # Manual fallback
@@ -457,6 +459,7 @@ async def run_prime_training(
457
459
  auto_create_pod: str | None = None,
458
460
  team_id: str | None = None,
459
461
  dataset_size: int | None = None,
462
+ is_json_file: bool = False,
460
463
  ) -> None:
461
464
  """Run training on Prime Intellect infrastructure."""
462
465
  # Check API key
@@ -488,4 +491,5 @@ async def run_prime_training(
488
491
  image=image,
489
492
  team_id=team_id,
490
493
  dataset_size=dataset_size,
494
+ is_json_file=is_json_file,
491
495
  )
@@ -101,6 +101,7 @@ async def connect_and_train(
101
101
  output_dir: Path,
102
102
  image: str,
103
103
  dataset_size: int | None = None,
104
+ is_json_file: bool = False,
104
105
  ) -> None:
105
106
  """Connect to the pod via SSH and run training commands."""
106
107
  design.section_title("🚀 Starting Remote Training")
@@ -175,6 +176,37 @@ async def connect_and_train(
175
176
  design.info("Make sure scp is installed and in your PATH")
176
177
  raise typer.Exit(1) from e
177
178
 
179
+ # If dataset is a JSON file, copy it too
180
+ remote_dataset = dataset # Default to unchanged
181
+ if is_json_file:
182
+ design.info("Copying task file to pod...")
183
+ try:
184
+ # On Windows, we need to ensure proper path formatting
185
+ dataset_path = str(dataset).replace("\\", "/")
186
+ # Extract just the filename for the remote path
187
+ dataset_filename = os.path.basename(dataset)
188
+ remote_dataset = f"/root/{dataset_filename}"
189
+
190
+ scp_cmd = [
191
+ "scp",
192
+ "-i",
193
+ str(ssh_key_path),
194
+ "-P",
195
+ ssh_port,
196
+ "-o",
197
+ "StrictHostKeyChecking=no",
198
+ "-o",
199
+ "UserKnownHostsFile=/dev/null",
200
+ dataset_path,
201
+ f"{ssh_user_host}:{remote_dataset}",
202
+ ]
203
+ design.debug(f"Running: {' '.join(scp_cmd)}")
204
+ subprocess.run(scp_cmd, check=True) # noqa: S603, ASYNC221
205
+ design.success(f"Task file copied to {remote_dataset}")
206
+ except subprocess.CalledProcessError as e:
207
+ design.error(f"Failed to copy task file: {e}")
208
+ raise typer.Exit(1) from e
209
+
178
210
  design.info("Setting up environment and starting training...")
179
211
  design.info("This will take a few minutes for initial setup, then training will begin.")
180
212
  design.info("")
@@ -196,7 +228,7 @@ async def connect_and_train(
196
228
  "# Load environment",
197
229
  "env = vf.load_environment(",
198
230
  ' env_id="hud-vf-gym",',
199
- f' taskset="{dataset}",',
231
+ f' taskset="{remote_dataset}",',
200
232
  ' config_path="/root/config.yaml",',
201
233
  f" num_tasks={dataset_size},",
202
234
  ")",
@@ -242,7 +274,7 @@ async def connect_and_train(
242
274
  "uv venv --python 3.12 && "
243
275
  "source .venv/bin/activate && "
244
276
  # Install packages
245
- "prime env install hud/hud-vf-gym@0.1.0 && "
277
+ "prime env install hud/hud-vf-gym@0.1.1 && "
246
278
  "uv pip install 'verifiers[train]' && "
247
279
  "uv pip install flash-attn --no-build-isolation && "
248
280
  # Set environment variables
@@ -23,6 +23,40 @@ from .utils import (
23
23
  design = HUDDesign()
24
24
 
25
25
 
26
+ def find_task_json_files() -> list[Path]:
27
+ """Find JSON files containing tasks in the current directory."""
28
+ json_files = []
29
+ patterns = [
30
+ "*task*.json",
31
+ "*eval*.json",
32
+ "*Task*.json",
33
+ "*Eval*.json",
34
+ "*TASK*.json",
35
+ "*EVAL*.json",
36
+ "tasks.json", # Most common name
37
+ ]
38
+
39
+ # First check current directory
40
+ for pattern in patterns:
41
+ json_files.extend(Path(".").glob(pattern))
42
+
43
+ # If no files found, search one level deep
44
+ if not json_files:
45
+ for pattern in patterns:
46
+ json_files.extend(Path(".").glob(f"*/{pattern}"))
47
+
48
+ # Remove duplicates and sort, prioritizing "tasks.json"
49
+ json_files = sorted(set(json_files))
50
+
51
+ # Put tasks.json first if it exists
52
+ tasks_json = Path("tasks.json")
53
+ if tasks_json in json_files:
54
+ json_files.remove(tasks_json)
55
+ json_files.insert(0, tasks_json)
56
+
57
+ return json_files
58
+
59
+
26
60
  def train_command_wrapper(
27
61
  model: str,
28
62
  dataset: str | None,
@@ -128,45 +162,22 @@ def train_command_wrapper(
128
162
  raise typer.Exit(1)
129
163
 
130
164
  if "dataset" in missing:
131
- # Check if we have tasks.json
132
- tasks_file = Path("tasks.json")
133
- if tasks_file.exists():
134
- create_dataset = design.select(
135
- "Found tasks.json. Would you like to upload it as a dataset?",
136
- ["Yes, upload to HuggingFace", "No, I'll handle it manually"],
165
+ if missing["dataset"] == "multiple_json":
166
+ # Multiple JSON files found, let user choose
167
+ json_files = find_task_json_files()
168
+ design.info("Multiple task files found:")
169
+ file_choice = design.select(
170
+ "Select a task file to use:",
171
+ choices=[str(f) for f in json_files],
172
+ )
173
+ dataset = file_choice
174
+ design.success(f"Selected: {dataset}")
175
+ elif missing["dataset"] == "none":
176
+ design.error("No dataset specified and no task JSON files found")
177
+ design.info("Please use --dataset or create a tasks.json file")
178
+ design.hint(
179
+ "Example: hud hf --name my-org/my-tasks # Generate tasks from HUD evaluation"
137
180
  )
138
-
139
- if create_dataset == "Yes, upload to HuggingFace":
140
- dataset_name = typer.prompt("Enter dataset name (e.g., username/dataset-name)")
141
-
142
- if not validate_dataset_name(dataset_name):
143
- design.error("Invalid dataset name format. Expected: username/dataset-name")
144
- raise typer.Exit(1)
145
-
146
- design.info(f"Running 'hud hf tasks.json --name {dataset_name}'...")
147
- design.info("")
148
-
149
- # Run hf command
150
- result = subprocess.run( # noqa: S603
151
- ["hud", "hf", "tasks.json", "--name", dataset_name], # noqa: S607
152
- capture_output=True,
153
- text=True,
154
- )
155
-
156
- if result.returncode == 0:
157
- design.success("Dataset uploaded successfully")
158
- dataset = dataset_name
159
- else:
160
- design.error("Failed to upload dataset")
161
- if result.stderr:
162
- design.error(result.stderr)
163
- raise typer.Exit(1)
164
- else:
165
- design.info("Please specify a dataset with --dataset")
166
- raise typer.Exit(1)
167
- else:
168
- design.error("No dataset specified and no tasks.json found")
169
- design.info("Use --dataset to specify a HuggingFace dataset")
170
181
  raise typer.Exit(1)
171
182
 
172
183
  # Ask about pod creation for Prime training
@@ -247,9 +258,123 @@ async def train_command(
247
258
  design.hint("Run 'hud build' first or specify with 'hud rl init <image>'")
248
259
  raise typer.Exit(1)
249
260
 
250
- # Validate dataset has sufficient tasks for training
261
+ # Handle dataset (JSON file or HuggingFace dataset)
251
262
  dataset_size = None
252
- if dataset:
263
+ is_json_file = False
264
+
265
+ # Use dataset from command or look for JSON files
266
+ if not dataset:
267
+ # Check for JSON files if no dataset specified
268
+ json_files = find_task_json_files()
269
+ if json_files:
270
+ if len(json_files) == 1:
271
+ dataset = str(json_files[0])
272
+ design.info(f"Found task file: {dataset}")
273
+ is_json_file = True
274
+ else:
275
+ # This case should have been handled in train_command_wrapper
276
+ design.error("Multiple task files found but none selected")
277
+ raise typer.Exit(1)
278
+ else:
279
+ # Use dataset from lock file
280
+ dataset = get_primary_dataset()
281
+ if dataset:
282
+ design.info(f"Using dataset from lock file: {dataset}")
283
+
284
+ # Check if dataset is a file path
285
+ if dataset and Path(dataset).exists() and dataset.endswith(".json"):
286
+ is_json_file = True
287
+
288
+ # Validate dataset
289
+ if dataset and is_json_file:
290
+ # Load and validate JSON file
291
+ design.info(f"Validating task file: {dataset}")
292
+ try:
293
+ with open(dataset) as f: # noqa: ASYNC230
294
+ tasks_data = json.load(f)
295
+
296
+ # Handle both single task and array of tasks
297
+ if isinstance(tasks_data, dict):
298
+ tasks = [tasks_data]
299
+ elif isinstance(tasks_data, list):
300
+ tasks = tasks_data
301
+ else:
302
+ design.error("Invalid tasks file format")
303
+ raise typer.Exit(1)
304
+
305
+ dataset_size = len(tasks)
306
+ if dataset_size < 4:
307
+ design.error(f"Task file has only {dataset_size} tasks")
308
+ design.info("RL training requires at least 4 tasks for proper batching")
309
+ design.hint("Consider adding more tasks to your JSON file")
310
+ raise typer.Exit(1)
311
+
312
+ design.success(f"✓ Task file has {dataset_size} tasks")
313
+
314
+ # Check and convert MCP configs to remote if needed
315
+ if tasks:
316
+ sample_task = tasks[0]
317
+ sample_mcp_config = sample_task.get("mcp_config", {})
318
+
319
+ # Check if using local MCP configs
320
+ config_type = "unknown"
321
+ for server_config in sample_mcp_config.values():
322
+ if isinstance(server_config, dict) and "url" in server_config:
323
+ url = server_config.get("url", "")
324
+ if "mcp.hud.so" in url:
325
+ config_type = "remote"
326
+ break
327
+ else:
328
+ config_type = "local"
329
+
330
+ if config_type == "local":
331
+ design.info("Converting local MCP configs to remote for training...")
332
+
333
+ # Get the image name from lock file or environment
334
+ from .utils import get_image_from_lock
335
+
336
+ env_image = image or get_image_from_lock()
337
+
338
+ if not env_image:
339
+ design.error("No image found for remote MCP conversion")
340
+ design.hint("Run 'hud build' first")
341
+ raise typer.Exit(1)
342
+
343
+ # Check if image needs to be pushed
344
+ if "/" not in env_image or env_image.startswith("local/"):
345
+ design.warning(f"Image '{env_image}' appears to be local only")
346
+ design.info("Running 'hud push' to make it publicly available...")
347
+ from hud.cli.push import push_command
348
+
349
+ push_command(directory=".", yes=True)
350
+ design.success("Image pushed successfully")
351
+ # Re-read image name after push
352
+ env_image = get_image_from_lock()
353
+
354
+ # Convert all tasks to use remote MCP
355
+ for task in tasks:
356
+ remote_config = {
357
+ "hud": {
358
+ "url": "https://mcp.hud.so/v3/mcp",
359
+ "headers": {
360
+ "Authorization": "Bearer $HUD_API_KEY",
361
+ "Mcp-Image": env_image,
362
+ },
363
+ }
364
+ }
365
+ task["mcp_config"] = remote_config
366
+
367
+ design.success("✓ Converted all tasks to use remote MCP configs")
368
+
369
+ # Save the modified tasks back to the file
370
+ with open(dataset, "w") as f: # noqa: ASYNC230
371
+ json.dump(tasks, f, indent=2)
372
+ design.info("Updated task file with remote configs")
373
+ except json.JSONDecodeError as e:
374
+ design.error(f"Invalid JSON in task file: {e}")
375
+ raise typer.Exit(1) from e
376
+ elif dataset:
377
+ # Validate HuggingFace dataset
253
378
  design.info(f"Validating dataset: {dataset}")
254
379
  try:
255
380
  # Try to load dataset info from HuggingFace
@@ -273,12 +398,6 @@ async def train_command(
273
398
  design.warning(f"Could not validate dataset size: {e}")
274
399
  design.info("Proceeding with training - ensure dataset has at least 4 tasks")
275
400
 
276
- # Use dataset from command or lock file
277
- if not dataset:
278
- dataset = get_primary_dataset()
279
- if dataset:
280
- design.info(f"Using dataset from lock file: {dataset}")
281
-
282
401
  # Display configuration
283
402
  design.section_title("📋 Training Configuration")
284
403
  design.json_config(
@@ -318,6 +437,7 @@ async def train_command(
318
437
  auto_create_pod=auto_create_pod,
319
438
  team_id=team_id,
320
439
  dataset_size=dataset_size,
440
+ is_json_file=is_json_file,
321
441
  )
322
442
 
323
443
 
@@ -340,10 +460,19 @@ def check_requirements(config: Path | None, dataset: str | None) -> dict[str, An
340
460
 
341
461
  # Check dataset
342
462
  if not dataset:
343
- # Check lock file for dataset
344
- primary_dataset = get_primary_dataset()
345
- if not primary_dataset:
346
- missing["dataset"] = "none"
463
+ # First check for JSON files (preferred method)
464
+ json_files = find_task_json_files()
465
+ if json_files:
466
+ if len(json_files) == 1:
467
+ # Will be auto-selected
468
+ pass
469
+ else:
470
+ missing["dataset"] = "multiple_json"
471
+ else:
472
+ # Check lock file for HuggingFace dataset
473
+ primary_dataset = get_primary_dataset()
474
+ if not primary_dataset:
475
+ missing["dataset"] = "none"
347
476
 
348
477
  return missing
349
478
 
@@ -407,13 +536,23 @@ async def run_remote_training(
407
536
  auto_create_pod: str | None = None,
408
537
  team_id: str | None = None,
409
538
  dataset_size: int | None = None,
539
+ is_json_file: bool = False,
410
540
  ) -> None:
411
541
  """Run training on remote infrastructure."""
412
542
  design.section_title("🚀 Remote Training")
413
543
 
414
544
  if provider == "prime":
415
545
  await run_prime_training(
416
- model, dataset, config, gpus, output_dir, image, auto_create_pod, team_id, dataset_size
546
+ model,
547
+ dataset,
548
+ config,
549
+ gpus,
550
+ output_dir,
551
+ image,
552
+ auto_create_pod,
553
+ team_id,
554
+ dataset_size,
555
+ is_json_file,
417
556
  )
418
557
  else:
419
558
  design.error(f"Provider '{provider}' not yet supported")
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.4.16"
8
+ assert hud.__version__ == "0.4.17"
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.4.16"
7
+ __version__ = "0.4.17"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hud-python"
3
- version = "0.4.16"
3
+ version = "0.4.17"
4
4
  description = "SDK for the HUD platform."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11, <3.14"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes