hud-python 0.6.3__tar.gz → 0.6.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. {hud_python-0.6.3 → hud_python-0.6.5}/PKG-INFO +2 -1
  2. hud_python-0.6.5/cookbooks/connect4-selfplay/README.md +57 -0
  3. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/__init__.py +11 -3
  4. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/agent.py +15 -4
  5. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/robot/__init__.py +9 -3
  6. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/robot/adapter.py +10 -0
  7. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/robot/agent.py +26 -14
  8. hud_python-0.6.5/hud/agents/robot/batching.py +130 -0
  9. hud_python-0.6.5/hud/agents/robot/model.py +127 -0
  10. hud_python-0.6.5/hud/agents/robot/record.py +230 -0
  11. hud_python-0.6.5/hud/agents/robot/video.py +267 -0
  12. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_base.py +38 -2
  13. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_provider_native_tools.py +4 -4
  14. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/types.py +38 -21
  15. {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/robot.py +4 -0
  16. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/__init__.py +4 -0
  17. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/eval.py +26 -7
  18. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/init.py +65 -26
  19. hud_python-0.6.5/hud/cli/jobs.py +146 -0
  20. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/models.py +21 -3
  21. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/presets.py +67 -12
  22. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_eval_config.py +40 -0
  23. hud_python-0.6.5/hud/cli/tests/test_init.py +113 -0
  24. hud_python-0.6.5/hud/cli/trace.py +215 -0
  25. {hud_python-0.6.3 → hud_python-0.6.5}/hud/clients/client.py +1 -1
  26. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/run.py +23 -5
  27. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/runtime.py +51 -8
  28. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_hosted.py +48 -0
  29. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_rollout.py +26 -1
  30. {hud_python-0.6.3 → hud_python-0.6.5}/hud/settings.py +2 -2
  31. {hud_python-0.6.3 → hud_python-0.6.5}/hud/train/__init__.py +2 -0
  32. hud_python-0.6.5/hud/train/base.py +159 -0
  33. {hud_python-0.6.3 → hud_python-0.6.5}/hud/train/client.py +41 -17
  34. {hud_python-0.6.3 → hud_python-0.6.5}/hud/train/types.py +38 -4
  35. {hud_python-0.6.3 → hud_python-0.6.5}/hud/types.py +5 -13
  36. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/gateway.py +23 -0
  37. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/hud_console.py +24 -6
  38. hud_python-0.6.5/hud/utils/tests/test_hud_console.py +165 -0
  39. {hud_python-0.6.3 → hud_python-0.6.5}/hud/version.py +1 -1
  40. {hud_python-0.6.3 → hud_python-0.6.5}/pyproject.toml +2 -1
  41. hud_python-0.6.3/hud/agents/robot/model.py +0 -138
  42. hud_python-0.6.3/hud/cli/tests/test_init.py +0 -59
  43. hud_python-0.6.3/hud/train/base.py +0 -102
  44. hud_python-0.6.3/hud/utils/tests/test_hud_console.py +0 -62
  45. {hud_python-0.6.3 → hud_python-0.6.5}/.gitignore +0 -0
  46. {hud_python-0.6.3 → hud_python-0.6.5}/LICENSE +0 -0
  47. {hud_python-0.6.3 → hud_python-0.6.5}/README.md +0 -0
  48. {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/a2a-chat/README.md +0 -0
  49. {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/a2a-chat/pyproject.toml +0 -0
  50. {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/codex-coding/README.md +0 -0
  51. {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/codex-coding/pyproject.toml +0 -0
  52. {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/fireworks-rl-training/README.md +0 -0
  53. {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/fireworks-rl-training/pyproject.toml +0 -0
  54. {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/rl-training/README.md +0 -0
  55. {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/rl-training/pyproject.toml +0 -0
  56. {hud_python-0.6.3 → hud_python-0.6.5}/hud/__init__.py +0 -0
  57. {hud_python-0.6.3 → hud_python-0.6.5}/hud/__main__.py +0 -0
  58. {hud_python-0.6.3 → hud_python-0.6.5}/hud/_legacy.py +0 -0
  59. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/base.py +0 -0
  60. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/browser_use/__init__.py +0 -0
  61. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/browser_use/agent.py +0 -0
  62. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/__init__.py +0 -0
  63. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/agent.py +0 -0
  64. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/sdk/__init__.py +0 -0
  65. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/sdk/agent.py +0 -0
  66. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/sdk/computer_mcp.py +0 -0
  67. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/__init__.py +0 -0
  68. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/base.py +0 -0
  69. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/coding.py +0 -0
  70. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/computer.py +0 -0
  71. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/hosted.py +0 -0
  72. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/mcp_proxy.py +0 -0
  73. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/settings.py +0 -0
  74. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/tests/__init__.py +0 -0
  75. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/tests/test_computer.py +0 -0
  76. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/__init__.py +0 -0
  77. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/agent.py +0 -0
  78. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/settings.py +0 -0
  79. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/__init__.py +0 -0
  80. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/base.py +0 -0
  81. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/coding.py +0 -0
  82. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/computer.py +0 -0
  83. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/filesystem.py +0 -0
  84. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/hosted.py +0 -0
  85. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/mcp_proxy.py +0 -0
  86. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/tests/__init__.py +0 -0
  87. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/tests/test_computer.py +0 -0
  88. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/misc/__init__.py +0 -0
  89. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/misc/response_automation.py +0 -0
  90. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/__init__.py +0 -0
  91. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/agent.py +0 -0
  92. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/__init__.py +0 -0
  93. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/apply_patch.py +0 -0
  94. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/base.py +0 -0
  95. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/coding.py +0 -0
  96. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/computer.py +0 -0
  97. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/hosted.py +0 -0
  98. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/mcp_proxy.py +0 -0
  99. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/strict_schema.py +0 -0
  100. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/tests/__init__.py +0 -0
  101. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/tests/test_computer.py +0 -0
  102. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/tests/test_strict_schema.py +0 -0
  103. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/__init__.py +0 -0
  104. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/tools/__init__.py +0 -0
  105. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/tools/base.py +0 -0
  106. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/tools/filesystem.py +0 -0
  107. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/tools/mcp_proxy.py +0 -0
  108. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/robot/_types.py +0 -0
  109. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/__init__.py +0 -0
  110. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_apply_patch.py +0 -0
  111. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_claude_agent.py +0 -0
  112. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_claude_sdk_agent.py +0 -0
  113. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_gemini_agent.py +0 -0
  114. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_openai_agent.py +0 -0
  115. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_openai_compatible_agent.py +0 -0
  116. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_tool_agent.py +0 -0
  117. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_trace.py +0 -0
  118. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tool_agent.py +0 -0
  119. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/__init__.py +0 -0
  120. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/base.py +0 -0
  121. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/hosted.py +0 -0
  122. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/mcp.py +0 -0
  123. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/rfb.py +0 -0
  124. {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/ssh.py +0 -0
  125. {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/__init__.py +0 -0
  126. {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/base.py +0 -0
  127. {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/cdp.py +0 -0
  128. {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/filetracking.py +0 -0
  129. {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/mcp.py +0 -0
  130. {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/rfb.py +0 -0
  131. {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/ssh.py +0 -0
  132. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/__main__.py +0 -0
  133. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/cancel.py +0 -0
  134. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/client.py +0 -0
  135. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/deploy.py +0 -0
  136. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/login.py +0 -0
  137. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/serve.py +0 -0
  138. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/sync.py +0 -0
  139. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/task.py +0 -0
  140. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/templates.py +0 -0
  141. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/__init__.py +0 -0
  142. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_cli_init.py +0 -0
  143. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_cli_main.py +0 -0
  144. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
  145. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_deploy.py +0 -0
  146. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_eval_bedrock.py +0 -0
  147. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_main_module.py +0 -0
  148. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_sync_export.py +0 -0
  149. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/__init__.py +0 -0
  150. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/api.py +0 -0
  151. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/build_display.py +0 -0
  152. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/build_logs.py +0 -0
  153. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/config.py +0 -0
  154. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/context.py +0 -0
  155. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/display.py +0 -0
  156. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/jobs.py +0 -0
  157. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/registry.py +0 -0
  158. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/source.py +0 -0
  159. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tasks.py +0 -0
  160. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/__init__.py +0 -0
  161. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_build_display.py +0 -0
  162. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_config.py +0 -0
  163. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_context.py +0 -0
  164. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_registry.py +0 -0
  165. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_source.py +0 -0
  166. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_tasks.py +0 -0
  167. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_version_check.py +0 -0
  168. {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/version_check.py +0 -0
  169. {hud_python-0.6.3 → hud_python-0.6.5}/hud/clients/__init__.py +0 -0
  170. {hud_python-0.6.3 → hud_python-0.6.5}/hud/clients/tests/__init__.py +0 -0
  171. {hud_python-0.6.3 → hud_python-0.6.5}/hud/clients/tests/test_connect.py +0 -0
  172. {hud_python-0.6.3 → hud_python-0.6.5}/hud/conftest.py +0 -0
  173. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/__init__.py +0 -0
  174. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/env.py +0 -0
  175. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/file_tracker.py +0 -0
  176. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/file_tracking.py +0 -0
  177. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/legacy.py +0 -0
  178. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/robot/__init__.py +0 -0
  179. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/robot/bridge.py +0 -0
  180. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/robot/endpoint.py +0 -0
  181. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/robot/sim_runner.py +0 -0
  182. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/server.py +0 -0
  183. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/__init__.py +0 -0
  184. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/conftest.py +0 -0
  185. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_capability_backing.py +0 -0
  186. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_file_tracker.py +0 -0
  187. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_file_tracking.py +0 -0
  188. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_legacy.py +0 -0
  189. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_loader.py +0 -0
  190. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_manifest.py +0 -0
  191. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_server.py +0 -0
  192. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_tunnel.py +0 -0
  193. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/utils.py +0 -0
  194. {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/workspace.py +0 -0
  195. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/__init__.py +0 -0
  196. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/chat.py +0 -0
  197. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/file_tracking.py +0 -0
  198. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/job.py +0 -0
  199. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/sync.py +0 -0
  200. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/task.py +0 -0
  201. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/taskset.py +0 -0
  202. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/__init__.py +0 -0
  203. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_chat.py +0 -0
  204. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_docker_provider.py +0 -0
  205. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_file_tracking_observer.py +0 -0
  206. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_job.py +0 -0
  207. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_sync.py +0 -0
  208. {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_task.py +0 -0
  209. {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/__init__.py +0 -0
  210. {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/base.py +0 -0
  211. {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/bash.py +0 -0
  212. {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/combine.py +0 -0
  213. {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/judge.py +0 -0
  214. {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/results.py +0 -0
  215. {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/text.py +0 -0
  216. {hud_python-0.6.3 → hud_python-0.6.5}/hud/patches/__init__.py +0 -0
  217. {hud_python-0.6.3 → hud_python-0.6.5}/hud/patches/mcp_patches.py +0 -0
  218. {hud_python-0.6.3 → hud_python-0.6.5}/hud/patches/tests/__init__.py +0 -0
  219. {hud_python-0.6.3 → hud_python-0.6.5}/hud/patches/tests/test_warnings.py +0 -0
  220. {hud_python-0.6.3 → hud_python-0.6.5}/hud/patches/warnings.py +0 -0
  221. {hud_python-0.6.3 → hud_python-0.6.5}/hud/py.typed +0 -0
  222. {hud_python-0.6.3 → hud_python-0.6.5}/hud/server.py +0 -0
  223. {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/__init__.py +0 -0
  224. {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/context.py +0 -0
  225. {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/exporter.py +0 -0
  226. {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/filetracking.py +0 -0
  227. {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/instrument.py +0 -0
  228. {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/span.py +0 -0
  229. {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/tests/__init__.py +0 -0
  230. {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/tests/test_exporter.py +0 -0
  231. {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/tests/test_filetracking.py +0 -0
  232. {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/tests/test_instrument.py +0 -0
  233. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/__init__.py +0 -0
  234. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/exceptions.py +0 -0
  235. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/hints.py +0 -0
  236. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/modules.py +0 -0
  237. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/platform.py +0 -0
  238. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/requests.py +0 -0
  239. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/serialization.py +0 -0
  240. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/__init__.py +0 -0
  241. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/test_exceptions.py +0 -0
  242. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/test_hints.py +0 -0
  243. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/test_platform.py +0 -0
  244. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/test_requests.py +0 -0
  245. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/test_serialization.py +0 -0
  246. {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/time.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.6.3
3
+ Version: 0.6.5
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -70,6 +70,7 @@ Requires-Dist: ruff<0.15.0,>=0.11.8; extra == 'dev'
70
70
  Provides-Extra: modal
71
71
  Requires-Dist: modal>=1.0; extra == 'modal'
72
72
  Provides-Extra: robot
73
+ Requires-Dist: av>=12; extra == 'robot'
73
74
  Requires-Dist: numpy>=1.24; extra == 'robot'
74
75
  Requires-Dist: openpi-client>=0.1.2; extra == 'robot'
75
76
  Provides-Extra: train
@@ -0,0 +1,57 @@
1
+ # Connect Four self-play
2
+
3
+ Symmetric self-play RL on a 6×7 Connect Four board. Draws are rare (you need a
4
+ full 42-cell board with no four-in-a-row), so the win/loss reward signal
5
+ persists as the policy improves and the GRPO advantage stays non-zero.
6
+
7
+ ## How it works
8
+
9
+ - One agent ("outer") plays a full game against an inner model on the **same
10
+ slug** — true self-play. `seed % 2` decides who drops first, for symmetric
11
+ first-move coverage.
12
+ - Each game trains **both sides at once**: the outer agent's `Run` (reward from
13
+ its perspective) plus a hand-built `TrajectoryPayload` for the inner model
14
+ with the flipped reward (`1 - outer_reward`).
15
+ - `group_size=2` pairs each game's two trajectories so the GRPO advantage is
16
+ `reward - 0.5` per game.
17
+ - `loss_fn="ppo"` clips the importance-sampling ratio, so a single lucky game
18
+ can't blow up the update.
19
+
20
+ The training loop uses the public API directly — `forward_backward` accepts
21
+ `Run` and `TrajectoryPayload` mixed, so no private helpers are needed.
22
+
23
+ ## Setup
24
+
25
+ ```bash
26
+ hud models fork Qwen/Qwen3.5-4B --name c4-selfplay # prints a slug like c4-selfplay-<id>
27
+ ```
28
+
29
+ Put your `HUD_API_KEY` in a `.env` here (or the environment).
30
+
31
+ ## Run
32
+
33
+ Local sanity check (one game, cheap external model as the outer agent):
34
+
35
+ ```bash
36
+ hud eval env.py claude --model claude-haiku-4-5
37
+ ```
38
+
39
+ Train:
40
+
41
+ ```bash
42
+ python train.py --model c4-selfplay-<id> --steps 20 --group 4 --lr 1e-5
43
+ ```
44
+
45
+ ## Tuning notes
46
+
47
+ - **Memory scales with `tasks × group`.** Each task×rollout is a fresh `env.py`
48
+ subprocess. With 8 tasks and `--group 4` that's 32 concurrent games. Connect
49
+ Four games can run up to 42 plies, so they cost more tokens and time per game —
50
+ start at `--group 4` and raise only if you have RAM headroom.
51
+ - **Watch the server-side metrics.** The loop prints local win/draw/loss counts
52
+ each step and the last few checkpoints' `mean_reward` / `reward_std` via
53
+ `trainer.checkpoints()` at the end. A healthy run keeps non-trivial
54
+ `reward_std` (within-group spread); if it collapses, the policy has saturated.
55
+ - **Reset on changes.** If you edit the reward or the board, roll the head back
56
+ to a clean checkpoint (`hud models head <slug> --set <id>`) or fork fresh —
57
+ don't keep training a policy shaped by the old objective.
@@ -8,7 +8,12 @@ from __future__ import annotations
8
8
  from typing import TYPE_CHECKING, Any, cast
9
9
 
10
10
  from hud.types import AgentType
11
- from hud.utils.gateway import build_gateway_client, list_gateway_models
11
+ from hud.utils.gateway import (
12
+ build_gateway_client,
13
+ gateway_model_aliases,
14
+ list_gateway_models,
15
+ normalize_gateway_model_id,
16
+ )
12
17
 
13
18
  if TYPE_CHECKING:
14
19
  from typing import TypeAlias
@@ -27,6 +32,8 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
27
32
 
28
33
  For direct API access with provider API keys, instantiate the agent classes directly.
29
34
  """
35
+ requested_model = model
36
+ model = normalize_gateway_model_id(model)
30
37
  agent_type = next((candidate for candidate in AgentType if candidate.value == model), None)
31
38
  if agent_type is not None:
32
39
  model_id = model
@@ -73,7 +80,8 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
73
80
  for n in (gm.id, gm.name, gm.model_name)
74
81
  if isinstance(n, str)
75
82
  ]
76
- near = difflib.get_close_matches(model, known, n=3, cutoff=0.5)
83
+ known.extend(gateway_model_aliases())
84
+ near = difflib.get_close_matches(requested_model, known, n=3, cutoff=0.5)
77
85
  hint = (
78
86
  f" Did you mean: {', '.join(near)}?"
79
87
  if near
@@ -84,7 +92,7 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
84
92
  if gateway_models
85
93
  else "the HUD gateway registry (empty — is HUD_API_KEY set?)"
86
94
  )
87
- raise ValueError(f"Model {model!r} not found in {source}.{hint}")
95
+ raise ValueError(f"Model {requested_model!r} not found in {source}.{hint}")
88
96
 
89
97
  kwargs.setdefault("model", model_id)
90
98
  kwargs.setdefault("model_client", build_gateway_client(provider_name))
@@ -193,16 +193,27 @@ class OpenAIChatAgent(ToolAgent[ChatCompletionMessageParam, OpenAIChatConfig]):
193
193
  sample: Sample | None = None
194
194
  if return_token_ids:
195
195
  prompt_token_ids = getattr(choice, "prompt_token_ids", None)
196
+ # Multimodal prompt (text + image chunks): the only prompt representation
197
+ # that survives image inputs; flat prompt_token_ids is null in that case.
198
+ prompt_chunks = getattr(choice, "prompt_chunks", None)
196
199
  token_ids = getattr(choice, "token_ids", None)
197
- if prompt_token_ids is not None and token_ids is not None:
198
- chat_state.continuation_token_ids = list(prompt_token_ids) + list(token_ids)
199
- chat_state.continuation_message_count = len(messages)
200
+ has_prompt = prompt_token_ids is not None or prompt_chunks is not None
201
+ if token_ids is not None and has_prompt:
200
202
  content_lp = choice.logprobs.content if choice.logprobs else None
201
203
  sample = Sample(
202
- prompt_token_ids=list(prompt_token_ids),
204
+ prompt_token_ids=list(prompt_token_ids) if prompt_token_ids is not None else [],
205
+ prompt_chunks=list(prompt_chunks) if prompt_chunks is not None else None,
203
206
  output_token_ids=list(token_ids),
204
207
  output_logprobs=[tok.logprob for tok in content_lp] if content_lp else [],
205
208
  )
209
+ # KV-cache continuation only applies to flat text prompts; clear any
210
+ # stale state when the gateway returns chunks-only (multimodal turn).
211
+ if prompt_token_ids is not None:
212
+ chat_state.continuation_token_ids = list(prompt_token_ids) + list(token_ids)
213
+ chat_state.continuation_message_count = len(messages)
214
+ else:
215
+ chat_state.continuation_token_ids = None
216
+ chat_state.continuation_message_count = None
206
217
 
207
218
  tool_calls: list[MCPToolCall] = []
208
219
  for tc in function_calls:
@@ -10,6 +10,9 @@ The harness splits a policy rollout into three seams, each replaceable on its ow
10
10
  - :class:`~hud.agents.robot.adapter.Adapter` — translate between the env's
11
11
  observation/action spaces (from the contract) and the policy's.
12
12
 
13
+ Wrap an agent in :class:`~hud.agents.robot.batching.BatchedAgent` to run many rollouts
14
+ concurrently off one batched GPU forward (``max_concurrent`` rollouts, shared model).
15
+
13
16
  Per-tick platform tracing is emitted by the loop itself: each step records an
14
17
  :class:`~hud.agents.types.ObservationStep`, and each re-inference an
15
18
  :class:`~hud.agents.types.InferenceStep`, so runs stream live into the HUD trace viewer.
@@ -20,16 +23,19 @@ This subpackage needs the ``robot`` extra (``pip install 'hud-python[robot]'``)
20
23
 
21
24
  from __future__ import annotations
22
25
 
23
- from .adapter import Adapter, LeRobotAdapter
26
+ from .adapter import Adapter, LeRobotAdapter, OpenPIAdapter
24
27
  from .agent import ROBOT_PROTOCOL, RobotAgent
25
- from .model import LeRobotModel, Model, lerobot_infer
28
+ from .batching import BatchedAgent, BatchedModel
29
+ from .model import LeRobotModel, Model
26
30
 
27
31
  __all__ = [
28
32
  "ROBOT_PROTOCOL",
29
33
  "Adapter",
34
+ "BatchedAgent",
35
+ "BatchedModel",
30
36
  "LeRobotAdapter",
31
37
  "LeRobotModel",
32
38
  "Model",
39
+ "OpenPIAdapter",
33
40
  "RobotAgent",
34
- "lerobot_infer",
35
41
  ]
@@ -89,7 +89,17 @@ class LeRobotAdapter(Adapter):
89
89
  return action
90
90
 
91
91
 
92
+ class OpenPIAdapter(Adapter):
93
+ """unwraps obs['data'] to OpenPI wire keys, attaches prompt; actions are passthrough"""
94
+
95
+ def adapt_observation(self, obs: dict[str, Any], prompt: str) -> dict[str, Any]:
96
+ out = dict(obs["data"])
97
+ out.setdefault("prompt", prompt)
98
+ return out
99
+
100
+
92
101
  __all__ = [
93
102
  "Adapter",
94
103
  "LeRobotAdapter",
104
+ "OpenPIAdapter",
95
105
  ]
@@ -5,8 +5,8 @@ Subclass :class:`RobotAgent`, set ``self.model`` and ``self.adapter`` in
5
5
 
6
6
  The base calls the adapter and model at the right moments::
7
7
 
8
- setup_robot -> adapter.bind(spaces) # once after connect
9
- on_episode_start -> model.reset(); adapter.reset() # once per episode
8
+ setup_robot -> adapter.bind(spaces) # once after connect
9
+ on_episode_start -> adapter.reset() # per episode; model is stateless
10
10
  select_action -> adapt_observation -> model.ainfer -> pop chunk -> adapt_action
11
11
 
12
12
  ``model.ainfer`` always returns a ``[T, A]`` chunk; :meth:`RobotAgent.select_action`
@@ -24,9 +24,10 @@ from typing import TYPE_CHECKING, Any, ClassVar
24
24
  import numpy as np
25
25
 
26
26
  from hud.agents.base import Agent
27
- from hud.agents.types import InferenceStep, ObservationStep
28
27
  from hud.capabilities.robot import RobotClient
29
28
 
29
+ from .record import Recorder
30
+
30
31
  if TYPE_CHECKING:
31
32
  from hud.eval.run import Run
32
33
 
@@ -57,6 +58,9 @@ class RobotAgent(Agent):
57
58
  robot_protocol: ClassVar[str] = ROBOT_PROTOCOL
58
59
  #: How often (in steps) to print a step-progress line. 0 = off.
59
60
  log_every: ClassVar[int] = 20
61
+ #: Opt-in: also save a LeRobot v3 dataset of every (obs, action) pair to disk
62
+ #: (the ``--save`` flag). Telemetry streams regardless; see :mod:`.record`.
63
+ save: bool = False
60
64
 
61
65
  #: Runs the policy (preprocess → forward → postprocess). Subclasses set this.
62
66
  model: Model | None = None
@@ -70,9 +74,11 @@ class RobotAgent(Agent):
70
74
  _env_obs_space: dict[str, Any]
71
75
  #: Unexecuted tail of the current policy chunk; popped one action per step.
72
76
  _active_chunk: deque[ActionArray]
73
- #: The live run + control-tick index, so ``select_action`` can record its own InferenceStep.
74
- _run: Run
77
+ #: Control-tick index, incremented per executed action.
75
78
  _tick: int
79
+ #: Records all telemetry (observation/inference steps + video) and, when ``save``, a
80
+ #: LeRobot dataset. Agent-lifetime (the dataset spans every episode); created lazily.
81
+ _recorder: Recorder | None = None
76
82
 
77
83
  def setup_robot(self, client: RobotClient) -> None:
78
84
  """Discover the env's action/observation layout and bind the adapter to it."""
@@ -81,16 +87,19 @@ class RobotAgent(Agent):
81
87
  self.adapter.bind(self._env_action_space, self._env_obs_space)
82
88
 
83
89
  def on_episode_start(self, run: Run, client: RobotClient, *, prompt: str) -> None:
84
- """Store the prompt and reset the model and adapter before the act loop.
90
+ """Store the prompt and reset per-episode state before the act loop.
85
91
 
86
- Override (calling ``super()`` first) only for extra per-episode setup.
92
+ The model is stateless (per-episode state lives here, not on the shared model), so
93
+ only the adapter is reset. Override (calling ``super()`` first) for extra setup.
87
94
  """
88
95
  self._prompt = prompt
89
96
  self._active_chunk = deque()
90
- self._run = run
91
97
  self._tick = 0
92
- if self.model is not None:
93
- self.model.reset()
98
+ # One recorder for the agent's life so its LeRobot dataset spans every episode;
99
+ # begin() opens this episode (fresh video stream, prompt) and takes the run it records onto.
100
+ if self._recorder is None:
101
+ self._recorder = Recorder(client, save=self.save)
102
+ self._recorder.begin(run, prompt)
94
103
  if self.adapter is not None:
95
104
  self.adapter.reset()
96
105
 
@@ -110,9 +119,8 @@ class RobotAgent(Agent):
110
119
  )
111
120
  chunk = np.atleast_2d(await self.model.ainfer(batch)) # [T, A]
112
121
  self._active_chunk = deque(chunk)
113
- self._run.record(
114
- InferenceStep(tick=self._tick, chunk=chunk.tolist(), chunk_length=len(chunk))
115
- )
122
+ assert self._recorder is not None # set in on_episode_start
123
+ self._recorder.record_inference(chunk, tick=self._tick)
116
124
  self._tick += 1
117
125
  raw = self._active_chunk.popleft()
118
126
  return raw if self.adapter is None else self.adapter.adapt_action(raw, obs)
@@ -131,15 +139,17 @@ class RobotAgent(Agent):
131
139
  self.on_episode_start(run, client, prompt=prompt)
132
140
  print(f"[agent] episode started: {prompt!r} (max_steps={step_limit})", flush=True)
133
141
 
142
+ assert self._recorder is not None # set in on_episode_start above
134
143
  for step in range(step_limit):
135
144
  obs = await client.get_observation()
136
- run.record(ObservationStep.from_obs(obs, tick=step, obs_space=self._env_obs_space))
145
+ self._recorder.record_observation(obs, tick=step)
137
146
 
138
147
  if self.should_stop(obs, step=step, max_steps=step_limit):
139
148
  print(f"[agent] env reported terminated at step {step}", flush=True)
140
149
  break
141
150
 
142
151
  action = await self.select_action(obs)
152
+ self._recorder.record_action(action)
143
153
  await client.send_action(action)
144
154
 
145
155
  if self.log_every and step % self.log_every == 0:
@@ -151,6 +161,8 @@ class RobotAgent(Agent):
151
161
  run.trace.status = "completed"
152
162
  run.trace.content = "done"
153
163
  finally:
164
+ if self._recorder is not None:
165
+ self._recorder.end() # flush video tails + commit the LeRobot episode
154
166
  await client.close()
155
167
 
156
168
 
@@ -0,0 +1,130 @@
1
+ """Batched inference for concurrent robot rollouts.
2
+
3
+ - BatchedModel: stacks concurrent ainfer calls into one infer
4
+ - BatchedAgent: gives each rollout its own state, shares one batched model
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import copy
11
+ import importlib
12
+ from typing import TYPE_CHECKING, Any
13
+
14
+ from hud.agents.base import Agent
15
+
16
+ from .model import Model
17
+
18
+ if TYPE_CHECKING:
19
+ from hud.eval.run import Run
20
+
21
+ from ._types import ActionArray
22
+ from .agent import RobotAgent
23
+
24
+
25
+ class BatchedModel(Model):
26
+ """Coalesce concurrent ``ainfer`` calls into one stacked ``inner.infer``.
27
+
28
+ A lazily-started worker drains up to ``batch_size`` queued calls (or waits up to
29
+ ``max_wait_s`` for stragglers — which avoids stalling when fewer rollouts are live,
30
+ e.g. the tail of a suite), stacks them into one ``[N, ...]`` batch, runs a single
31
+ forward, and scatters the ``[N, T, A]`` rows back to each caller.
32
+
33
+ ``inner`` must be an in-process, stateless model whose :meth:`~Model.infer` runs the
34
+ whole ``[N, ...]`` batch in one forward (e.g. :class:`~hud.agents.robot.model.LeRobotModel`).
35
+ :class:`~hud.agents.robot.model.RemoteModel` is **not** supported: it does one WebSocket
36
+ request per env and the OpenPI server protocol has no batched-request shape, so a stacked
37
+ batch would be mis-sent as a single env. Run one agent per rollout against it instead.
38
+ """
39
+
40
+ def __init__(self, inner: Model, *, batch_size: int, max_wait_s: float = 0.05) -> None:
41
+ self.inner = inner
42
+ self.batch_size = int(batch_size)
43
+ self.max_wait_s = float(max_wait_s)
44
+ # Bound to the running loop on first ainfer (the harness owns the loop).
45
+ self._queue: asyncio.Queue[tuple[Any, asyncio.Future[ActionArray]]] | None = None
46
+ self._worker: asyncio.Task[None] | None = None
47
+
48
+ def infer(self, batch: Any) -> ActionArray:
49
+ return self.inner.infer(batch)
50
+
51
+ async def ainfer(self, batch: Any) -> ActionArray:
52
+ loop = asyncio.get_running_loop()
53
+ if self._worker is None:
54
+ self._queue = asyncio.Queue()
55
+ self._worker = loop.create_task(self._batch_loop())
56
+ assert self._queue is not None
57
+ fut: asyncio.Future[ActionArray] = loop.create_future()
58
+ await self._queue.put((batch, fut))
59
+ return await fut
60
+
61
+ async def _batch_loop(self) -> None:
62
+ assert self._queue is not None
63
+ loop = asyncio.get_running_loop()
64
+ while True:
65
+ items = [await self._queue.get()] # block for the first caller
66
+ deadline = loop.time() + self.max_wait_s
67
+ while len(items) < self.batch_size:
68
+ timeout = deadline - loop.time()
69
+ if timeout <= 0:
70
+ break
71
+ try:
72
+ items.append(await asyncio.wait_for(self._queue.get(), timeout))
73
+ except TimeoutError:
74
+ break
75
+ samples = [b for b, _ in items]
76
+ try:
77
+ torch: Any = importlib.import_module("torch")
78
+
79
+ # Collate N raw observations into one [N, ...] batch: stack tensor
80
+ # fields on a new leading dim, gather scalars/strings into a list.
81
+ stacked: dict[str, Any] = {
82
+ k: torch.stack([s[k] for s in samples])
83
+ if torch.is_tensor(samples[0][k])
84
+ else [s[k] for s in samples]
85
+ for k in samples[0]
86
+ }
87
+ arr = await asyncio.to_thread(self.inner.infer, stacked) # [N, T, A]
88
+ for (_, fut), chunk in zip(items, arr, strict=True):
89
+ if not fut.done():
90
+ fut.set_result(chunk)
91
+ except Exception as exc: # isolate: a bad batch fails only its own callers
92
+ for _, fut in items:
93
+ if not fut.done():
94
+ fut.set_exception(exc)
95
+
96
+
97
+ class BatchedAgent(Agent):
98
+ """Drive many rollouts concurrently against one shared, batched model.
99
+
100
+ Per run: a shallow clone of ``agent`` (its own episode state) sharing a per-run
101
+ adapter copy and the single :class:`BatchedModel`, so concurrent ``ainfer`` calls
102
+ coalesce into one forward. Relies on the agent keeping per-run state out of
103
+ ``__init__`` (assigned in ``on_episode_start``) so the clones stay isolated, and on
104
+ the model being stateless (no per-episode ``reset``) since it is shared across clones.
105
+
106
+ Requires an in-process batchable model; :class:`~hud.agents.robot.model.RemoteModel`
107
+ is not supported (the OpenPI server protocol has no batched-request shape).
108
+
109
+ Takes ownership of ``agent``: it swaps ``agent.model`` for a :class:`BatchedModel` wrapper
110
+ in place (so the wrapper is shared by every per-run clone). The passed-in instance is
111
+ therefore permanently batched — hand :class:`BatchedAgent` a dedicated agent and don't
112
+ also use that same instance for direct, unbatched :class:`RobotAgent` rollouts.
113
+ """
114
+
115
+ def __init__(self, agent: RobotAgent, *, batch_size: int, max_wait_s: float = 0.05) -> None:
116
+ if agent.model is None:
117
+ raise RuntimeError("BatchedAgent needs agent.model set")
118
+ self._template = agent
119
+ # Wrap once, in place: the passed-in agent is now permanently batched (see class doc).
120
+ # Every per-run clone shares this batcher by reference.
121
+ agent.model = BatchedModel(agent.model, batch_size=batch_size, max_wait_s=max_wait_s)
122
+
123
+ async def __call__(self, run: Run, **kwargs: Any) -> None:
124
+ worker = copy.copy(self._template) # fresh __dict__; shares the batched model
125
+ if worker.adapter is not None: # defensive: a stateful custom adapter must be per-run
126
+ worker.adapter = copy.copy(worker.adapter)
127
+ await worker(run, **kwargs)
128
+
129
+
130
+ __all__ = ["BatchedAgent", "BatchedModel"]
@@ -0,0 +1,127 @@
1
+ """The ``Model``: wraps a policy and owns its inference mechanics.
2
+
3
+ A ``Model`` knows *how to run* a policy (preprocess → forward → postprocess); the
4
+ harness only awaits ``model.ainfer(batch)``. Use :class:`LeRobotModel` for stock
5
+ LeRobot checkpoints; subclass :class:`Model` and implement ``infer`` otherwise.
6
+
7
+ :meth:`Model.infer` is batch-shaped (one batch dict in, an ``[N, T, A]`` chunk out) and
8
+ stateless across calls, so one model can be shared and batched across concurrent rollouts
9
+ (see :mod:`hud.agents.robot.batching`); per-episode state belongs on the agent.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+ import importlib
16
+ from typing import TYPE_CHECKING, Any
17
+
18
+ import numpy as np
19
+
20
+ if TYPE_CHECKING:
21
+ from ._types import ActionArray
22
+
23
+
24
+ class Model:
25
+ """Owns a policy and its inference mechanics.
26
+
27
+ Stateless by contract: the agent owns all per-episode state (the open-loop chunk), so a
28
+ single model can be shared and batched across concurrent rollouts. There is deliberately
29
+ no ``reset`` hook — anything that resets per episode belongs on the agent, not here.
30
+ Driven by :class:`~hud.agents.robot.agent.RobotAgent`, which awaits :meth:`ainfer`.
31
+ """
32
+
33
+ def infer(self, batch: Any) -> ActionArray:
34
+ """Run the policy on an ``[N, ...]`` batch, return an ``[N, T, A]`` chunk.
35
+
36
+ Implementations MUST keep the leading batch dim ``N`` (even for ``N == 1``):
37
+ :meth:`ainfer` indexes ``[0]`` and :class:`~hud.agents.robot.batching.BatchedModel`
38
+ scatters rows along it, so a squeezed ``[T, A]`` silently breaks both.
39
+ """
40
+ raise NotImplementedError
41
+
42
+ async def ainfer(self, batch: Any) -> ActionArray:
43
+ """Awaited single-rollout entry: run :meth:`infer` in a thread, return its single
44
+ ``[T, A]`` row. Indexing ``[0]`` assumes :meth:`infer` honors the ``[N, T, A]`` contract.
45
+ """
46
+ return (await asyncio.to_thread(self.infer, batch))[0]
47
+
48
+
49
+ class LeRobotModel(Model):
50
+ """LeRobot policy with pre/post-processors: ``preprocess`` → ``predict_action_chunk`` →
51
+ ``postprocess``. ``preprocess`` adds the batch dim for an unbatched sample and is a no-op
52
+ for an already-stacked one, so :meth:`infer` handles both single and batched inputs.
53
+
54
+ Stateless: ``predict_action_chunk`` is a pure forward and the agent owns the open-loop
55
+ chunk, so LeRobot's internal action queue is never consumed here — hence no ``reset``.
56
+ """
57
+
58
+ def __init__(self, policy: Any, preprocess: Any, postprocess: Any) -> None:
59
+ self.policy = policy
60
+ self.preprocess = preprocess
61
+ self.postprocess = postprocess
62
+ #: Flipped to False after the first forward; used to print the one-time
63
+ #: CUDA/flow-matching warmup message.
64
+ self._first_inference = True
65
+
66
+ def infer(self, batch: Any) -> ActionArray:
67
+ """run batch dict (N dim) → [N, T, A] chunk"""
68
+ torch: Any = importlib.import_module("torch")
69
+ if self._first_inference:
70
+ print(
71
+ "[agent] first inference — flow-matching/CUDA warmup; this may take a while",
72
+ flush=True,
73
+ )
74
+ with torch.no_grad():
75
+ chunk = self.postprocess(self.policy.predict_action_chunk(self.preprocess(batch)))
76
+ if self._first_inference:
77
+ print("[agent] first inference done — inference is now fast", flush=True)
78
+ self._first_inference = False
79
+ arr = chunk.float().cpu().numpy()
80
+ assert arr.ndim == 3, (
81
+ f"expected [N, T, A] chunk, got {arr.shape}"
82
+ ) # LeRobot keeps the N dim
83
+ return arr
84
+
85
+
86
+ class RemoteModel(Model):
87
+ """Weightless client to an OpenPI-WebSocket policy server: ships the adapter's request
88
+ dict, returns the server's chunk. All pre/post-processing lives in the adapter + server.
89
+
90
+ Not batchable: each :meth:`infer` is one WebSocket request for one env and always adds a
91
+ single leading batch dim, and the OpenPI server protocol currently has no batched-request
92
+ shape. Do not wrap in :class:`~hud.agents.robot.batching.BatchedModel` — use one
93
+ :class:`~hud.agents.robot.agent.RobotAgent` per concurrent rollout instead.
94
+ """
95
+
96
+ def __init__(
97
+ self, host: str = "localhost", port: int = 8000, *, response_key: str = "actions"
98
+ ) -> None:
99
+ self.host = host
100
+ self.port = port
101
+ #: Server chunk key — "actions" (stock OpenPI) or "action" (Cosmos).
102
+ self.response_key = response_key
103
+ self._client: Any = None
104
+
105
+ def connect(self) -> None:
106
+ """Open the websocket (idempotent); blocks until the server is up."""
107
+ if self._client is None:
108
+ mod: Any = importlib.import_module("openpi_client.websocket_client_policy")
109
+
110
+ print(
111
+ f"[agent] connecting to openpi server ws://{self.host}:{self.port} — on hold...",
112
+ flush=True,
113
+ )
114
+ self._client = mod.WebsocketClientPolicy(self.host, self.port)
115
+
116
+ def infer(self, batch: Any) -> ActionArray:
117
+ """Ship one request dict → the server's ``[T, A]`` chunk, returned as ``[1, T, A]``."""
118
+ self.connect() # lazy connect on first call (blocks until the server is up)
119
+ chunk = np.asarray(self._client.infer(batch)[self.response_key], dtype=np.float32)
120
+ return chunk[None] # add the leading N=1 batch dim
121
+
122
+
123
+ __all__ = [
124
+ "LeRobotModel",
125
+ "Model",
126
+ "RemoteModel",
127
+ ]