hud-python 0.6.3__tar.gz → 0.6.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.6.3 → hud_python-0.6.5}/PKG-INFO +2 -1
- hud_python-0.6.5/cookbooks/connect4-selfplay/README.md +57 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/__init__.py +11 -3
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/agent.py +15 -4
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/robot/__init__.py +9 -3
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/robot/adapter.py +10 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/robot/agent.py +26 -14
- hud_python-0.6.5/hud/agents/robot/batching.py +130 -0
- hud_python-0.6.5/hud/agents/robot/model.py +127 -0
- hud_python-0.6.5/hud/agents/robot/record.py +230 -0
- hud_python-0.6.5/hud/agents/robot/video.py +267 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_base.py +38 -2
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_provider_native_tools.py +4 -4
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/types.py +38 -21
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/robot.py +4 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/__init__.py +4 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/eval.py +26 -7
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/init.py +65 -26
- hud_python-0.6.5/hud/cli/jobs.py +146 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/models.py +21 -3
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/presets.py +67 -12
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_eval_config.py +40 -0
- hud_python-0.6.5/hud/cli/tests/test_init.py +113 -0
- hud_python-0.6.5/hud/cli/trace.py +215 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/clients/client.py +1 -1
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/run.py +23 -5
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/runtime.py +51 -8
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_hosted.py +48 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_rollout.py +26 -1
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/settings.py +2 -2
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/train/__init__.py +2 -0
- hud_python-0.6.5/hud/train/base.py +159 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/train/client.py +41 -17
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/train/types.py +38 -4
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/types.py +5 -13
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/gateway.py +23 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/hud_console.py +24 -6
- hud_python-0.6.5/hud/utils/tests/test_hud_console.py +165 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/version.py +1 -1
- {hud_python-0.6.3 → hud_python-0.6.5}/pyproject.toml +2 -1
- hud_python-0.6.3/hud/agents/robot/model.py +0 -138
- hud_python-0.6.3/hud/cli/tests/test_init.py +0 -59
- hud_python-0.6.3/hud/train/base.py +0 -102
- hud_python-0.6.3/hud/utils/tests/test_hud_console.py +0 -62
- {hud_python-0.6.3 → hud_python-0.6.5}/.gitignore +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/LICENSE +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/README.md +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/a2a-chat/README.md +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/a2a-chat/pyproject.toml +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/codex-coding/README.md +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/codex-coding/pyproject.toml +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/fireworks-rl-training/README.md +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/fireworks-rl-training/pyproject.toml +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/rl-training/README.md +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/cookbooks/rl-training/pyproject.toml +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/__main__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/_legacy.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/base.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/browser_use/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/browser_use/agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/sdk/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/sdk/agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/sdk/computer_mcp.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/base.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/coding.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/computer.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/hosted.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/settings.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/claude/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/settings.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/base.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/coding.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/computer.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/filesystem.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/hosted.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/gemini/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/misc/response_automation.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/apply_patch.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/base.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/coding.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/computer.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/hosted.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/strict_schema.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai/tools/tests/test_strict_schema.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/tools/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/tools/base.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/tools/filesystem.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/openai_compatible/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/robot/_types.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_apply_patch.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_claude_agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_claude_sdk_agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_gemini_agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_openai_agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_openai_compatible_agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_tool_agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tests/test_trace.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tool_agent.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/base.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/hosted.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/mcp.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/rfb.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/agents/tools/ssh.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/base.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/cdp.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/filetracking.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/mcp.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/rfb.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/capabilities/ssh.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/__main__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/cancel.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/client.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/deploy.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/login.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/serve.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/sync.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/task.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/templates.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_deploy.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/tests/test_sync_export.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/api.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/build_display.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/build_logs.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/config.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/context.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/display.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/jobs.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/source.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_build_display.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_context.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_registry.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_source.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/tests/test_version_check.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/clients/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/clients/tests/test_connect.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/conftest.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/env.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/file_tracker.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/file_tracking.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/legacy.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/robot/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/robot/bridge.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/robot/endpoint.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/robot/sim_runner.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/server.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/conftest.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_capability_backing.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_file_tracker.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_file_tracking.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_legacy.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_loader.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_manifest.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_server.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/tests/test_tunnel.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/utils.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/environment/workspace.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/chat.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/file_tracking.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/job.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/sync.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/task.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/taskset.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_chat.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_docker_provider.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_file_tracking_observer.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_job.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_sync.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/eval/tests/test_task.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/base.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/bash.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/combine.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/judge.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/results.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/graders/text.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/patches/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/patches/mcp_patches.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/patches/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/patches/tests/test_warnings.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/patches/warnings.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/py.typed +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/server.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/context.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/filetracking.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/span.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/tests/test_filetracking.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/exceptions.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/hints.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/modules.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/platform.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/requests.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/serialization.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/test_exceptions.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/test_hints.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/test_platform.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/test_requests.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/tests/test_serialization.py +0 -0
- {hud_python-0.6.3 → hud_python-0.6.5}/hud/utils/time.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.5
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -70,6 +70,7 @@ Requires-Dist: ruff<0.15.0,>=0.11.8; extra == 'dev'
|
|
|
70
70
|
Provides-Extra: modal
|
|
71
71
|
Requires-Dist: modal>=1.0; extra == 'modal'
|
|
72
72
|
Provides-Extra: robot
|
|
73
|
+
Requires-Dist: av>=12; extra == 'robot'
|
|
73
74
|
Requires-Dist: numpy>=1.24; extra == 'robot'
|
|
74
75
|
Requires-Dist: openpi-client>=0.1.2; extra == 'robot'
|
|
75
76
|
Provides-Extra: train
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Connect Four self-play
|
|
2
|
+
|
|
3
|
+
Symmetric self-play RL on a 6×7 Connect Four board. Draws are rare (you need a
|
|
4
|
+
full 42-cell board with no four-in-a-row), so the win/loss reward signal
|
|
5
|
+
persists as the policy improves and the GRPO advantage stays non-zero.
|
|
6
|
+
|
|
7
|
+
## How it works
|
|
8
|
+
|
|
9
|
+
- One agent ("outer") plays a full game against an inner model on the **same
|
|
10
|
+
slug** — true self-play. `seed % 2` decides who drops first, for symmetric
|
|
11
|
+
first-move coverage.
|
|
12
|
+
- Each game trains **both sides at once**: the outer agent's `Run` (reward from
|
|
13
|
+
its perspective) plus a hand-built `TrajectoryPayload` for the inner model
|
|
14
|
+
with the flipped reward (`1 - outer_reward`).
|
|
15
|
+
- `group_size=2` pairs each game's two trajectories so the GRPO advantage is
|
|
16
|
+
`reward - 0.5` per game.
|
|
17
|
+
- `loss_fn="ppo"` clips the importance-sampling ratio, so a single lucky game
|
|
18
|
+
can't blow up the update.
|
|
19
|
+
|
|
20
|
+
The training loop uses the public API directly — `forward_backward` accepts
|
|
21
|
+
`Run` and `TrajectoryPayload` mixed, so no private helpers are needed.
|
|
22
|
+
|
|
23
|
+
## Setup
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
hud models fork Qwen/Qwen3.5-4B --name c4-selfplay # prints a slug like c4-selfplay-<id>
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Put your `HUD_API_KEY` in a `.env` here (or the environment).
|
|
30
|
+
|
|
31
|
+
## Run
|
|
32
|
+
|
|
33
|
+
Local sanity check (one game, cheap external model as the outer agent):
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
hud eval env.py claude --model claude-haiku-4-5
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Train:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
python train.py --model c4-selfplay-<id> --steps 20 --group 4 --lr 1e-5
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Tuning notes
|
|
46
|
+
|
|
47
|
+
- **Memory scales with `tasks × group`.** Each task×rollout is a fresh `env.py`
|
|
48
|
+
subprocess. With 8 tasks and `--group 4` that's 32 concurrent games. Connect
|
|
49
|
+
Four games can run up to 42 plies, so they cost more tokens and time per game —
|
|
50
|
+
start at `--group 4` and raise only if you have RAM headroom.
|
|
51
|
+
- **Watch the server-side metrics.** The loop prints local win/draw/loss counts
|
|
52
|
+
each step and the last few checkpoints' `mean_reward` / `reward_std` via
|
|
53
|
+
`trainer.checkpoints()` at the end. A healthy run keeps non-trivial
|
|
54
|
+
`reward_std` (within-group spread); if it collapses, the policy has saturated.
|
|
55
|
+
- **Reset on changes.** If you edit the reward or the board, roll the head back
|
|
56
|
+
to a clean checkpoint (`hud models head <slug> --set <id>`) or fork fresh —
|
|
57
|
+
don't keep training a policy shaped by the old objective.
|
|
@@ -8,7 +8,12 @@ from __future__ import annotations
|
|
|
8
8
|
from typing import TYPE_CHECKING, Any, cast
|
|
9
9
|
|
|
10
10
|
from hud.types import AgentType
|
|
11
|
-
from hud.utils.gateway import
|
|
11
|
+
from hud.utils.gateway import (
|
|
12
|
+
build_gateway_client,
|
|
13
|
+
gateway_model_aliases,
|
|
14
|
+
list_gateway_models,
|
|
15
|
+
normalize_gateway_model_id,
|
|
16
|
+
)
|
|
12
17
|
|
|
13
18
|
if TYPE_CHECKING:
|
|
14
19
|
from typing import TypeAlias
|
|
@@ -27,6 +32,8 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
|
|
|
27
32
|
|
|
28
33
|
For direct API access with provider API keys, instantiate the agent classes directly.
|
|
29
34
|
"""
|
|
35
|
+
requested_model = model
|
|
36
|
+
model = normalize_gateway_model_id(model)
|
|
30
37
|
agent_type = next((candidate for candidate in AgentType if candidate.value == model), None)
|
|
31
38
|
if agent_type is not None:
|
|
32
39
|
model_id = model
|
|
@@ -73,7 +80,8 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
|
|
|
73
80
|
for n in (gm.id, gm.name, gm.model_name)
|
|
74
81
|
if isinstance(n, str)
|
|
75
82
|
]
|
|
76
|
-
|
|
83
|
+
known.extend(gateway_model_aliases())
|
|
84
|
+
near = difflib.get_close_matches(requested_model, known, n=3, cutoff=0.5)
|
|
77
85
|
hint = (
|
|
78
86
|
f" Did you mean: {', '.join(near)}?"
|
|
79
87
|
if near
|
|
@@ -84,7 +92,7 @@ def create_agent(model: str, **kwargs: Any) -> GatewayAgent:
|
|
|
84
92
|
if gateway_models
|
|
85
93
|
else "the HUD gateway registry (empty — is HUD_API_KEY set?)"
|
|
86
94
|
)
|
|
87
|
-
raise ValueError(f"Model {
|
|
95
|
+
raise ValueError(f"Model {requested_model!r} not found in {source}.{hint}")
|
|
88
96
|
|
|
89
97
|
kwargs.setdefault("model", model_id)
|
|
90
98
|
kwargs.setdefault("model_client", build_gateway_client(provider_name))
|
|
@@ -193,16 +193,27 @@ class OpenAIChatAgent(ToolAgent[ChatCompletionMessageParam, OpenAIChatConfig]):
|
|
|
193
193
|
sample: Sample | None = None
|
|
194
194
|
if return_token_ids:
|
|
195
195
|
prompt_token_ids = getattr(choice, "prompt_token_ids", None)
|
|
196
|
+
# Multimodal prompt (text + image chunks): the only prompt representation
|
|
197
|
+
# that survives image inputs; flat prompt_token_ids is null in that case.
|
|
198
|
+
prompt_chunks = getattr(choice, "prompt_chunks", None)
|
|
196
199
|
token_ids = getattr(choice, "token_ids", None)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
chat_state.continuation_message_count = len(messages)
|
|
200
|
+
has_prompt = prompt_token_ids is not None or prompt_chunks is not None
|
|
201
|
+
if token_ids is not None and has_prompt:
|
|
200
202
|
content_lp = choice.logprobs.content if choice.logprobs else None
|
|
201
203
|
sample = Sample(
|
|
202
|
-
prompt_token_ids=list(prompt_token_ids),
|
|
204
|
+
prompt_token_ids=list(prompt_token_ids) if prompt_token_ids is not None else [],
|
|
205
|
+
prompt_chunks=list(prompt_chunks) if prompt_chunks is not None else None,
|
|
203
206
|
output_token_ids=list(token_ids),
|
|
204
207
|
output_logprobs=[tok.logprob for tok in content_lp] if content_lp else [],
|
|
205
208
|
)
|
|
209
|
+
# KV-cache continuation only applies to flat text prompts; clear any
|
|
210
|
+
# stale state when the gateway returns chunks-only (multimodal turn).
|
|
211
|
+
if prompt_token_ids is not None:
|
|
212
|
+
chat_state.continuation_token_ids = list(prompt_token_ids) + list(token_ids)
|
|
213
|
+
chat_state.continuation_message_count = len(messages)
|
|
214
|
+
else:
|
|
215
|
+
chat_state.continuation_token_ids = None
|
|
216
|
+
chat_state.continuation_message_count = None
|
|
206
217
|
|
|
207
218
|
tool_calls: list[MCPToolCall] = []
|
|
208
219
|
for tc in function_calls:
|
|
@@ -10,6 +10,9 @@ The harness splits a policy rollout into three seams, each replaceable on its ow
|
|
|
10
10
|
- :class:`~hud.agents.robot.adapter.Adapter` — translate between the env's
|
|
11
11
|
observation/action spaces (from the contract) and the policy's.
|
|
12
12
|
|
|
13
|
+
Wrap an agent in :class:`~hud.agents.robot.batching.BatchedAgent` to run many rollouts
|
|
14
|
+
concurrently off one batched GPU forward (``max_concurrent`` rollouts, shared model).
|
|
15
|
+
|
|
13
16
|
Per-tick platform tracing is emitted by the loop itself: each step records an
|
|
14
17
|
:class:`~hud.agents.types.ObservationStep`, and each re-inference an
|
|
15
18
|
:class:`~hud.agents.types.InferenceStep`, so runs stream live into the HUD trace viewer.
|
|
@@ -20,16 +23,19 @@ This subpackage needs the ``robot`` extra (``pip install 'hud-python[robot]'``)
|
|
|
20
23
|
|
|
21
24
|
from __future__ import annotations
|
|
22
25
|
|
|
23
|
-
from .adapter import Adapter, LeRobotAdapter
|
|
26
|
+
from .adapter import Adapter, LeRobotAdapter, OpenPIAdapter
|
|
24
27
|
from .agent import ROBOT_PROTOCOL, RobotAgent
|
|
25
|
-
from .
|
|
28
|
+
from .batching import BatchedAgent, BatchedModel
|
|
29
|
+
from .model import LeRobotModel, Model
|
|
26
30
|
|
|
27
31
|
__all__ = [
|
|
28
32
|
"ROBOT_PROTOCOL",
|
|
29
33
|
"Adapter",
|
|
34
|
+
"BatchedAgent",
|
|
35
|
+
"BatchedModel",
|
|
30
36
|
"LeRobotAdapter",
|
|
31
37
|
"LeRobotModel",
|
|
32
38
|
"Model",
|
|
39
|
+
"OpenPIAdapter",
|
|
33
40
|
"RobotAgent",
|
|
34
|
-
"lerobot_infer",
|
|
35
41
|
]
|
|
@@ -89,7 +89,17 @@ class LeRobotAdapter(Adapter):
|
|
|
89
89
|
return action
|
|
90
90
|
|
|
91
91
|
|
|
92
|
+
class OpenPIAdapter(Adapter):
|
|
93
|
+
"""unwraps obs['data'] to OpenPI wire keys, attaches prompt; actions are passthrough"""
|
|
94
|
+
|
|
95
|
+
def adapt_observation(self, obs: dict[str, Any], prompt: str) -> dict[str, Any]:
|
|
96
|
+
out = dict(obs["data"])
|
|
97
|
+
out.setdefault("prompt", prompt)
|
|
98
|
+
return out
|
|
99
|
+
|
|
100
|
+
|
|
92
101
|
__all__ = [
|
|
93
102
|
"Adapter",
|
|
94
103
|
"LeRobotAdapter",
|
|
104
|
+
"OpenPIAdapter",
|
|
95
105
|
]
|
|
@@ -5,8 +5,8 @@ Subclass :class:`RobotAgent`, set ``self.model`` and ``self.adapter`` in
|
|
|
5
5
|
|
|
6
6
|
The base calls the adapter and model at the right moments::
|
|
7
7
|
|
|
8
|
-
setup_robot -> adapter.bind(spaces)
|
|
9
|
-
on_episode_start ->
|
|
8
|
+
setup_robot -> adapter.bind(spaces) # once after connect
|
|
9
|
+
on_episode_start -> adapter.reset() # per episode; model is stateless
|
|
10
10
|
select_action -> adapt_observation -> model.ainfer -> pop chunk -> adapt_action
|
|
11
11
|
|
|
12
12
|
``model.ainfer`` always returns a ``[T, A]`` chunk; :meth:`RobotAgent.select_action`
|
|
@@ -24,9 +24,10 @@ from typing import TYPE_CHECKING, Any, ClassVar
|
|
|
24
24
|
import numpy as np
|
|
25
25
|
|
|
26
26
|
from hud.agents.base import Agent
|
|
27
|
-
from hud.agents.types import InferenceStep, ObservationStep
|
|
28
27
|
from hud.capabilities.robot import RobotClient
|
|
29
28
|
|
|
29
|
+
from .record import Recorder
|
|
30
|
+
|
|
30
31
|
if TYPE_CHECKING:
|
|
31
32
|
from hud.eval.run import Run
|
|
32
33
|
|
|
@@ -57,6 +58,9 @@ class RobotAgent(Agent):
|
|
|
57
58
|
robot_protocol: ClassVar[str] = ROBOT_PROTOCOL
|
|
58
59
|
#: How often (in steps) to print a step-progress line. 0 = off.
|
|
59
60
|
log_every: ClassVar[int] = 20
|
|
61
|
+
#: Opt-in: also save a LeRobot v3 dataset of every (obs, action) pair to disk
|
|
62
|
+
#: (the ``--save`` flag). Telemetry streams regardless; see :mod:`.record`.
|
|
63
|
+
save: bool = False
|
|
60
64
|
|
|
61
65
|
#: Runs the policy (preprocess → forward → postprocess). Subclasses set this.
|
|
62
66
|
model: Model | None = None
|
|
@@ -70,9 +74,11 @@ class RobotAgent(Agent):
|
|
|
70
74
|
_env_obs_space: dict[str, Any]
|
|
71
75
|
#: Unexecuted tail of the current policy chunk; popped one action per step.
|
|
72
76
|
_active_chunk: deque[ActionArray]
|
|
73
|
-
#:
|
|
74
|
-
_run: Run
|
|
77
|
+
#: Control-tick index, incremented per executed action.
|
|
75
78
|
_tick: int
|
|
79
|
+
#: Records all telemetry (observation/inference steps + video) and, when ``save``, a
|
|
80
|
+
#: LeRobot dataset. Agent-lifetime (the dataset spans every episode); created lazily.
|
|
81
|
+
_recorder: Recorder | None = None
|
|
76
82
|
|
|
77
83
|
def setup_robot(self, client: RobotClient) -> None:
|
|
78
84
|
"""Discover the env's action/observation layout and bind the adapter to it."""
|
|
@@ -81,16 +87,19 @@ class RobotAgent(Agent):
|
|
|
81
87
|
self.adapter.bind(self._env_action_space, self._env_obs_space)
|
|
82
88
|
|
|
83
89
|
def on_episode_start(self, run: Run, client: RobotClient, *, prompt: str) -> None:
|
|
84
|
-
"""Store the prompt and reset
|
|
90
|
+
"""Store the prompt and reset per-episode state before the act loop.
|
|
85
91
|
|
|
86
|
-
|
|
92
|
+
The model is stateless (per-episode state lives here, not on the shared model), so
|
|
93
|
+
only the adapter is reset. Override (calling ``super()`` first) for extra setup.
|
|
87
94
|
"""
|
|
88
95
|
self._prompt = prompt
|
|
89
96
|
self._active_chunk = deque()
|
|
90
|
-
self._run = run
|
|
91
97
|
self._tick = 0
|
|
92
|
-
|
|
93
|
-
|
|
98
|
+
# One recorder for the agent's life so its LeRobot dataset spans every episode;
|
|
99
|
+
# begin() opens this episode (fresh video stream, prompt) and takes the run it records onto.
|
|
100
|
+
if self._recorder is None:
|
|
101
|
+
self._recorder = Recorder(client, save=self.save)
|
|
102
|
+
self._recorder.begin(run, prompt)
|
|
94
103
|
if self.adapter is not None:
|
|
95
104
|
self.adapter.reset()
|
|
96
105
|
|
|
@@ -110,9 +119,8 @@ class RobotAgent(Agent):
|
|
|
110
119
|
)
|
|
111
120
|
chunk = np.atleast_2d(await self.model.ainfer(batch)) # [T, A]
|
|
112
121
|
self._active_chunk = deque(chunk)
|
|
113
|
-
self.
|
|
114
|
-
|
|
115
|
-
)
|
|
122
|
+
assert self._recorder is not None # set in on_episode_start
|
|
123
|
+
self._recorder.record_inference(chunk, tick=self._tick)
|
|
116
124
|
self._tick += 1
|
|
117
125
|
raw = self._active_chunk.popleft()
|
|
118
126
|
return raw if self.adapter is None else self.adapter.adapt_action(raw, obs)
|
|
@@ -131,15 +139,17 @@ class RobotAgent(Agent):
|
|
|
131
139
|
self.on_episode_start(run, client, prompt=prompt)
|
|
132
140
|
print(f"[agent] episode started: {prompt!r} (max_steps={step_limit})", flush=True)
|
|
133
141
|
|
|
142
|
+
assert self._recorder is not None # set in on_episode_start above
|
|
134
143
|
for step in range(step_limit):
|
|
135
144
|
obs = await client.get_observation()
|
|
136
|
-
|
|
145
|
+
self._recorder.record_observation(obs, tick=step)
|
|
137
146
|
|
|
138
147
|
if self.should_stop(obs, step=step, max_steps=step_limit):
|
|
139
148
|
print(f"[agent] env reported terminated at step {step}", flush=True)
|
|
140
149
|
break
|
|
141
150
|
|
|
142
151
|
action = await self.select_action(obs)
|
|
152
|
+
self._recorder.record_action(action)
|
|
143
153
|
await client.send_action(action)
|
|
144
154
|
|
|
145
155
|
if self.log_every and step % self.log_every == 0:
|
|
@@ -151,6 +161,8 @@ class RobotAgent(Agent):
|
|
|
151
161
|
run.trace.status = "completed"
|
|
152
162
|
run.trace.content = "done"
|
|
153
163
|
finally:
|
|
164
|
+
if self._recorder is not None:
|
|
165
|
+
self._recorder.end() # flush video tails + commit the LeRobot episode
|
|
154
166
|
await client.close()
|
|
155
167
|
|
|
156
168
|
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Batched inference for concurrent robot rollouts.
|
|
2
|
+
|
|
3
|
+
- BatchedModel: stacks concurrent ainfer calls into one infer
|
|
4
|
+
- BatchedAgent: gives each rollout its own state, shares one batched model
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import copy
|
|
11
|
+
import importlib
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
|
+
|
|
14
|
+
from hud.agents.base import Agent
|
|
15
|
+
|
|
16
|
+
from .model import Model
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from hud.eval.run import Run
|
|
20
|
+
|
|
21
|
+
from ._types import ActionArray
|
|
22
|
+
from .agent import RobotAgent
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BatchedModel(Model):
|
|
26
|
+
"""Coalesce concurrent ``ainfer`` calls into one stacked ``inner.infer``.
|
|
27
|
+
|
|
28
|
+
A lazily-started worker drains up to ``batch_size`` queued calls (or waits up to
|
|
29
|
+
``max_wait_s`` for stragglers — which avoids stalling when fewer rollouts are live,
|
|
30
|
+
e.g. the tail of a suite), stacks them into one ``[N, ...]`` batch, runs a single
|
|
31
|
+
forward, and scatters the ``[N, T, A]`` rows back to each caller.
|
|
32
|
+
|
|
33
|
+
``inner`` must be an in-process, stateless model whose :meth:`~Model.infer` runs the
|
|
34
|
+
whole ``[N, ...]`` batch in one forward (e.g. :class:`~hud.agents.robot.model.LeRobotModel`).
|
|
35
|
+
:class:`~hud.agents.robot.model.RemoteModel` is **not** supported: it does one WebSocket
|
|
36
|
+
request per env and the OpenPI server protocol has no batched-request shape, so a stacked
|
|
37
|
+
batch would be mis-sent as a single env. Run one agent per rollout against it instead.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, inner: Model, *, batch_size: int, max_wait_s: float = 0.05) -> None:
|
|
41
|
+
self.inner = inner
|
|
42
|
+
self.batch_size = int(batch_size)
|
|
43
|
+
self.max_wait_s = float(max_wait_s)
|
|
44
|
+
# Bound to the running loop on first ainfer (the harness owns the loop).
|
|
45
|
+
self._queue: asyncio.Queue[tuple[Any, asyncio.Future[ActionArray]]] | None = None
|
|
46
|
+
self._worker: asyncio.Task[None] | None = None
|
|
47
|
+
|
|
48
|
+
def infer(self, batch: Any) -> ActionArray:
|
|
49
|
+
return self.inner.infer(batch)
|
|
50
|
+
|
|
51
|
+
async def ainfer(self, batch: Any) -> ActionArray:
|
|
52
|
+
loop = asyncio.get_running_loop()
|
|
53
|
+
if self._worker is None:
|
|
54
|
+
self._queue = asyncio.Queue()
|
|
55
|
+
self._worker = loop.create_task(self._batch_loop())
|
|
56
|
+
assert self._queue is not None
|
|
57
|
+
fut: asyncio.Future[ActionArray] = loop.create_future()
|
|
58
|
+
await self._queue.put((batch, fut))
|
|
59
|
+
return await fut
|
|
60
|
+
|
|
61
|
+
async def _batch_loop(self) -> None:
|
|
62
|
+
assert self._queue is not None
|
|
63
|
+
loop = asyncio.get_running_loop()
|
|
64
|
+
while True:
|
|
65
|
+
items = [await self._queue.get()] # block for the first caller
|
|
66
|
+
deadline = loop.time() + self.max_wait_s
|
|
67
|
+
while len(items) < self.batch_size:
|
|
68
|
+
timeout = deadline - loop.time()
|
|
69
|
+
if timeout <= 0:
|
|
70
|
+
break
|
|
71
|
+
try:
|
|
72
|
+
items.append(await asyncio.wait_for(self._queue.get(), timeout))
|
|
73
|
+
except TimeoutError:
|
|
74
|
+
break
|
|
75
|
+
samples = [b for b, _ in items]
|
|
76
|
+
try:
|
|
77
|
+
torch: Any = importlib.import_module("torch")
|
|
78
|
+
|
|
79
|
+
# Collate N raw observations into one [N, ...] batch: stack tensor
|
|
80
|
+
# fields on a new leading dim, gather scalars/strings into a list.
|
|
81
|
+
stacked: dict[str, Any] = {
|
|
82
|
+
k: torch.stack([s[k] for s in samples])
|
|
83
|
+
if torch.is_tensor(samples[0][k])
|
|
84
|
+
else [s[k] for s in samples]
|
|
85
|
+
for k in samples[0]
|
|
86
|
+
}
|
|
87
|
+
arr = await asyncio.to_thread(self.inner.infer, stacked) # [N, T, A]
|
|
88
|
+
for (_, fut), chunk in zip(items, arr, strict=True):
|
|
89
|
+
if not fut.done():
|
|
90
|
+
fut.set_result(chunk)
|
|
91
|
+
except Exception as exc: # isolate: a bad batch fails only its own callers
|
|
92
|
+
for _, fut in items:
|
|
93
|
+
if not fut.done():
|
|
94
|
+
fut.set_exception(exc)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class BatchedAgent(Agent):
|
|
98
|
+
"""Drive many rollouts concurrently against one shared, batched model.
|
|
99
|
+
|
|
100
|
+
Per run: a shallow clone of ``agent`` (its own episode state) sharing a per-run
|
|
101
|
+
adapter copy and the single :class:`BatchedModel`, so concurrent ``ainfer`` calls
|
|
102
|
+
coalesce into one forward. Relies on the agent keeping per-run state out of
|
|
103
|
+
``__init__`` (assigned in ``on_episode_start``) so the clones stay isolated, and on
|
|
104
|
+
the model being stateless (no per-episode ``reset``) since it is shared across clones.
|
|
105
|
+
|
|
106
|
+
Requires an in-process batchable model; :class:`~hud.agents.robot.model.RemoteModel`
|
|
107
|
+
is not supported (the OpenPI server protocol has no batched-request shape).
|
|
108
|
+
|
|
109
|
+
Takes ownership of ``agent``: it swaps ``agent.model`` for a :class:`BatchedModel` wrapper
|
|
110
|
+
in place (so the wrapper is shared by every per-run clone). The passed-in instance is
|
|
111
|
+
therefore permanently batched — hand :class:`BatchedAgent` a dedicated agent and don't
|
|
112
|
+
also use that same instance for direct, unbatched :class:`RobotAgent` rollouts.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
def __init__(self, agent: RobotAgent, *, batch_size: int, max_wait_s: float = 0.05) -> None:
|
|
116
|
+
if agent.model is None:
|
|
117
|
+
raise RuntimeError("BatchedAgent needs agent.model set")
|
|
118
|
+
self._template = agent
|
|
119
|
+
# Wrap once, in place: the passed-in agent is now permanently batched (see class doc).
|
|
120
|
+
# Every per-run clone shares this batcher by reference.
|
|
121
|
+
agent.model = BatchedModel(agent.model, batch_size=batch_size, max_wait_s=max_wait_s)
|
|
122
|
+
|
|
123
|
+
async def __call__(self, run: Run, **kwargs: Any) -> None:
|
|
124
|
+
worker = copy.copy(self._template) # fresh __dict__; shares the batched model
|
|
125
|
+
if worker.adapter is not None: # defensive: a stateful custom adapter must be per-run
|
|
126
|
+
worker.adapter = copy.copy(worker.adapter)
|
|
127
|
+
await worker(run, **kwargs)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
__all__ = ["BatchedAgent", "BatchedModel"]
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""The ``Model``: wraps a policy and owns its inference mechanics.
|
|
2
|
+
|
|
3
|
+
A ``Model`` knows *how to run* a policy (preprocess → forward → postprocess); the
|
|
4
|
+
harness only awaits ``model.ainfer(batch)``. Use :class:`LeRobotModel` for stock
|
|
5
|
+
LeRobot checkpoints; subclass :class:`Model` and implement ``infer`` otherwise.
|
|
6
|
+
|
|
7
|
+
:meth:`Model.infer` is batch-shaped (one batch dict in, an ``[N, T, A]`` chunk out) and
|
|
8
|
+
stateless across calls, so one model can be shared and batched across concurrent rollouts
|
|
9
|
+
(see :mod:`hud.agents.robot.batching`); per-episode state belongs on the agent.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
import importlib
|
|
16
|
+
from typing import TYPE_CHECKING, Any
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from ._types import ActionArray
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Model:
|
|
25
|
+
"""Owns a policy and its inference mechanics.
|
|
26
|
+
|
|
27
|
+
Stateless by contract: the agent owns all per-episode state (the open-loop chunk), so a
|
|
28
|
+
single model can be shared and batched across concurrent rollouts. There is deliberately
|
|
29
|
+
no ``reset`` hook — anything that resets per episode belongs on the agent, not here.
|
|
30
|
+
Driven by :class:`~hud.agents.robot.agent.RobotAgent`, which awaits :meth:`ainfer`.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def infer(self, batch: Any) -> ActionArray:
|
|
34
|
+
"""Run the policy on an ``[N, ...]`` batch, return an ``[N, T, A]`` chunk.
|
|
35
|
+
|
|
36
|
+
Implementations MUST keep the leading batch dim ``N`` (even for ``N == 1``):
|
|
37
|
+
:meth:`ainfer` indexes ``[0]`` and :class:`~hud.agents.robot.batching.BatchedModel`
|
|
38
|
+
scatters rows along it, so a squeezed ``[T, A]`` silently breaks both.
|
|
39
|
+
"""
|
|
40
|
+
raise NotImplementedError
|
|
41
|
+
|
|
42
|
+
async def ainfer(self, batch: Any) -> ActionArray:
|
|
43
|
+
"""Awaited single-rollout entry: run :meth:`infer` in a thread, return its single
|
|
44
|
+
``[T, A]`` row. Indexing ``[0]`` assumes :meth:`infer` honors the ``[N, T, A]`` contract.
|
|
45
|
+
"""
|
|
46
|
+
return (await asyncio.to_thread(self.infer, batch))[0]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class LeRobotModel(Model):
|
|
50
|
+
"""LeRobot policy with pre/post-processors: ``preprocess`` → ``predict_action_chunk`` →
|
|
51
|
+
``postprocess``. ``preprocess`` adds the batch dim for an unbatched sample and is a no-op
|
|
52
|
+
for an already-stacked one, so :meth:`infer` handles both single and batched inputs.
|
|
53
|
+
|
|
54
|
+
Stateless: ``predict_action_chunk`` is a pure forward and the agent owns the open-loop
|
|
55
|
+
chunk, so LeRobot's internal action queue is never consumed here — hence no ``reset``.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, policy: Any, preprocess: Any, postprocess: Any) -> None:
|
|
59
|
+
self.policy = policy
|
|
60
|
+
self.preprocess = preprocess
|
|
61
|
+
self.postprocess = postprocess
|
|
62
|
+
#: Flipped to False after the first forward; used to print the one-time
|
|
63
|
+
#: CUDA/flow-matching warmup message.
|
|
64
|
+
self._first_inference = True
|
|
65
|
+
|
|
66
|
+
def infer(self, batch: Any) -> ActionArray:
|
|
67
|
+
"""run batch dict (N dim) → [N, T, A] chunk"""
|
|
68
|
+
torch: Any = importlib.import_module("torch")
|
|
69
|
+
if self._first_inference:
|
|
70
|
+
print(
|
|
71
|
+
"[agent] first inference — flow-matching/CUDA warmup; this may take a while",
|
|
72
|
+
flush=True,
|
|
73
|
+
)
|
|
74
|
+
with torch.no_grad():
|
|
75
|
+
chunk = self.postprocess(self.policy.predict_action_chunk(self.preprocess(batch)))
|
|
76
|
+
if self._first_inference:
|
|
77
|
+
print("[agent] first inference done — inference is now fast", flush=True)
|
|
78
|
+
self._first_inference = False
|
|
79
|
+
arr = chunk.float().cpu().numpy()
|
|
80
|
+
assert arr.ndim == 3, (
|
|
81
|
+
f"expected [N, T, A] chunk, got {arr.shape}"
|
|
82
|
+
) # LeRobot keeps the N dim
|
|
83
|
+
return arr
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class RemoteModel(Model):
|
|
87
|
+
"""Weightless client to an OpenPI-WebSocket policy server: ships the adapter's request
|
|
88
|
+
dict, returns the server's chunk. All pre/post-processing lives in the adapter + server.
|
|
89
|
+
|
|
90
|
+
Not batchable: each :meth:`infer` is one WebSocket request for one env and always adds a
|
|
91
|
+
single leading batch dim, and the OpenPI server protocol currently has no batched-request
|
|
92
|
+
shape. Do not wrap in :class:`~hud.agents.robot.batching.BatchedModel` — use one
|
|
93
|
+
:class:`~hud.agents.robot.agent.RobotAgent` per concurrent rollout instead.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(
|
|
97
|
+
self, host: str = "localhost", port: int = 8000, *, response_key: str = "actions"
|
|
98
|
+
) -> None:
|
|
99
|
+
self.host = host
|
|
100
|
+
self.port = port
|
|
101
|
+
#: Server chunk key — "actions" (stock OpenPI) or "action" (Cosmos).
|
|
102
|
+
self.response_key = response_key
|
|
103
|
+
self._client: Any = None
|
|
104
|
+
|
|
105
|
+
def connect(self) -> None:
|
|
106
|
+
"""Open the websocket (idempotent); blocks until the server is up."""
|
|
107
|
+
if self._client is None:
|
|
108
|
+
mod: Any = importlib.import_module("openpi_client.websocket_client_policy")
|
|
109
|
+
|
|
110
|
+
print(
|
|
111
|
+
f"[agent] connecting to openpi server ws://{self.host}:{self.port} — on hold...",
|
|
112
|
+
flush=True,
|
|
113
|
+
)
|
|
114
|
+
self._client = mod.WebsocketClientPolicy(self.host, self.port)
|
|
115
|
+
|
|
116
|
+
def infer(self, batch: Any) -> ActionArray:
|
|
117
|
+
"""Ship one request dict → the server's ``[T, A]`` chunk, returned as ``[1, T, A]``."""
|
|
118
|
+
self.connect() # lazy connect on first call (blocks until the server is up)
|
|
119
|
+
chunk = np.asarray(self._client.infer(batch)[self.response_key], dtype=np.float32)
|
|
120
|
+
return chunk[None] # add the leading N=1 batch dim
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
__all__ = [
|
|
124
|
+
"LeRobotModel",
|
|
125
|
+
"Model",
|
|
126
|
+
"RemoteModel",
|
|
127
|
+
]
|