hud-python 0.6.5__tar.gz → 0.6.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.6.5 → hud_python-0.6.7}/PKG-INFO +1 -1
- hud_python-0.6.7/cookbooks/fireworks-rl-training/README.md +129 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/deploy.py +41 -1
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/presets.py +16 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/tests/test_deploy.py +86 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/runtime.py +4 -1
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/sync.py +2 -2
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/tests/test_hosted.py +19 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/tests/test_sync.py +12 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/version.py +1 -1
- {hud_python-0.6.5 → hud_python-0.6.7}/pyproject.toml +1 -1
- hud_python-0.6.5/cookbooks/fireworks-rl-training/README.md +0 -114
- {hud_python-0.6.5 → hud_python-0.6.7}/.gitignore +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/LICENSE +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/README.md +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/cookbooks/a2a-chat/README.md +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/cookbooks/a2a-chat/pyproject.toml +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/cookbooks/codex-coding/README.md +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/cookbooks/codex-coding/pyproject.toml +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/cookbooks/connect4-selfplay/README.md +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/cookbooks/fireworks-rl-training/pyproject.toml +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/cookbooks/rl-training/README.md +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/cookbooks/rl-training/pyproject.toml +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/__main__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/_legacy.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/base.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/browser_use/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/browser_use/agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/sdk/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/sdk/agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/sdk/computer_mcp.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/tools/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/tools/base.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/tools/coding.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/tools/computer.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/tools/hosted.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/tools/settings.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/tools/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/claude/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/settings.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/tools/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/tools/base.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/tools/coding.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/tools/computer.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/tools/filesystem.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/tools/hosted.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/tools/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/gemini/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/misc/response_automation.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/tools/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/tools/apply_patch.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/tools/base.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/tools/coding.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/tools/computer.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/tools/hosted.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/tools/strict_schema.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/tools/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai/tools/tests/test_strict_schema.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai_compatible/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai_compatible/agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai_compatible/tools/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai_compatible/tools/base.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai_compatible/tools/filesystem.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/openai_compatible/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/robot/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/robot/_types.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/robot/adapter.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/robot/agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/robot/batching.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/robot/model.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/robot/record.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/robot/video.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tests/test_apply_patch.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tests/test_claude_agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tests/test_claude_sdk_agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tests/test_gemini_agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tests/test_openai_agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tests/test_openai_compatible_agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tests/test_provider_native_tools.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tests/test_tool_agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tests/test_trace.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tool_agent.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tools/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tools/base.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tools/hosted.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tools/mcp.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tools/rfb.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/tools/ssh.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/agents/types.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/capabilities/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/capabilities/base.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/capabilities/cdp.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/capabilities/filetracking.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/capabilities/mcp.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/capabilities/rfb.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/capabilities/robot.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/capabilities/ssh.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/__main__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/cancel.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/client.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/eval.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/init.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/jobs.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/login.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/models.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/serve.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/sync.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/task.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/templates.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/tests/test_eval_config.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/tests/test_init.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/tests/test_sync_export.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/trace.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/api.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/build_display.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/build_logs.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/config.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/context.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/display.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/jobs.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/source.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/tests/test_build_display.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/tests/test_context.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/tests/test_registry.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/tests/test_source.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/tests/test_version_check.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/clients/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/clients/client.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/clients/tests/test_connect.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/conftest.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/env.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/file_tracker.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/file_tracking.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/legacy.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/robot/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/robot/bridge.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/robot/endpoint.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/robot/sim_runner.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/server.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/tests/conftest.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/tests/test_capability_backing.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/tests/test_file_tracker.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/tests/test_file_tracking.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/tests/test_legacy.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/tests/test_loader.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/tests/test_manifest.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/tests/test_server.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/tests/test_tunnel.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/utils.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/environment/workspace.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/chat.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/file_tracking.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/job.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/run.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/task.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/taskset.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/tests/test_chat.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/tests/test_docker_provider.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/tests/test_file_tracking_observer.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/tests/test_job.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/tests/test_rollout.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/eval/tests/test_task.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/graders/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/graders/base.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/graders/bash.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/graders/combine.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/graders/judge.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/graders/results.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/graders/text.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/patches/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/patches/mcp_patches.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/patches/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/patches/tests/test_warnings.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/patches/warnings.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/py.typed +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/server.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/settings.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/telemetry/context.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/telemetry/filetracking.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/telemetry/span.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/telemetry/tests/test_filetracking.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/train/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/train/base.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/train/client.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/train/types.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/types.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/exceptions.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/gateway.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/hints.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/hud_console.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/modules.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/platform.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/requests.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/serialization.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/tests/test_exceptions.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/tests/test_hints.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/tests/test_hud_console.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/tests/test_platform.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/tests/test_requests.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/tests/test_serialization.py +0 -0
- {hud_python-0.6.5 → hud_python-0.6.7}/hud/utils/time.py +0 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Fireworks RL Training
|
|
2
|
+
|
|
3
|
+
Direct Fireworks Training API loop over the same arithmetic preview task used by
|
|
4
|
+
`cookbooks/rl-training`.
|
|
5
|
+
|
|
6
|
+
This does **not** use Fireworks native datasets or RFT jobs. It follows the
|
|
7
|
+
Training API service path from the Fireworks docs:
|
|
8
|
+
|
|
9
|
+
1. `FiretitanServiceClient.from_firetitan_config(...)`
|
|
10
|
+
2. `create_deployment_sampler(...)` for high-parallel rollouts
|
|
11
|
+
3. local grading of HUD-style multiplication tasks
|
|
12
|
+
4. `forward_backward_custom(...)` + `optim_step(...)`
|
|
13
|
+
5. `save_weights_for_sampler(...)` + sampler refresh
|
|
14
|
+
|
|
15
|
+
References:
|
|
16
|
+
|
|
17
|
+
- Fireworks Training API introduction: https://docs.fireworks.ai/fine-tuning/training-api/introduction
|
|
18
|
+
- Training and sampling lifecycle: https://docs.fireworks.ai/fine-tuning/training-api/training-and-sampling
|
|
19
|
+
- Loss functions / GRPO reference: https://docs.fireworks.ai/fine-tuning/training-api/loss-functions
|
|
20
|
+
|
|
21
|
+
## Setup
|
|
22
|
+
|
|
23
|
+
The repo-level `.env` is loaded automatically. It must contain:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
FIREWORKS_API_KEY=...
|
|
27
|
+
FIREWORKS_ACCOUNT_ID=...
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Install the isolated cookbook environment:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
uv sync --pre
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Calibrate task difficulty first
|
|
37
|
+
|
|
38
|
+
What matters for GRPO is **within-group** reward spread: advantages are computed
|
|
39
|
+
within each prompt group, so a group whose rollouts all score the same (all 0 or
|
|
40
|
+
all 1) produces zero advantage and no gradient — even if the *overall* mean looks
|
|
41
|
+
healthy. Calibration reports `within_group_reward_std` for exactly this; treat
|
|
42
|
+
it, not `reward_mean`, as the signal that training has something to learn.
|
|
43
|
+
|
|
44
|
+
Two backends:
|
|
45
|
+
|
|
46
|
+
- `--calibration-backend inference` (default): Fireworks' OpenAI-compatible API.
|
|
47
|
+
Cheap, but samples `gpt-oss-120b` (`--inference-model`), not the training base —
|
|
48
|
+
the small serverless catalog on the `lorenss` key has no Qwen3 8B. Use it only
|
|
49
|
+
for a rough task sanity check.
|
|
50
|
+
- `--calibration-backend managed`: provisions the same deployment sampler that
|
|
51
|
+
training uses and samples the **actual base model** (Qwen3 8B). This is the
|
|
52
|
+
calibration that counts. It still skips the trainer and `optim_step`.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
uv run train.py --calibrate-only --calibration-backend managed \
|
|
56
|
+
--groups-per-step 6 --rollouts-per-prompt 6 --parallelism 18 --debug-samples 4
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
`--debug-samples N` prints the first N rollouts (reward, output-token count,
|
|
60
|
+
text) so you can see *why* a group scored the way it did. Tune the multiplication
|
|
61
|
+
range until `within_group_reward_std` is clearly above zero:
|
|
62
|
+
|
|
63
|
+
- Groups all-correct (`within_group_reward_std ~= 0`) → make it harder
|
|
64
|
+
(`--min-a/--max-a/--min-b/--max-b`).
|
|
65
|
+
- Groups all-wrong → make it easier, or raise `--max-tokens` so the model can
|
|
66
|
+
finish its working before the budget cuts it off.
|
|
67
|
+
|
|
68
|
+
The shipped defaults (3-digit × 3-digit, `--max-tokens 512`, thinking disabled)
|
|
69
|
+
calibrate to `reward_mean ~= 0.47`, `within_group_reward_std ~= 0.20` on Qwen3 8B:
|
|
70
|
+
a regime where the same problem is sometimes solved (when the model shows its
|
|
71
|
+
work) and sometimes slipped (when it answers directly) — so RL has a gradient to
|
|
72
|
+
follow.
|
|
73
|
+
|
|
74
|
+
### Reasoning models and the token budget
|
|
75
|
+
|
|
76
|
+
Qwen3 is a hybrid reasoning model: by default it opens a `<think>` block and, on
|
|
77
|
+
a tight `--max-tokens`, spends the whole budget reasoning and never emits the
|
|
78
|
+
answer (reward collapses to zero). This cookbook disables thinking by default
|
|
79
|
+
through the chat template so direct rollouts reach the integer. Pass
|
|
80
|
+
`--enable-thinking` to keep the reasoning block — and raise `--max-tokens`
|
|
81
|
+
accordingly so the answer still fits.
|
|
82
|
+
|
|
83
|
+
## Train
|
|
84
|
+
|
|
85
|
+
Once calibration has non-trivial rewards:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
uv run train.py --steps 5 --groups-per-step 8 --rollouts-per-prompt 8 --parallelism 32
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
This uses the direct Training API managed service path. If you want calibration
|
|
92
|
+
to go through the managed deployment sampler too, pass
|
|
93
|
+
`--calibration-backend managed`; this provisions the same resources as training.
|
|
94
|
+
|
|
95
|
+
### Preview account constraints
|
|
96
|
+
|
|
97
|
+
On the `lorenss` preview account today:
|
|
98
|
+
|
|
99
|
+
- **Trainer creation works** end to end with a provisioned key: rollouts,
|
|
100
|
+
`forward_backward_custom`, `optim_step`, checkpoint save, and sampler hotload
|
|
101
|
+
all run, and multi-step training completes. (An earlier `unkey inference api id
|
|
102
|
+
is not configured` 500 on trainer creation was an account-side provisioning gap,
|
|
103
|
+
now resolved.)
|
|
104
|
+
- **LoRA is unavailable**: the validated `qwen3-8b-128k` shape only accepts
|
|
105
|
+
full-parameter training, so `--lora-rank > 0` fails at trainer creation with
|
|
106
|
+
`no validated training shape exists for ... trainer_mode=LORA_TRAINER`.
|
|
107
|
+
- **Hotloads sync full 8B weights** between steps and occasionally exceed the
|
|
108
|
+
SDK's 600s hotload budget (`RuntimeError: Hotload failed for sampler snapshot
|
|
109
|
+
...`). This is transient preview-infra latency, not a loop bug — re-running the
|
|
110
|
+
same command generally proceeds. There is no clean knob to extend the timeout
|
|
111
|
+
on the managed sampler path.
|
|
112
|
+
|
|
113
|
+
Metrics are written to:
|
|
114
|
+
|
|
115
|
+
- `runs/fireworks-rl-preview/metrics.jsonl`
|
|
116
|
+
- `runs/fireworks-rl-preview/reward_loss.png` if `matplotlib` is installed
|
|
117
|
+
|
|
118
|
+
## Notes
|
|
119
|
+
|
|
120
|
+
- Defaults use Qwen 3 8B full-parameter training:
|
|
121
|
+
- `accounts/fireworks/models/qwen3-8b`
|
|
122
|
+
- `Qwen/Qwen3-8B`
|
|
123
|
+
- `accounts/fireworks/trainingShapes/qwen3-8b-128k`
|
|
124
|
+
- LoRA can be tested with `--lora-rank N`, but the validated Qwen3 8B training
|
|
125
|
+
shape currently rejects LoRA mode on the `lorenss` preview account.
|
|
126
|
+
- The first checkpoint sync happens after step 0 and subsequent rollouts sample
|
|
127
|
+
the updated weights through the same deployment.
|
|
128
|
+
- `--keep-trainer` and `--keep-deployment` are available for debugging. By
|
|
129
|
+
default the trainer is cleaned up and the deployment scales to zero on exit.
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
|
+
import json
|
|
6
7
|
import logging
|
|
7
8
|
import os
|
|
8
9
|
import time
|
|
@@ -12,6 +13,7 @@ from typing import Any
|
|
|
12
13
|
|
|
13
14
|
import httpx
|
|
14
15
|
import typer
|
|
16
|
+
from pydantic import ValidationError
|
|
15
17
|
|
|
16
18
|
from hud.cli.utils.build_display import display_build_summary
|
|
17
19
|
from hud.cli.utils.build_logs import poll_build_status, stream_build_logs
|
|
@@ -19,6 +21,7 @@ from hud.cli.utils.config import parse_env_file, parse_key_value
|
|
|
19
21
|
from hud.cli.utils.context import create_build_context_tarball, format_size
|
|
20
22
|
from hud.cli.utils.registry import get_registry_environment
|
|
21
23
|
from hud.cli.utils.source import EnvironmentSource, normalize_environment_name
|
|
24
|
+
from hud.eval.runtime import RuntimeConfig
|
|
22
25
|
from hud.utils.exceptions import HudRequestError
|
|
23
26
|
from hud.utils.hud_console import HUDConsole
|
|
24
27
|
from hud.utils.platform import PlatformClient
|
|
@@ -32,6 +35,7 @@ class _DeployPlan:
|
|
|
32
35
|
name: str
|
|
33
36
|
registry_id: str | None
|
|
34
37
|
runtime: str | None
|
|
38
|
+
runtime_config: dict[str, Any] | None
|
|
35
39
|
env_vars: dict[str, str]
|
|
36
40
|
build_args: dict[str, str]
|
|
37
41
|
build_secrets: dict[str, str]
|
|
@@ -75,6 +79,26 @@ def _normalize_runtime(runtime: str | None, console: HUDConsole) -> str | None:
|
|
|
75
79
|
raise typer.Exit(1)
|
|
76
80
|
|
|
77
81
|
|
|
82
|
+
def _load_runtime_config(path: str | None, console: HUDConsole) -> dict[str, Any] | None:
|
|
83
|
+
if path is None:
|
|
84
|
+
return None
|
|
85
|
+
config_path = Path(path).expanduser()
|
|
86
|
+
try:
|
|
87
|
+
raw = json.loads(config_path.read_text(encoding="utf-8"))
|
|
88
|
+
config = RuntimeConfig.model_validate(raw)
|
|
89
|
+
except FileNotFoundError:
|
|
90
|
+
console.error(f"Runtime config file not found: {config_path}")
|
|
91
|
+
raise typer.Exit(1) from None
|
|
92
|
+
except json.JSONDecodeError as exc:
|
|
93
|
+
console.error(f"Invalid runtime config JSON in {config_path}: {exc.msg}")
|
|
94
|
+
raise typer.Exit(1) from exc
|
|
95
|
+
except ValidationError as exc:
|
|
96
|
+
console.error(f"Invalid runtime config in {config_path}: {exc}")
|
|
97
|
+
raise typer.Exit(1) from exc
|
|
98
|
+
payload = config.request_payload()
|
|
99
|
+
return payload or None
|
|
100
|
+
|
|
101
|
+
|
|
78
102
|
def _load_env_vars(path: Path, console: HUDConsole, *, warn_missing: bool) -> dict[str, str]:
|
|
79
103
|
if not path.exists():
|
|
80
104
|
if warn_missing:
|
|
@@ -322,6 +346,7 @@ def _prepare_deploy_plan(
|
|
|
322
346
|
build_args: list[str] | None,
|
|
323
347
|
build_secrets: list[str] | None,
|
|
324
348
|
runtime: str | None,
|
|
349
|
+
runtime_config: str | None,
|
|
325
350
|
verbose: bool,
|
|
326
351
|
platform: PlatformClient,
|
|
327
352
|
console: HUDConsole,
|
|
@@ -357,11 +382,13 @@ def _prepare_deploy_plan(
|
|
|
357
382
|
build_args_dict = _parse_key_value_flags(build_args, option="--build-arg", console=console)
|
|
358
383
|
if build_args_dict and verbose:
|
|
359
384
|
console.info(f"Build arguments: {', '.join(build_args_dict.keys())}")
|
|
385
|
+
normalized_runtime = _normalize_runtime(runtime, console)
|
|
360
386
|
|
|
361
387
|
return _DeployPlan(
|
|
362
388
|
name=resolved_name,
|
|
363
389
|
registry_id=registry_id,
|
|
364
|
-
runtime=
|
|
390
|
+
runtime=normalized_runtime,
|
|
391
|
+
runtime_config=_load_runtime_config(runtime_config, console),
|
|
365
392
|
env_vars=env_vars,
|
|
366
393
|
build_args=build_args_dict,
|
|
367
394
|
build_secrets=_collect_build_secrets(build_secrets, env_dir=env_dir, console=console),
|
|
@@ -379,6 +406,7 @@ def deploy_environment(
|
|
|
379
406
|
build_args: list[str] | None = None,
|
|
380
407
|
build_secrets: list[str] | None = None,
|
|
381
408
|
runtime: str | None = None,
|
|
409
|
+
runtime_config: str | None = None,
|
|
382
410
|
) -> None:
|
|
383
411
|
"""Deploy one HUD environment to the platform."""
|
|
384
412
|
hud_console = HUDConsole()
|
|
@@ -411,6 +439,7 @@ def deploy_environment(
|
|
|
411
439
|
build_args=build_args,
|
|
412
440
|
build_secrets=build_secrets,
|
|
413
441
|
runtime=runtime,
|
|
442
|
+
runtime_config=runtime_config,
|
|
414
443
|
verbose=verbose,
|
|
415
444
|
platform=platform,
|
|
416
445
|
console=hud_console,
|
|
@@ -485,6 +514,8 @@ async def _trigger_build(
|
|
|
485
514
|
payload["registry_id"] = plan.registry_id
|
|
486
515
|
if plan.runtime:
|
|
487
516
|
payload["runtime_provider"] = plan.runtime
|
|
517
|
+
if plan.runtime_config:
|
|
518
|
+
payload["runtime_config"] = plan.runtime_config
|
|
488
519
|
if plan.env_vars:
|
|
489
520
|
payload["environment_variables"] = plan.env_vars
|
|
490
521
|
if plan.build_args:
|
|
@@ -644,6 +675,7 @@ def deploy_all(
|
|
|
644
675
|
build_args: list[str] | None = None,
|
|
645
676
|
build_secrets: list[str] | None = None,
|
|
646
677
|
runtime: str | None = None,
|
|
678
|
+
runtime_config: str | None = None,
|
|
647
679
|
) -> None:
|
|
648
680
|
"""Deploy each HUD environment under a parent directory."""
|
|
649
681
|
hud_console = HUDConsole()
|
|
@@ -683,6 +715,7 @@ def deploy_all(
|
|
|
683
715
|
build_args=build_args,
|
|
684
716
|
build_secrets=build_secrets,
|
|
685
717
|
runtime=runtime,
|
|
718
|
+
runtime_config=runtime_config,
|
|
686
719
|
)
|
|
687
720
|
succeeded.append(env_dir.name)
|
|
688
721
|
except (typer.Exit, SystemExit):
|
|
@@ -762,6 +795,11 @@ def deploy_command(
|
|
|
762
795
|
"--runtime",
|
|
763
796
|
help="Persist Modal as the hosted runtime for this registry",
|
|
764
797
|
),
|
|
798
|
+
runtime_config: str | None = typer.Option(
|
|
799
|
+
None,
|
|
800
|
+
"--runtime-config",
|
|
801
|
+
help="Path to a JSON RuntimeConfig for hosted runs",
|
|
802
|
+
),
|
|
765
803
|
) -> None:
|
|
766
804
|
"""Deploy HUD environment to the platform.
|
|
767
805
|
|
|
@@ -781,6 +819,7 @@ def deploy_command(
|
|
|
781
819
|
build_args=build_args,
|
|
782
820
|
build_secrets=secrets,
|
|
783
821
|
runtime=runtime,
|
|
822
|
+
runtime_config=runtime_config,
|
|
784
823
|
)
|
|
785
824
|
return
|
|
786
825
|
|
|
@@ -795,4 +834,5 @@ def deploy_command(
|
|
|
795
834
|
build_args=build_args,
|
|
796
835
|
build_secrets=secrets,
|
|
797
836
|
runtime=runtime,
|
|
837
|
+
runtime_config=runtime_config,
|
|
798
838
|
)
|
|
@@ -122,6 +122,14 @@ ENVIRONMENT_PRESETS: tuple[EnvironmentPreset, ...] = (
|
|
|
122
122
|
"hud-evals",
|
|
123
123
|
"worldsim-template",
|
|
124
124
|
),
|
|
125
|
+
EnvironmentPreset(
|
|
126
|
+
"robot",
|
|
127
|
+
"🤖",
|
|
128
|
+
"Robot",
|
|
129
|
+
"Robotics: run a VLA policy against a containerized robot sim, graded by task success.",
|
|
130
|
+
"hud-evals",
|
|
131
|
+
"robot-template",
|
|
132
|
+
),
|
|
125
133
|
EnvironmentPreset(
|
|
126
134
|
"videogamebench",
|
|
127
135
|
"🎮",
|
|
@@ -130,6 +138,14 @@ ENVIRONMENT_PRESETS: tuple[EnvironmentPreset, ...] = (
|
|
|
130
138
|
"hud-evals",
|
|
131
139
|
"videogamebench-template",
|
|
132
140
|
),
|
|
141
|
+
EnvironmentPreset(
|
|
142
|
+
"arc-agi-3",
|
|
143
|
+
"🧩",
|
|
144
|
+
"ARC-AGI-3",
|
|
145
|
+
"Interactive reasoning benchmark: agents play ARC-AGI-3 games.",
|
|
146
|
+
"hud-evals",
|
|
147
|
+
"ARC-AGI-3",
|
|
148
|
+
),
|
|
133
149
|
)
|
|
134
150
|
|
|
135
151
|
PRESETS_BY_ID: dict[str, EnvironmentPreset] = {p.id: p for p in ENVIRONMENT_PRESETS}
|
|
@@ -179,6 +179,47 @@ class TestCollectEnvironmentVariables:
|
|
|
179
179
|
assert "INVALID_FORMAT" not in result
|
|
180
180
|
|
|
181
181
|
|
|
182
|
+
class TestRuntimeConfigFile:
|
|
183
|
+
def test_load_runtime_config_uses_sdk_shape(self, tmp_path: Path) -> None:
|
|
184
|
+
from hud.cli.deploy import _load_runtime_config
|
|
185
|
+
from hud.utils.hud_console import HUDConsole
|
|
186
|
+
|
|
187
|
+
config_path = tmp_path / "runtime.json"
|
|
188
|
+
config_path.write_text(
|
|
189
|
+
json.dumps(
|
|
190
|
+
{
|
|
191
|
+
"resources": {"gpu": {"type": "A10G", "count": 2}},
|
|
192
|
+
"limits": {"startup_timeout_s": 300},
|
|
193
|
+
}
|
|
194
|
+
),
|
|
195
|
+
encoding="utf-8",
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
assert _load_runtime_config(str(config_path), HUDConsole()) == {
|
|
199
|
+
"resources": {"gpu": {"type": "A10G", "count": 2}},
|
|
200
|
+
"limits": {"startup_timeout_s": 300},
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
def test_load_runtime_config_preserves_null_override(self, tmp_path: Path) -> None:
|
|
204
|
+
from hud.cli.deploy import _load_runtime_config
|
|
205
|
+
from hud.utils.hud_console import HUDConsole
|
|
206
|
+
|
|
207
|
+
config_path = tmp_path / "runtime.json"
|
|
208
|
+
config_path.write_text(json.dumps({"resources": None}), encoding="utf-8")
|
|
209
|
+
|
|
210
|
+
assert _load_runtime_config(str(config_path), HUDConsole()) == {"resources": None}
|
|
211
|
+
|
|
212
|
+
def test_load_runtime_config_rejects_unknown_fields(self, tmp_path: Path) -> None:
|
|
213
|
+
from hud.cli.deploy import _load_runtime_config
|
|
214
|
+
from hud.utils.hud_console import HUDConsole
|
|
215
|
+
|
|
216
|
+
config_path = tmp_path / "runtime.json"
|
|
217
|
+
config_path.write_text(json.dumps({"provider_config": {}}), encoding="utf-8")
|
|
218
|
+
|
|
219
|
+
with pytest.raises(typer.Exit):
|
|
220
|
+
_load_runtime_config(str(config_path), HUDConsole())
|
|
221
|
+
|
|
222
|
+
|
|
182
223
|
class TestDeployEnvironment:
|
|
183
224
|
"""Tests for deploy_environment function."""
|
|
184
225
|
|
|
@@ -262,6 +303,7 @@ class TestDeployAsync:
|
|
|
262
303
|
name="test-env",
|
|
263
304
|
registry_id=None,
|
|
264
305
|
runtime=None,
|
|
306
|
+
runtime_config=None,
|
|
265
307
|
env_vars={},
|
|
266
308
|
build_args={},
|
|
267
309
|
build_secrets={},
|
|
@@ -292,6 +334,7 @@ class TestDeployAsync:
|
|
|
292
334
|
name="test-env",
|
|
293
335
|
registry_id=None,
|
|
294
336
|
runtime=None,
|
|
337
|
+
runtime_config=None,
|
|
295
338
|
env_vars={},
|
|
296
339
|
build_args={},
|
|
297
340
|
build_secrets={},
|
|
@@ -331,6 +374,7 @@ class TestDeployAsync:
|
|
|
331
374
|
name="test-env",
|
|
332
375
|
registry_id=None,
|
|
333
376
|
runtime="modal",
|
|
377
|
+
runtime_config=None,
|
|
334
378
|
env_vars={},
|
|
335
379
|
build_args={},
|
|
336
380
|
build_secrets={},
|
|
@@ -343,6 +387,48 @@ class TestDeployAsync:
|
|
|
343
387
|
assert platform.payload is not None
|
|
344
388
|
assert platform.payload["runtime_provider"] == "modal"
|
|
345
389
|
|
|
390
|
+
@pytest.mark.asyncio
|
|
391
|
+
async def test_trigger_build_sends_runtime_config(self) -> None:
|
|
392
|
+
from hud.cli.deploy import _DeployPlan, _trigger_build
|
|
393
|
+
from hud.utils.hud_console import HUDConsole
|
|
394
|
+
from hud.utils.platform import PlatformClient
|
|
395
|
+
|
|
396
|
+
class FakePlatform(PlatformClient):
|
|
397
|
+
payload: dict[str, object] | None = None
|
|
398
|
+
|
|
399
|
+
async def apost(
|
|
400
|
+
self,
|
|
401
|
+
path: str,
|
|
402
|
+
*,
|
|
403
|
+
json: object | None = None,
|
|
404
|
+
) -> dict[str, object]:
|
|
405
|
+
assert path == "/builds/trigger"
|
|
406
|
+
assert isinstance(json, dict)
|
|
407
|
+
object.__setattr__(self, "payload", json)
|
|
408
|
+
return {"id": "build-1", "registry_id": "registry-1"}
|
|
409
|
+
|
|
410
|
+
runtime_config = {"resources": {"gpu": {"type": "A10G", "count": 1}}}
|
|
411
|
+
platform = FakePlatform("https://api.example", "key")
|
|
412
|
+
result = await _trigger_build(
|
|
413
|
+
platform,
|
|
414
|
+
build_id="build-1",
|
|
415
|
+
plan=_DeployPlan(
|
|
416
|
+
name="test-env",
|
|
417
|
+
registry_id=None,
|
|
418
|
+
runtime="modal",
|
|
419
|
+
runtime_config=runtime_config,
|
|
420
|
+
env_vars={},
|
|
421
|
+
build_args={},
|
|
422
|
+
build_secrets={},
|
|
423
|
+
),
|
|
424
|
+
no_cache=False,
|
|
425
|
+
console=HUDConsole(),
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
assert result == {"id": "build-1", "registry_id": "registry-1"}
|
|
429
|
+
assert platform.payload is not None
|
|
430
|
+
assert platform.payload["runtime_config"] == runtime_config
|
|
431
|
+
|
|
346
432
|
|
|
347
433
|
class TestSaveDeployLink:
|
|
348
434
|
"""Tests for _save_deploy_link function."""
|
|
@@ -108,6 +108,9 @@ class RuntimeConfig(BaseModel):
|
|
|
108
108
|
self.model_dump() | override.model_dump(exclude_unset=True)
|
|
109
109
|
)
|
|
110
110
|
|
|
111
|
+
def request_payload(self) -> dict[str, Any]:
|
|
112
|
+
return self.model_dump(mode="json", exclude_unset=True)
|
|
113
|
+
|
|
111
114
|
|
|
112
115
|
class Provider(Protocol):
|
|
113
116
|
"""Server placement: called with the task row being placed, acquire one
|
|
@@ -925,7 +928,7 @@ class HostedRuntime:
|
|
|
925
928
|
if group_id is not None:
|
|
926
929
|
payload["group_id"] = group_id
|
|
927
930
|
if task.runtime_config is not None:
|
|
928
|
-
runtime_config = task.runtime_config.
|
|
931
|
+
runtime_config = task.runtime_config.request_payload()
|
|
929
932
|
if runtime_config:
|
|
930
933
|
payload["runtime_config"] = runtime_config
|
|
931
934
|
await platform.apost("/rollouts/submit", json=payload)
|
|
@@ -163,7 +163,7 @@ def task_upload_payload(task: Task) -> dict[str, Any]:
|
|
|
163
163
|
if task.columns:
|
|
164
164
|
payload["columns"] = task.columns
|
|
165
165
|
if task.runtime_config is not None:
|
|
166
|
-
payload["runtime_config"] = task.runtime_config.
|
|
166
|
+
payload["runtime_config"] = task.runtime_config.request_payload()
|
|
167
167
|
return payload
|
|
168
168
|
|
|
169
169
|
|
|
@@ -176,7 +176,7 @@ def _task_signature(task: Task) -> str:
|
|
|
176
176
|
if task.columns:
|
|
177
177
|
sig_data["columns"] = task.columns
|
|
178
178
|
if task.runtime_config is not None:
|
|
179
|
-
sig_data["runtime_config"] = task.runtime_config.
|
|
179
|
+
sig_data["runtime_config"] = task.runtime_config.request_payload()
|
|
180
180
|
return f"{task.id}|" + json.dumps(
|
|
181
181
|
sig_data,
|
|
182
182
|
sort_keys=True,
|
|
@@ -164,6 +164,25 @@ async def test_run_submits_and_polls_to_terminal(monkeypatch: pytest.MonkeyPatch
|
|
|
164
164
|
assert payload["agent"]["config"]["model"] == "test-model"
|
|
165
165
|
|
|
166
166
|
|
|
167
|
+
@pytest.mark.asyncio
|
|
168
|
+
async def test_run_preserves_runtime_config_null_override(
|
|
169
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
170
|
+
) -> None:
|
|
171
|
+
platform = _FakePlatform([{"status": "completed", "reward": 0.5}])
|
|
172
|
+
monkeypatch.setattr(
|
|
173
|
+
"hud.eval.runtime.PlatformClient.from_settings", classmethod(lambda cls: platform)
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
await HostedRuntime(poll_interval=0.0).run(
|
|
177
|
+
Task(env="sums", id="add", runtime_config=RuntimeConfig(resources=None)),
|
|
178
|
+
_agent(),
|
|
179
|
+
job_id=uuid.uuid4().hex,
|
|
180
|
+
trace_id=uuid.uuid4().hex,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
assert platform.posts[0][1]["runtime_config"] == {"resources": None}
|
|
184
|
+
|
|
185
|
+
|
|
167
186
|
@pytest.mark.asyncio
|
|
168
187
|
async def test_run_timeout_requests_platform_cancel(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
169
188
|
platform = _FakePlatform([{"status": "running"}])
|
|
@@ -148,3 +148,15 @@ def test_task_upload_payload_includes_runtime_config() -> None:
|
|
|
148
148
|
payload = task_upload_payload(task)
|
|
149
149
|
|
|
150
150
|
assert payload["runtime_config"] == {"image": "img:tag"}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_task_upload_payload_preserves_runtime_config_null_override() -> None:
|
|
154
|
+
task = Task(
|
|
155
|
+
env="e",
|
|
156
|
+
id="solve",
|
|
157
|
+
runtime_config=RuntimeConfig(resources=None),
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
payload = task_upload_payload(task)
|
|
161
|
+
|
|
162
|
+
assert payload["runtime_config"] == {"resources": None}
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
# Fireworks RL Training
|
|
2
|
-
|
|
3
|
-
Direct Fireworks Training API loop over the same arithmetic preview task used by
|
|
4
|
-
`cookbooks/rl-training`.
|
|
5
|
-
|
|
6
|
-
This does **not** use Fireworks native datasets or RFT jobs. It follows the
|
|
7
|
-
Training API service path from the Fireworks docs:
|
|
8
|
-
|
|
9
|
-
1. `FiretitanServiceClient.from_firetitan_config(...)`
|
|
10
|
-
2. `create_deployment_sampler(...)` for high-parallel rollouts
|
|
11
|
-
3. local grading of HUD-style multiplication tasks
|
|
12
|
-
4. `forward_backward_custom(...)` + `optim_step(...)`
|
|
13
|
-
5. `save_weights_for_sampler(...)` + sampler refresh
|
|
14
|
-
|
|
15
|
-
References:
|
|
16
|
-
|
|
17
|
-
- Fireworks Training API introduction: https://docs.fireworks.ai/fine-tuning/training-api/introduction
|
|
18
|
-
- Training and sampling lifecycle: https://docs.fireworks.ai/fine-tuning/training-api/training-and-sampling
|
|
19
|
-
- Loss functions / GRPO reference: https://docs.fireworks.ai/fine-tuning/training-api/loss-functions
|
|
20
|
-
|
|
21
|
-
## Setup
|
|
22
|
-
|
|
23
|
-
The repo-level `.env` is loaded automatically. It must contain:
|
|
24
|
-
|
|
25
|
-
```bash
|
|
26
|
-
FIREWORKS_API_KEY=...
|
|
27
|
-
FIREWORKS_ACCOUNT_ID=...
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
Install the isolated cookbook environment:
|
|
31
|
-
|
|
32
|
-
```bash
|
|
33
|
-
uv sync --pre
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
## Calibrate task difficulty first
|
|
37
|
-
|
|
38
|
-
Calibration defaults to Fireworks' OpenAI-compatible inference API, so it does
|
|
39
|
-
**not** create a trainer, provision a Training API deployment, or call
|
|
40
|
-
`optim_step`. This is the cheap way to tune task difficulty before paying for a
|
|
41
|
-
Training API run.
|
|
42
|
-
|
|
43
|
-
The calibration model is separate from the training base model because the
|
|
44
|
-
`lorenss` key currently exposes only a small serverless inference catalog (no
|
|
45
|
-
Qwen3 8B deployment). Override it with `--inference-model` if you have a closer
|
|
46
|
-
deployed model.
|
|
47
|
-
|
|
48
|
-
```bash
|
|
49
|
-
uv run train.py --calibrate-only --groups-per-step 8 --rollouts-per-prompt 8 --parallelism 32
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
The goal is a reward distribution with variance. If reward is all zero, make the
|
|
53
|
-
task easier:
|
|
54
|
-
|
|
55
|
-
```bash
|
|
56
|
-
uv run train.py --calibrate-only --min-a 10 --max-a 99 --min-b 2 --max-b 9
|
|
57
|
-
```
|
|
58
|
-
|
|
59
|
-
If reward is all one, make the task harder:
|
|
60
|
-
|
|
61
|
-
```bash
|
|
62
|
-
uv run train.py --calibrate-only --min-a 1000 --max-a 9999 --min-b 11 --max-b 99
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
The current defaults are calibrated for the visible `gpt-oss-120b` inference
|
|
66
|
-
model on the `lorenss` key: 2-digit by 1-digit multiplication with a direct
|
|
67
|
-
"reply only with the integer" prompt. A 32-rollout calibration gave a non-trivial
|
|
68
|
-
baseline (`reward_mean ~= 0.22`, `reward_std ~= 0.42`), while the original
|
|
69
|
-
3-digit by 2-digit range was all-zero.
|
|
70
|
-
|
|
71
|
-
## Train
|
|
72
|
-
|
|
73
|
-
Once calibration has non-trivial rewards:
|
|
74
|
-
|
|
75
|
-
```bash
|
|
76
|
-
uv run train.py --steps 5 --groups-per-step 8 --rollouts-per-prompt 8 --parallelism 32
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
This uses the direct Training API managed service path. If you want calibration
|
|
80
|
-
to go through the managed deployment sampler too, pass
|
|
81
|
-
`--calibration-backend managed`; this provisions the same resources as training.
|
|
82
|
-
|
|
83
|
-
### Current Fireworks preview account blocker
|
|
84
|
-
|
|
85
|
-
On the `lorenss` preview account, trainer creation currently fails before the
|
|
86
|
-
first train step with:
|
|
87
|
-
|
|
88
|
-
```text
|
|
89
|
-
failed to ensure FIREWORKS_API_KEY secret: unkey inference api id is not configured
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
This happens even with `create_deployment=False`, so it is an account/control
|
|
93
|
-
plane provisioning issue rather than a problem in the rollout or loss code. Once
|
|
94
|
-
Fireworks enables the missing Unkey inference API config for the account, the
|
|
95
|
-
same `uv run train.py ...` command should proceed to trainer startup and the
|
|
96
|
-
first `forward_backward_custom(...)` call.
|
|
97
|
-
|
|
98
|
-
Metrics are written to:
|
|
99
|
-
|
|
100
|
-
- `runs/fireworks-rl-preview/metrics.jsonl`
|
|
101
|
-
- `runs/fireworks-rl-preview/reward_loss.png` if `matplotlib` is installed
|
|
102
|
-
|
|
103
|
-
## Notes
|
|
104
|
-
|
|
105
|
-
- Defaults use Qwen 3 8B full-parameter training:
|
|
106
|
-
- `accounts/fireworks/models/qwen3-8b`
|
|
107
|
-
- `Qwen/Qwen3-8B`
|
|
108
|
-
- `accounts/fireworks/trainingShapes/qwen3-8b-128k`
|
|
109
|
-
- LoRA can be tested with `--lora-rank N`, but the validated Qwen3 8B training
|
|
110
|
-
shape currently rejects LoRA mode on the `lorenss` preview account.
|
|
111
|
-
- The first checkpoint sync happens after step 0 and subsequent rollouts sample
|
|
112
|
-
the updated weights through the same deployment.
|
|
113
|
-
- `--keep-trainer` and `--keep-deployment` are available for debugging. By
|
|
114
|
-
default the trainer is cleaned up and the deployment scales to zero on exit.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|