hud-python 0.6.6__tar.gz → 0.6.8.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/PKG-INFO +27 -24
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/README.md +26 -23
- hud_python-0.6.8.dev0/cookbooks/fireworks-rl-training/README.md +129 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai_compatible/agent.py +7 -3
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai_compatible/tools/__init__.py +4 -2
- hud_python-0.6.8.dev0/hud/agents/openai_compatible/tools/filesystem.py +332 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tests/test_provider_native_tools.py +135 -6
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/deploy.py +41 -1
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/tests/test_deploy.py +86 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/runtime.py +4 -1
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/sync.py +2 -2
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/tests/test_hosted.py +19 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/tests/test_rollout.py +90 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/tests/test_sync.py +12 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/version.py +1 -1
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/pyproject.toml +1 -1
- hud_python-0.6.6/cookbooks/fireworks-rl-training/README.md +0 -114
- hud_python-0.6.6/hud/agents/openai_compatible/tools/filesystem.py +0 -138
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/.gitignore +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/LICENSE +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/cookbooks/a2a-chat/README.md +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/cookbooks/a2a-chat/pyproject.toml +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/cookbooks/codex-coding/README.md +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/cookbooks/codex-coding/pyproject.toml +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/cookbooks/connect4-selfplay/README.md +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/cookbooks/fireworks-rl-training/pyproject.toml +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/cookbooks/rl-training/README.md +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/cookbooks/rl-training/pyproject.toml +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/__main__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/_legacy.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/base.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/browser_use/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/browser_use/agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/sdk/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/sdk/agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/sdk/computer_mcp.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/tools/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/tools/base.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/tools/coding.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/tools/computer.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/tools/hosted.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/tools/settings.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/tools/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/claude/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/settings.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/tools/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/tools/base.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/tools/coding.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/tools/computer.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/tools/filesystem.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/tools/hosted.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/tools/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/gemini/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/misc/response_automation.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/tools/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/tools/apply_patch.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/tools/base.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/tools/coding.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/tools/computer.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/tools/hosted.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/tools/strict_schema.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/tools/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai/tools/tests/test_strict_schema.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai_compatible/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai_compatible/tools/base.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/openai_compatible/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/robot/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/robot/_types.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/robot/adapter.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/robot/agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/robot/batching.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/robot/model.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/robot/record.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/robot/video.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tests/test_apply_patch.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tests/test_claude_agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tests/test_claude_sdk_agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tests/test_gemini_agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tests/test_openai_agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tests/test_openai_compatible_agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tests/test_tool_agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tests/test_trace.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tool_agent.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tools/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tools/base.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tools/hosted.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tools/mcp.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tools/rfb.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/tools/ssh.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/agents/types.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/capabilities/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/capabilities/base.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/capabilities/cdp.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/capabilities/filetracking.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/capabilities/mcp.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/capabilities/rfb.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/capabilities/robot.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/capabilities/ssh.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/__main__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/cancel.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/client.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/eval.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/init.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/jobs.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/login.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/models.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/presets.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/serve.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/sync.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/task.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/templates.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/tests/test_eval_config.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/tests/test_init.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/tests/test_sync_export.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/trace.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/api.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/build_display.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/build_logs.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/config.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/context.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/display.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/jobs.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/source.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/tests/test_build_display.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/tests/test_context.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/tests/test_registry.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/tests/test_source.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/tests/test_version_check.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/clients/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/clients/client.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/clients/tests/test_connect.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/conftest.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/env.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/file_tracker.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/file_tracking.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/legacy.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/robot/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/robot/bridge.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/robot/endpoint.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/robot/sim_runner.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/server.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/tests/conftest.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/tests/test_capability_backing.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/tests/test_file_tracker.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/tests/test_file_tracking.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/tests/test_legacy.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/tests/test_loader.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/tests/test_manifest.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/tests/test_server.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/tests/test_tunnel.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/utils.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/environment/workspace.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/chat.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/file_tracking.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/job.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/run.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/task.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/taskset.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/tests/test_chat.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/tests/test_docker_provider.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/tests/test_file_tracking_observer.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/tests/test_job.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/eval/tests/test_task.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/graders/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/graders/base.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/graders/bash.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/graders/combine.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/graders/judge.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/graders/results.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/graders/text.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/patches/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/patches/mcp_patches.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/patches/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/patches/tests/test_warnings.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/patches/warnings.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/py.typed +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/server.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/settings.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/telemetry/context.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/telemetry/filetracking.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/telemetry/span.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/telemetry/tests/test_filetracking.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/train/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/train/base.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/train/client.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/train/types.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/types.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/exceptions.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/gateway.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/hints.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/hud_console.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/modules.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/platform.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/requests.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/serialization.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/tests/test_exceptions.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/tests/test_hints.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/tests/test_hud_console.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/tests/test_platform.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/tests/test_requests.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/tests/test_serialization.py +0 -0
- {hud_python-0.6.6 → hud_python-0.6.8.dev0}/hud/utils/time.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.8.dev0
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -87,7 +87,7 @@ Description-Content-Type: text/markdown
|
|
|
87
87
|
|
|
88
88
|
HUD is a platform for building RL environments for AI agents, across coding, browser, computer-use, and robotics. Define an environment, write tasks, and run them as evals and training across any model, at any scale.
|
|
89
89
|
|
|
90
|
-
To learn more, see the [documentation](https://docs.hud.ai) and [
|
|
90
|
+
To learn more, see the [documentation](https://docs.hud.ai) and [environment reference](https://docs.hud.ai/v6/core/environment).
|
|
91
91
|
|
|
92
92
|
[](https://pypi.org/project/hud-python/)
|
|
93
93
|
[](LICENSE)
|
|
@@ -120,7 +120,7 @@ Then scaffold your first environment:
|
|
|
120
120
|
hud init my-env
|
|
121
121
|
```
|
|
122
122
|
|
|
123
|
-

|
|
124
124
|
|
|
125
125
|
## The protocol
|
|
126
126
|
|
|
@@ -159,14 +159,14 @@ hud eval my-taskset --remote
|
|
|
159
159
|
For local iteration, the same protocol works against a container on your laptop:
|
|
160
160
|
|
|
161
161
|
```bash
|
|
162
|
-
|
|
163
|
-
docker run -d --name run1 my-env
|
|
164
|
-
|
|
165
|
-
|
|
162
|
+
docker build -f Dockerfile.hud -t my-env .
|
|
163
|
+
docker run -d --name run1 -p 8765:8765 my-env
|
|
164
|
+
hud task start fix_bug --url tcp://127.0.0.1:8765
|
|
165
|
+
hud task grade fix_bug --url tcp://127.0.0.1:8765 --answer "..."
|
|
166
166
|
docker rm -f run1
|
|
167
167
|
```
|
|
168
168
|
|
|
169
|
-
→ [
|
|
169
|
+
→ [Run & deploy](https://docs.hud.ai/v6/core/runtime)
|
|
170
170
|
|
|
171
171
|
## Environments & templates
|
|
172
172
|
|
|
@@ -193,7 +193,7 @@ hud eval tasks.py claude --group 3
|
|
|
193
193
|
|
|
194
194
|
Each graded evaluation is a **trace** (the SDK's live handle is a `Run`). With `HUD_API_KEY` set, every rollout is recorded on [hud.ai](https://hud.ai). Tasks that need a shell, browser, GUI, or robot declare **capabilities** (below); everything else — variants, grading, batching — stays identical.
|
|
195
195
|
|
|
196
|
-
→ [Quickstart](https://docs.hud.ai/quickstart) · [Tasks & tasksets](https://docs.hud.ai/
|
|
196
|
+
→ [Quickstart](https://docs.hud.ai/v6/start/quickstart) · [Tasks & tasksets](https://docs.hud.ai/v6/core/tasks)
|
|
197
197
|
|
|
198
198
|
## Capabilities & harnesses
|
|
199
199
|
|
|
@@ -211,39 +211,42 @@ A **capability** is a connection the environment exposes; a **harness** attaches
|
|
|
211
211
|
|
|
212
212
|
**Bring your own:** a harness attaches to a capability and defines a tool spec — wrap `browser-use` on `cdp`, a VLA policy on `robot`, or your own agent on `ssh` / `mcp`. No protocol work required.
|
|
213
213
|
|
|
214
|
-
→ [Capabilities](https://docs.hud.ai/
|
|
214
|
+
→ [Capabilities](https://docs.hud.ai/v6/core/capabilities) · [Models](https://docs.hud.ai/v6/core/agents) · [Robots](https://docs.hud.ai/v6/advanced/robots)
|
|
215
215
|
|
|
216
216
|
## Deploy on the platform
|
|
217
217
|
|
|
218
218
|
From the [platform UI](https://hud.ai) you can run batches, compare models on the same taskset, and inspect every trace.
|
|
219
219
|
|
|
220
|
-
→ [
|
|
220
|
+
→ [Run & deploy](https://docs.hud.ai/v6/core/runtime)
|
|
221
221
|
|
|
222
222
|
## Train on rewards
|
|
223
223
|
|
|
224
|
-
Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and
|
|
224
|
+
Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and pass the graded runs to `TrainingClient.step()`:
|
|
225
225
|
|
|
226
226
|
```python
|
|
227
|
+
from hud import TrainingClient
|
|
227
228
|
from hud.agents import create_agent
|
|
228
|
-
from hud.eval import
|
|
229
|
+
from hud.eval import Job
|
|
229
230
|
|
|
230
|
-
agent = create_agent("
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
231
|
+
agent = create_agent("arith-rl", completion_kwargs={"extra_body": {"return_token_ids": True}})
|
|
232
|
+
trainer = TrainingClient("arith-rl")
|
|
233
|
+
taskset, runtime = ... # your Taskset and where rollouts run
|
|
234
|
+
|
|
235
|
+
session = await Job.start("arith-rl", group=8)
|
|
236
|
+
start = len(session.runs)
|
|
237
|
+
await taskset.run(agent, runtime=runtime, group=8, job=session)
|
|
238
|
+
await trainer.step(session.runs[start:], learning_rate=1e-5, group_size=8)
|
|
235
239
|
```
|
|
236
240
|
|
|
237
241
|
HUD is the environment-and-reward source for your own GRPO/PPO loop — the same environment trains any model, text or multimodal, unchanged.
|
|
238
242
|
|
|
239
|
-
→ [Training](https://docs.hud.ai/
|
|
243
|
+
→ [Training](https://docs.hud.ai/v6/core/training) · [Designing tasks for signal](https://docs.hud.ai/v6/core/advice)
|
|
240
244
|
|
|
241
245
|
## Links
|
|
242
246
|
|
|
243
247
|
- [Documentation](https://docs.hud.ai)
|
|
244
|
-
- [Quickstart](https://docs.hud.ai/quickstart)
|
|
245
|
-
- [CLI reference](https://docs.hud.ai/
|
|
246
|
-
- [Leaderboards](https://hud.ai/leaderboards)
|
|
248
|
+
- [Quickstart](https://docs.hud.ai/v6/start/quickstart)
|
|
249
|
+
- [CLI reference](https://docs.hud.ai/v6/core/cli)
|
|
247
250
|
- [Environment templates](https://hud.ai/environments)
|
|
248
251
|
- [Supported models](https://hud.ai/models)
|
|
249
252
|
- [Discord](https://discord.gg/wkjtmHYYjm)
|
|
@@ -268,8 +271,8 @@ Key areas: [Agents](hud/agents/) · [Environments](hud/environment/) · [Capabil
|
|
|
268
271
|
|
|
269
272
|
```bibtex
|
|
270
273
|
@software{hud2025agentevalplatform,
|
|
271
|
-
author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
|
|
272
|
-
title = {HUD: An Evaluation and RL
|
|
274
|
+
author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep Chawla and Nguyen Nhat Minh},
|
|
275
|
+
title = {HUD: An Evaluation and RL Environments Platform for Agents},
|
|
273
276
|
date = {2025-04},
|
|
274
277
|
url = {https://github.com/hud-evals/hud-python},
|
|
275
278
|
langid = {en}
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
HUD is a platform for building RL environments for AI agents, across coding, browser, computer-use, and robotics. Define an environment, write tasks, and run them as evals and training across any model, at any scale.
|
|
10
10
|
|
|
11
|
-
To learn more, see the [documentation](https://docs.hud.ai) and [
|
|
11
|
+
To learn more, see the [documentation](https://docs.hud.ai) and [environment reference](https://docs.hud.ai/v6/core/environment).
|
|
12
12
|
|
|
13
13
|
[](https://pypi.org/project/hud-python/)
|
|
14
14
|
[](LICENSE)
|
|
@@ -41,7 +41,7 @@ Then scaffold your first environment:
|
|
|
41
41
|
hud init my-env
|
|
42
42
|
```
|
|
43
43
|
|
|
44
|
-

|
|
45
45
|
|
|
46
46
|
## The protocol
|
|
47
47
|
|
|
@@ -80,14 +80,14 @@ hud eval my-taskset --remote
|
|
|
80
80
|
For local iteration, the same protocol works against a container on your laptop:
|
|
81
81
|
|
|
82
82
|
```bash
|
|
83
|
-
|
|
84
|
-
docker run -d --name run1 my-env
|
|
85
|
-
|
|
86
|
-
|
|
83
|
+
docker build -f Dockerfile.hud -t my-env .
|
|
84
|
+
docker run -d --name run1 -p 8765:8765 my-env
|
|
85
|
+
hud task start fix_bug --url tcp://127.0.0.1:8765
|
|
86
|
+
hud task grade fix_bug --url tcp://127.0.0.1:8765 --answer "..."
|
|
87
87
|
docker rm -f run1
|
|
88
88
|
```
|
|
89
89
|
|
|
90
|
-
→ [
|
|
90
|
+
→ [Run & deploy](https://docs.hud.ai/v6/core/runtime)
|
|
91
91
|
|
|
92
92
|
## Environments & templates
|
|
93
93
|
|
|
@@ -114,7 +114,7 @@ hud eval tasks.py claude --group 3
|
|
|
114
114
|
|
|
115
115
|
Each graded evaluation is a **trace** (the SDK's live handle is a `Run`). With `HUD_API_KEY` set, every rollout is recorded on [hud.ai](https://hud.ai). Tasks that need a shell, browser, GUI, or robot declare **capabilities** (below); everything else — variants, grading, batching — stays identical.
|
|
116
116
|
|
|
117
|
-
→ [Quickstart](https://docs.hud.ai/quickstart) · [Tasks & tasksets](https://docs.hud.ai/
|
|
117
|
+
→ [Quickstart](https://docs.hud.ai/v6/start/quickstart) · [Tasks & tasksets](https://docs.hud.ai/v6/core/tasks)
|
|
118
118
|
|
|
119
119
|
## Capabilities & harnesses
|
|
120
120
|
|
|
@@ -132,39 +132,42 @@ A **capability** is a connection the environment exposes; a **harness** attaches
|
|
|
132
132
|
|
|
133
133
|
**Bring your own:** a harness attaches to a capability and defines a tool spec — wrap `browser-use` on `cdp`, a VLA policy on `robot`, or your own agent on `ssh` / `mcp`. No protocol work required.
|
|
134
134
|
|
|
135
|
-
→ [Capabilities](https://docs.hud.ai/
|
|
135
|
+
→ [Capabilities](https://docs.hud.ai/v6/core/capabilities) · [Models](https://docs.hud.ai/v6/core/agents) · [Robots](https://docs.hud.ai/v6/advanced/robots)
|
|
136
136
|
|
|
137
137
|
## Deploy on the platform
|
|
138
138
|
|
|
139
139
|
From the [platform UI](https://hud.ai) you can run batches, compare models on the same taskset, and inspect every trace.
|
|
140
140
|
|
|
141
|
-
→ [
|
|
141
|
+
→ [Run & deploy](https://docs.hud.ai/v6/core/runtime)
|
|
142
142
|
|
|
143
143
|
## Train on rewards
|
|
144
144
|
|
|
145
|
-
Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and
|
|
145
|
+
Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and pass the graded runs to `TrainingClient.step()`:
|
|
146
146
|
|
|
147
147
|
```python
|
|
148
|
+
from hud import TrainingClient
|
|
148
149
|
from hud.agents import create_agent
|
|
149
|
-
from hud.eval import
|
|
150
|
+
from hud.eval import Job
|
|
150
151
|
|
|
151
|
-
agent = create_agent("
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
152
|
+
agent = create_agent("arith-rl", completion_kwargs={"extra_body": {"return_token_ids": True}})
|
|
153
|
+
trainer = TrainingClient("arith-rl")
|
|
154
|
+
taskset, runtime = ... # your Taskset and where rollouts run
|
|
155
|
+
|
|
156
|
+
session = await Job.start("arith-rl", group=8)
|
|
157
|
+
start = len(session.runs)
|
|
158
|
+
await taskset.run(agent, runtime=runtime, group=8, job=session)
|
|
159
|
+
await trainer.step(session.runs[start:], learning_rate=1e-5, group_size=8)
|
|
156
160
|
```
|
|
157
161
|
|
|
158
162
|
HUD is the environment-and-reward source for your own GRPO/PPO loop — the same environment trains any model, text or multimodal, unchanged.
|
|
159
163
|
|
|
160
|
-
→ [Training](https://docs.hud.ai/
|
|
164
|
+
→ [Training](https://docs.hud.ai/v6/core/training) · [Designing tasks for signal](https://docs.hud.ai/v6/core/advice)
|
|
161
165
|
|
|
162
166
|
## Links
|
|
163
167
|
|
|
164
168
|
- [Documentation](https://docs.hud.ai)
|
|
165
|
-
- [Quickstart](https://docs.hud.ai/quickstart)
|
|
166
|
-
- [CLI reference](https://docs.hud.ai/
|
|
167
|
-
- [Leaderboards](https://hud.ai/leaderboards)
|
|
169
|
+
- [Quickstart](https://docs.hud.ai/v6/start/quickstart)
|
|
170
|
+
- [CLI reference](https://docs.hud.ai/v6/core/cli)
|
|
168
171
|
- [Environment templates](https://hud.ai/environments)
|
|
169
172
|
- [Supported models](https://hud.ai/models)
|
|
170
173
|
- [Discord](https://discord.gg/wkjtmHYYjm)
|
|
@@ -189,8 +192,8 @@ Key areas: [Agents](hud/agents/) · [Environments](hud/environment/) · [Capabil
|
|
|
189
192
|
|
|
190
193
|
```bibtex
|
|
191
194
|
@software{hud2025agentevalplatform,
|
|
192
|
-
author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
|
|
193
|
-
title = {HUD: An Evaluation and RL
|
|
195
|
+
author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep Chawla and Nguyen Nhat Minh},
|
|
196
|
+
title = {HUD: An Evaluation and RL Environments Platform for Agents},
|
|
194
197
|
date = {2025-04},
|
|
195
198
|
url = {https://github.com/hud-evals/hud-python},
|
|
196
199
|
langid = {en}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Fireworks RL Training
|
|
2
|
+
|
|
3
|
+
Direct Fireworks Training API loop over the same arithmetic preview task used by
|
|
4
|
+
`cookbooks/rl-training`.
|
|
5
|
+
|
|
6
|
+
This does **not** use Fireworks native datasets or RFT jobs. It follows the
|
|
7
|
+
Training API service path from the Fireworks docs:
|
|
8
|
+
|
|
9
|
+
1. `FiretitanServiceClient.from_firetitan_config(...)`
|
|
10
|
+
2. `create_deployment_sampler(...)` for high-parallel rollouts
|
|
11
|
+
3. local grading of HUD-style multiplication tasks
|
|
12
|
+
4. `forward_backward_custom(...)` + `optim_step(...)`
|
|
13
|
+
5. `save_weights_for_sampler(...)` + sampler refresh
|
|
14
|
+
|
|
15
|
+
References:
|
|
16
|
+
|
|
17
|
+
- Fireworks Training API introduction: https://docs.fireworks.ai/fine-tuning/training-api/introduction
|
|
18
|
+
- Training and sampling lifecycle: https://docs.fireworks.ai/fine-tuning/training-api/training-and-sampling
|
|
19
|
+
- Loss functions / GRPO reference: https://docs.fireworks.ai/fine-tuning/training-api/loss-functions
|
|
20
|
+
|
|
21
|
+
## Setup
|
|
22
|
+
|
|
23
|
+
The repo-level `.env` is loaded automatically. It must contain:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
FIREWORKS_API_KEY=...
|
|
27
|
+
FIREWORKS_ACCOUNT_ID=...
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Install the isolated cookbook environment:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
uv sync --pre
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Calibrate task difficulty first
|
|
37
|
+
|
|
38
|
+
What matters for GRPO is **within-group** reward spread: advantages are computed
|
|
39
|
+
within each prompt group, so a group whose rollouts all score the same (all 0 or
|
|
40
|
+
all 1) produces zero advantage and no gradient — even if the *overall* mean looks
|
|
41
|
+
healthy. Calibration reports `within_group_reward_std` for exactly this; treat
|
|
42
|
+
it, not `reward_mean`, as the signal that training has something to learn.
|
|
43
|
+
|
|
44
|
+
Two backends:
|
|
45
|
+
|
|
46
|
+
- `--calibration-backend inference` (default): Fireworks' OpenAI-compatible API.
|
|
47
|
+
Cheap, but samples `gpt-oss-120b` (`--inference-model`), not the training base —
|
|
48
|
+
the small serverless catalog on the `lorenss` key has no Qwen3 8B. Use it only
|
|
49
|
+
for a rough task sanity check.
|
|
50
|
+
- `--calibration-backend managed`: provisions the same deployment sampler that
|
|
51
|
+
training uses and samples the **actual base model** (Qwen3 8B). This is the
|
|
52
|
+
calibration that counts. It still skips the trainer and `optim_step`.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
uv run train.py --calibrate-only --calibration-backend managed \
|
|
56
|
+
--groups-per-step 6 --rollouts-per-prompt 6 --parallelism 18 --debug-samples 4
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
`--debug-samples N` prints the first N rollouts (reward, output-token count,
|
|
60
|
+
text) so you can see *why* a group scored the way it did. Tune the multiplication
|
|
61
|
+
range until `within_group_reward_std` is clearly above zero:
|
|
62
|
+
|
|
63
|
+
- Groups all-correct (`within_group_reward_std ~= 0`) → make it harder
|
|
64
|
+
(`--min-a/--max-a/--min-b/--max-b`).
|
|
65
|
+
- Groups all-wrong → make it easier, or raise `--max-tokens` so the model can
|
|
66
|
+
finish its working before the budget cuts it off.
|
|
67
|
+
|
|
68
|
+
The shipped defaults (3-digit × 3-digit, `--max-tokens 512`, thinking disabled)
|
|
69
|
+
calibrate to `reward_mean ~= 0.47`, `within_group_reward_std ~= 0.20` on Qwen3 8B:
|
|
70
|
+
a regime where the same problem is sometimes solved (when the model shows its
|
|
71
|
+
work) and sometimes slipped (when it answers directly) — so RL has a gradient to
|
|
72
|
+
follow.
|
|
73
|
+
|
|
74
|
+
### Reasoning models and the token budget
|
|
75
|
+
|
|
76
|
+
Qwen3 is a hybrid reasoning model: by default it opens a `<think>` block and, on
|
|
77
|
+
a tight `--max-tokens`, spends the whole budget reasoning and never emits the
|
|
78
|
+
answer (reward collapses to zero). This cookbook disables thinking by default
|
|
79
|
+
through the chat template so direct rollouts reach the integer. Pass
|
|
80
|
+
`--enable-thinking` to keep the reasoning block — and raise `--max-tokens`
|
|
81
|
+
accordingly so the answer still fits.
|
|
82
|
+
|
|
83
|
+
## Train
|
|
84
|
+
|
|
85
|
+
Once calibration has non-trivial rewards:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
uv run train.py --steps 5 --groups-per-step 8 --rollouts-per-prompt 8 --parallelism 32
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
This uses the direct Training API managed service path. If you want calibration
|
|
92
|
+
to go through the managed deployment sampler too, pass
|
|
93
|
+
`--calibration-backend managed`; this provisions the same resources as training.
|
|
94
|
+
|
|
95
|
+
### Preview account constraints
|
|
96
|
+
|
|
97
|
+
On the `lorenss` preview account today:
|
|
98
|
+
|
|
99
|
+
- **Trainer creation works** end to end with a provisioned key: rollouts,
|
|
100
|
+
`forward_backward_custom`, `optim_step`, checkpoint save, and sampler hotload
|
|
101
|
+
all run, and multi-step training completes. (An earlier `unkey inference api id
|
|
102
|
+
is not configured` 500 on trainer creation was an account-side provisioning gap,
|
|
103
|
+
now resolved.)
|
|
104
|
+
- **LoRA is unavailable**: the validated `qwen3-8b-128k` shape only accepts
|
|
105
|
+
full-parameter training, so `--lora-rank > 0` fails at trainer creation with
|
|
106
|
+
`no validated training shape exists for ... trainer_mode=LORA_TRAINER`.
|
|
107
|
+
- **Hotloads sync full 8B weights** between steps and occasionally exceed the
|
|
108
|
+
SDK's 600s hotload budget (`RuntimeError: Hotload failed for sampler snapshot
|
|
109
|
+
...`). This is transient preview-infra latency, not a loop bug — re-running the
|
|
110
|
+
same command generally proceeds. There is no clean knob to extend the timeout
|
|
111
|
+
on the managed sampler path.
|
|
112
|
+
|
|
113
|
+
Metrics are written to:
|
|
114
|
+
|
|
115
|
+
- `runs/fireworks-rl-preview/metrics.jsonl`
|
|
116
|
+
- `runs/fireworks-rl-preview/reward_loss.png` if `matplotlib` is installed
|
|
117
|
+
|
|
118
|
+
## Notes
|
|
119
|
+
|
|
120
|
+
- Defaults use Qwen 3 8B full-parameter training:
|
|
121
|
+
- `accounts/fireworks/models/qwen3-8b`
|
|
122
|
+
- `Qwen/Qwen3-8B`
|
|
123
|
+
- `accounts/fireworks/trainingShapes/qwen3-8b-128k`
|
|
124
|
+
- LoRA can be tested with `--lora-rank N`, but the validated Qwen3 8B training
|
|
125
|
+
shape currently rejects LoRA mode on the `lorenss` preview account.
|
|
126
|
+
- The first checkpoint sync happens after step 0 and subsequent rollouts sample
|
|
127
|
+
the updated weights through the same deployment.
|
|
128
|
+
- `--keep-trainer` and `--keep-deployment` are available for debugging. By
|
|
129
|
+
default the trainer is cleaned up and the deployment scales to zero on exit.
|
|
@@ -17,11 +17,13 @@ from hud.types import MCPToolCall, MCPToolResult
|
|
|
17
17
|
from hud.utils import gateway
|
|
18
18
|
|
|
19
19
|
from .tools import (
|
|
20
|
+
BashTool,
|
|
21
|
+
EditTool,
|
|
20
22
|
GlobTool,
|
|
21
23
|
GrepTool,
|
|
22
|
-
ListTool,
|
|
23
24
|
OpenAICompatibleMCPProxyTool,
|
|
24
25
|
ReadTool,
|
|
26
|
+
WriteTool,
|
|
25
27
|
)
|
|
26
28
|
from .tools.base import format_chat_result
|
|
27
29
|
|
|
@@ -41,10 +43,12 @@ class OpenAIChatAgent(ToolAgent[ChatCompletionMessageParam, OpenAIChatConfig]):
|
|
|
41
43
|
"""OpenAI-compatible agent using the chat.completions protocol."""
|
|
42
44
|
|
|
43
45
|
tool_catalog = (
|
|
46
|
+
BashTool,
|
|
44
47
|
ReadTool,
|
|
45
|
-
GrepTool,
|
|
46
48
|
GlobTool,
|
|
47
|
-
|
|
49
|
+
GrepTool,
|
|
50
|
+
EditTool,
|
|
51
|
+
WriteTool,
|
|
48
52
|
OpenAICompatibleMCPProxyTool,
|
|
49
53
|
)
|
|
50
54
|
|
|
@@ -2,13 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from .filesystem import GlobTool, GrepTool,
|
|
5
|
+
from .filesystem import BashTool, EditTool, GlobTool, GrepTool, ReadTool, WriteTool
|
|
6
6
|
from .mcp_proxy import OpenAICompatibleMCPProxyTool
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
|
+
"BashTool",
|
|
10
|
+
"EditTool",
|
|
9
11
|
"GlobTool",
|
|
10
12
|
"GrepTool",
|
|
11
|
-
"ListTool",
|
|
12
13
|
"OpenAICompatibleMCPProxyTool",
|
|
13
14
|
"ReadTool",
|
|
15
|
+
"WriteTool",
|
|
14
16
|
]
|