hud-python 0.6.1__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.6.1 → hud_python-0.6.3}/PKG-INFO +1 -1
- hud_python-0.6.3/cookbooks/fireworks-rl-training/README.md +114 -0
- hud_python-0.6.3/cookbooks/fireworks-rl-training/pyproject.toml +19 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/cookbooks/rl-training/README.md +5 -4
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/eval.py +38 -4
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/init.py +3 -3
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/models.py +4 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/templates.py +4 -5
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/tests/test_deploy.py +1 -1
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/tests/test_eval_config.py +29 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/tests/test_init.py +8 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/job.py +33 -9
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/run.py +8 -1
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/taskset.py +18 -2
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/gateway.py +1 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/version.py +1 -1
- {hud_python-0.6.1 → hud_python-0.6.3}/pyproject.toml +1 -1
- {hud_python-0.6.1 → hud_python-0.6.3}/.gitignore +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/LICENSE +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/README.md +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/cookbooks/a2a-chat/README.md +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/cookbooks/a2a-chat/pyproject.toml +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/cookbooks/codex-coding/README.md +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/cookbooks/codex-coding/pyproject.toml +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/cookbooks/rl-training/pyproject.toml +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/__main__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/_legacy.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/base.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/browser_use/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/browser_use/agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/sdk/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/sdk/agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/sdk/computer_mcp.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/tools/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/tools/base.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/tools/coding.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/tools/computer.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/tools/hosted.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/tools/settings.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/tools/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/claude/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/settings.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/tools/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/tools/base.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/tools/coding.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/tools/computer.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/tools/filesystem.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/tools/hosted.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/tools/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/gemini/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/misc/response_automation.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/tools/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/tools/apply_patch.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/tools/base.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/tools/coding.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/tools/computer.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/tools/hosted.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/tools/strict_schema.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/tools/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/tools/tests/test_computer.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai/tools/tests/test_strict_schema.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai_compatible/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai_compatible/agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai_compatible/tools/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai_compatible/tools/base.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai_compatible/tools/filesystem.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/openai_compatible/tools/mcp_proxy.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/robot/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/robot/_types.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/robot/adapter.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/robot/agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/robot/model.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tests/test_apply_patch.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tests/test_claude_agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tests/test_claude_sdk_agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tests/test_gemini_agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tests/test_openai_agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tests/test_openai_compatible_agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tests/test_provider_native_tools.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tests/test_tool_agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tests/test_trace.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tool_agent.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tools/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tools/base.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tools/hosted.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tools/mcp.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tools/rfb.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/tools/ssh.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/agents/types.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/capabilities/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/capabilities/base.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/capabilities/cdp.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/capabilities/filetracking.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/capabilities/mcp.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/capabilities/rfb.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/capabilities/robot.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/capabilities/ssh.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/__main__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/cancel.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/client.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/deploy.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/login.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/presets.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/serve.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/sync.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/task.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/tests/test_sync_export.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/api.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/build_display.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/build_logs.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/config.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/context.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/display.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/jobs.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/source.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/tests/test_build_display.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/tests/test_context.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/tests/test_registry.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/tests/test_source.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/tests/test_version_check.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/clients/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/clients/client.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/clients/tests/test_connect.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/conftest.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/env.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/file_tracker.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/file_tracking.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/legacy.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/robot/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/robot/bridge.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/robot/endpoint.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/robot/sim_runner.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/server.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/tests/conftest.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/tests/test_capability_backing.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/tests/test_file_tracker.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/tests/test_file_tracking.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/tests/test_legacy.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/tests/test_loader.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/tests/test_manifest.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/tests/test_server.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/tests/test_tunnel.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/utils.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/environment/workspace.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/chat.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/file_tracking.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/runtime.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/sync.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/task.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/tests/test_chat.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/tests/test_docker_provider.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/tests/test_file_tracking_observer.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/tests/test_hosted.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/tests/test_job.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/tests/test_rollout.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/tests/test_sync.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/eval/tests/test_task.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/graders/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/graders/base.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/graders/bash.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/graders/combine.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/graders/judge.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/graders/results.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/graders/text.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/patches/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/patches/mcp_patches.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/patches/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/patches/tests/test_warnings.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/patches/warnings.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/py.typed +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/server.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/settings.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/telemetry/context.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/telemetry/filetracking.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/telemetry/span.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/telemetry/tests/test_filetracking.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/train/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/train/base.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/train/client.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/train/types.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/types.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/exceptions.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/hints.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/hud_console.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/modules.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/platform.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/requests.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/serialization.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/tests/test_exceptions.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/tests/test_hints.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/tests/test_hud_console.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/tests/test_platform.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/tests/test_requests.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/tests/test_serialization.py +0 -0
- {hud_python-0.6.1 → hud_python-0.6.3}/hud/utils/time.py +0 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Fireworks RL Training
|
|
2
|
+
|
|
3
|
+
Direct Fireworks Training API loop over the same arithmetic preview task used by
|
|
4
|
+
`cookbooks/rl-training`.
|
|
5
|
+
|
|
6
|
+
This does **not** use Fireworks native datasets or RFT jobs. It follows the
|
|
7
|
+
Training API service path from the Fireworks docs:
|
|
8
|
+
|
|
9
|
+
1. `FiretitanServiceClient.from_firetitan_config(...)`
|
|
10
|
+
2. `create_deployment_sampler(...)` for high-parallel rollouts
|
|
11
|
+
3. local grading of HUD-style multiplication tasks
|
|
12
|
+
4. `forward_backward_custom(...)` + `optim_step(...)`
|
|
13
|
+
5. `save_weights_for_sampler(...)` + sampler refresh
|
|
14
|
+
|
|
15
|
+
References:
|
|
16
|
+
|
|
17
|
+
- Fireworks Training API introduction: https://docs.fireworks.ai/fine-tuning/training-api/introduction
|
|
18
|
+
- Training and sampling lifecycle: https://docs.fireworks.ai/fine-tuning/training-api/training-and-sampling
|
|
19
|
+
- Loss functions / GRPO reference: https://docs.fireworks.ai/fine-tuning/training-api/loss-functions
|
|
20
|
+
|
|
21
|
+
## Setup
|
|
22
|
+
|
|
23
|
+
The repo-level `.env` is loaded automatically. It must contain:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
FIREWORKS_API_KEY=...
|
|
27
|
+
FIREWORKS_ACCOUNT_ID=...
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Install the isolated cookbook environment:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
uv sync --pre
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Calibrate task difficulty first
|
|
37
|
+
|
|
38
|
+
Calibration defaults to Fireworks' OpenAI-compatible inference API, so it does
|
|
39
|
+
**not** create a trainer, provision a Training API deployment, or call
|
|
40
|
+
`optim_step`. This is the cheap way to tune task difficulty before paying for a
|
|
41
|
+
Training API run.
|
|
42
|
+
|
|
43
|
+
The calibration model is separate from the training base model because the
|
|
44
|
+
`lorenss` key currently exposes only a small serverless inference catalog (no
|
|
45
|
+
Qwen3 8B deployment). Override it with `--inference-model` if you have a closer
|
|
46
|
+
deployed model.
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
uv run train.py --calibrate-only --groups-per-step 8 --rollouts-per-prompt 8 --parallelism 32
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
The goal is a reward distribution with variance. If reward is all zero, make the
|
|
53
|
+
task easier:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
uv run train.py --calibrate-only --min-a 10 --max-a 99 --min-b 2 --max-b 9
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
If reward is all one, make the task harder:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
uv run train.py --calibrate-only --min-a 1000 --max-a 9999 --min-b 11 --max-b 99
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
The current defaults are calibrated for the visible `gpt-oss-120b` inference
|
|
66
|
+
model on the `lorenss` key: 2-digit by 1-digit multiplication with a direct
|
|
67
|
+
"reply only with the integer" prompt. A 32-rollout calibration gave a non-trivial
|
|
68
|
+
baseline (`reward_mean ~= 0.22`, `reward_std ~= 0.42`), while the original
|
|
69
|
+
3-digit by 2-digit range was all-zero.
|
|
70
|
+
|
|
71
|
+
## Train
|
|
72
|
+
|
|
73
|
+
Once calibration has non-trivial rewards:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
uv run train.py --steps 5 --groups-per-step 8 --rollouts-per-prompt 8 --parallelism 32
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
This uses the direct Training API managed service path. If you want calibration
|
|
80
|
+
to go through the managed deployment sampler too, pass
|
|
81
|
+
`--calibration-backend managed`; this provisions the same resources as training.
|
|
82
|
+
|
|
83
|
+
### Current Fireworks preview account blocker
|
|
84
|
+
|
|
85
|
+
On the `lorenss` preview account, trainer creation currently fails before the
|
|
86
|
+
first train step with:
|
|
87
|
+
|
|
88
|
+
```text
|
|
89
|
+
failed to ensure FIREWORKS_API_KEY secret: unkey inference api id is not configured
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
This happens even with `create_deployment=False`, so it is an account/control
|
|
93
|
+
plane provisioning issue rather than a problem in the rollout or loss code. Once
|
|
94
|
+
Fireworks enables the missing Unkey inference API config for the account, the
|
|
95
|
+
same `uv run train.py ...` command should proceed to trainer startup and the
|
|
96
|
+
first `forward_backward_custom(...)` call.
|
|
97
|
+
|
|
98
|
+
Metrics are written to:
|
|
99
|
+
|
|
100
|
+
- `runs/fireworks-rl-preview/metrics.jsonl`
|
|
101
|
+
- `runs/fireworks-rl-preview/reward_loss.png` if `matplotlib` is installed
|
|
102
|
+
|
|
103
|
+
## Notes
|
|
104
|
+
|
|
105
|
+
- Defaults use Qwen 3 8B full-parameter training:
|
|
106
|
+
- `accounts/fireworks/models/qwen3-8b`
|
|
107
|
+
- `Qwen/Qwen3-8B`
|
|
108
|
+
- `accounts/fireworks/trainingShapes/qwen3-8b-128k`
|
|
109
|
+
- LoRA can be tested with `--lora-rank N`, but the validated Qwen3 8B training
|
|
110
|
+
shape currently rejects LoRA mode on the `lorenss` preview account.
|
|
111
|
+
- The first checkpoint sync happens after step 0 and subsequent rollouts sample
|
|
112
|
+
the updated weights through the same deployment.
|
|
113
|
+
- `--keep-trainer` and `--keep-deployment` are available for debugging. By
|
|
114
|
+
default the trainer is cleaned up and the deployment scales to zero on exit.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "fireworks-rl-training"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Direct Fireworks Training API RL loop over HUD-style arithmetic tasks"
|
|
5
|
+
requires-python = ">=3.11,<3.13"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"fireworks-ai[training]",
|
|
8
|
+
"hud-python",
|
|
9
|
+
"matplotlib",
|
|
10
|
+
"python-dotenv",
|
|
11
|
+
"torch>=2",
|
|
12
|
+
"transformers>=4.55",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[tool.uv]
|
|
16
|
+
package = false
|
|
17
|
+
|
|
18
|
+
[tool.uv.sources]
|
|
19
|
+
hud-python = { path = "../..", editable = true }
|
|
@@ -18,12 +18,13 @@ each `optim_step` closes the on-policy loop.
|
|
|
18
18
|
|
|
19
19
|
## Run
|
|
20
20
|
|
|
21
|
-
Needs `HUD_API_KEY` (from your environment or `.env`). List the
|
|
22
|
-
|
|
23
|
-
the top of `simple_train.py` /
|
|
21
|
+
Needs `HUD_API_KEY` (from your environment or `.env`). List the gateway models
|
|
22
|
+
on your account, pick a trainable one (the **Trainable** column marks them), and
|
|
23
|
+
set it as the `MODEL` constant at the top of `simple_train.py` /
|
|
24
|
+
`ppo_custom_loss.py`:
|
|
24
25
|
|
|
25
26
|
```bash
|
|
26
|
-
hud models
|
|
27
|
+
hud models list # Name | Model (API) | ID | Provider | Agent | Trainable
|
|
27
28
|
```
|
|
28
29
|
|
|
29
30
|
**Train on a deployed taskset (the real flow).** You've built a taskset and
|
|
@@ -5,6 +5,7 @@ Config Override Order: CLI arguments > .hud_eval.toml > defaults
|
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
|
+
import ast
|
|
8
9
|
import asyncio
|
|
9
10
|
import logging
|
|
10
11
|
import os
|
|
@@ -665,13 +666,46 @@ def _build_agent(cfg: EvalConfig) -> Any:
|
|
|
665
666
|
return cast("Any", cfg.agent_type.cls)(config=config)
|
|
666
667
|
|
|
667
668
|
|
|
669
|
+
def _python_defines_environment(path: Path) -> bool:
|
|
670
|
+
"""Return True when ``path`` constructs a v6 :class:`~hud.environment.Environment`."""
|
|
671
|
+
try:
|
|
672
|
+
tree = ast.parse(path.read_text(encoding="utf-8"))
|
|
673
|
+
except (OSError, SyntaxError):
|
|
674
|
+
return False
|
|
675
|
+
for node in ast.walk(tree):
|
|
676
|
+
if not isinstance(node, ast.Call):
|
|
677
|
+
continue
|
|
678
|
+
callee = node.func
|
|
679
|
+
callee_name = (
|
|
680
|
+
callee.id
|
|
681
|
+
if isinstance(callee, ast.Name)
|
|
682
|
+
else callee.attr
|
|
683
|
+
if isinstance(callee, ast.Attribute)
|
|
684
|
+
else None
|
|
685
|
+
)
|
|
686
|
+
if callee_name == "Environment":
|
|
687
|
+
return True
|
|
688
|
+
return False
|
|
689
|
+
|
|
690
|
+
|
|
668
691
|
def _spawn_target(source: Path) -> Path:
|
|
669
|
-
"""The path the ``LocalRuntime`` provider serves
|
|
670
|
-
|
|
671
|
-
|
|
692
|
+
"""The path the ``LocalRuntime`` provider serves.
|
|
693
|
+
|
|
694
|
+
Directories and env-defining ``.py`` files are served as-is. Task-only
|
|
695
|
+
sources (``tasks.py`` importing from ``env.py``) resolve to a sibling
|
|
696
|
+
``env.py`` or the containing directory. JSON/JSONL data files use the
|
|
697
|
+
surrounding directory (the env source lives next to the tasks file).
|
|
698
|
+
"""
|
|
672
699
|
resolved = source.resolve()
|
|
673
|
-
if resolved.is_dir()
|
|
700
|
+
if resolved.is_dir():
|
|
701
|
+
return resolved
|
|
702
|
+
if resolved.suffix != ".py":
|
|
703
|
+
return resolved.parent
|
|
704
|
+
if _python_defines_environment(resolved):
|
|
674
705
|
return resolved
|
|
706
|
+
env_py = resolved.parent / "env.py"
|
|
707
|
+
if env_py.is_file():
|
|
708
|
+
return env_py
|
|
675
709
|
return resolved.parent
|
|
676
710
|
|
|
677
711
|
|
|
@@ -76,8 +76,8 @@ def init_command(
|
|
|
76
76
|
None,
|
|
77
77
|
"--preset",
|
|
78
78
|
"-p",
|
|
79
|
-
help="Starter preset to download from GitHub (e.g. blank,
|
|
80
|
-
"deepresearch,
|
|
79
|
+
help="Starter preset to download from GitHub (e.g. blank, browser, "
|
|
80
|
+
"deepresearch, cua, autonomous-businesses, verilog). Omit for an interactive picker; in a "
|
|
81
81
|
"non-interactive shell, omitting it writes the minimal local scaffold.",
|
|
82
82
|
),
|
|
83
83
|
) -> None:
|
|
@@ -89,7 +89,7 @@ def init_command(
|
|
|
89
89
|
|
|
90
90
|
Examples:
|
|
91
91
|
hud init my-env # interactive picker (or local scaffold)
|
|
92
|
-
hud init my-env --preset
|
|
92
|
+
hud init my-env --preset browser # download the browser starter
|
|
93
93
|
hud init my-env --dir envs # create ./envs/my-env[/not dim]
|
|
94
94
|
"""
|
|
95
95
|
hud_console = HUDConsole()
|
|
@@ -56,14 +56,18 @@ def list_models(
|
|
|
56
56
|
table = Table()
|
|
57
57
|
table.add_column("Name", style="cyan")
|
|
58
58
|
table.add_column("Model (API)", style="green")
|
|
59
|
+
table.add_column("ID", style="blue", no_wrap=True)
|
|
59
60
|
table.add_column("Provider", style="yellow")
|
|
60
61
|
table.add_column("Agent", style="magenta")
|
|
62
|
+
table.add_column("Trainable", style="green", justify="center")
|
|
61
63
|
for model in models_list:
|
|
62
64
|
table.add_row(
|
|
63
65
|
model.name or model.id or "-",
|
|
64
66
|
model.model_name or model.id or "-",
|
|
67
|
+
model.id or "-",
|
|
65
68
|
model.provider.name or "-",
|
|
66
69
|
model.sdk_agent_type or "-",
|
|
70
|
+
"✓" if model.is_trainable else "",
|
|
67
71
|
)
|
|
68
72
|
console.print(table)
|
|
69
73
|
console.print(f"\n[dim]Gateway: {settings.hud_gateway_url}[/dim]")
|
|
@@ -13,7 +13,7 @@ COPY . .
|
|
|
13
13
|
|
|
14
14
|
# Serve the Environment's control channel (tcp JSON-RPC) on 8765.
|
|
15
15
|
EXPOSE 8765
|
|
16
|
-
CMD ["uv", "run", "
|
|
16
|
+
CMD ["uv", "run", "hud", "serve", "env:env", "--host", "0.0.0.0", "--port", "8765"]
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
19
|
# fmt: off
|
|
@@ -78,7 +78,7 @@ async def count(sentence: str, letter: str):
|
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
# =============================================================================
|
|
81
|
-
# TEST - run with: python env.py
|
|
81
|
+
# TEST - run with: uv run python env.py
|
|
82
82
|
# =============================================================================
|
|
83
83
|
|
|
84
84
|
async def test():
|
|
@@ -136,7 +136,6 @@ version = "0.1.0"
|
|
|
136
136
|
requires-python = ">=3.11"
|
|
137
137
|
dependencies = ["hud-python"]
|
|
138
138
|
|
|
139
|
-
[
|
|
140
|
-
|
|
141
|
-
build-backend = "hatchling.build"
|
|
139
|
+
[tool.uv]
|
|
140
|
+
package = false
|
|
142
141
|
"""
|
|
@@ -48,7 +48,7 @@ class TestResolveEnvironmentName:
|
|
|
48
48
|
|
|
49
49
|
def test_entrypoint_disambiguates_subagent(self, tmp_path: Path) -> None:
|
|
50
50
|
(tmp_path / "Dockerfile").write_text(
|
|
51
|
-
'CMD ["hud", "
|
|
51
|
+
'CMD ["hud", "serve", "env:env", "--port", "8765"]\n', encoding="utf-8"
|
|
52
52
|
)
|
|
53
53
|
(tmp_path / "env.py").write_text('env = Environment("trace-explorer")\n', encoding="utf-8")
|
|
54
54
|
(tmp_path / "verify.py").write_text(
|
|
@@ -237,3 +237,32 @@ def test_eval_max_steps_lands_in_agent_config() -> None:
|
|
|
237
237
|
)
|
|
238
238
|
agent = eval_mod._build_agent(cfg)
|
|
239
239
|
assert agent.config.max_steps == 17
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def test_spawn_target_serves_single_file_env(tmp_path: Path) -> None:
|
|
243
|
+
env_py = tmp_path / "tasks.py"
|
|
244
|
+
env_py.write_text(
|
|
245
|
+
'from hud import Environment\nenv = Environment(name="demo")\n',
|
|
246
|
+
encoding="utf-8",
|
|
247
|
+
)
|
|
248
|
+
assert eval_mod._spawn_target(env_py) == env_py.resolve()
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def test_spawn_target_resolves_split_tasks_layout(tmp_path: Path) -> None:
|
|
252
|
+
(tmp_path / "env.py").write_text(
|
|
253
|
+
'from hud.environment import Environment\nenv = Environment(name="demo")\n',
|
|
254
|
+
encoding="utf-8",
|
|
255
|
+
)
|
|
256
|
+
tasks_py = tmp_path / "tasks.py"
|
|
257
|
+
tasks_py.write_text("from env import env\n\ntasks = []\n", encoding="utf-8")
|
|
258
|
+
assert eval_mod._spawn_target(tasks_py) == (tmp_path / "env.py").resolve()
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def test_spawn_target_json_uses_parent_directory(tmp_path: Path) -> None:
|
|
262
|
+
tasks_json = tmp_path / "tasks.json"
|
|
263
|
+
tasks_json.write_text("[]", encoding="utf-8")
|
|
264
|
+
assert eval_mod._spawn_target(tasks_json) == tmp_path.resolve()
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def test_spawn_target_directory_is_served_as_is(tmp_path: Path) -> None:
|
|
268
|
+
assert eval_mod._spawn_target(tmp_path) == tmp_path.resolve()
|
|
@@ -29,6 +29,14 @@ def test_init_scaffolds_a_runnable_package(tmp_path: Path) -> None:
|
|
|
29
29
|
assert (target / "tasks.py").read_text().startswith('"""')
|
|
30
30
|
assert 'name = "my-cool-env"' in (target / "pyproject.toml").read_text()
|
|
31
31
|
|
|
32
|
+
pyproject = (target / "pyproject.toml").read_text()
|
|
33
|
+
assert "package = false" in pyproject
|
|
34
|
+
assert "[build-system]" not in pyproject
|
|
35
|
+
|
|
36
|
+
dockerfile = (target / "Dockerfile.hud").read_text()
|
|
37
|
+
assert 'CMD ["uv", "run", "hud", "serve"' in dockerfile
|
|
38
|
+
assert '"dev"' not in dockerfile
|
|
39
|
+
|
|
32
40
|
|
|
33
41
|
def test_init_refuses_to_clobber_nonempty_directory(tmp_path: Path) -> None:
|
|
34
42
|
target = tmp_path / "taken"
|
|
@@ -38,17 +38,20 @@ class Job:
|
|
|
38
38
|
name: str
|
|
39
39
|
runs: list[Run] = field(default_factory=list)
|
|
40
40
|
group: int = 1
|
|
41
|
+
#: Platform taskset id this job runs, when it's a synced taskset
|
|
42
|
+
#: (``Taskset.from_api``). Links the job to that taskset on the platform.
|
|
43
|
+
taskset_id: str | None = None
|
|
41
44
|
|
|
42
45
|
@classmethod
|
|
43
|
-
async def start(cls, name: str, *, group: int = 1) -> Job:
|
|
46
|
+
async def start(cls, name: str, *, group: int = 1, taskset_id: str | None = None) -> Job:
|
|
44
47
|
"""Open a job spanning multiple scheduler calls.
|
|
45
48
|
|
|
46
49
|
A scheduler call mints its own job by default; pass a started job as
|
|
47
50
|
``job=`` to ``Task.run`` / ``Taskset.run`` to accumulate every run of a
|
|
48
51
|
longer arc — a training session, a chat conversation — under one id.
|
|
49
52
|
"""
|
|
50
|
-
job = cls(id=uuid.uuid4().hex, name=name, group=group)
|
|
51
|
-
await job_enter(job.id, name=name, group=group)
|
|
53
|
+
job = cls(id=uuid.uuid4().hex, name=name, group=group, taskset_id=taskset_id)
|
|
54
|
+
await job_enter(job.id, name=name, group=group, taskset_id=taskset_id)
|
|
52
55
|
return job
|
|
53
56
|
|
|
54
57
|
@property
|
|
@@ -79,21 +82,42 @@ def _reporting_enabled() -> bool:
|
|
|
79
82
|
return bool(settings.telemetry_enabled and settings.api_key)
|
|
80
83
|
|
|
81
84
|
|
|
82
|
-
async def job_enter(job_id: str, *, name: str, group: int) -> None:
|
|
83
|
-
"""Register a batch job with the platform.
|
|
85
|
+
async def job_enter(job_id: str, *, name: str, group: int, taskset_id: str | None = None) -> None:
|
|
86
|
+
"""Register a batch job with the platform.
|
|
87
|
+
|
|
88
|
+
``taskset_id`` links the job to a synced taskset (set when running
|
|
89
|
+
``Taskset.from_api``); ``None`` for ad-hoc/local tasksets. The platform
|
|
90
|
+
creates no taskset on its own — remote rollouts carry the scenario inline.
|
|
91
|
+
"""
|
|
84
92
|
if not _reporting_enabled():
|
|
85
93
|
return
|
|
86
|
-
await _report(
|
|
94
|
+
await _report(
|
|
95
|
+
f"/trace/job/{job_id}/enter",
|
|
96
|
+
{"name": name, "group": group, "taskset_id": taskset_id},
|
|
97
|
+
)
|
|
87
98
|
from hud.settings import settings
|
|
88
99
|
|
|
89
100
|
logger.info("job: %s/jobs/%s", settings.hud_web_url, job_id)
|
|
90
101
|
|
|
91
102
|
|
|
92
|
-
async def trace_enter(
|
|
93
|
-
|
|
103
|
+
async def trace_enter(
|
|
104
|
+
trace_id: str,
|
|
105
|
+
*,
|
|
106
|
+
job_id: str | None,
|
|
107
|
+
group_id: str | None,
|
|
108
|
+
model: str | None = None,
|
|
109
|
+
) -> None:
|
|
110
|
+
"""Report that one rollout started.
|
|
111
|
+
|
|
112
|
+
``model`` is the model string the agent will sample (when known); the
|
|
113
|
+
platform resolves it and attributes the trace immediately on enter.
|
|
114
|
+
"""
|
|
94
115
|
if not _reporting_enabled():
|
|
95
116
|
return
|
|
96
|
-
await _report(
|
|
117
|
+
await _report(
|
|
118
|
+
f"/trace/{trace_id}/enter",
|
|
119
|
+
{"job_id": job_id, "group_id": group_id, "model": model},
|
|
120
|
+
)
|
|
97
121
|
|
|
98
122
|
|
|
99
123
|
async def trace_exit(run: Run) -> None:
|
|
@@ -295,8 +295,15 @@ async def rollout(
|
|
|
295
295
|
job_id = uuid.uuid4().hex
|
|
296
296
|
await job_enter(job_id, name=task.id, group=1)
|
|
297
297
|
trace_id = trace_id or uuid.uuid4().hex
|
|
298
|
+
# Report the model the agent will sample so the platform attributes the
|
|
299
|
+
# trace to it on enter. Only LLM tool agents carry an inference-model slug
|
|
300
|
+
# (``config.model``); robot/other agents have none. Local import avoids an
|
|
301
|
+
# eval<->agents import cycle.
|
|
302
|
+
from hud.agents.tool_agent import ToolAgent
|
|
303
|
+
|
|
304
|
+
agent_model = agent.config.model if isinstance(agent, ToolAgent) else None
|
|
298
305
|
with set_trace_context(trace_id):
|
|
299
|
-
await trace_enter(trace_id, job_id=job_id, group_id=group_id)
|
|
306
|
+
await trace_enter(trace_id, job_id=job_id, group_id=group_id, model=agent_model)
|
|
300
307
|
run: Run | None = None
|
|
301
308
|
_phase = "provisioning"
|
|
302
309
|
|
|
@@ -59,6 +59,17 @@ class Taskset:
|
|
|
59
59
|
self.origin = origin
|
|
60
60
|
self.tasks: dict[str, Task] = self._index_by_slug(list(tasks))
|
|
61
61
|
|
|
62
|
+
@property
|
|
63
|
+
def api_id(self) -> str | None:
|
|
64
|
+
"""The platform taskset id when loaded via :meth:`from_api`, else None.
|
|
65
|
+
|
|
66
|
+
Threaded into the job so a remote run of a synced taskset links to it;
|
|
67
|
+
ad-hoc/file/module tasksets have none and create no taskset.
|
|
68
|
+
"""
|
|
69
|
+
if self.origin and self.origin.startswith("api:"):
|
|
70
|
+
return self.origin[len("api:") :]
|
|
71
|
+
return None
|
|
72
|
+
|
|
62
73
|
@classmethod
|
|
63
74
|
def from_file(cls, path: str | Path) -> Taskset:
|
|
64
75
|
"""Load a taskset from ``.py`` source, a directory, or JSON/JSONL data.
|
|
@@ -242,8 +253,13 @@ class Taskset:
|
|
|
242
253
|
expanded.extend((task, group_id) for _ in range(group))
|
|
243
254
|
|
|
244
255
|
if job is None:
|
|
245
|
-
job = Job(
|
|
246
|
-
|
|
256
|
+
job = Job(
|
|
257
|
+
id=uuid.uuid4().hex,
|
|
258
|
+
name=_job_name(self.name, task_list, group),
|
|
259
|
+
group=group,
|
|
260
|
+
taskset_id=self.api_id,
|
|
261
|
+
)
|
|
262
|
+
await job_enter(job.id, name=job.name, group=group, taskset_id=self.api_id)
|
|
247
263
|
job_id = job.id
|
|
248
264
|
|
|
249
265
|
# Placement is chosen once for the batch: HostedRuntime delegates the
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|