hud-python 0.4.42__tar.gz → 0.4.44__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- {hud_python-0.4.42 → hud_python-0.4.44}/PKG-INFO +1 -1
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/openai_chat_generic.py +1 -1
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/__init__.py +6 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/dev.py +24 -2
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/eval.py +10 -11
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/flows/tasks.py +4 -5
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/__init__.py +6 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/config.py +2 -2
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/gpu_utils.py +5 -3
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/remote_runner.py +18 -9
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/rl_api.py +2 -2
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/environment.py +1 -5
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/config.py +14 -9
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/distributed.py +34 -1
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/learner.py +28 -5
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/train.py +73 -50
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/group_eval.py +2 -2
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tasks.py +1 -1
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/version.py +1 -1
- {hud_python-0.4.42 → hud_python-0.4.44}/pyproject.toml +1 -1
- {hud_python-0.4.42 → hud_python-0.4.44}/.gitignore +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/LICENSE +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/blank/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/blank/controller/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/blank/environment/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/blank/pyproject.toml +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/environment/2048/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/environment/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/environment/todo/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/browser/pyproject.toml +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/deepresearch/pyproject.toml +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/remote_browser/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/remote_browser/pyproject.toml +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/text_2048/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/environments/text_2048/pyproject.toml +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/examples/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/__main__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/base.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/claude.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/langchain.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/lite_llm.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/openai.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/__main__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/analyze.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/build.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/clone.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/debug.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/get.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/init.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/list_func.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/pull.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/push.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/remove.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/celebrate.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/display.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/gpu.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/local_runner.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/presets.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/viewer.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/vllm.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/rl/wait_utils.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/config.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/local_runner.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/package_runner.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/server.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/base.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/fastmcp.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/mcp_use.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/tests/test_mcp_use_retry.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/utils/mcp_use_retry.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/utils/retry.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/datasets/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/datasets/parallel.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/datasets/runner.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/datasets/utils.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/misc/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/misc/claude_plays_pokemon.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/native/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/native/comparator.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/collector.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/config.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/context.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/exporters.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/instrumentation.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/processors.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/otel/tests/test_processors.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/py.typed +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/README.md +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/actor.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/buffer.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/chat_template.jinja +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/tests/test_learner.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/types.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/utils/start_vllm_server.sh +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/utils.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/rl/vllm_adapter.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/samples/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/samples/browser.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/context.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/low_level.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/server.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/settings.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/exceptions.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/hints.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/requests.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/job.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/replay.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/tests/test_replay.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/tests/test_trace.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/telemetry/trace.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/base.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/bash.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/edit.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/base.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/playwright.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/response.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/submit.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/types.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/tools/utils.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/types.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/agent_factories.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/async_utils.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/hud_console.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/mcp.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/progress.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/telemetry.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_async_utils.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_progress.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.4.42 → hud_python-0.4.44}/hud/utils/tool_shorthand.py +0 -0
|
@@ -1178,6 +1178,11 @@ def rl(
|
|
|
1178
1178
|
"--vllm-gpu",
|
|
1179
1179
|
help="Specific GPU for vLLM server",
|
|
1180
1180
|
),
|
|
1181
|
+
vllm_gpu_count: int = typer.Option(
|
|
1182
|
+
1,
|
|
1183
|
+
"--vllm-gpu-count",
|
|
1184
|
+
help="Number of GPUs for vLLM server",
|
|
1185
|
+
),
|
|
1181
1186
|
skip_vllm_startup: bool = typer.Option(
|
|
1182
1187
|
False,
|
|
1183
1188
|
"--skip-vllm-startup",
|
|
@@ -1199,6 +1204,7 @@ def rl(
|
|
|
1199
1204
|
no_ddp=no_ddp,
|
|
1200
1205
|
ddp_gpus=ddp_gpus,
|
|
1201
1206
|
vllm_gpu=vllm_gpu,
|
|
1207
|
+
vllm_gpu_count=vllm_gpu_count,
|
|
1202
1208
|
yes=yes,
|
|
1203
1209
|
skip_vllm_startup=skip_vllm_startup,
|
|
1204
1210
|
)
|
|
@@ -73,6 +73,24 @@ def create_proxy_server(
|
|
|
73
73
|
"PYTHONUNBUFFERED=1", # Ensure Python output is not buffered
|
|
74
74
|
]
|
|
75
75
|
|
|
76
|
+
# Check for .env file in the project directory and add env vars
|
|
77
|
+
env_file = project_path / ".env"
|
|
78
|
+
loaded_env_vars = {}
|
|
79
|
+
if env_file.exists():
|
|
80
|
+
try:
|
|
81
|
+
from hud.cli.utils.config import parse_env_file
|
|
82
|
+
|
|
83
|
+
env_contents = env_file.read_text(encoding="utf-8")
|
|
84
|
+
loaded_env_vars = parse_env_file(env_contents)
|
|
85
|
+
for key, value in loaded_env_vars.items():
|
|
86
|
+
docker_cmd.extend(["-e", f"{key}={value}"])
|
|
87
|
+
if verbose and loaded_env_vars:
|
|
88
|
+
hud_console.info(
|
|
89
|
+
f"Loaded {len(loaded_env_vars)} environment variable(s) from .env file"
|
|
90
|
+
)
|
|
91
|
+
except Exception as e:
|
|
92
|
+
hud_console.warning(f"Failed to load .env file: {e}")
|
|
93
|
+
|
|
76
94
|
# Add user-provided Docker arguments
|
|
77
95
|
if docker_args:
|
|
78
96
|
docker_cmd.extend(docker_args)
|
|
@@ -112,8 +130,12 @@ def create_proxy_server(
|
|
|
112
130
|
hud_console.info("The container's CMD determines reload behavior")
|
|
113
131
|
hud_console.command_example(f"docker logs -f {container_name}", "View container logs")
|
|
114
132
|
|
|
115
|
-
# Show the full Docker command if there are environment variables
|
|
116
|
-
|
|
133
|
+
# Show the full Docker command if there are environment variables (from .env or args)
|
|
134
|
+
has_env_from_args = docker_args and any(
|
|
135
|
+
arg == "-e" or arg.startswith("--env") for arg in docker_args
|
|
136
|
+
)
|
|
137
|
+
has_env_from_file = bool(loaded_env_vars)
|
|
138
|
+
if has_env_from_args or has_env_from_file:
|
|
117
139
|
hud_console.info("")
|
|
118
140
|
hud_console.info("Docker command with environment variables:")
|
|
119
141
|
hud_console.info(" ".join(docker_cmd))
|
|
@@ -298,16 +298,15 @@ async def run_single_task(
|
|
|
298
298
|
agent_config["allowed_tools"] = allowed_tools
|
|
299
299
|
|
|
300
300
|
# Run with grouping
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
)
|
|
301
|
+
stats = await run_tasks_grouped(
|
|
302
|
+
tasks=[task],
|
|
303
|
+
agent_class=agent_class,
|
|
304
|
+
agent_config=agent_config,
|
|
305
|
+
group_size=group_size,
|
|
306
|
+
max_parallel_episodes=48, # Same as RL default
|
|
307
|
+
max_steps=max_steps,
|
|
308
|
+
verbose=verbose,
|
|
309
|
+
)
|
|
311
310
|
|
|
312
311
|
# Display results
|
|
313
312
|
display_group_statistics(stats, show_details=True)
|
|
@@ -499,7 +498,7 @@ async def run_full_dataset(
|
|
|
499
498
|
)
|
|
500
499
|
|
|
501
500
|
# Display results
|
|
502
|
-
display_group_statistics(stats, show_details=len(stats) <=
|
|
501
|
+
display_group_statistics(stats, show_details=len(stats) <= 50)
|
|
503
502
|
|
|
504
503
|
# Return stats for consistency with other modes
|
|
505
504
|
return stats
|
|
@@ -212,17 +212,14 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
212
212
|
# Check if tasks already have remote URLs
|
|
213
213
|
already_remote = _validate_tasks(tasks)
|
|
214
214
|
|
|
215
|
-
# If tasks already reference a remote MCP URL, do not require a local environment
|
|
216
|
-
# or attempt any image updates. Use the dataset as-is.
|
|
217
|
-
if already_remote:
|
|
218
|
-
return str(tasks_path)
|
|
219
|
-
|
|
220
215
|
# Extract existing images from tasks
|
|
221
216
|
existing_images = _extract_existing_images(tasks)
|
|
222
217
|
|
|
223
218
|
# Locate environment
|
|
224
219
|
env_dir = find_environment_dir(tasks_path)
|
|
225
220
|
if not env_dir:
|
|
221
|
+
if already_remote:
|
|
222
|
+
return str(tasks_path)
|
|
226
223
|
hud_console.error("Could not locate an environment directory (Dockerfile + pyproject.toml)")
|
|
227
224
|
hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
|
|
228
225
|
raise typer.Exit(1)
|
|
@@ -373,6 +370,8 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
|
|
|
373
370
|
item["system_prompt"] = t.system_prompt
|
|
374
371
|
if t.metadata:
|
|
375
372
|
item["metadata"] = t.metadata
|
|
373
|
+
if t.id is not None:
|
|
374
|
+
item["id"] = t.id
|
|
376
375
|
|
|
377
376
|
tasks_payload.append(item)
|
|
378
377
|
|
|
@@ -78,6 +78,11 @@ def rl_command(
|
|
|
78
78
|
"-y",
|
|
79
79
|
help="Auto-accept all prompts and use defaults (lazy mode)",
|
|
80
80
|
),
|
|
81
|
+
vllm_gpu_count: int = typer.Option(
|
|
82
|
+
None,
|
|
83
|
+
"--vllm-gpu-count",
|
|
84
|
+
help="Number of GPUs for vLLM server",
|
|
85
|
+
),
|
|
81
86
|
skip_vllm_startup: bool = typer.Option(
|
|
82
87
|
False,
|
|
83
88
|
"--skip-vllm-startup",
|
|
@@ -145,6 +150,7 @@ def rl_command(
|
|
|
145
150
|
model=model,
|
|
146
151
|
config_file=config_file,
|
|
147
152
|
output_dir=output_dir,
|
|
153
|
+
vllm_gpu_count=vllm_gpu_count,
|
|
148
154
|
yes=yes,
|
|
149
155
|
)
|
|
150
156
|
return
|
|
@@ -84,7 +84,7 @@ def save_config(config: Config, path: Path) -> None:
|
|
|
84
84
|
"""Save configuration to a JSON file."""
|
|
85
85
|
config_dict = config.to_dict()
|
|
86
86
|
|
|
87
|
-
with open(path, "w") as f:
|
|
87
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
88
88
|
json.dump(config_dict, f, indent=2)
|
|
89
89
|
f.write("\n") # Add newline at end of file
|
|
90
90
|
|
|
@@ -94,7 +94,7 @@ def save_config(config: Config, path: Path) -> None:
|
|
|
94
94
|
|
|
95
95
|
def load_config(path: Path) -> Config:
|
|
96
96
|
"""Load configuration from a JSON file."""
|
|
97
|
-
with open(path) as f:
|
|
97
|
+
with open(path, encoding="utf-8") as f:
|
|
98
98
|
data = json.load(f)
|
|
99
99
|
|
|
100
100
|
# Use Config.from_dict which handles missing fields gracefully
|
|
@@ -245,10 +245,12 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
|
|
|
245
245
|
# Apply scaling rule
|
|
246
246
|
if num_gpus == 1:
|
|
247
247
|
# Special case: 2 groups for single GPU
|
|
248
|
+
groups_per_gpu = 2
|
|
248
249
|
config.training.batch_size = 2 * group_size
|
|
249
250
|
else:
|
|
250
|
-
|
|
251
|
-
|
|
251
|
+
groups_per_gpu = config.training.batch_size // group_size
|
|
252
|
+
# Multi-GPU: each GPU processes groups_per_gpu groups
|
|
253
|
+
config.training.batch_size = num_gpus * group_size * groups_per_gpu
|
|
252
254
|
|
|
253
255
|
# Update max_parallel_episodes to match
|
|
254
256
|
config.actor.max_parallel_episodes = config.training.batch_size
|
|
@@ -263,7 +265,7 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
|
|
|
263
265
|
f"\n[cyan]📊 Adjusted batch_size to {config.training.batch_size} ({config.training.batch_size // group_size} groups)[/cyan]" # noqa: E501
|
|
264
266
|
)
|
|
265
267
|
console.print(
|
|
266
|
-
f"[cyan] Each of the {num_gpus} GPU(s) will process {
|
|
268
|
+
f"[cyan] Each of the {num_gpus} GPU(s) will process {groups_per_gpu} group(s) in parallel[/cyan]" # noqa: E501
|
|
267
269
|
)
|
|
268
270
|
|
|
269
271
|
return config
|
|
@@ -32,7 +32,9 @@ GPU_PRICING = {
|
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def ensure_vllm_deployed(
|
|
35
|
+
def ensure_vllm_deployed(
|
|
36
|
+
model_name: str, gpu_type: str = "A100", gpu_count: int = 1, timeout: int = 600
|
|
37
|
+
) -> None:
|
|
36
38
|
"""Deploy vLLM for a model if needed and wait until it's ready.
|
|
37
39
|
|
|
38
40
|
Args:
|
|
@@ -47,7 +49,7 @@ def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int =
|
|
|
47
49
|
return
|
|
48
50
|
|
|
49
51
|
hud_console.info(f"Deploying vLLM server for {model_name}...")
|
|
50
|
-
rl_api.deploy_vllm(model_name, gpu_type=gpu_type)
|
|
52
|
+
rl_api.deploy_vllm(model_name, gpu_type=gpu_type, gpu_count=gpu_count)
|
|
51
53
|
hud_console.success("vLLM deployment started")
|
|
52
54
|
|
|
53
55
|
hud_console.info("Waiting for vLLM server to be ready...")
|
|
@@ -72,6 +74,7 @@ def run_remote_training(
|
|
|
72
74
|
model: str | None,
|
|
73
75
|
config_file: Path | None,
|
|
74
76
|
output_dir: str,
|
|
77
|
+
vllm_gpu_count: int = 1,
|
|
75
78
|
yes: bool = False,
|
|
76
79
|
) -> None:
|
|
77
80
|
"""Run RL training remotely via the API server following the new interactive flow."""
|
|
@@ -183,14 +186,18 @@ def run_remote_training(
|
|
|
183
186
|
|
|
184
187
|
# Ask for model type
|
|
185
188
|
if yes:
|
|
186
|
-
|
|
189
|
+
if config_file:
|
|
190
|
+
config = load_config(config_file)
|
|
191
|
+
model_type = config.model.base_model
|
|
192
|
+
else:
|
|
193
|
+
model_type = "Qwen/Qwen2.5-VL-3B-Instruct"
|
|
187
194
|
hud_console.info(f"Auto-selecting base model: {model_type} (--yes mode)")
|
|
188
195
|
else:
|
|
189
196
|
model_type = hud_console.select(
|
|
190
197
|
"Select base model type:",
|
|
191
198
|
choices=[
|
|
192
199
|
{"name": "Qwen2.5-VL-3B-Instruct", "value": "Qwen/Qwen2.5-VL-3B-Instruct"},
|
|
193
|
-
|
|
200
|
+
{"name": "Qwen2.5-3B-Instruct", "value": "Qwen/Qwen2.5-3B-Instruct"},
|
|
194
201
|
],
|
|
195
202
|
default=0,
|
|
196
203
|
)
|
|
@@ -218,7 +225,7 @@ def run_remote_training(
|
|
|
218
225
|
try:
|
|
219
226
|
rl_api.create_model(model_name, model_type)
|
|
220
227
|
hud_console.success(f"Created model: {model_name}")
|
|
221
|
-
ensure_vllm_deployed(model_name, gpu_type="A100")
|
|
228
|
+
ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
|
|
222
229
|
|
|
223
230
|
except Exception as e:
|
|
224
231
|
# If the name already exists, suggest a new name and prompt once
|
|
@@ -247,7 +254,7 @@ def run_remote_training(
|
|
|
247
254
|
rl_api.create_model(chosen, model_type)
|
|
248
255
|
hud_console.success(f"Created model: {chosen}")
|
|
249
256
|
model_name = chosen
|
|
250
|
-
ensure_vllm_deployed(model_name, gpu_type="A100")
|
|
257
|
+
ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
|
|
251
258
|
except Exception as e2:
|
|
252
259
|
hud_console.error(f"Failed to create model: {e2}")
|
|
253
260
|
raise
|
|
@@ -281,7 +288,7 @@ def run_remote_training(
|
|
|
281
288
|
return
|
|
282
289
|
|
|
283
290
|
# Ensure vLLM is deployed
|
|
284
|
-
ensure_vllm_deployed(model_name, gpu_type="A100")
|
|
291
|
+
ensure_vllm_deployed(model_name, gpu_type="A100", gpu_count=vllm_gpu_count)
|
|
285
292
|
except KeyboardInterrupt:
|
|
286
293
|
hud_console.dim_info("Training cancelled", "")
|
|
287
294
|
return
|
|
@@ -323,7 +330,7 @@ def run_remote_training(
|
|
|
323
330
|
)
|
|
324
331
|
|
|
325
332
|
if yes:
|
|
326
|
-
num_gpus = 2
|
|
333
|
+
num_gpus = 2 # Default to 2 GPUs in yes mode
|
|
327
334
|
hud_console.info(f"Auto-selecting {num_gpus} GPU(s) (--yes mode)")
|
|
328
335
|
else:
|
|
329
336
|
num_gpus = hud_console.select(
|
|
@@ -425,10 +432,12 @@ def run_remote_training(
|
|
|
425
432
|
# Load provided config
|
|
426
433
|
hud_console.info(f"Loading configuration from: {config_file}")
|
|
427
434
|
config = load_config(config_file)
|
|
428
|
-
config_dict = config.to_dict()
|
|
429
435
|
gpu_choice = config.training.gpu_type
|
|
430
436
|
num_gpus = config.training.num_gpus
|
|
431
437
|
|
|
438
|
+
config = adjust_config_for_ddp(config, int(num_gpus))
|
|
439
|
+
config_dict = config.to_dict()
|
|
440
|
+
|
|
432
441
|
# Launch training
|
|
433
442
|
try:
|
|
434
443
|
# Little celebration before launching
|
|
@@ -61,12 +61,12 @@ def list_models() -> list[RLModelInfo]:
|
|
|
61
61
|
]
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
def deploy_vllm(model_name: str, gpu_type: str = "A100") -> dict[str, Any]:
|
|
64
|
+
def deploy_vllm(model_name: str, gpu_type: str = "A100", gpu_count: int = 1) -> dict[str, Any]:
|
|
65
65
|
"""Deploy a vLLM server for a model."""
|
|
66
66
|
return make_request_sync(
|
|
67
67
|
method="POST",
|
|
68
68
|
url=f"{settings.hud_rl_url}/models/{model_name}/deploy",
|
|
69
|
-
json={"gpu_type": gpu_type},
|
|
69
|
+
json={"gpu_type": gpu_type, "gpu_count": gpu_count},
|
|
70
70
|
api_key=settings.api_key,
|
|
71
71
|
)
|
|
72
72
|
|
|
@@ -127,8 +127,4 @@ def is_environment_directory(path: str | Path) -> bool:
|
|
|
127
127
|
return False
|
|
128
128
|
|
|
129
129
|
# Must have pyproject.toml
|
|
130
|
-
|
|
131
|
-
hud_console.error("pyproject.toml not found")
|
|
132
|
-
return False
|
|
133
|
-
|
|
134
|
-
return True
|
|
130
|
+
return (dir_path / "pyproject.toml").exists()
|
|
@@ -13,6 +13,7 @@ SUPPORTED_MODELS = [
|
|
|
13
13
|
"Qwen/Qwen2.5-VL-32B-Instruct",
|
|
14
14
|
"Qwen/Qwen2.5-VL-72B-Instruct",
|
|
15
15
|
"Qwen/Qwen2.5-7B-Instruct",
|
|
16
|
+
"Qwen/Qwen2.5-3B-Instruct",
|
|
16
17
|
]
|
|
17
18
|
|
|
18
19
|
|
|
@@ -39,9 +40,9 @@ class ModelConfig:
|
|
|
39
40
|
"""Model and LoRA configuration."""
|
|
40
41
|
|
|
41
42
|
base_model: str = "Qwen/Qwen2.5-VL-3B-Instruct"
|
|
42
|
-
lora_r: int =
|
|
43
|
-
lora_alpha: int =
|
|
44
|
-
lora_dropout: float = 0.
|
|
43
|
+
lora_r: int = 16
|
|
44
|
+
lora_alpha: int = 32
|
|
45
|
+
lora_dropout: float = 0.1
|
|
45
46
|
target_modules: tuple[str, ...] = (
|
|
46
47
|
"q_proj",
|
|
47
48
|
"k_proj",
|
|
@@ -61,6 +62,7 @@ class ModelConfig:
|
|
|
61
62
|
@dataclass
|
|
62
63
|
class TrainingConfig:
|
|
63
64
|
"""Training hyperparameters."""
|
|
65
|
+
|
|
64
66
|
# GPU parameters
|
|
65
67
|
gpu_type: str = "A100"
|
|
66
68
|
num_gpus: int = 2
|
|
@@ -71,9 +73,9 @@ class TrainingConfig:
|
|
|
71
73
|
save_every_batches: int = 1
|
|
72
74
|
|
|
73
75
|
# Batching parameters
|
|
74
|
-
epochs: int =
|
|
75
|
-
batch_size: int =
|
|
76
|
-
group_size: int =
|
|
76
|
+
epochs: int = 1
|
|
77
|
+
batch_size: int = 16
|
|
78
|
+
group_size: int = 8
|
|
77
79
|
mini_batch_size: int = 1
|
|
78
80
|
update_after_group: bool = True # Whether to update the policy after each task group
|
|
79
81
|
accumulate_over_minibatches: bool = False # Whether to accumulate over minibatches
|
|
@@ -84,7 +86,7 @@ class TrainingConfig:
|
|
|
84
86
|
leave_one_out: bool = True
|
|
85
87
|
|
|
86
88
|
# Replay buffer parameters
|
|
87
|
-
buffer_steps: int =
|
|
89
|
+
buffer_steps: int = 8
|
|
88
90
|
select_strategy: Literal["recent", "variance", "random"] = "variance"
|
|
89
91
|
|
|
90
92
|
# Aggregation parameters
|
|
@@ -92,8 +94,8 @@ class TrainingConfig:
|
|
|
92
94
|
token_agg: Literal["mean", "sum"] = "mean" # noqa: S105
|
|
93
95
|
|
|
94
96
|
# Regularization parameters
|
|
95
|
-
kl_beta: float = 0.
|
|
96
|
-
entropy_beta: float = 0.
|
|
97
|
+
kl_beta: float = 0.001
|
|
98
|
+
entropy_beta: float = 0.001
|
|
97
99
|
top_eps: float = 0.2
|
|
98
100
|
bottom_eps: float = 0.1
|
|
99
101
|
|
|
@@ -143,6 +145,7 @@ class Config:
|
|
|
143
145
|
job_id: str | None = None # Use existing job ID if provided
|
|
144
146
|
stats_interval: int = 1
|
|
145
147
|
verbose: bool = False
|
|
148
|
+
very_verbose: bool = False
|
|
146
149
|
|
|
147
150
|
# Paths
|
|
148
151
|
out_dir: str = "./checkpoints"
|
|
@@ -166,6 +169,7 @@ class Config:
|
|
|
166
169
|
job_id=d.get("job_id"),
|
|
167
170
|
stats_interval=d.get("stats_interval", 1),
|
|
168
171
|
verbose=d.get("verbose", False),
|
|
172
|
+
very_verbose=d.get("very_verbose", False),
|
|
169
173
|
out_dir=d.get("out_dir", "./checkpoints"),
|
|
170
174
|
adapter_prefix=d.get("adapter_prefix", "cua-grpo-step"),
|
|
171
175
|
seed=d.get("seed", 1234),
|
|
@@ -181,6 +185,7 @@ class Config:
|
|
|
181
185
|
"job_id": self.job_id,
|
|
182
186
|
"stats_interval": self.stats_interval,
|
|
183
187
|
"verbose": self.verbose,
|
|
188
|
+
"very_verbose": self.very_verbose,
|
|
184
189
|
"out_dir": self.out_dir,
|
|
185
190
|
"adapter_prefix": self.adapter_prefix,
|
|
186
191
|
"seed": self.seed,
|
|
@@ -66,7 +66,13 @@ def all_reduce_mean(tensor: torch.Tensor) -> torch.Tensor:
|
|
|
66
66
|
|
|
67
67
|
|
|
68
68
|
def broadcast_object(obj: Any, src: int = 0) -> Any:
|
|
69
|
-
"""Broadcast a Python object from src rank to all ranks.
|
|
69
|
+
"""Broadcast a Python object from src rank to all ranks.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
obj: Object to broadcast (used on src rank)
|
|
73
|
+
src: Source rank
|
|
74
|
+
device: Device for temporary tensor buffer during pickling transfer
|
|
75
|
+
"""
|
|
70
76
|
if not dist.is_initialized():
|
|
71
77
|
return obj
|
|
72
78
|
|
|
@@ -75,6 +81,33 @@ def broadcast_object(obj: Any, src: int = 0) -> Any:
|
|
|
75
81
|
return obj_list[0]
|
|
76
82
|
|
|
77
83
|
|
|
84
|
+
def scatter_object(
|
|
85
|
+
obj_list: list[Any] | None,
|
|
86
|
+
src: int = 0,
|
|
87
|
+
) -> Any:
|
|
88
|
+
"""Scatter a list of Python objects from src so each rank receives one object.
|
|
89
|
+
|
|
90
|
+
Usage:
|
|
91
|
+
- On src rank: pass the full list (length == world_size)
|
|
92
|
+
- On non-src ranks: pass None
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
The object intended for this rank.
|
|
96
|
+
"""
|
|
97
|
+
if not dist.is_initialized():
|
|
98
|
+
# Single-process: return first element if provided, else None
|
|
99
|
+
if obj_list is None or len(obj_list) == 0:
|
|
100
|
+
return None
|
|
101
|
+
return obj_list[0]
|
|
102
|
+
|
|
103
|
+
out: list[Any] = [None]
|
|
104
|
+
if dist.get_rank() == src:
|
|
105
|
+
dist.scatter_object_list(out, obj_list, src=src)
|
|
106
|
+
else:
|
|
107
|
+
dist.scatter_object_list(out, None, src=src)
|
|
108
|
+
return out[0]
|
|
109
|
+
|
|
110
|
+
|
|
78
111
|
def gather_tensors(tensor: torch.Tensor) -> list[torch.Tensor] | None:
|
|
79
112
|
"""Gather tensors from all ranks to rank 0.
|
|
80
113
|
|
|
@@ -240,6 +240,8 @@ class GRPOLearner:
|
|
|
240
240
|
if sample.inputs:
|
|
241
241
|
sample = sample.to_device(self.device)
|
|
242
242
|
sample.old_logprobs, _ = self.compute_logprobs(self.policy, sample.inputs)
|
|
243
|
+
# Free GPU memory for this sample immediately
|
|
244
|
+
sample.to_device(torch.device("cpu"))
|
|
243
245
|
|
|
244
246
|
policy_module = self.policy.module if hasattr(self.policy, "module") else self.policy
|
|
245
247
|
with policy_module.disable_adapter():
|
|
@@ -247,7 +249,10 @@ class GRPOLearner:
|
|
|
247
249
|
if is_main_process():
|
|
248
250
|
progress.update(f"Processing batch of traces... {i}/{len(batch)}")
|
|
249
251
|
if sample.inputs:
|
|
252
|
+
# Move back to GPU for reference computation, then free
|
|
253
|
+
sample = sample.to_device(self.device)
|
|
250
254
|
sample.ref_logprobs, _ = self.compute_logprobs(self.policy, sample.inputs)
|
|
255
|
+
sample.to_device(torch.device("cpu"))
|
|
251
256
|
|
|
252
257
|
hud_console.info_log("Creating mini-batches...")
|
|
253
258
|
group_size = self.config.training.group_size
|
|
@@ -488,15 +493,21 @@ class GRPOLearner:
|
|
|
488
493
|
out = model(**model_inputs)
|
|
489
494
|
|
|
490
495
|
logits = out.logits / self.config.actor.temperature
|
|
491
|
-
log_probs = F.log_softmax(logits, dim=-1)
|
|
492
496
|
|
|
497
|
+
# Compute token log-probs via negative cross-entropy to avoid materializing full log_probs
|
|
493
498
|
targets = inputs["input_ids"][:, 1:]
|
|
494
|
-
|
|
499
|
+
logits_slice = logits[:, :-1, :]
|
|
500
|
+
loss_flat = F.cross_entropy(
|
|
501
|
+
logits_slice.reshape(-1, logits_slice.size(-1)),
|
|
502
|
+
targets.reshape(-1),
|
|
503
|
+
reduction="none",
|
|
504
|
+
)
|
|
505
|
+
token_log_probs = (-loss_flat).reshape_as(targets)
|
|
495
506
|
|
|
496
507
|
# Compute entropy only for assistant tokens to save memory
|
|
497
508
|
assistant_mask = inputs["assistant_mask"]
|
|
498
509
|
entropy = torch.zeros_like(token_log_probs)
|
|
499
|
-
if assistant_mask.any():
|
|
510
|
+
if assistant_mask.any() and getattr(self.config.training, "entropy_beta", 0.0) != 0.0:
|
|
500
511
|
entropy[assistant_mask] = entropy_from_logits(logits[:, :-1][assistant_mask])
|
|
501
512
|
|
|
502
513
|
return token_log_probs, entropy
|
|
@@ -506,8 +517,20 @@ class GRPOLearner:
|
|
|
506
517
|
# Return dummy values that match expected shapes
|
|
507
518
|
seq_len = inputs["input_ids"].shape[1] - 1 if "input_ids" in inputs else 0
|
|
508
519
|
batch_size = inputs["input_ids"].shape[0] if "input_ids" in inputs else 1
|
|
509
|
-
|
|
510
|
-
|
|
520
|
+
# Create dummy tensors that still participate in autograd so backward doesn't fail
|
|
521
|
+
try:
|
|
522
|
+
param_sum = torch.sum(
|
|
523
|
+
next(self.policy.parameters())
|
|
524
|
+
) # touch params to build a graph
|
|
525
|
+
base = param_sum * 0.0
|
|
526
|
+
except StopIteration:
|
|
527
|
+
base = torch.tensor(0.0, device=self.device)
|
|
528
|
+
dummy_logprobs = (
|
|
529
|
+
base + torch.zeros(batch_size, seq_len, device=self.device)
|
|
530
|
+
).requires_grad_(True)
|
|
531
|
+
dummy_entropy = (
|
|
532
|
+
base + torch.zeros(batch_size, seq_len, device=self.device)
|
|
533
|
+
).requires_grad_(True)
|
|
511
534
|
return dummy_logprobs, dummy_entropy
|
|
512
535
|
|
|
513
536
|
def save(self, path: str) -> None:
|