hud-python 0.4.32__tar.gz → 0.4.34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- {hud_python-0.4.32 → hud_python-0.4.34}/PKG-INFO +1 -1
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/misc/response_agent.py +25 -9
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/__init__.py +4 -1
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/build.py +40 -25
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/flows/tasks.py +4 -2
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/rl/remote_runner.py +2 -4
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/actor.py +7 -5
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/buffer.py +108 -77
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/tests/test_learner.py +20 -5
- hud_python-0.4.34/hud/samples/__init__.py +7 -0
- hud_python-0.4.34/hud/samples/browser.py +33 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/types.py +19 -6
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/mcp.py +6 -1
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/tests/test_version.py +1 -1
- hud_python-0.4.34/hud/utils/tool_shorthand.py +59 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/version.py +1 -1
- {hud_python-0.4.32 → hud_python-0.4.34}/pyproject.toml +1 -1
- {hud_python-0.4.32 → hud_python-0.4.34}/.gitignore +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/LICENSE +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/browser/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/browser/apps/2048/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/browser/apps/2048/backend/pyproject.toml +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/browser/apps/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/browser/apps/todo/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/browser/apps/todo/backend/pyproject.toml +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/browser/pyproject.toml +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/remote_browser/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/remote_browser/pyproject.toml +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/text_2048/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/environments/text_2048/pyproject.toml +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/examples/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/__main__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/base.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/claude.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/langchain.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/openai.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/openai_chat_generic.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/__main__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/analyze.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/clone.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/debug.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/dev.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/eval.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/get.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/init.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/list_func.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/pull.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/push.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/remove.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/rl/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/rl/config.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/rl/display.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/rl/gpu.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/rl/gpu_utils.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/rl/local_runner.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/rl/presets.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/rl/rl_api.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/rl/vllm.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/server.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/base.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/fastmcp.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/mcp_use.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/tests/test_mcp_use_retry.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/utils/mcp_use_retry.py +3 -3
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/utils/retry.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/datasets/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/datasets/parallel.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/datasets/runner.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/datasets/utils.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/misc/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/misc/claude_plays_pokemon.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/native/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/native/comparator.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/otel/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/otel/collector.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/otel/config.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/otel/context.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/otel/exporters.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/otel/instrumentation.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/otel/processors.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/otel/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/otel/tests/test_processors.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/py.typed +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/README.md +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/chat_template.jinja +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/config.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/distributed.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/learner.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/train.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/types.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/utils/start_vllm_server.sh +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/utils.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/rl/vllm_adapter.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/server/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/server/context.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/server/low_level.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/server/server.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/settings.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/shared/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/shared/exceptions.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/shared/hints.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/shared/requests.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/telemetry/job.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/telemetry/replay.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/telemetry/tests/test_replay.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/telemetry/tests/test_trace.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/telemetry/trace.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/base.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/bash.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/edit.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/executors/base.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/playwright.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/response.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/submit.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/types.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/tools/utils.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/agent_factories.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/async_utils.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/group_eval.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/hud_console.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/progress.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/tasks.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/telemetry.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/tests/test_async_utils.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/tests/test_progress.py +0 -0
- {hud_python-0.4.32 → hud_python-0.4.34}/hud/utils/tests/test_telemetry.py +0 -0
|
@@ -16,7 +16,17 @@ class ResponseAgent:
|
|
|
16
16
|
based on the agent's final response message.
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
def __init__(
|
|
19
|
+
def __init__(
|
|
20
|
+
self, api_key: str | None = None, model: str = "gpt-4o", system_prompt: str | None = None
|
|
21
|
+
) -> None:
|
|
22
|
+
"""
|
|
23
|
+
Initialize the ResponseAgent.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
api_key: The API key to use for the OpenAI client
|
|
27
|
+
model: The model to use for the OpenAI client (default: "gpt-4o")
|
|
28
|
+
system_prompt: The system prompt to use for the OpenAI client
|
|
29
|
+
"""
|
|
20
30
|
self.api_key = api_key or settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
|
|
21
31
|
if not self.api_key:
|
|
22
32
|
raise ValueError(
|
|
@@ -26,23 +36,29 @@ class ResponseAgent:
|
|
|
26
36
|
self.client = AsyncOpenAI(api_key=self.api_key)
|
|
27
37
|
self.model = model
|
|
28
38
|
|
|
29
|
-
self.system_prompt =
|
|
39
|
+
self.system_prompt = (
|
|
40
|
+
system_prompt
|
|
41
|
+
or """
|
|
30
42
|
You are an assistant that helps determine the appropriate response to an agent's message.
|
|
31
43
|
|
|
32
44
|
You will receive messages from an agent that is performing tasks for a user.
|
|
33
45
|
Your job is to analyze these messages and respond with one of the following:
|
|
34
46
|
|
|
35
|
-
- STOP: If the agent indicates it has successfully completed a task
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
47
|
+
- STOP: If the agent indicates it has successfully completed a task or is stuck,
|
|
48
|
+
struggling or says it cannot complete the task, even if phrased as a question
|
|
49
|
+
like "I have entered the right values into this form. Would you like me to do
|
|
50
|
+
anything else?" or "Here is the website. Is there any other information you
|
|
51
|
+
need?" or if the agent has strongly determined it wants to stop the task like
|
|
52
|
+
"The task is infeasible. Can I help you with something else?"
|
|
53
|
+
|
|
40
54
|
- CONTINUE: If the agent is asking for clarification before proceeding with a task
|
|
41
55
|
like "I'm about to clear cookies from this website. Would you like me to proceed?"
|
|
42
|
-
or "I've entered the right values into this form. Would you like me to continue
|
|
56
|
+
or "I've entered the right values into this form. Would you like me to continue
|
|
57
|
+
with the rest of the task?"
|
|
43
58
|
|
|
44
59
|
Respond ONLY with one of these two options.
|
|
45
|
-
"""
|
|
60
|
+
"""
|
|
61
|
+
)
|
|
46
62
|
|
|
47
63
|
async def determine_response(self, agent_message: str) -> ResponseType:
|
|
48
64
|
"""
|
|
@@ -585,6 +585,9 @@ def build(
|
|
|
585
585
|
),
|
|
586
586
|
no_cache: bool = typer.Option(False, "--no-cache", help="Build without Docker cache"),
|
|
587
587
|
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output"),
|
|
588
|
+
platform: str | None = typer.Option(
|
|
589
|
+
None, "--platform", help="Set Docker target platform (e.g., linux/amd64)"
|
|
590
|
+
),
|
|
588
591
|
) -> None:
|
|
589
592
|
"""🏗️ Build a HUD environment and generate lock file.
|
|
590
593
|
|
|
@@ -635,7 +638,7 @@ def build(
|
|
|
635
638
|
else:
|
|
636
639
|
i += 1
|
|
637
640
|
|
|
638
|
-
build_command(directory, tag, no_cache, verbose, env_vars)
|
|
641
|
+
build_command(directory, tag, no_cache, verbose, env_vars, platform)
|
|
639
642
|
|
|
640
643
|
|
|
641
644
|
@app.command()
|
|
@@ -224,6 +224,7 @@ def build_docker_image(
|
|
|
224
224
|
no_cache: bool = False,
|
|
225
225
|
verbose: bool = False,
|
|
226
226
|
build_args: dict[str, str] | None = None,
|
|
227
|
+
platform: str | None = None,
|
|
227
228
|
) -> bool:
|
|
228
229
|
"""Build a Docker image from a directory."""
|
|
229
230
|
hud_console = HUDConsole()
|
|
@@ -236,7 +237,10 @@ def build_docker_image(
|
|
|
236
237
|
return False
|
|
237
238
|
|
|
238
239
|
# Build command
|
|
239
|
-
cmd = ["docker", "build"
|
|
240
|
+
cmd = ["docker", "build"]
|
|
241
|
+
if platform:
|
|
242
|
+
cmd.extend(["--platform", platform])
|
|
243
|
+
cmd.extend(["-t", tag])
|
|
240
244
|
if no_cache:
|
|
241
245
|
cmd.append("--no-cache")
|
|
242
246
|
|
|
@@ -264,6 +268,7 @@ def build_environment(
|
|
|
264
268
|
no_cache: bool = False,
|
|
265
269
|
verbose: bool = False,
|
|
266
270
|
env_vars: dict[str, str] | None = None,
|
|
271
|
+
platform: str | None = None,
|
|
267
272
|
) -> None:
|
|
268
273
|
"""Build a HUD environment and generate lock file."""
|
|
269
274
|
hud_console = HUDConsole()
|
|
@@ -294,9 +299,8 @@ def build_environment(
|
|
|
294
299
|
except Exception:
|
|
295
300
|
default_image = f"{env_dir.name}:dev"
|
|
296
301
|
|
|
297
|
-
#
|
|
298
|
-
if
|
|
299
|
-
tag = default_image
|
|
302
|
+
# Determine final image tag to use
|
|
303
|
+
image_tag: str = tag if tag else default_image
|
|
300
304
|
|
|
301
305
|
# Build temporary image first
|
|
302
306
|
temp_tag = f"hud-build-temp:{int(time.time())}"
|
|
@@ -304,7 +308,14 @@ def build_environment(
|
|
|
304
308
|
hud_console.progress_message(f"Building Docker image: {temp_tag}")
|
|
305
309
|
|
|
306
310
|
# Build the image (env vars are for runtime, not build time)
|
|
307
|
-
if not build_docker_image(
|
|
311
|
+
if not build_docker_image(
|
|
312
|
+
env_dir,
|
|
313
|
+
temp_tag,
|
|
314
|
+
no_cache,
|
|
315
|
+
verbose,
|
|
316
|
+
build_args=None,
|
|
317
|
+
platform=platform,
|
|
318
|
+
):
|
|
308
319
|
hud_console.error("Docker build failed")
|
|
309
320
|
raise typer.Exit(1)
|
|
310
321
|
|
|
@@ -422,21 +433,24 @@ def build_environment(
|
|
|
422
433
|
|
|
423
434
|
# Build final image with label (uses cache from first build)
|
|
424
435
|
# Also tag with version
|
|
425
|
-
base_name =
|
|
436
|
+
base_name = image_tag.split(":")[0] if ":" in image_tag else image_tag
|
|
426
437
|
version_tag = f"{base_name}:{new_version}"
|
|
427
438
|
|
|
428
|
-
label_cmd = [
|
|
429
|
-
|
|
430
|
-
"
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
439
|
+
label_cmd = ["docker", "build"]
|
|
440
|
+
if platform is not None:
|
|
441
|
+
label_cmd.extend(["--platform", platform])
|
|
442
|
+
label_cmd.extend(
|
|
443
|
+
[
|
|
444
|
+
"--label",
|
|
445
|
+
f"org.hud.manifest.head={lock_hash}:{lock_size}",
|
|
446
|
+
"--label",
|
|
447
|
+
f"org.hud.version={new_version}",
|
|
448
|
+
"-t",
|
|
449
|
+
image_tag,
|
|
450
|
+
"-t",
|
|
451
|
+
version_tag,
|
|
452
|
+
]
|
|
453
|
+
)
|
|
440
454
|
|
|
441
455
|
label_cmd.append(str(env_dir))
|
|
442
456
|
|
|
@@ -457,14 +471,14 @@ def build_environment(
|
|
|
457
471
|
hud_console.success("Built final image with lock file metadata")
|
|
458
472
|
|
|
459
473
|
# NOW get the image ID after the final build
|
|
460
|
-
image_id = get_docker_image_id(
|
|
474
|
+
image_id = get_docker_image_id(image_tag)
|
|
461
475
|
if image_id:
|
|
462
476
|
# For local builds, store the image ID
|
|
463
477
|
# Docker IDs come as sha256:hash, we want tag@sha256:hash
|
|
464
478
|
if image_id.startswith("sha256:"):
|
|
465
|
-
lock_content["image"] = f"{
|
|
479
|
+
lock_content["image"] = f"{image_tag}@{image_id}"
|
|
466
480
|
else:
|
|
467
|
-
lock_content["image"] = f"{
|
|
481
|
+
lock_content["image"] = f"{image_tag}@sha256:{image_id}"
|
|
468
482
|
|
|
469
483
|
# Update the lock file with the new image reference
|
|
470
484
|
with open(lock_path, "w") as f:
|
|
@@ -487,8 +501,8 @@ def build_environment(
|
|
|
487
501
|
|
|
488
502
|
# Show the version tag as primary since that's what will be pushed
|
|
489
503
|
hud_console.status_item("Built image", version_tag, primary=True)
|
|
490
|
-
if
|
|
491
|
-
hud_console.status_item("Also tagged",
|
|
504
|
+
if image_tag:
|
|
505
|
+
hud_console.status_item("Also tagged", image_tag)
|
|
492
506
|
hud_console.status_item("Version", new_version)
|
|
493
507
|
hud_console.status_item("Lock file", "hud.lock.yaml")
|
|
494
508
|
hud_console.status_item("Tools found", str(analysis["toolCount"]))
|
|
@@ -500,7 +514,7 @@ def build_environment(
|
|
|
500
514
|
hud_console.section_title("Next Steps")
|
|
501
515
|
hud_console.info("Test locally:")
|
|
502
516
|
hud_console.command_example("hud dev", "Hot-reload development")
|
|
503
|
-
hud_console.command_example(f"hud run {
|
|
517
|
+
hud_console.command_example(f"hud run {image_tag}", "Run the built image")
|
|
504
518
|
hud_console.info("")
|
|
505
519
|
hud_console.info("Publish to registry:")
|
|
506
520
|
hud_console.command_example("hud push", f"Push as {version_tag}")
|
|
@@ -517,6 +531,7 @@ def build_command(
|
|
|
517
531
|
no_cache: bool = typer.Option(False, "--no-cache", help="Build without Docker cache"),
|
|
518
532
|
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output"),
|
|
519
533
|
env_vars: dict[str, str] | None = None,
|
|
534
|
+
platform: str | None = None,
|
|
520
535
|
) -> None:
|
|
521
536
|
"""Build a HUD environment and generate lock file."""
|
|
522
|
-
build_environment(directory, tag, no_cache, verbose, env_vars)
|
|
537
|
+
build_environment(directory, tag, no_cache, verbose, env_vars, platform)
|
|
@@ -32,6 +32,7 @@ def _validate_tasks(tasks: list[Task]) -> bool:
|
|
|
32
32
|
A task is considered remote if any "url" field anywhere inside mcp_config
|
|
33
33
|
is a valid remote URL (e.g., https://mcp.hud.so/v3/mcp).
|
|
34
34
|
"""
|
|
35
|
+
|
|
35
36
|
def _has_remote_url(obj: Any) -> bool:
|
|
36
37
|
if isinstance(obj, dict):
|
|
37
38
|
for k, v in obj.items():
|
|
@@ -99,7 +100,8 @@ def _ensure_built(env_dir: Path) -> dict[str, Any]:
|
|
|
99
100
|
# Check Docker availability before attempting a build
|
|
100
101
|
require_docker_running()
|
|
101
102
|
# Run build (non-interactive). If Docker isn't running, this will raise and stop the flow.
|
|
102
|
-
|
|
103
|
+
# Force linux/amd64 platform to ensure compatibility during RL flows.
|
|
104
|
+
build_environment(str(env_dir), platform="linux/amd64")
|
|
103
105
|
|
|
104
106
|
# Load lock file
|
|
105
107
|
with open(lock_path) as f:
|
|
@@ -146,7 +148,7 @@ def _derive_remote_image(lock_data: dict[str, Any]) -> str:
|
|
|
146
148
|
# Base name always comes from lock_data.image to preserve org/repo
|
|
147
149
|
image_ref = str(lock_data.get("image", "")).strip()
|
|
148
150
|
if not image_ref:
|
|
149
|
-
raise typer.Exit(
|
|
151
|
+
raise typer.Exit(1)
|
|
150
152
|
name, tag = extract_name_and_tag(image_ref)
|
|
151
153
|
return f"{name}:{tag}"
|
|
152
154
|
|
|
@@ -9,8 +9,8 @@ from __future__ import annotations
|
|
|
9
9
|
import os
|
|
10
10
|
import subprocess
|
|
11
11
|
import time
|
|
12
|
-
from pathlib import Path
|
|
13
12
|
import uuid
|
|
13
|
+
from pathlib import Path
|
|
14
14
|
|
|
15
15
|
from rich.console import Console
|
|
16
16
|
|
|
@@ -51,9 +51,7 @@ def ensure_vllm_deployed(model_name: str, gpu_type: str = "A100", timeout: int =
|
|
|
51
51
|
hud_console.info("Waiting for vLLM server to be ready...")
|
|
52
52
|
start_time = time.time()
|
|
53
53
|
with hud_console.progress() as progress:
|
|
54
|
-
progress.update(
|
|
55
|
-
"Checking deployment status (see live status on https://app.hud.so/models)"
|
|
56
|
-
)
|
|
54
|
+
progress.update("Checking deployment status (see live status on https://app.hud.so/models)")
|
|
57
55
|
while True:
|
|
58
56
|
if time.time() - start_time > timeout:
|
|
59
57
|
hud_console.error("Timeout waiting for vLLM deployment")
|
|
@@ -85,18 +85,19 @@ class Actor:
|
|
|
85
85
|
)
|
|
86
86
|
except TimeoutError:
|
|
87
87
|
hud_console.warning_log(f"Episode timed out for task {t.id}")
|
|
88
|
-
|
|
88
|
+
# Attach task so buffer grouping has key
|
|
89
|
+
return Trace(isError=True, content="Episode timeout", task=t)
|
|
89
90
|
|
|
90
91
|
results = await asyncio.gather(
|
|
91
92
|
*[run_with_timeout(t) for t in batch],
|
|
92
93
|
return_exceptions=True,
|
|
93
94
|
)
|
|
94
95
|
|
|
95
|
-
# Normalize exceptions to error traces
|
|
96
|
-
for res in results:
|
|
96
|
+
# Normalize exceptions to error traces and ensure task is attached
|
|
97
|
+
for t, res in zip(batch, results, strict=False):
|
|
97
98
|
if isinstance(res, Exception):
|
|
98
99
|
hud_console.warning_log(f"Episode error: {res}")
|
|
99
|
-
traces.append(Trace(isError=True, content=str(res)))
|
|
100
|
+
traces.append(Trace(isError=True, content=str(res), task=t))
|
|
100
101
|
else:
|
|
101
102
|
traces.append(res)
|
|
102
103
|
|
|
@@ -113,7 +114,8 @@ class Actor:
|
|
|
113
114
|
|
|
114
115
|
except Exception:
|
|
115
116
|
logger.info("GOT EXCEPTION")
|
|
116
|
-
|
|
117
|
+
# Preserve task on exception for grouping
|
|
118
|
+
return Trace(isError=True, task=task)
|
|
117
119
|
|
|
118
120
|
result.info["tool_spec"] = agent.get_tool_schemas()
|
|
119
121
|
|
|
@@ -219,12 +219,93 @@ class ReplayBuffer(Buffer[Trace]):
|
|
|
219
219
|
else:
|
|
220
220
|
raise ValueError(f"Invalid select strategy: {self.select_strategy}")
|
|
221
221
|
|
|
222
|
+
def _extract_group_key(self, trace: Trace) -> tuple[str, str]:
|
|
223
|
+
"""Return a stable grouping key for a trace.
|
|
224
|
+
|
|
225
|
+
Preference order:
|
|
226
|
+
1) task.id when present (kind='id')
|
|
227
|
+
2) task.prompt exact string (kind='prompt') when id is None
|
|
228
|
+
3) 'NA' for missing/errored entries (kind='NA')
|
|
229
|
+
"""
|
|
230
|
+
if getattr(trace, "isError", False):
|
|
231
|
+
return ("NA", "NA")
|
|
232
|
+
|
|
233
|
+
task = getattr(trace, "task", None)
|
|
234
|
+
if task is None:
|
|
235
|
+
return ("NA", "NA")
|
|
236
|
+
|
|
237
|
+
tid = getattr(task, "id", None)
|
|
238
|
+
if tid is not None:
|
|
239
|
+
return ("id", str(tid))
|
|
240
|
+
|
|
241
|
+
prompt = getattr(task, "prompt", None)
|
|
242
|
+
if prompt:
|
|
243
|
+
return ("prompt", str(prompt))
|
|
244
|
+
|
|
245
|
+
return ("NA", "NA")
|
|
246
|
+
|
|
247
|
+
def _validate_and_split_groups(
|
|
248
|
+
self, recent_traces: list[Trace]
|
|
249
|
+
) -> tuple[list[list[Trace]], list[tuple[str, str]]]:
|
|
250
|
+
"""Validate and split recent traces into homogeneous groups by id or prompt.
|
|
251
|
+
|
|
252
|
+
- Uses id when present; otherwise falls back to prompt equality.
|
|
253
|
+
- Any NA/error traces are excluded and the group is filled by duplicating
|
|
254
|
+
existing valid members in that group.
|
|
255
|
+
- Always returns len == groups_per_batch groups of size == group_size.
|
|
256
|
+
"""
|
|
257
|
+
from collections import Counter
|
|
258
|
+
|
|
259
|
+
groups_per_batch = self.batch_size // self.group_size
|
|
260
|
+
|
|
261
|
+
window_keys = [self._extract_group_key(t) for t in recent_traces]
|
|
262
|
+
window_counter = Counter(k for k in window_keys if k[0] != "NA")
|
|
263
|
+
|
|
264
|
+
validated_groups: list[list[Trace]] = []
|
|
265
|
+
selected_keys: list[tuple[str, str]] = []
|
|
266
|
+
|
|
267
|
+
for g_idx in range(groups_per_batch):
|
|
268
|
+
start = g_idx * self.group_size
|
|
269
|
+
end = start + self.group_size
|
|
270
|
+
chunk = recent_traces[start:end]
|
|
271
|
+
|
|
272
|
+
key_counts = Counter()
|
|
273
|
+
per_item_keys: list[tuple[str, str]] = []
|
|
274
|
+
for tr in chunk:
|
|
275
|
+
k = self._extract_group_key(tr)
|
|
276
|
+
per_item_keys.append(k)
|
|
277
|
+
if k[0] != "NA":
|
|
278
|
+
key_counts[k] += 1
|
|
279
|
+
|
|
280
|
+
if key_counts:
|
|
281
|
+
best_key = key_counts.most_common(1)[0][0]
|
|
282
|
+
elif window_counter:
|
|
283
|
+
best_key = window_counter.most_common(1)[0][0]
|
|
284
|
+
else:
|
|
285
|
+
best_key = ("NA", "NA")
|
|
286
|
+
|
|
287
|
+
homogeneous = [tr for tr, k in zip(chunk, per_item_keys, strict=False) if k == best_key]
|
|
288
|
+
|
|
289
|
+
while len(homogeneous) < self.group_size:
|
|
290
|
+
if homogeneous:
|
|
291
|
+
homogeneous.append(homogeneous[-1])
|
|
292
|
+
else:
|
|
293
|
+
idx = next((i for i, wk in enumerate(window_keys) if wk[0] != "NA"), None)
|
|
294
|
+
if idx is not None:
|
|
295
|
+
homogeneous.append(recent_traces[idx])
|
|
296
|
+
elif chunk:
|
|
297
|
+
homogeneous.append(chunk[0])
|
|
298
|
+
else:
|
|
299
|
+
homogeneous.append(recent_traces[0])
|
|
300
|
+
|
|
301
|
+
validated_groups.append(homogeneous)
|
|
302
|
+
selected_keys.append(best_key)
|
|
303
|
+
|
|
304
|
+
return validated_groups, selected_keys
|
|
305
|
+
|
|
222
306
|
def _sample_high_variance_traces(self) -> list[Trace]:
|
|
223
307
|
from collections import Counter, defaultdict, deque
|
|
224
308
|
|
|
225
|
-
# Expect recent window to already be grouped by task id
|
|
226
|
-
|
|
227
|
-
# Build recent window and earlier lookup (short form)
|
|
228
309
|
buf_list = list(self.buffer)
|
|
229
310
|
if len(buf_list) < self.batch_size:
|
|
230
311
|
hud_console.warning(
|
|
@@ -234,81 +315,32 @@ class ReplayBuffer(Buffer[Trace]):
|
|
|
234
315
|
take = min(len(buf_list) or 1, self.batch_size - len(buf_list))
|
|
235
316
|
buf_list.extend(buf_list[:take])
|
|
236
317
|
recent_traces = buf_list[-self.batch_size :]
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
)
|
|
318
|
+
|
|
319
|
+
recent_keys = [self._extract_group_key(t) for t in recent_traces]
|
|
320
|
+
hud_console.info(f"[group-sampler] recent-window histogram: {Counter(recent_keys)}")
|
|
240
321
|
|
|
241
322
|
hud_console.info(
|
|
242
323
|
f"[group-sampler] Building earlier traces lookup, buffer size: {len(buf_list)}"
|
|
243
324
|
)
|
|
244
|
-
|
|
325
|
+
earlier_traces_by_key: dict[tuple[str, str], deque[Trace]] = defaultdict(deque)
|
|
245
326
|
for tr in buf_list[: -self.batch_size]:
|
|
246
|
-
|
|
327
|
+
k = self._extract_group_key(tr)
|
|
328
|
+
if k[0] != "NA":
|
|
329
|
+
earlier_traces_by_key[k].append(tr)
|
|
330
|
+
|
|
331
|
+
groups, group_keys = self._validate_and_split_groups(recent_traces)
|
|
247
332
|
|
|
248
|
-
# Chunk from the most-recent end
|
|
249
333
|
final_traces: list[Trace] = []
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
end = start + self.group_size
|
|
255
|
-
group = recent_traces[start:end]
|
|
256
|
-
|
|
257
|
-
# Assert homogeneity: every trace in a group must share the same task id
|
|
258
|
-
cnt = Counter(getattr(t.task, "id", "NA") for t in group)
|
|
259
|
-
if len(cnt) != 1:
|
|
260
|
-
raise RuntimeError(f"Group {g_idx} is not homogeneous: {dict(cnt)}")
|
|
261
|
-
target_tid = next(iter(cnt.keys()))
|
|
262
|
-
|
|
263
|
-
# Build homogeneous group of target_tid, filling from earlier traces to increase spread
|
|
264
|
-
homogeneous: list[Trace] = [
|
|
265
|
-
t for t in group if getattr(t.task, "id", "NA") == target_tid
|
|
266
|
-
]
|
|
267
|
-
needed = self.group_size - len(homogeneous)
|
|
268
|
-
|
|
269
|
-
# Greedy fill: choose earlier traces (same task-id) farthest from current mean reward
|
|
270
|
-
def current_mean(homogeneous: list[Trace]) -> float:
|
|
271
|
-
if not homogeneous:
|
|
334
|
+
for g_idx, (homogeneous, target_key) in enumerate(zip(groups, group_keys, strict=False)):
|
|
335
|
+
|
|
336
|
+
def current_mean(h: list[Trace]) -> float:
|
|
337
|
+
if not h:
|
|
272
338
|
return 0.0
|
|
273
|
-
vals = [float(getattr(t, "reward", 0.0) or 0.0) for t in
|
|
339
|
+
vals = [float(getattr(t, "reward", 0.0) or 0.0) for t in h]
|
|
274
340
|
return sum(vals) / len(vals)
|
|
275
341
|
|
|
276
|
-
|
|
277
|
-
pool = earlier_traces_by_task.get(target_tid, deque())
|
|
278
|
-
if pool:
|
|
279
|
-
mu = current_mean(homogeneous)
|
|
280
|
-
# pick element farthest from current mean
|
|
281
|
-
best_i = None
|
|
282
|
-
best_dist = -1.0
|
|
283
|
-
for i, tr in enumerate(list(pool)):
|
|
284
|
-
r = float(getattr(tr, "reward", 0.0) or 0.0)
|
|
285
|
-
dist = abs(r - mu)
|
|
286
|
-
if dist > best_dist:
|
|
287
|
-
best_dist = dist
|
|
288
|
-
best_i = i
|
|
289
|
-
# pop selected
|
|
290
|
-
chosen = list(pool)[best_i] # type: ignore[index]
|
|
291
|
-
# remove from deque efficiently by rotating
|
|
292
|
-
left = list(pool)
|
|
293
|
-
if best_i is not None:
|
|
294
|
-
left.pop(best_i) # O(n) but pool is small in practice
|
|
295
|
-
earlier_traces_by_task[target_tid] = deque(left)
|
|
296
|
-
homogeneous.append(chosen)
|
|
297
|
-
else:
|
|
298
|
-
# duplicate extreme within current homogeneous set
|
|
299
|
-
if not homogeneous:
|
|
300
|
-
raise RuntimeError(f"Group {g_idx} has no traces for target {target_tid}")
|
|
301
|
-
mu = current_mean(homogeneous)
|
|
302
|
-
extreme = max(
|
|
303
|
-
homogeneous, key=lambda t: abs(float(getattr(t, "reward", 0.0) or 0.0) - mu)
|
|
304
|
-
)
|
|
305
|
-
homogeneous.append(extreme)
|
|
306
|
-
needed -= 1
|
|
307
|
-
|
|
308
|
-
# Replacement step: swap in earlier traces to increase reward spread
|
|
309
|
-
pool = earlier_traces_by_task.get(target_tid, deque())
|
|
342
|
+
pool = earlier_traces_by_key.get(target_key, deque())
|
|
310
343
|
if pool:
|
|
311
|
-
# Log pool stats
|
|
312
344
|
pool_vals = [float(getattr(tr, "reward", 0.0) or 0.0) for tr in list(pool)]
|
|
313
345
|
if pool_vals:
|
|
314
346
|
pool_mean = sum(pool_vals) / len(pool_vals)
|
|
@@ -316,16 +348,15 @@ class ReplayBuffer(Buffer[Trace]):
|
|
|
316
348
|
pool_vals
|
|
317
349
|
)
|
|
318
350
|
hud_console.info(
|
|
319
|
-
f"[group-sampler] Group {g_idx}: earlier-pool size={len(pool_vals)}
|
|
351
|
+
f"[group-sampler] Group {g_idx}: earlier-pool size={len(pool_vals)} "
|
|
352
|
+
f"mean={pool_mean:.4f} std={(pool_var**0.5):.4f}"
|
|
320
353
|
)
|
|
321
354
|
|
|
322
|
-
# Decide how many to replace (up to 1/4 of group, at least 1)
|
|
323
355
|
replace_k = max(1, self.group_size // 4)
|
|
324
356
|
replace_k = min(replace_k, len(pool), self.group_size)
|
|
325
357
|
|
|
326
358
|
if replace_k > 0:
|
|
327
359
|
mu = current_mean(homogeneous)
|
|
328
|
-
# Select replacement candidates from pool farthest from current mean
|
|
329
360
|
pool_list = list(pool)
|
|
330
361
|
pool_indices = list(range(len(pool_list)))
|
|
331
362
|
pool_indices.sort(
|
|
@@ -337,12 +368,11 @@ class ReplayBuffer(Buffer[Trace]):
|
|
|
337
368
|
chosen_pool_idx = set(pool_indices[:replace_k])
|
|
338
369
|
replacements = [pool_list[i] for i in pool_indices[:replace_k]]
|
|
339
370
|
|
|
340
|
-
# Remove chosen from pool deque
|
|
341
371
|
remaining = [tr for i, tr in enumerate(pool_list) if i not in chosen_pool_idx]
|
|
342
|
-
|
|
372
|
+
earlier_traces_by_key[target_key] = deque(remaining)
|
|
343
373
|
|
|
344
|
-
# Select current group positions closest to mean to replace
|
|
345
374
|
group_indices = list(range(len(homogeneous)))
|
|
375
|
+
mu = current_mean(homogeneous)
|
|
346
376
|
group_indices.sort(
|
|
347
377
|
key=lambda i: abs(
|
|
348
378
|
(float(getattr(homogeneous[i], "reward", 0.0) or 0.0)) - mu
|
|
@@ -353,18 +383,19 @@ class ReplayBuffer(Buffer[Trace]):
|
|
|
353
383
|
for pos, new_tr in zip(target_positions, replacements, strict=False):
|
|
354
384
|
homogeneous[pos] = new_tr
|
|
355
385
|
|
|
356
|
-
|
|
357
|
-
if any(getattr(t.task, "id", "NA") != target_tid for t in homogeneous):
|
|
386
|
+
if any(self._extract_group_key(t) != target_key for t in homogeneous):
|
|
358
387
|
raise RuntimeError(f"Group {g_idx} is not homogeneous after sampling")
|
|
359
388
|
final_traces.extend(homogeneous)
|
|
360
389
|
|
|
361
390
|
for i in range(0, len(final_traces), self.group_size):
|
|
362
391
|
block = final_traces[i : i + self.group_size]
|
|
363
|
-
|
|
392
|
+
keys = {self._extract_group_key(t) for t in block}
|
|
393
|
+
if len(keys) != 1:
|
|
364
394
|
raise RuntimeError(f"Homogeneity validation failed for block starting at index {i}")
|
|
365
395
|
|
|
366
396
|
hud_console.info(
|
|
367
|
-
f"[group-sampler] final histogram:
|
|
397
|
+
f"[group-sampler] final histogram: "
|
|
398
|
+
f"{Counter(self._extract_group_key(t) for t in final_traces)}"
|
|
368
399
|
)
|
|
369
400
|
return final_traces
|
|
370
401
|
|
|
@@ -38,15 +38,20 @@ def make_sample(
|
|
|
38
38
|
ref_logp_tok: torch.Tensor,
|
|
39
39
|
advantage: float,
|
|
40
40
|
):
|
|
41
|
-
# Minimal object
|
|
42
|
-
#
|
|
41
|
+
# Minimal-but-correct object for GRPOLearner.compute_loss.
|
|
42
|
+
# Needs assistant_mask (T-1) and attention_mask (T) for sanity_check().
|
|
43
43
|
Tm1 = pol_logp_tok.size(-1)
|
|
44
|
-
inputs = {
|
|
44
|
+
inputs = {
|
|
45
|
+
"input_ids": torch.zeros(1, Tm1 + 1, dtype=torch.long),
|
|
46
|
+
"attention_mask": torch.ones(1, Tm1 + 1, dtype=torch.long),
|
|
47
|
+
"assistant_mask": torch.ones(1, Tm1, dtype=torch.bool),
|
|
48
|
+
}
|
|
45
49
|
return TrainingSample(
|
|
46
50
|
inputs=inputs,
|
|
47
51
|
old_logprobs=old_logp_tok,
|
|
48
52
|
ref_logprobs=ref_logp_tok,
|
|
49
|
-
advantage
|
|
53
|
+
# advantage must be 1D so .view(-1,1) works in compute_loss
|
|
54
|
+
advantage=torch.tensor([advantage], dtype=torch.float32),
|
|
50
55
|
)
|
|
51
56
|
|
|
52
57
|
|
|
@@ -155,6 +160,13 @@ def test_skip_update_when_zero_adv(monkeypatch, learner_stub: GRPOLearner):
|
|
|
155
160
|
|
|
156
161
|
monkeypatch.setattr(GRPOLearner, "prepare_groups", _stub_prepare_groups, raising=True)
|
|
157
162
|
|
|
163
|
+
# Return a zero scalar loss that *depends* on params so backward works,
|
|
164
|
+
# but has zero gradients (no update signal).
|
|
165
|
+
def _zero_loss(self, sample) -> torch.Tensor:
|
|
166
|
+
return sum(p.sum() for p in self.policy.parameters()) * 0.0
|
|
167
|
+
|
|
168
|
+
monkeypatch.setattr(GRPOLearner, "compute_loss", _zero_loss, raising=True)
|
|
169
|
+
|
|
158
170
|
# Count optimizer.step calls
|
|
159
171
|
steps = {"n": 0}
|
|
160
172
|
# orig_step = learner_stub.optimizer.step
|
|
@@ -168,4 +180,7 @@ def test_skip_update_when_zero_adv(monkeypatch, learner_stub: GRPOLearner):
|
|
|
168
180
|
assert any(p.requires_grad for p in learner_stub.policy.parameters())
|
|
169
181
|
|
|
170
182
|
learner_stub.update([])
|
|
171
|
-
|
|
183
|
+
# With the current learner implementation we still call optimizer.step()
|
|
184
|
+
# even if the per-minibatch "advantage" is zero (the step is a no-op
|
|
185
|
+
# because the gradients are zero). So we expect exactly one step here.
|
|
186
|
+
assert steps["n"] == 1
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Sample browser task factory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pydantic import Field
|
|
8
|
+
|
|
9
|
+
from hud.settings import settings
|
|
10
|
+
from hud.types import MCPToolCall, Task
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BrowserTask(Task):
|
|
14
|
+
"""Task subclass with browser defaults for BrowserTask(prompt=...)."""
|
|
15
|
+
|
|
16
|
+
prompt: str = "Open Google and be ready to search."
|
|
17
|
+
mcp_config: dict[str, Any] = Field(
|
|
18
|
+
default_factory=lambda: {
|
|
19
|
+
"browser": {
|
|
20
|
+
"url": "https://mcp.hud.so/v3/mcp",
|
|
21
|
+
"headers": {
|
|
22
|
+
"Authorization": f"Bearer {settings.api_key}",
|
|
23
|
+
"Mcp-Image": "hudevals/hud-remote-browser:0.1.1",
|
|
24
|
+
},
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
)
|
|
28
|
+
setup_tool: MCPToolCall | list[MCPToolCall] | None = Field(
|
|
29
|
+
default_factory=lambda: MCPToolCall(
|
|
30
|
+
name="setup",
|
|
31
|
+
arguments={"name": "navigate_to_url", "arguments": {"url": "https://www.google.com"}},
|
|
32
|
+
)
|
|
33
|
+
)
|