hud-python 0.4.35__tar.gz → 0.4.36__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- {hud_python-0.4.35 → hud_python-0.4.36}/.gitignore +0 -1
- {hud_python-0.4.35 → hud_python-0.4.36}/PKG-INFO +30 -12
- hud_python-0.4.36/environments/blank/README.md +92 -0
- hud_python-0.4.36/environments/blank/controller/README.md +16 -0
- hud_python-0.4.36/environments/blank/environment/README.md +16 -0
- hud_python-0.4.36/environments/blank/pyproject.toml +19 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/environments/browser/README.md +67 -88
- hud_python-0.4.36/environments/browser/environment/pyproject.toml +20 -0
- hud_python-0.4.36/environments/browser/pyproject.toml +22 -0
- hud_python-0.4.36/environments/deepresearch/pyproject.toml +19 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/tests/test_claude.py +32 -7
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/tests/test_openai.py +29 -6
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/__init__.py +209 -75
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/build.py +9 -4
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/dev.py +20 -39
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/eval.py +3 -2
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/flows/tasks.py +1 -0
- hud_python-0.4.36/hud/cli/init.py +270 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/pull.py +6 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/push.py +2 -1
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/rl/remote_runner.py +3 -1
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_build.py +3 -27
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_mcp_server.py +1 -12
- hud_python-0.4.36/hud/cli/utils/config.py +85 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/docker.py +21 -39
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/environment.py +4 -3
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/interactive.py +2 -1
- hud_python-0.4.36/hud/cli/utils/local_runner.py +204 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/metadata.py +3 -1
- hud_python-0.4.36/hud/cli/utils/package_runner.py +292 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/remote_runner.py +4 -1
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/mcp_use.py +30 -7
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/datasets/parallel.py +3 -1
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/datasets/runner.py +4 -1
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/otel/context.py +38 -4
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/buffer.py +3 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/tests/test_learner.py +1 -1
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/server/server.py +157 -1
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/settings.py +38 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/shared/hints.py +1 -1
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/version.py +1 -1
- {hud_python-0.4.35 → hud_python-0.4.36}/pyproject.toml +19 -22
- hud_python-0.4.35/environments/browser/pyproject.toml +0 -22
- hud_python-0.4.35/hud/cli/init.py +0 -677
- {hud_python-0.4.35 → hud_python-0.4.36}/LICENSE +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/README.md +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/environments/README.md +0 -0
- {hud_python-0.4.35/environments/browser/apps → hud_python-0.4.36/environments/browser/environment}/2048/README.md +0 -0
- {hud_python-0.4.35/environments/browser/apps → hud_python-0.4.36/environments/browser/environment}/2048/backend/pyproject.toml +0 -0
- {hud_python-0.4.35/environments/browser/apps → hud_python-0.4.36/environments/browser/environment}/README.md +0 -0
- {hud_python-0.4.35/environments/browser/apps → hud_python-0.4.36/environments/browser/environment}/todo/README.md +0 -0
- {hud_python-0.4.35/environments/browser/apps → hud_python-0.4.36/environments/browser/environment}/todo/backend/pyproject.toml +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/environments/remote_browser/README.md +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/environments/remote_browser/pyproject.toml +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/environments/text_2048/README.md +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/environments/text_2048/pyproject.toml +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/examples/README.md +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/__main__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/base.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/claude.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/langchain.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/openai.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/openai_chat_generic.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/__main__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/analyze.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/clone.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/debug.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/get.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/list_func.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/remove.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/rl/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/rl/config.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/rl/display.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/rl/gpu.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/rl/gpu_utils.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/rl/local_runner.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/rl/presets.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/rl/rl_api.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/rl/vllm.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/server.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/README.md +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/base.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/fastmcp.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/tests/test_mcp_use_retry.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/utils/mcp_use_retry.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/utils/retry.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/datasets/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/datasets/utils.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/misc/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/misc/claude_plays_pokemon.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/native/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/native/comparator.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/otel/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/otel/collector.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/otel/config.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/otel/exporters.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/otel/instrumentation.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/otel/processors.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/otel/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/otel/tests/test_processors.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/py.typed +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/README.md +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/actor.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/chat_template.jinja +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/config.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/distributed.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/learner.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/train.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/types.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/utils/start_vllm_server.sh +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/utils.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/rl/vllm_adapter.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/samples/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/samples/browser.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/server/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/server/context.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/server/low_level.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/shared/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/shared/exceptions.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/shared/requests.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/telemetry/job.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/telemetry/replay.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/telemetry/tests/test_replay.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/telemetry/tests/test_trace.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/telemetry/trace.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/base.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/bash.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/edit.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/executors/base.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/playwright.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/response.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/submit.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/types.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/tools/utils.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/types.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/agent_factories.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/async_utils.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/group_eval.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/hud_console.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/mcp.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/progress.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/tasks.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/telemetry.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/tests/test_async_utils.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/tests/test_progress.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.4.35 → hud_python-0.4.36}/hud/utils/tool_shorthand.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.36
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -40,7 +40,7 @@ Requires-Dist: datasets>=2.14.0
|
|
|
40
40
|
Requires-Dist: httpx<1,>=0.23.0
|
|
41
41
|
Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
|
|
42
42
|
Requires-Dist: hud-mcp-python-sdk>=3.13.2
|
|
43
|
-
Requires-Dist: hud-mcp-use-python-sdk
|
|
43
|
+
Requires-Dist: hud-mcp-use-python-sdk==2.3.19
|
|
44
44
|
Requires-Dist: numpy>=1.24.0
|
|
45
45
|
Requires-Dist: openai
|
|
46
46
|
Requires-Dist: opentelemetry-api>=1.34.1
|
|
@@ -50,8 +50,8 @@ Requires-Dist: opentelemetry-sdk>=1.34.1
|
|
|
50
50
|
Requires-Dist: pathspec>=0.12.1
|
|
51
51
|
Requires-Dist: pillow>=11.1.0
|
|
52
52
|
Requires-Dist: prompt-toolkit==3.0.51
|
|
53
|
-
Requires-Dist: pydantic-settings<3,>=2
|
|
54
|
-
Requires-Dist: pydantic<3,>=2
|
|
53
|
+
Requires-Dist: pydantic-settings<3,>=2.2
|
|
54
|
+
Requires-Dist: pydantic<3,>=2.6
|
|
55
55
|
Requires-Dist: questionary==2.1.0
|
|
56
56
|
Requires-Dist: rich>=13.0.0
|
|
57
57
|
Requires-Dist: toml>=0.10.2
|
|
@@ -59,7 +59,9 @@ Requires-Dist: typer>=0.9.0
|
|
|
59
59
|
Requires-Dist: watchfiles>=0.21.0
|
|
60
60
|
Requires-Dist: wrapt>=1.14.0
|
|
61
61
|
Provides-Extra: agent
|
|
62
|
+
Requires-Dist: aiodocker>=0.24.0; extra == 'agent'
|
|
62
63
|
Requires-Dist: dotenv>=0.9.9; extra == 'agent'
|
|
64
|
+
Requires-Dist: inspect-ai>=0.3.80; extra == 'agent'
|
|
63
65
|
Requires-Dist: ipykernel; extra == 'agent'
|
|
64
66
|
Requires-Dist: ipython<9; extra == 'agent'
|
|
65
67
|
Requires-Dist: jupyter-client; extra == 'agent'
|
|
@@ -67,8 +69,21 @@ Requires-Dist: jupyter-core; extra == 'agent'
|
|
|
67
69
|
Requires-Dist: langchain; extra == 'agent'
|
|
68
70
|
Requires-Dist: langchain-anthropic; extra == 'agent'
|
|
69
71
|
Requires-Dist: langchain-openai; extra == 'agent'
|
|
72
|
+
Requires-Dist: pillow>=11.1.0; extra == 'agent'
|
|
73
|
+
Requires-Dist: playwright; extra == 'agent'
|
|
74
|
+
Requires-Dist: pyautogui>=0.9.54; extra == 'agent'
|
|
75
|
+
Requires-Dist: pyright==1.1.401; extra == 'agent'
|
|
76
|
+
Requires-Dist: pytest-asyncio; extra == 'agent'
|
|
77
|
+
Requires-Dist: pytest-cov; extra == 'agent'
|
|
78
|
+
Requires-Dist: pytest-mock; extra == 'agent'
|
|
79
|
+
Requires-Dist: pytest<9,>=8.1.1; extra == 'agent'
|
|
80
|
+
Requires-Dist: ruff>=0.11.8; extra == 'agent'
|
|
81
|
+
Requires-Dist: setuptools; extra == 'agent'
|
|
82
|
+
Requires-Dist: textdistance<5,>=4.5.0; extra == 'agent'
|
|
70
83
|
Provides-Extra: agents
|
|
84
|
+
Requires-Dist: aiodocker>=0.24.0; extra == 'agents'
|
|
71
85
|
Requires-Dist: dotenv>=0.9.9; extra == 'agents'
|
|
86
|
+
Requires-Dist: inspect-ai>=0.3.80; extra == 'agents'
|
|
72
87
|
Requires-Dist: ipykernel; extra == 'agents'
|
|
73
88
|
Requires-Dist: ipython<9; extra == 'agents'
|
|
74
89
|
Requires-Dist: jupyter-client; extra == 'agents'
|
|
@@ -76,6 +91,17 @@ Requires-Dist: jupyter-core; extra == 'agents'
|
|
|
76
91
|
Requires-Dist: langchain; extra == 'agents'
|
|
77
92
|
Requires-Dist: langchain-anthropic; extra == 'agents'
|
|
78
93
|
Requires-Dist: langchain-openai; extra == 'agents'
|
|
94
|
+
Requires-Dist: pillow>=11.1.0; extra == 'agents'
|
|
95
|
+
Requires-Dist: playwright; extra == 'agents'
|
|
96
|
+
Requires-Dist: pyautogui>=0.9.54; extra == 'agents'
|
|
97
|
+
Requires-Dist: pyright==1.1.401; extra == 'agents'
|
|
98
|
+
Requires-Dist: pytest-asyncio; extra == 'agents'
|
|
99
|
+
Requires-Dist: pytest-cov; extra == 'agents'
|
|
100
|
+
Requires-Dist: pytest-mock; extra == 'agents'
|
|
101
|
+
Requires-Dist: pytest<9,>=8.1.1; extra == 'agents'
|
|
102
|
+
Requires-Dist: ruff>=0.11.8; extra == 'agents'
|
|
103
|
+
Requires-Dist: setuptools; extra == 'agents'
|
|
104
|
+
Requires-Dist: textdistance<5,>=4.5.0; extra == 'agents'
|
|
79
105
|
Provides-Extra: dev
|
|
80
106
|
Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
|
|
81
107
|
Requires-Dist: dotenv>=0.9.9; extra == 'dev'
|
|
@@ -100,14 +126,6 @@ Requires-Dist: setuptools; extra == 'dev'
|
|
|
100
126
|
Requires-Dist: textdistance<5,>=4.5.0; extra == 'dev'
|
|
101
127
|
Provides-Extra: rl
|
|
102
128
|
Requires-Dist: bitsandbytes>=0.41.0; (sys_platform == 'linux') and extra == 'rl'
|
|
103
|
-
Requires-Dist: dotenv>=0.9.9; extra == 'rl'
|
|
104
|
-
Requires-Dist: ipykernel; extra == 'rl'
|
|
105
|
-
Requires-Dist: ipython<9; extra == 'rl'
|
|
106
|
-
Requires-Dist: jupyter-client; extra == 'rl'
|
|
107
|
-
Requires-Dist: jupyter-core; extra == 'rl'
|
|
108
|
-
Requires-Dist: langchain; extra == 'rl'
|
|
109
|
-
Requires-Dist: langchain-anthropic; extra == 'rl'
|
|
110
|
-
Requires-Dist: langchain-openai; extra == 'rl'
|
|
111
129
|
Requires-Dist: liger-kernel>=0.5.0; (sys_platform == 'linux') and extra == 'rl'
|
|
112
130
|
Requires-Dist: peft>=0.17.1; extra == 'rl'
|
|
113
131
|
Requires-Dist: vllm==0.10.1.1; extra == 'rl'
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# test-test
|
|
2
|
+
|
|
3
|
+
## Environment design pattern
|
|
4
|
+
- Controller (Think of this as a frontend in web development)
|
|
5
|
+
- Creates the UX and manages the lifecycle of an app (in this case for an agent)
|
|
6
|
+
- Define `mcp = MCPServer()` and register `@mcp.tool` as tools the agent can interact with
|
|
7
|
+
- Environment (Think of this as a backend in web development)
|
|
8
|
+
- Owns all long‑lived states of the environment and exposes the environment data structure
|
|
9
|
+
- Expose simple HTTP endpoints (`/health`, `/act`, `/reset`, `/state`)
|
|
10
|
+
|
|
11
|
+
IMPORTANT: Make sure all logs are going to stderr instead of stdio, which is reserved for MCP communication
|
|
12
|
+
|
|
13
|
+
### Interactive Development
|
|
14
|
+
```bash
|
|
15
|
+
# 1. Configure your API keys (optional - only needed for evaluation)
|
|
16
|
+
# Edit .env file to add your HUD_API_KEY and ANTHROPIC_API_KEY
|
|
17
|
+
|
|
18
|
+
# 2. Start the environment (optional: with --inspector or --interactive)
|
|
19
|
+
hud dev --build --interactive
|
|
20
|
+
|
|
21
|
+
# 3. Choose your preferred way to test:
|
|
22
|
+
|
|
23
|
+
# Option A: Run the task with Claude (requires ANTHROPIC_API_KEY)
|
|
24
|
+
hud eval tasks.json --agent claude
|
|
25
|
+
|
|
26
|
+
# Option B: Interactive notebook test_env.ipynb (great for learning!)
|
|
27
|
+
# Requires installation:
|
|
28
|
+
pip install hud-python[agents]
|
|
29
|
+
|
|
30
|
+
# Option C: Simple Python script (runs all tasks from tasks.json)
|
|
31
|
+
python test_task.py
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Layout
|
|
35
|
+
```
|
|
36
|
+
controller/
|
|
37
|
+
__init__.py # mcp + shared HTTP client
|
|
38
|
+
__main__.py # python -m controller → mcp.run()
|
|
39
|
+
hooks.py # @mcp.initialize / @mcp.shutdown
|
|
40
|
+
tools.py # @mcp.tool act / setup / evaluate
|
|
41
|
+
|
|
42
|
+
./environment
|
|
43
|
+
├── __init__.py
|
|
44
|
+
└── server.py # FastAPI app: /health, /act, /reset, /state
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Publishing Your Environment
|
|
48
|
+
|
|
49
|
+
Once your environment is ready, you can share it with the community:
|
|
50
|
+
|
|
51
|
+
### 1. Push to Registry
|
|
52
|
+
```bash
|
|
53
|
+
# Build and push your environment (requires docker hub login and hud api key)
|
|
54
|
+
hud build
|
|
55
|
+
hud push
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### 2. Create a Dataset
|
|
59
|
+
|
|
60
|
+
Create a dataset on HuggingFace with your tasks:
|
|
61
|
+
|
|
62
|
+
**Option A: Upload manually**
|
|
63
|
+
1. Upload your `tasks.json` to HuggingFace
|
|
64
|
+
2. Make sure it's **public** to appear on leaderboards
|
|
65
|
+
|
|
66
|
+
**Option B: Use the SDK**
|
|
67
|
+
```python
|
|
68
|
+
from hud.datasets import save_tasks
|
|
69
|
+
import json
|
|
70
|
+
|
|
71
|
+
# Load your tasks
|
|
72
|
+
with open("tasks.json") as f:
|
|
73
|
+
tasks = json.load(f)
|
|
74
|
+
|
|
75
|
+
# Push to HuggingFace
|
|
76
|
+
save_tasks(tasks, repo_id="your-org/your-dataset")
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### 3. Run and Track Performance
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
# Run Claude on your benchmark
|
|
83
|
+
hud eval "your-org/your-dataset" --agent claude
|
|
84
|
+
|
|
85
|
+
# View results at:
|
|
86
|
+
# app.hud.so/leaderboards/your-org/your-dataset
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
**Note**: Only public HuggingFace datasets appear as leaderboards!
|
|
90
|
+
|
|
91
|
+
📚 Learn more: [Creating Benchmarks](https://docs.hud.so/evaluate-agents/create-benchmarks) | [Leaderboards](https://docs.hud.so/evaluate-agents/leaderboards)
|
|
92
|
+
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Controller
|
|
2
|
+
|
|
3
|
+
Frontend for the agent: defines tools, minimal state, calls the environment over HTTP.
|
|
4
|
+
|
|
5
|
+
What to implement
|
|
6
|
+
- Shared client in `__init__.py` (one `httpx.AsyncClient`)
|
|
7
|
+
- Lifecycle in `hooks.py` (`@mcp.initialize`/`@mcp.shutdown`)
|
|
8
|
+
- Tools in `tools.py` (`@mcp.tool`) — keep logic thin; docstrings = descriptions
|
|
9
|
+
|
|
10
|
+
Run
|
|
11
|
+
```bash
|
|
12
|
+
hud run controller --transport http --reload
|
|
13
|
+
# Helper endpoints: http://localhost:8765/hud and /hud/tools
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Principle: the controller is UX, not state. Keep long‑lived state in the environment.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Environment
|
|
2
|
+
|
|
3
|
+
Backend service: owns state and exposes HTTP APIs the controller calls.
|
|
4
|
+
|
|
5
|
+
Endpoints (FastAPI)
|
|
6
|
+
- `GET /health` → {status: ok}
|
|
7
|
+
- `POST /act` → increments counter and returns {count}
|
|
8
|
+
- `POST /reset` → resets counter
|
|
9
|
+
- `GET /state` → returns {count}
|
|
10
|
+
|
|
11
|
+
Run (dev)
|
|
12
|
+
```bash
|
|
13
|
+
uv run uvicorn environment.server:app --reload --port 8005
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Principle: treat like a backend. Keep long‑lived state here; add endpoints as tools need them.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "test_test"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "A minimal HUD environment"
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
dependencies = [ "hud-python==0.4.36", "fastapi", "uvicorn[standard]", "httpx>=0.28.1",]
|
|
7
|
+
|
|
8
|
+
[build-system]
|
|
9
|
+
requires = [ "hatchling",]
|
|
10
|
+
build-backend = "hatchling.build"
|
|
11
|
+
|
|
12
|
+
[tool.hud]
|
|
13
|
+
image = "test_test:dev"
|
|
14
|
+
|
|
15
|
+
[tool.hatch.metadata]
|
|
16
|
+
allow-direct-references = true
|
|
17
|
+
|
|
18
|
+
[tool.hatch.build.targets.wheel]
|
|
19
|
+
packages = [ "controller", "environment",]
|
|
@@ -2,100 +2,99 @@
|
|
|
2
2
|
|
|
3
3
|
A browser automation environment for HUD that provides GUI access and web app interaction capabilities. This environment supports hot-reloading during development while maintaining persistent state.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Quick Start
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
### Interactive Development
|
|
8
|
+
```bash
|
|
9
|
+
# 1. Configure your API keys (optional - only needed for evaluation)
|
|
10
|
+
# Edit .env file to add your HUD_API_KEY and ANTHROPIC_API_KEY
|
|
8
11
|
|
|
9
|
-
|
|
10
|
-
|
|
12
|
+
# 2. Start the environment (optional: with inspector)
|
|
13
|
+
hud dev --build --inspector
|
|
11
14
|
|
|
12
|
-
|
|
15
|
+
# 3. Choose your preferred way to test:
|
|
13
16
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
- **BaseHub Tools**: Setup and evaluate tools organized by app (2048, todo)
|
|
17
|
-
- **Multiprocessing Proxy**: Enables state sharing between processes
|
|
17
|
+
# Option A: Run the task with Claude (requires ANTHROPIC_API_KEY)
|
|
18
|
+
hud eval tasks.json --agent claude
|
|
18
19
|
|
|
19
|
-
|
|
20
|
+
# Option B: Interactive notebook test_env.ipynb (great for learning!)
|
|
21
|
+
# Requires installation:
|
|
22
|
+
pip install hud-python[agents]
|
|
20
23
|
|
|
21
|
-
|
|
24
|
+
# Option C: Simple Python script (runs all tasks from tasks.json)
|
|
25
|
+
python test_task.py
|
|
26
|
+
```
|
|
22
27
|
|
|
23
|
-
|
|
28
|
+
## How HUD Environments Work
|
|
24
29
|
|
|
25
|
-
|
|
30
|
+
The environment is split into two components:
|
|
26
31
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
@setup.tool("my_tool")
|
|
30
|
-
async def my_tool():
|
|
31
|
-
env = setup.env
|
|
32
|
-
result = await env.call_app_api("app", "/api/endpoint") # Returns coroutine
|
|
33
|
-
# The coroutine can't be serialized through the proxy!
|
|
34
|
-
```
|
|
32
|
+
- **`env.py`** - Stateful logic that persists across reloads
|
|
33
|
+
- **`server.py`** - MCP server with tools (reloads on file changes)
|
|
35
34
|
|
|
36
|
-
|
|
35
|
+
This separation is crucial for `hud dev` - it allows you to modify the MCP tools and see changes immediately without losing the environment state. The environment runs as a separate process and communicates via socket, while the server can be restarted freely.
|
|
37
36
|
|
|
38
|
-
|
|
39
|
-
# GOOD: Make HTTP calls directly
|
|
40
|
-
@setup.tool("my_tool")
|
|
41
|
-
async def my_tool():
|
|
42
|
-
import httpx
|
|
43
|
-
|
|
44
|
-
# Get the backend port from persistent context
|
|
45
|
-
persistent_ctx = setup.env
|
|
46
|
-
backend_port = persistent_ctx.get_app_backend_port("app")
|
|
47
|
-
|
|
48
|
-
# Make API call directly
|
|
49
|
-
url = f"http://localhost:{backend_port}/api/endpoint"
|
|
50
|
-
async with httpx.AsyncClient() as client:
|
|
51
|
-
response = await client.get(url)
|
|
52
|
-
response.raise_for_status()
|
|
53
|
-
result = response.json()
|
|
54
|
-
```
|
|
37
|
+
If you are ever seeing issues with the environment itself, running `hud dev --full-reload` will reload both the environment and the server.
|
|
55
38
|
|
|
56
|
-
|
|
39
|
+
## Publishing Your Environment
|
|
57
40
|
|
|
58
|
-
|
|
41
|
+
Once your environment is ready, you can share it with the community:
|
|
59
42
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
43
|
+
### 1. Push to Registry
|
|
44
|
+
```bash
|
|
45
|
+
# Build and push your environment (requires docker hub login and hud api key)
|
|
46
|
+
hud build
|
|
47
|
+
hud push
|
|
65
48
|
```
|
|
66
49
|
|
|
67
|
-
|
|
50
|
+
### 2. Create a Dataset
|
|
68
51
|
|
|
52
|
+
Create a dataset on HuggingFace with your tasks:
|
|
53
|
+
|
|
54
|
+
**Option A: Upload manually**
|
|
55
|
+
1. Upload your `tasks.json` to HuggingFace
|
|
56
|
+
2. Make sure it's **public** to appear on leaderboards
|
|
57
|
+
|
|
58
|
+
**Option B: Use the SDK**
|
|
69
59
|
```python
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
self._running_apps.append(app_name)
|
|
60
|
+
from hud.datasets import save_tasks
|
|
61
|
+
import json
|
|
62
|
+
|
|
63
|
+
# Load your tasks
|
|
64
|
+
with open("tasks.json") as f:
|
|
65
|
+
tasks = json.load(f)
|
|
66
|
+
|
|
67
|
+
# Push to HuggingFace
|
|
68
|
+
save_tasks(tasks, repo_id="your-org/your-dataset")
|
|
80
69
|
```
|
|
81
70
|
|
|
82
|
-
###
|
|
71
|
+
### 3. Run and Track Performance
|
|
83
72
|
|
|
84
|
-
|
|
73
|
+
```bash
|
|
74
|
+
# Run Claude on your benchmark
|
|
75
|
+
hud eval "your-org/your-dataset" --agent claude
|
|
85
76
|
|
|
86
|
-
|
|
87
|
-
#
|
|
88
|
-
playwright_tool = env.playwright # May not work with proxy
|
|
77
|
+
# View results at:
|
|
78
|
+
# app.hud.so/leaderboards/your-org/your-dataset
|
|
89
79
|
```
|
|
90
80
|
|
|
91
|
-
|
|
81
|
+
**Note**: Only public HuggingFace datasets appear as leaderboards!
|
|
92
82
|
|
|
93
|
-
|
|
94
|
-
# GOOD: Use proxy-friendly getter methods
|
|
95
|
-
playwright_tool = persistent_ctx.get_playwright_tool()
|
|
96
|
-
```
|
|
83
|
+
📚 Learn more: [Creating Benchmarks](https://docs.hud.so/evaluate-agents/create-benchmarks) | [Leaderboards](https://docs.hud.so/evaluate-agents/leaderboards)
|
|
97
84
|
|
|
98
|
-
##
|
|
85
|
+
## Architecture Overview
|
|
86
|
+
|
|
87
|
+
The browser environment uses a two-process architecture:
|
|
88
|
+
|
|
89
|
+
1. **Context Server** (`context.py`): Long-running process that maintains persistent state
|
|
90
|
+
2. **MCP Server** (`server.py`): Hot-reloadable process that handles tool requests
|
|
91
|
+
|
|
92
|
+
### Key Components
|
|
93
|
+
|
|
94
|
+
- **BrowserContext**: Stores persistent state (running apps, ports, playwright instance)
|
|
95
|
+
- **ServiceManager**: Manages X11, VNC, and app processes
|
|
96
|
+
- **BaseHub Tools**: Setup and evaluate tools organized by app (2048, todo)
|
|
97
|
+
- **Multiprocessing Proxy**: Enables state sharing between processes
|
|
99
98
|
|
|
100
99
|
### 1. Tool Implementation Pattern
|
|
101
100
|
|
|
@@ -166,26 +165,6 @@ from . import setup
|
|
|
166
165
|
# Not inside functions
|
|
167
166
|
```
|
|
168
167
|
|
|
169
|
-
## Troubleshooting
|
|
170
|
-
|
|
171
|
-
### "Cannot pickle 'coroutine' object"
|
|
172
|
-
|
|
173
|
-
**Cause**: Trying to return an async function result through the proxy.
|
|
174
|
-
|
|
175
|
-
**Fix**: Don't use async methods on proxied objects. Make direct HTTP calls instead.
|
|
176
|
-
|
|
177
|
-
### "App not launched" errors
|
|
178
|
-
|
|
179
|
-
**Cause**: State synchronization issue between ServiceManager and persistent context.
|
|
180
|
-
|
|
181
|
-
**Fix**: Ensure `launch_app` stores app info in the persistent context, and setup/evaluate tools check the persistent context's app list.
|
|
182
|
-
|
|
183
|
-
### "Object has no attribute" on proxy objects
|
|
184
|
-
|
|
185
|
-
**Cause**: Direct attribute access on multiprocessing proxy objects.
|
|
186
|
-
|
|
187
|
-
**Fix**: Use getter/setter methods instead of direct attribute access.
|
|
188
|
-
|
|
189
168
|
## Development Workflow
|
|
190
169
|
|
|
191
170
|
1. **Start the environment**: `hud dev`
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "browser-environment"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Browser environment server for managing X11, VNC, and applications"
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"fastapi>=0.104.1",
|
|
8
|
+
"uvicorn[standard]>=0.24.0",
|
|
9
|
+
"httpx>=0.25.2",
|
|
10
|
+
"pydantic>=2.6,<3",
|
|
11
|
+
"pydantic-settings>=2.2,<3",
|
|
12
|
+
"python-multipart>=0.0.6",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[build-system]
|
|
16
|
+
requires = ["hatchling"]
|
|
17
|
+
build-backend = "hatchling.build"
|
|
18
|
+
|
|
19
|
+
[tool.hatch.build.targets.wheel]
|
|
20
|
+
packages = ["controller", "environment"]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "hud-browser-controller"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "HUD Browser Controller - MCP interface for browser environments"
|
|
5
|
+
requires-python = ">=3.11,<3.14"
|
|
6
|
+
dependencies = [ "pydantic>=2.6,<3", "pydantic-settings>=2.2,<3", "hud-python@git+https://github.com/hud-evals/hud-python@env-cli-improvements", "playwright", "pyautogui", "httpx", "typer", "fastapi", "uvicorn",]
|
|
7
|
+
|
|
8
|
+
[build-system]
|
|
9
|
+
requires = [ "hatchling",]
|
|
10
|
+
build-backend = "hatchling.build"
|
|
11
|
+
|
|
12
|
+
[project.scripts]
|
|
13
|
+
hud-browser-controller = "controller.__main__:main"
|
|
14
|
+
|
|
15
|
+
[tool.hud]
|
|
16
|
+
image = "hud-browser:dev"
|
|
17
|
+
|
|
18
|
+
[tool.hatch.metadata]
|
|
19
|
+
allow-direct-references = true
|
|
20
|
+
|
|
21
|
+
[tool.hatch.build.targets.wheel]
|
|
22
|
+
packages = [ "controller", "problems",]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "deepresearch"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "DeepResearch HUD environment with HTTP backend (EXA on server)"
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
dependencies = [ "hud-python==0.4.36", "fastapi>=0.104.1", "uvicorn[standard]>=0.24.0", "httpx>=0.24.0",]
|
|
7
|
+
|
|
8
|
+
[build-system]
|
|
9
|
+
requires = [ "hatchling",]
|
|
10
|
+
build-backend = "hatchling.build"
|
|
11
|
+
|
|
12
|
+
[tool.hud]
|
|
13
|
+
image = "deepresearch:dev"
|
|
14
|
+
|
|
15
|
+
[tool.hatch.metadata]
|
|
16
|
+
allow-direct-references = true
|
|
17
|
+
|
|
18
|
+
[tool.hatch.build.targets.wheel]
|
|
19
|
+
packages = [ "controller", "environment",]
|
|
@@ -86,6 +86,7 @@ class TestClaudeAgent:
|
|
|
86
86
|
model_client=mock_model_client,
|
|
87
87
|
model="claude-3-opus-20240229",
|
|
88
88
|
max_tokens=1000,
|
|
89
|
+
validate_api_key=False, # Skip validation in tests
|
|
89
90
|
)
|
|
90
91
|
|
|
91
92
|
assert agent.model_name == "claude-3-opus-20240229"
|
|
@@ -93,10 +94,14 @@ class TestClaudeAgent:
|
|
|
93
94
|
assert agent.anthropic_client == mock_model_client
|
|
94
95
|
|
|
95
96
|
@pytest.mark.asyncio
|
|
96
|
-
async def test_init_without_model_client(self, mock_mcp_client):
|
|
97
|
+
async def test_init_without_model_client(self, mock_mcp_client, mock_anthropic):
|
|
97
98
|
"""Test agent initialization without model client."""
|
|
98
99
|
with patch("hud.settings.settings.anthropic_api_key", "test_key"):
|
|
99
|
-
agent = ClaudeAgent(
|
|
100
|
+
agent = ClaudeAgent(
|
|
101
|
+
mcp_client=mock_mcp_client,
|
|
102
|
+
model="claude-3-opus-20240229",
|
|
103
|
+
validate_api_key=False, # Skip validation in tests
|
|
104
|
+
)
|
|
100
105
|
|
|
101
106
|
assert agent.model_name == "claude-3-opus-20240229"
|
|
102
107
|
assert agent.anthropic_client is not None
|
|
@@ -105,7 +110,11 @@ class TestClaudeAgent:
|
|
|
105
110
|
async def test_format_blocks(self, mock_mcp_client):
|
|
106
111
|
"""Test formatting content blocks into Claude messages."""
|
|
107
112
|
mock_model_client = MagicMock()
|
|
108
|
-
agent = ClaudeAgent(
|
|
113
|
+
agent = ClaudeAgent(
|
|
114
|
+
mcp_client=mock_mcp_client,
|
|
115
|
+
model_client=mock_model_client,
|
|
116
|
+
validate_api_key=False, # Skip validation in tests
|
|
117
|
+
)
|
|
109
118
|
|
|
110
119
|
# Test with text only
|
|
111
120
|
text_blocks: list[types.ContentBlock] = [
|
|
@@ -141,7 +150,11 @@ class TestClaudeAgent:
|
|
|
141
150
|
async def test_format_tool_results_method(self, mock_mcp_client):
|
|
142
151
|
"""Test the agent's format_tool_results method."""
|
|
143
152
|
mock_model_client = MagicMock()
|
|
144
|
-
agent = ClaudeAgent(
|
|
153
|
+
agent = ClaudeAgent(
|
|
154
|
+
mcp_client=mock_mcp_client,
|
|
155
|
+
model_client=mock_model_client,
|
|
156
|
+
validate_api_key=False, # Skip validation in tests
|
|
157
|
+
)
|
|
145
158
|
|
|
146
159
|
tool_calls = [
|
|
147
160
|
MCPToolCall(name="test_tool", arguments={}, id="id1"),
|
|
@@ -171,7 +184,11 @@ class TestClaudeAgent:
|
|
|
171
184
|
"""Test getting model response from Claude API."""
|
|
172
185
|
# Disable telemetry for this test to avoid backend configuration issues
|
|
173
186
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
174
|
-
agent = ClaudeAgent(
|
|
187
|
+
agent = ClaudeAgent(
|
|
188
|
+
mcp_client=mock_mcp_client,
|
|
189
|
+
model_client=mock_anthropic,
|
|
190
|
+
validate_api_key=False, # Skip validation in tests
|
|
191
|
+
)
|
|
175
192
|
|
|
176
193
|
# Mock the API response
|
|
177
194
|
mock_response = MagicMock()
|
|
@@ -215,7 +232,11 @@ class TestClaudeAgent:
|
|
|
215
232
|
"""Test getting text-only response."""
|
|
216
233
|
# Disable telemetry for this test to avoid backend configuration issues
|
|
217
234
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
218
|
-
agent = ClaudeAgent(
|
|
235
|
+
agent = ClaudeAgent(
|
|
236
|
+
mcp_client=mock_mcp_client,
|
|
237
|
+
model_client=mock_anthropic,
|
|
238
|
+
validate_api_key=False, # Skip validation in tests
|
|
239
|
+
)
|
|
219
240
|
|
|
220
241
|
mock_response = MagicMock()
|
|
221
242
|
# Create text block
|
|
@@ -242,7 +263,11 @@ class TestClaudeAgent:
|
|
|
242
263
|
"""Test handling API errors."""
|
|
243
264
|
# Disable telemetry for this test to avoid backend configuration issues
|
|
244
265
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
245
|
-
agent = ClaudeAgent(
|
|
266
|
+
agent = ClaudeAgent(
|
|
267
|
+
mcp_client=mock_mcp_client,
|
|
268
|
+
model_client=mock_anthropic,
|
|
269
|
+
validate_api_key=False, # Skip validation in tests
|
|
270
|
+
)
|
|
246
271
|
|
|
247
272
|
# Mock API error
|
|
248
273
|
mock_anthropic.beta.messages.create = AsyncMock(
|
|
@@ -44,7 +44,10 @@ class TestOperatorAgent:
|
|
|
44
44
|
"""Test agent initialization."""
|
|
45
45
|
mock_model_client = MagicMock()
|
|
46
46
|
agent = OperatorAgent(
|
|
47
|
-
mcp_client=mock_mcp_client,
|
|
47
|
+
mcp_client=mock_mcp_client,
|
|
48
|
+
model_client=mock_model_client,
|
|
49
|
+
model="gpt-4",
|
|
50
|
+
validate_api_key=False, # Skip validation in tests
|
|
48
51
|
)
|
|
49
52
|
|
|
50
53
|
assert agent.model_name == "openai-gpt-4"
|
|
@@ -55,7 +58,11 @@ class TestOperatorAgent:
|
|
|
55
58
|
async def test_format_blocks(self, mock_mcp_client):
|
|
56
59
|
"""Test formatting content blocks."""
|
|
57
60
|
mock_model_client = MagicMock()
|
|
58
|
-
agent = OperatorAgent(
|
|
61
|
+
agent = OperatorAgent(
|
|
62
|
+
mcp_client=mock_mcp_client,
|
|
63
|
+
model_client=mock_model_client,
|
|
64
|
+
validate_api_key=False, # Skip validation in tests
|
|
65
|
+
)
|
|
59
66
|
|
|
60
67
|
# Test with text blocks
|
|
61
68
|
blocks: list[types.ContentBlock] = [
|
|
@@ -85,7 +92,11 @@ class TestOperatorAgent:
|
|
|
85
92
|
@pytest.mark.asyncio
|
|
86
93
|
async def test_format_tool_results(self, mock_mcp_client, mock_openai):
|
|
87
94
|
"""Test formatting tool results."""
|
|
88
|
-
agent = OperatorAgent(
|
|
95
|
+
agent = OperatorAgent(
|
|
96
|
+
mcp_client=mock_mcp_client,
|
|
97
|
+
model_client=mock_openai,
|
|
98
|
+
validate_api_key=False, # Skip validation in tests
|
|
99
|
+
)
|
|
89
100
|
|
|
90
101
|
tool_calls = [
|
|
91
102
|
MCPToolCall(name="test_tool", arguments={}, id="call_123"), # type: ignore
|
|
@@ -111,7 +122,11 @@ class TestOperatorAgent:
|
|
|
111
122
|
@pytest.mark.asyncio
|
|
112
123
|
async def test_format_tool_results_with_error(self, mock_mcp_client, mock_openai):
|
|
113
124
|
"""Test formatting tool results with errors."""
|
|
114
|
-
agent = OperatorAgent(
|
|
125
|
+
agent = OperatorAgent(
|
|
126
|
+
mcp_client=mock_mcp_client,
|
|
127
|
+
model_client=mock_openai,
|
|
128
|
+
validate_api_key=False, # Skip validation in tests
|
|
129
|
+
)
|
|
115
130
|
|
|
116
131
|
tool_calls = [
|
|
117
132
|
MCPToolCall(name="failing_tool", arguments={}, id="call_error"), # type: ignore
|
|
@@ -131,7 +146,11 @@ class TestOperatorAgent:
|
|
|
131
146
|
@pytest.mark.asyncio
|
|
132
147
|
async def test_get_model_response(self, mock_mcp_client, mock_openai):
|
|
133
148
|
"""Test getting model response from OpenAI API."""
|
|
134
|
-
agent = OperatorAgent(
|
|
149
|
+
agent = OperatorAgent(
|
|
150
|
+
mcp_client=mock_mcp_client,
|
|
151
|
+
model_client=mock_openai,
|
|
152
|
+
validate_api_key=False, # Skip validation in tests
|
|
153
|
+
)
|
|
135
154
|
|
|
136
155
|
# Set up available tools so agent doesn't return "No computer use tools available"
|
|
137
156
|
agent._available_tools = [
|
|
@@ -162,7 +181,11 @@ class TestOperatorAgent:
|
|
|
162
181
|
@pytest.mark.asyncio
|
|
163
182
|
async def test_handle_empty_response(self, mock_mcp_client, mock_openai):
|
|
164
183
|
"""Test handling empty response from API."""
|
|
165
|
-
agent = OperatorAgent(
|
|
184
|
+
agent = OperatorAgent(
|
|
185
|
+
mcp_client=mock_mcp_client,
|
|
186
|
+
model_client=mock_openai,
|
|
187
|
+
validate_api_key=False, # Skip validation in tests
|
|
188
|
+
)
|
|
166
189
|
|
|
167
190
|
# Set up available tools
|
|
168
191
|
agent._available_tools = [
|