hud-python 0.4.53__tar.gz → 0.4.54__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- {hud_python-0.4.53 → hud_python-0.4.54}/PKG-INFO +8 -7
- {hud_python-0.4.53 → hud_python-0.4.54}/README.md +6 -6
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/README.md +2 -2
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/blank/server/pyproject.toml +1 -1
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/todo/README.md +2 -2
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/server/pyproject.toml +1 -1
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/deepresearch/server/pyproject.toml +1 -1
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/__init__.py +14 -9
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/dev.py +2 -2
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/eval.py +24 -23
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_eval.py +6 -6
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/types.py +10 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/version.py +1 -1
- {hud_python-0.4.53 → hud_python-0.4.54}/pyproject.toml +2 -1
- {hud_python-0.4.53 → hud_python-0.4.54}/.gitignore +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/LICENSE +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/blank/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/blank/environment/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/blank/environment/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/blank/server/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/2048/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/deepresearch/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/deepresearch/environment/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/deepresearch/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/remote_browser/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/remote_browser/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/text_2048/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/environments/text_2048/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/examples/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/__main__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/base.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/claude.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/langchain.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/lite_llm.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/misc/integration_test_agent.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/openai.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/openai_chat_generic.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_base_runtime.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/__main__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/analyze.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/build.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/clone.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/debug.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/flows/tasks.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/get.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/list_func.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/pull.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/push.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/remove.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/celebrate.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/display.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/gpu.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/gpu_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/local_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/presets.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/remote_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/rl_api.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/viewer.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/vllm.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/rl/wait_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_analyze_module.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/local_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/package_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/server.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_local_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_package_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_registry_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_remote_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_runner_modules.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/base.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/fastmcp.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/mcp_use.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/tests/test_mcp_use_retry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/utils/mcp_use_retry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/utils/retry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/parallel.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/tests/test_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/tests/test_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/datasets/utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/misc/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/misc/claude_plays_pokemon.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/native/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/native/comparator.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/collector.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/context.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/exporters.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/instrumentation.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/processors.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/tests/test_instrumentation.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/otel/tests/test_processors.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/py.typed +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/actor.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/buffer.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/chat_template.jinja +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/distributed.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/learner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/tests/test_learner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/train.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/types.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/utils/start_vllm_server.sh +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/rl/vllm_adapter.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/samples/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/samples/browser.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/context.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/low_level.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/router.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/server.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/settings.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/exceptions.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/hints.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/requests.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/async_context.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/job.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/replay.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/test_async_context.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/test_job.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/test_replay.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/tests/test_trace.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/telemetry/trace.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/base.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/bash.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/edit.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/base.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/playwright.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/response.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/submit.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/types.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/tools/utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/agent_factories.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/async_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/group_eval.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/hud_console.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/mcp.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/progress.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/task_tracking.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tasks.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/telemetry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_agent_factories.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_async_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_progress.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_tasks.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.54}/hud/utils/tool_shorthand.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.54
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -42,6 +42,7 @@ Requires-Dist: httpx<1,>=0.23.0
|
|
|
42
42
|
Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
|
|
43
43
|
Requires-Dist: hud-mcp-python-sdk>=3.13.2
|
|
44
44
|
Requires-Dist: hud-mcp-use-python-sdk==2.3.20
|
|
45
|
+
Requires-Dist: langchain==0.3.27
|
|
45
46
|
Requires-Dist: numpy>=1.24.0
|
|
46
47
|
Requires-Dist: openai
|
|
47
48
|
Requires-Dist: opentelemetry-api>=1.34.1
|
|
@@ -247,8 +248,8 @@ The above example let's the agent play 2048 ([See replay](https://hud.so/trace/6
|
|
|
247
248
|
RL using GRPO a Qwen2.5-VL model on any hud dataset:
|
|
248
249
|
|
|
249
250
|
```bash
|
|
250
|
-
hud get hud-evals/basic
|
|
251
|
-
hud rl basic
|
|
251
|
+
hud get hud-evals/2048-basic # from HF
|
|
252
|
+
hud rl 2048-basic.json
|
|
252
253
|
```
|
|
253
254
|
|
|
254
255
|
> See [agent training docs](https://docs.hud.so/train-agents/quickstart)
|
|
@@ -439,14 +440,14 @@ Train with the new interactive `hud rl` flow:
|
|
|
439
440
|
uv tool install hud-python
|
|
440
441
|
|
|
441
442
|
# Option A: Run directly from a HuggingFace dataset
|
|
442
|
-
hud rl hud-evals/basic
|
|
443
|
+
hud rl hud-evals/2048-basic
|
|
443
444
|
|
|
444
445
|
# Option B: Download first, modify, then train
|
|
445
|
-
hud get hud-evals/basic
|
|
446
|
-
hud rl basic
|
|
446
|
+
hud get hud-evals/2048-basic
|
|
447
|
+
hud rl 2048-basic.json
|
|
447
448
|
|
|
448
449
|
# Optional: baseline evaluation
|
|
449
|
-
hud eval basic
|
|
450
|
+
hud eval 2048-basic.json
|
|
450
451
|
```
|
|
451
452
|
|
|
452
453
|
Supports multi‑turn RL for both:
|
|
@@ -109,8 +109,8 @@ The above example let's the agent play 2048 ([See replay](https://hud.so/trace/6
|
|
|
109
109
|
RL using GRPO a Qwen2.5-VL model on any hud dataset:
|
|
110
110
|
|
|
111
111
|
```bash
|
|
112
|
-
hud get hud-evals/basic
|
|
113
|
-
hud rl basic
|
|
112
|
+
hud get hud-evals/2048-basic # from HF
|
|
113
|
+
hud rl 2048-basic.json
|
|
114
114
|
```
|
|
115
115
|
|
|
116
116
|
> See [agent training docs](https://docs.hud.so/train-agents/quickstart)
|
|
@@ -301,14 +301,14 @@ Train with the new interactive `hud rl` flow:
|
|
|
301
301
|
uv tool install hud-python
|
|
302
302
|
|
|
303
303
|
# Option A: Run directly from a HuggingFace dataset
|
|
304
|
-
hud rl hud-evals/basic
|
|
304
|
+
hud rl hud-evals/2048-basic
|
|
305
305
|
|
|
306
306
|
# Option B: Download first, modify, then train
|
|
307
|
-
hud get hud-evals/basic
|
|
308
|
-
hud rl basic
|
|
307
|
+
hud get hud-evals/2048-basic
|
|
308
|
+
hud rl 2048-basic.json
|
|
309
309
|
|
|
310
310
|
# Optional: baseline evaluation
|
|
311
|
-
hud eval basic
|
|
311
|
+
hud eval 2048-basic.json
|
|
312
312
|
```
|
|
313
313
|
|
|
314
314
|
Supports multi‑turn RL for both:
|
|
@@ -804,9 +804,9 @@ class TodoCompleted:
|
|
|
804
804
|
@problem("todo_basic", description="Complete two todo items", difficulty="easy")
|
|
805
805
|
class TodoBasic:
|
|
806
806
|
def get_setup(self):
|
|
807
|
-
return {"
|
|
807
|
+
return {"name": "todo_seed", "arguments": {"num_items": 5}}
|
|
808
808
|
def get_evaluation(self):
|
|
809
|
-
return {"
|
|
809
|
+
return {"name": "todo_completed", "arguments": {"expected_count": 2}}
|
|
810
810
|
```
|
|
811
811
|
|
|
812
812
|
Decorators keep registration *next to the implementation* and avoid manual bookkeeping. The server simply exposes the combined metadata through an MCP **resource**. Follow `environments/browser/src/hud_controller/problems/registry.py` as a template and expose the JSON with `@mcp.resource("problems://registry")`.
|
|
@@ -47,8 +47,8 @@ await setup({"name": "todo_basic_usage"})
|
|
|
47
47
|
await evaluate({"name": "todo_basic_usage"})
|
|
48
48
|
|
|
49
49
|
# Direct function calls
|
|
50
|
-
await setup({"
|
|
51
|
-
await evaluate({"
|
|
50
|
+
await setup({"name": "todo_reset", "arguments": {}})
|
|
51
|
+
await evaluate({"name": "todo_completion_rate", "arguments": {"min_rate": 0.5}})
|
|
52
52
|
|
|
53
53
|
# MCP resource discovery
|
|
54
54
|
todo_evaluators = await client.read_resource("evaluators://todo")
|
|
@@ -12,6 +12,8 @@ from rich.console import Console
|
|
|
12
12
|
from rich.panel import Panel
|
|
13
13
|
from rich.table import Table
|
|
14
14
|
|
|
15
|
+
from hud.types import AgentType
|
|
16
|
+
|
|
15
17
|
from . import list_func as list_module
|
|
16
18
|
from .analyze import (
|
|
17
19
|
analyze_environment,
|
|
@@ -847,7 +849,7 @@ def eval(
|
|
|
847
849
|
hud_console = HUDConsole()
|
|
848
850
|
|
|
849
851
|
if integration_test:
|
|
850
|
-
agent =
|
|
852
|
+
agent = AgentType.INTEGRATION_TEST
|
|
851
853
|
|
|
852
854
|
# If no source provided, reuse RL helper to find a tasks file interactively
|
|
853
855
|
if source is None:
|
|
@@ -894,17 +896,17 @@ def eval(
|
|
|
894
896
|
# Add standard agent choices
|
|
895
897
|
choices.extend(
|
|
896
898
|
[
|
|
897
|
-
{"name": "Claude 4 Sonnet", "value":
|
|
898
|
-
{"name": "OpenAI Computer Use", "value":
|
|
899
|
-
{"name": "vLLM (Local Server)", "value":
|
|
900
|
-
{"name": "LiteLLM (Multi-provider)", "value":
|
|
899
|
+
{"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE},
|
|
900
|
+
{"name": "OpenAI Computer Use", "value": AgentType.OPENAI},
|
|
901
|
+
{"name": "vLLM (Local Server)", "value": AgentType.VLLM},
|
|
902
|
+
{"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM},
|
|
901
903
|
]
|
|
902
904
|
)
|
|
903
905
|
|
|
904
906
|
agent = hud_console.select("Select an agent to use:", choices=choices, default=0)
|
|
905
907
|
|
|
906
908
|
# Handle HUD model selection
|
|
907
|
-
if agent and agent not in [
|
|
909
|
+
if agent and agent not in [e.value for e in AgentType]:
|
|
908
910
|
# Find remote model name
|
|
909
911
|
model = agent
|
|
910
912
|
if not vllm_base_url:
|
|
@@ -921,20 +923,23 @@ def eval(
|
|
|
921
923
|
hud_console.error(f"Model {model} not found")
|
|
922
924
|
raise typer.Exit(1)
|
|
923
925
|
model = base_model
|
|
924
|
-
agent =
|
|
926
|
+
agent = AgentType.VLLM # Use vLLM backend for HUD models
|
|
925
927
|
hud_console.info(f"Using HUD model: {model} (trained on {base_model})")
|
|
926
928
|
|
|
927
929
|
# Validate agent choice
|
|
928
|
-
valid_agents = [
|
|
930
|
+
valid_agents = [e.value for e in AgentType]
|
|
929
931
|
if agent not in valid_agents:
|
|
930
932
|
hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
|
|
931
933
|
raise typer.Exit(1)
|
|
932
934
|
|
|
935
|
+
# Type narrowing: agent is now guaranteed to be an AgentType value after validation
|
|
936
|
+
agent = AgentType(agent)
|
|
937
|
+
|
|
933
938
|
# Run the command
|
|
934
939
|
eval_command(
|
|
935
940
|
source=source,
|
|
936
941
|
full=full,
|
|
937
|
-
agent=agent,
|
|
942
|
+
agent=agent,
|
|
938
943
|
model=model,
|
|
939
944
|
allowed_tools=allowed_tools,
|
|
940
945
|
max_concurrent=max_concurrent,
|
|
@@ -238,9 +238,9 @@ async def run_mcp_module(
|
|
|
238
238
|
if env_dir.exists() and (env_dir / "server.py").exists():
|
|
239
239
|
hud_console.info("")
|
|
240
240
|
hud_console.info(
|
|
241
|
-
f"{hud_console.sym.FLOW} Don't forget to start the environment backend:"
|
|
241
|
+
f"{hud_console.sym.FLOW} Don't forget to start the environment backend in another terminal:"
|
|
242
242
|
)
|
|
243
|
-
hud_console.info(" cd
|
|
243
|
+
hud_console.info(" cd environment && uv run python uvicorn server:app --reload")
|
|
244
244
|
|
|
245
245
|
# Launch inspector if requested (first run only)
|
|
246
246
|
if inspector and transport == "http":
|
|
@@ -5,13 +5,14 @@ from __future__ import annotations
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import logging
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import TYPE_CHECKING, Any
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
9
|
|
|
10
10
|
import typer
|
|
11
11
|
|
|
12
12
|
import hud
|
|
13
13
|
from hud.cli.utils.env_check import ensure_built, find_environment_dir
|
|
14
14
|
from hud.settings import settings
|
|
15
|
+
from hud.types import AgentType
|
|
15
16
|
from hud.utils.group_eval import display_group_statistics, run_tasks_grouped
|
|
16
17
|
from hud.utils.hud_console import HUDConsole
|
|
17
18
|
|
|
@@ -113,7 +114,7 @@ def _build_vllm_config(
|
|
|
113
114
|
|
|
114
115
|
|
|
115
116
|
def build_agent(
|
|
116
|
-
agent_type:
|
|
117
|
+
agent_type: AgentType,
|
|
117
118
|
*,
|
|
118
119
|
model: str | None = None,
|
|
119
120
|
allowed_tools: list[str] | None = None,
|
|
@@ -123,11 +124,11 @@ def build_agent(
|
|
|
123
124
|
"""Create and return the requested agent type."""
|
|
124
125
|
|
|
125
126
|
# Import agents lazily to avoid dependency issues
|
|
126
|
-
if agent_type ==
|
|
127
|
+
if agent_type == AgentType.INTEGRATION_TEST:
|
|
127
128
|
from hud.agents.misc.integration_test_agent import IntegrationTestRunner
|
|
128
129
|
|
|
129
130
|
return IntegrationTestRunner(verbose=verbose)
|
|
130
|
-
elif agent_type ==
|
|
131
|
+
elif agent_type == AgentType.VLLM:
|
|
131
132
|
# Create a generic OpenAI agent for vLLM server
|
|
132
133
|
try:
|
|
133
134
|
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
|
|
@@ -147,7 +148,7 @@ def build_agent(
|
|
|
147
148
|
)
|
|
148
149
|
return GenericOpenAIChatAgent(**config)
|
|
149
150
|
|
|
150
|
-
elif agent_type ==
|
|
151
|
+
elif agent_type == AgentType.OPENAI:
|
|
151
152
|
try:
|
|
152
153
|
from hud.agents import OperatorAgent
|
|
153
154
|
except ImportError as e:
|
|
@@ -165,7 +166,7 @@ def build_agent(
|
|
|
165
166
|
else:
|
|
166
167
|
return OperatorAgent(verbose=verbose)
|
|
167
168
|
|
|
168
|
-
elif agent_type ==
|
|
169
|
+
elif agent_type == AgentType.LITELLM:
|
|
169
170
|
try:
|
|
170
171
|
from hud.agents.lite_llm import LiteAgent
|
|
171
172
|
except ImportError as e:
|
|
@@ -209,7 +210,7 @@ def build_agent(
|
|
|
209
210
|
async def run_single_task(
|
|
210
211
|
source: str,
|
|
211
212
|
*,
|
|
212
|
-
agent_type:
|
|
213
|
+
agent_type: AgentType = AgentType.CLAUDE,
|
|
213
214
|
model: str | None = None,
|
|
214
215
|
allowed_tools: list[str] | None = None,
|
|
215
216
|
max_steps: int = 10,
|
|
@@ -268,14 +269,14 @@ async def run_single_task(
|
|
|
268
269
|
|
|
269
270
|
# Use grouped evaluation if group_size > 1
|
|
270
271
|
agent_config: dict[str, Any] = {}
|
|
271
|
-
if agent_type ==
|
|
272
|
+
if agent_type == AgentType.INTEGRATION_TEST:
|
|
272
273
|
from hud.agents.misc.integration_test_agent import IntegrationTestRunner
|
|
273
274
|
|
|
274
275
|
agent_class = IntegrationTestRunner
|
|
275
276
|
agent_config = {"verbose": verbose}
|
|
276
277
|
if allowed_tools:
|
|
277
278
|
agent_config["allowed_tools"] = allowed_tools
|
|
278
|
-
elif agent_type ==
|
|
279
|
+
elif agent_type == AgentType.VLLM:
|
|
279
280
|
# Special handling for vLLM
|
|
280
281
|
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
|
|
281
282
|
|
|
@@ -288,14 +289,14 @@ async def run_single_task(
|
|
|
288
289
|
allowed_tools=allowed_tools,
|
|
289
290
|
verbose=verbose,
|
|
290
291
|
)
|
|
291
|
-
elif agent_type ==
|
|
292
|
+
elif agent_type == AgentType.OPENAI:
|
|
292
293
|
from hud.agents import OperatorAgent
|
|
293
294
|
|
|
294
295
|
agent_class = OperatorAgent
|
|
295
296
|
agent_config = {"verbose": verbose}
|
|
296
297
|
if allowed_tools:
|
|
297
298
|
agent_config["allowed_tools"] = allowed_tools
|
|
298
|
-
elif agent_type ==
|
|
299
|
+
elif agent_type == AgentType.LITELLM:
|
|
299
300
|
from hud.agents.lite_llm import LiteAgent
|
|
300
301
|
|
|
301
302
|
agent_class = LiteAgent
|
|
@@ -305,7 +306,7 @@ async def run_single_task(
|
|
|
305
306
|
}
|
|
306
307
|
if allowed_tools:
|
|
307
308
|
agent_config["allowed_tools"] = allowed_tools
|
|
308
|
-
elif agent_type ==
|
|
309
|
+
elif agent_type == AgentType.CLAUDE:
|
|
309
310
|
from hud.agents import ClaudeAgent
|
|
310
311
|
|
|
311
312
|
agent_class = ClaudeAgent
|
|
@@ -353,7 +354,7 @@ async def run_single_task(
|
|
|
353
354
|
async def run_full_dataset(
|
|
354
355
|
source: str,
|
|
355
356
|
*,
|
|
356
|
-
agent_type:
|
|
357
|
+
agent_type: AgentType = AgentType.CLAUDE,
|
|
357
358
|
model: str | None = None,
|
|
358
359
|
allowed_tools: list[str] | None = None,
|
|
359
360
|
max_concurrent: int = 30,
|
|
@@ -395,12 +396,12 @@ async def run_full_dataset(
|
|
|
395
396
|
|
|
396
397
|
# Build agent class + config for run_dataset
|
|
397
398
|
agent_config: dict[str, Any]
|
|
398
|
-
if agent_type ==
|
|
399
|
+
if agent_type == AgentType.INTEGRATION_TEST: # --integration-test mode
|
|
399
400
|
from hud.agents.misc.integration_test_agent import IntegrationTestRunner
|
|
400
401
|
|
|
401
402
|
agent_class = IntegrationTestRunner
|
|
402
403
|
agent_config = {"verbose": verbose}
|
|
403
|
-
elif agent_type ==
|
|
404
|
+
elif agent_type == AgentType.VLLM:
|
|
404
405
|
try:
|
|
405
406
|
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
|
|
406
407
|
|
|
@@ -419,7 +420,7 @@ async def run_full_dataset(
|
|
|
419
420
|
allowed_tools=allowed_tools,
|
|
420
421
|
verbose=verbose,
|
|
421
422
|
)
|
|
422
|
-
elif agent_type ==
|
|
423
|
+
elif agent_type == AgentType.OPENAI:
|
|
423
424
|
try:
|
|
424
425
|
from hud.agents import OperatorAgent
|
|
425
426
|
|
|
@@ -435,7 +436,7 @@ async def run_full_dataset(
|
|
|
435
436
|
if allowed_tools:
|
|
436
437
|
agent_config["allowed_tools"] = allowed_tools
|
|
437
438
|
|
|
438
|
-
elif agent_type ==
|
|
439
|
+
elif agent_type == AgentType.LITELLM:
|
|
439
440
|
try:
|
|
440
441
|
from hud.agents.lite_llm import LiteAgent
|
|
441
442
|
|
|
@@ -539,8 +540,8 @@ def eval_command(
|
|
|
539
540
|
"--full",
|
|
540
541
|
help="Run the entire dataset (omit for single-task debug mode)",
|
|
541
542
|
),
|
|
542
|
-
agent:
|
|
543
|
-
|
|
543
|
+
agent: AgentType = typer.Option( # noqa: B008
|
|
544
|
+
AgentType.CLAUDE,
|
|
544
545
|
"--agent",
|
|
545
546
|
help="Agent backend to use (claude, openai, vllm for local server, or litellm)",
|
|
546
547
|
),
|
|
@@ -648,21 +649,21 @@ def eval_command(
|
|
|
648
649
|
|
|
649
650
|
# We pass integration_test as the agent_type
|
|
650
651
|
if integration_test:
|
|
651
|
-
agent =
|
|
652
|
+
agent = AgentType.INTEGRATION_TEST
|
|
652
653
|
|
|
653
654
|
# Check for required API keys
|
|
654
|
-
if agent ==
|
|
655
|
+
if agent == AgentType.CLAUDE:
|
|
655
656
|
if not settings.anthropic_api_key:
|
|
656
657
|
hud_console.error("ANTHROPIC_API_KEY is required for Claude agent")
|
|
657
658
|
hud_console.info(
|
|
658
659
|
"Set it in your environment or run: hud set ANTHROPIC_API_KEY=your-key-here"
|
|
659
660
|
)
|
|
660
661
|
raise typer.Exit(1)
|
|
661
|
-
elif agent ==
|
|
662
|
+
elif agent == AgentType.OPENAI and not settings.openai_api_key:
|
|
662
663
|
hud_console.error("OPENAI_API_KEY is required for OpenAI agent")
|
|
663
664
|
hud_console.info("Set it in your environment or run: hud set OPENAI_API_KEY=your-key-here")
|
|
664
665
|
raise typer.Exit(1)
|
|
665
|
-
elif agent ==
|
|
666
|
+
elif agent == AgentType.VLLM:
|
|
666
667
|
if model:
|
|
667
668
|
hud_console.info(f"Using vLLM with model: {model}")
|
|
668
669
|
else:
|
|
@@ -11,7 +11,7 @@ from hud.cli.eval import (
|
|
|
11
11
|
build_agent,
|
|
12
12
|
run_single_task,
|
|
13
13
|
)
|
|
14
|
-
from hud.types import Task, Trace
|
|
14
|
+
from hud.types import AgentType, Task, Trace
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class TestBuildAgent:
|
|
@@ -26,7 +26,7 @@ class TestBuildAgent:
|
|
|
26
26
|
mock_runner.return_value = mock_instance
|
|
27
27
|
|
|
28
28
|
# Test with verbose=False
|
|
29
|
-
result = build_agent(
|
|
29
|
+
result = build_agent(AgentType.INTEGRATION_TEST, verbose=False)
|
|
30
30
|
|
|
31
31
|
mock_runner.assert_called_once_with(verbose=False)
|
|
32
32
|
assert result == mock_instance
|
|
@@ -40,7 +40,7 @@ class TestBuildAgent:
|
|
|
40
40
|
mock_runner.return_value = mock_instance
|
|
41
41
|
|
|
42
42
|
# Test with verbose=False
|
|
43
|
-
result = build_agent(
|
|
43
|
+
result = build_agent(AgentType.CLAUDE, verbose=False)
|
|
44
44
|
|
|
45
45
|
mock_runner.assert_called_once_with(model="claude-sonnet-4-20250514", verbose=False)
|
|
46
46
|
assert result == mock_instance
|
|
@@ -55,7 +55,7 @@ class TestBuildAgent:
|
|
|
55
55
|
|
|
56
56
|
# Test with verbose=False
|
|
57
57
|
result = build_agent(
|
|
58
|
-
|
|
58
|
+
AgentType.CLAUDE,
|
|
59
59
|
model="claude-sonnet-4-20250514",
|
|
60
60
|
allowed_tools=["act"],
|
|
61
61
|
verbose=True,
|
|
@@ -97,7 +97,7 @@ class TestRunSingleTask:
|
|
|
97
97
|
patch("hud.cli.eval.find_environment_dir", return_value=None),
|
|
98
98
|
patch("hud.cli.eval.hud.trace"),
|
|
99
99
|
):
|
|
100
|
-
await run_single_task("test.json", agent_type=
|
|
100
|
+
await run_single_task("test.json", agent_type=AgentType.INTEGRATION_TEST, max_steps=10)
|
|
101
101
|
|
|
102
102
|
# Verify agent.run was called with the task containing agent_config
|
|
103
103
|
mock_agent.run.assert_called_once()
|
|
@@ -119,7 +119,7 @@ class TestRunSingleTask:
|
|
|
119
119
|
mock_grouped.return_value = [{"task": mock_task, "rewards": [1.0, 0.5]}]
|
|
120
120
|
|
|
121
121
|
await run_single_task(
|
|
122
|
-
"test.json", agent_type=
|
|
122
|
+
"test.json", agent_type=AgentType.INTEGRATION_TEST, group_size=3, max_steps=10
|
|
123
123
|
)
|
|
124
124
|
|
|
125
125
|
# Verify run_tasks_grouped was called with correct group_size
|
|
@@ -5,6 +5,7 @@ import json
|
|
|
5
5
|
import logging
|
|
6
6
|
import uuid
|
|
7
7
|
from collections import defaultdict
|
|
8
|
+
from enum import Enum
|
|
8
9
|
from string import Template
|
|
9
10
|
from typing import Any, Literal
|
|
10
11
|
|
|
@@ -21,6 +22,14 @@ logger = logging.getLogger(__name__)
|
|
|
21
22
|
_missing_api_key_error_logged: bool = False
|
|
22
23
|
|
|
23
24
|
|
|
25
|
+
class AgentType(str, Enum):
|
|
26
|
+
CLAUDE = "claude"
|
|
27
|
+
OPENAI = "openai"
|
|
28
|
+
VLLM = "vllm"
|
|
29
|
+
LITELLM = "litellm"
|
|
30
|
+
INTEGRATION_TEST = "integration_test"
|
|
31
|
+
|
|
32
|
+
|
|
24
33
|
class Task(BaseModel):
|
|
25
34
|
"""
|
|
26
35
|
A task configuration that can be used to create a task.
|
|
@@ -325,6 +334,7 @@ class Trace(BaseModel):
|
|
|
325
334
|
|
|
326
335
|
__all__ = [
|
|
327
336
|
"AgentResponse",
|
|
337
|
+
"AgentType",
|
|
328
338
|
"MCPToolCall",
|
|
329
339
|
"MCPToolResult",
|
|
330
340
|
"Trace",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "hud-python"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.54"
|
|
4
4
|
description = "SDK for the HUD platform."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.11, <3.13"
|
|
@@ -18,6 +18,7 @@ dependencies = [
|
|
|
18
18
|
"hud-mcp-python-sdk>=3.13.2",
|
|
19
19
|
"hud-fastmcp-python-sdk>=0.1.2",
|
|
20
20
|
"hud-mcp-use-python-sdk==2.3.20",
|
|
21
|
+
"langchain==0.3.27",
|
|
21
22
|
"pathspec>=0.12.1",
|
|
22
23
|
"wrapt>=1.14.0",
|
|
23
24
|
# CLI dependencies
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/2048/backend/pyproject.toml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hud_python-0.4.53 → hud_python-0.4.54}/environments/browser/environment/todo/backend/pyproject.toml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hud_python-0.4.53 → hud_python-0.4.54}/environments/deepresearch/environment/pyproject.toml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|