hud-python 0.5.8__tar.gz → 0.5.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.5.8 → hud_python-0.5.10}/PKG-INFO +1 -1
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/__init__.py +5 -9
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/base.py +2 -13
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/claude.py +3 -18
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/gemini.py +3 -22
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/gemini_cua.py +3 -17
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/openai.py +4 -25
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/openai_chat.py +3 -19
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/operator.py +3 -17
- hud_python-0.5.10/hud/agents/types.py +148 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/eval.py +1 -1
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/mcp_use.py +6 -1
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/datasets/loader.py +4 -8
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/datasets/tests/test_loader.py +14 -14
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/connection.py +4 -2
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/connectors/mcp_config.py +29 -1
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/environment.py +11 -4
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/tests/test_connectors.py +10 -23
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/tests/test_environment.py +248 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/tests/test_local_connectors.py +81 -40
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/tests/test_task.py +107 -0
- hud_python-0.5.10/hud/patches/mcp_patches.py +308 -0
- hud_python-0.5.10/hud/tools/computer/__init__.py +48 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/types.py +50 -27
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/version.py +1 -1
- {hud_python-0.5.8 → hud_python-0.5.10}/pyproject.toml +1 -1
- hud_python-0.5.8/hud/patches/mcp_patches.py +0 -151
- hud_python-0.5.8/hud/tools/computer/__init__.py +0 -19
- {hud_python-0.5.8 → hud_python-0.5.10}/.gitignore +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/LICENSE +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/README.md +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/examples/README.md +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/__main__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/gateway.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/misc/integration_test_agent.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/resolver.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/conftest.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/test_base_runtime.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/test_gemini.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/test_operator.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/test_resolver.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/agents/tests/test_run_eval.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/__main__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/analyze.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/build.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/clone.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/debug.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/dev.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/flows/dev.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/flows/init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/flows/tasks.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/flows/templates.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/flows/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/flows/tests/test_dev.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/get.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/list_func.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/pull.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/push.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/remove.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/rft.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/rft_status.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_analyze_module.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_convert.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_dev.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_eval.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/celebrate.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/config.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/git.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/local_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/package_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/server.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_git.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_local_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_package_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_registry_utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_remote_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_runner_modules.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/cli/utils/viewer.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/README.md +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/base.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/environment.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/fastmcp.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/tests/test_analyze_scenarios.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/tests/test_mcp_use_retry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/utils/mcp_use_retry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/utils/retry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/datasets/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/datasets/runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/datasets/tests/test_utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/datasets/utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/connectors/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/connectors/base.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/connectors/local.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/connectors/openai.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/connectors/remote.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/integrations/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/integrations/adk.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/integrations/anthropic.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/integrations/gemini.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/integrations/langchain.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/integrations/llamaindex.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/integrations/openai.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/mock.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/router.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/scenarios.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/tests/test_connection.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/tests/test_integrations.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/tests/test_scenarios.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/tests/test_tools.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/types.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/utils/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/utils/formats.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/utils/schema.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/environment/utils/tool_wrappers.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/context.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/display.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/instrument.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/manager.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/parallel.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/task.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/tests/test_context.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/tests/test_eval.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/tests/test_manager.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/tests/test_parallel.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/types.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/eval/utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/native/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/native/comparator.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/patches/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/patches/warnings.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/py.typed +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/samples/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/samples/browser.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/context.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/low_level.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/router.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/server.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/settings.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/shared/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/shared/exceptions.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/shared/hints.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/shared/requests.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/telemetry/tests/test_eval_telemetry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/agent.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/apply_patch.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/base.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/bash.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/computer/gemini.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/edit.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/executors/base.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/jupyter.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/playwright.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/response.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/shell.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/submit.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_agent_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_apply_patch.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_jupyter_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_shell.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/types.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/tools/utils.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/env.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/hud_console.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/mcp.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/strict_schema.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/telemetry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/tool_shorthand.py +0 -0
- {hud_python-0.5.8 → hud_python-0.5.10}/hud/utils/types.py +0 -0
|
@@ -56,15 +56,11 @@ def create_agent(model: str, **kwargs: Any) -> MCPAgent:
|
|
|
56
56
|
if gateway_info:
|
|
57
57
|
provider = gateway_info.get("provider") or "openai"
|
|
58
58
|
else:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
ClaudeAgent: "anthropic",
|
|
65
|
-
GeminiAgent: "google",
|
|
66
|
-
}
|
|
67
|
-
provider = _AGENT_TO_PROVIDER.get(agent_cls, "openai")
|
|
59
|
+
provider = "openai"
|
|
60
|
+
if agent_cls.__name__ == "ClaudeAgent":
|
|
61
|
+
provider = "anthropic"
|
|
62
|
+
elif agent_cls.__name__ in ("GeminiAgent", "GeminiCUAAgent"):
|
|
63
|
+
provider = "gemini"
|
|
68
64
|
|
|
69
65
|
client = build_gateway_client(provider)
|
|
70
66
|
|
|
@@ -9,11 +9,12 @@ from abc import ABC, abstractmethod
|
|
|
9
9
|
from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
10
10
|
|
|
11
11
|
import mcp.types as types
|
|
12
|
-
from pydantic import BaseModel, ConfigDict
|
|
13
12
|
|
|
14
13
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
|
|
15
14
|
from hud.utils.hud_console import HUDConsole
|
|
16
15
|
|
|
16
|
+
from .types import BaseCreateParams
|
|
17
|
+
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
19
|
from hud.environment import Environment
|
|
19
20
|
from hud.eval.context import EvalContext
|
|
@@ -22,18 +23,6 @@ if TYPE_CHECKING:
|
|
|
22
23
|
logger = logging.getLogger(__name__)
|
|
23
24
|
|
|
24
25
|
|
|
25
|
-
class BaseCreateParams(BaseModel):
|
|
26
|
-
"""Runtime parameters for agent creation."""
|
|
27
|
-
|
|
28
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
29
|
-
|
|
30
|
-
# Primary way to bind agent to execution context (v5)
|
|
31
|
-
ctx: Any | None = None # EvalContext or Environment - agent uses this for tool calls
|
|
32
|
-
|
|
33
|
-
auto_respond: bool = False
|
|
34
|
-
verbose: bool = False
|
|
35
|
-
|
|
36
|
-
|
|
37
26
|
class MCPAgent(ABC):
|
|
38
27
|
"""
|
|
39
28
|
Base class for MCP-enabled agents.
|
|
@@ -25,7 +25,6 @@ from anthropic.types.beta import (
|
|
|
25
25
|
BetaToolTextEditor20250728Param,
|
|
26
26
|
BetaToolUnionParam,
|
|
27
27
|
)
|
|
28
|
-
from pydantic import ConfigDict
|
|
29
28
|
|
|
30
29
|
from hud.settings import settings
|
|
31
30
|
from hud.tools.computer.settings import computer_settings
|
|
@@ -33,7 +32,8 @@ from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
|
33
32
|
from hud.utils.hud_console import HUDConsole
|
|
34
33
|
from hud.utils.types import with_signature
|
|
35
34
|
|
|
36
|
-
from .base import
|
|
35
|
+
from .base import MCPAgent
|
|
36
|
+
from .types import ClaudeConfig, ClaudeCreateParams
|
|
37
37
|
|
|
38
38
|
if TYPE_CHECKING:
|
|
39
39
|
from collections.abc import Sequence
|
|
@@ -41,21 +41,6 @@ if TYPE_CHECKING:
|
|
|
41
41
|
logger = logging.getLogger(__name__)
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
class ClaudeConfig(BaseAgentConfig):
|
|
45
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
46
|
-
|
|
47
|
-
model_name: str = "Claude"
|
|
48
|
-
model: str = "claude-sonnet-4-5"
|
|
49
|
-
model_client: AsyncAnthropic | AsyncAnthropicBedrock | None = None
|
|
50
|
-
max_tokens: int = 16384
|
|
51
|
-
use_computer_beta: bool = True
|
|
52
|
-
validate_api_key: bool = True
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
|
|
59
44
|
class ClaudeAgent(MCPAgent):
|
|
60
45
|
"""
|
|
61
46
|
Claude agent that uses MCP servers for tool execution.
|
|
@@ -94,7 +79,7 @@ class ClaudeAgent(MCPAgent):
|
|
|
94
79
|
"or ANTHROPIC_API_KEY for direct Anthropic access."
|
|
95
80
|
)
|
|
96
81
|
|
|
97
|
-
self.anthropic_client = model_client
|
|
82
|
+
self.anthropic_client: AsyncAnthropic | AsyncAnthropicBedrock = model_client
|
|
98
83
|
self.max_tokens = self.config.max_tokens
|
|
99
84
|
self.use_computer_beta = self.config.use_computer_beta
|
|
100
85
|
self.hud_console = HUDConsole(logger=logger)
|
|
@@ -8,37 +8,18 @@ from typing import Any, ClassVar, cast
|
|
|
8
8
|
import mcp.types as types
|
|
9
9
|
from google import genai
|
|
10
10
|
from google.genai import types as genai_types
|
|
11
|
-
from pydantic import ConfigDict
|
|
12
11
|
|
|
13
12
|
from hud.settings import settings
|
|
14
13
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
15
14
|
from hud.utils.hud_console import HUDConsole
|
|
16
15
|
from hud.utils.types import with_signature
|
|
17
16
|
|
|
18
|
-
from .base import
|
|
17
|
+
from .base import MCPAgent
|
|
18
|
+
from .types import GeminiConfig, GeminiCreateParams
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class GeminiConfig(BaseAgentConfig):
|
|
24
|
-
"""Configuration for `GeminiAgent`."""
|
|
25
|
-
|
|
26
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
27
|
-
|
|
28
|
-
model_name: str = "Gemini"
|
|
29
|
-
model: str = "gemini-3-pro-preview"
|
|
30
|
-
model_client: genai.Client | None = None
|
|
31
|
-
temperature: float = 1.0
|
|
32
|
-
top_p: float = 0.95
|
|
33
|
-
top_k: int = 40
|
|
34
|
-
max_output_tokens: int = 8192
|
|
35
|
-
validate_api_key: bool = True
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class GeminiCreateParams(BaseCreateParams, GeminiConfig):
|
|
39
|
-
pass
|
|
40
|
-
|
|
41
|
-
|
|
42
23
|
class GeminiAgent(MCPAgent):
|
|
43
24
|
"""
|
|
44
25
|
Gemini agent that uses MCP servers for tool execution.
|
|
@@ -80,7 +61,7 @@ class GeminiAgent(MCPAgent):
|
|
|
80
61
|
except Exception as e:
|
|
81
62
|
raise ValueError(f"Gemini API key is invalid: {e}") from e
|
|
82
63
|
|
|
83
|
-
self.gemini_client = model_client
|
|
64
|
+
self.gemini_client: genai.Client = model_client
|
|
84
65
|
self.temperature = self.config.temperature
|
|
85
66
|
self.top_p = self.config.top_p
|
|
86
67
|
self.top_k = self.config.top_k
|
|
@@ -7,14 +7,14 @@ from typing import Any, ClassVar
|
|
|
7
7
|
|
|
8
8
|
import mcp.types as types
|
|
9
9
|
from google.genai import types as genai_types
|
|
10
|
-
from pydantic import ConfigDict, Field
|
|
11
10
|
|
|
12
11
|
from hud.tools.computer.settings import computer_settings
|
|
13
12
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
14
13
|
from hud.utils.types import with_signature
|
|
15
14
|
|
|
16
|
-
from .base import
|
|
17
|
-
from .gemini import GeminiAgent
|
|
15
|
+
from .base import MCPAgent
|
|
16
|
+
from .gemini import GeminiAgent
|
|
17
|
+
from .types import GeminiCUAConfig, GeminiCUACreateParams
|
|
18
18
|
|
|
19
19
|
logger = logging.getLogger(__name__)
|
|
20
20
|
|
|
@@ -56,20 +56,6 @@ what they asked.
|
|
|
56
56
|
""".strip()
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
class GeminiCUAConfig(GeminiConfig):
|
|
60
|
-
"""Configuration for `GeminiCUAAgent`."""
|
|
61
|
-
|
|
62
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
63
|
-
|
|
64
|
-
model_name: str = "GeminiCUA"
|
|
65
|
-
model: str = "gemini-2.5-computer-use-preview-10-2025"
|
|
66
|
-
excluded_predefined_functions: list[str] = Field(default_factory=list)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class GeminiCUACreateParams(BaseCreateParams, GeminiCUAConfig):
|
|
70
|
-
pass
|
|
71
|
-
|
|
72
|
-
|
|
73
59
|
class GeminiCUAAgent(GeminiAgent):
|
|
74
60
|
"""
|
|
75
61
|
Gemini Computer Use Agent that extends GeminiAgent with computer use capabilities.
|
|
@@ -29,39 +29,18 @@ from openai.types.responses import (
|
|
|
29
29
|
from openai.types.responses.response_create_params import ToolChoice # noqa: TC002
|
|
30
30
|
from openai.types.responses.response_input_param import FunctionCallOutput, Message
|
|
31
31
|
from openai.types.shared_params.reasoning import Reasoning # noqa: TC002
|
|
32
|
-
from pydantic import ConfigDict
|
|
33
32
|
|
|
34
33
|
from hud.settings import settings
|
|
35
34
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
|
|
36
35
|
from hud.utils.strict_schema import ensure_strict_json_schema
|
|
37
36
|
from hud.utils.types import with_signature
|
|
38
37
|
|
|
39
|
-
from .base import
|
|
38
|
+
from .base import MCPAgent
|
|
39
|
+
from .types import OpenAIConfig, OpenAICreateParams
|
|
40
40
|
|
|
41
41
|
logger = logging.getLogger(__name__)
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
class OpenAIConfig(BaseAgentConfig):
|
|
45
|
-
"""Configuration model for `OpenAIAgent`."""
|
|
46
|
-
|
|
47
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
48
|
-
|
|
49
|
-
model_name: str = "OpenAI"
|
|
50
|
-
model: str = "gpt-5.1"
|
|
51
|
-
model_client: AsyncOpenAI | None = None
|
|
52
|
-
max_output_tokens: int | None = None
|
|
53
|
-
temperature: float | None = None
|
|
54
|
-
reasoning: Reasoning | None = None
|
|
55
|
-
tool_choice: ToolChoice | None = None
|
|
56
|
-
truncation: Literal["auto", "disabled"] | None = None
|
|
57
|
-
parallel_tool_calls: bool | None = None
|
|
58
|
-
validate_api_key: bool = True
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
|
|
62
|
-
pass
|
|
63
|
-
|
|
64
|
-
|
|
65
44
|
class OpenAIAgent(MCPAgent):
|
|
66
45
|
"""Generic OpenAI agent that can execute MCP tools through the Responses API."""
|
|
67
46
|
|
|
@@ -98,11 +77,11 @@ class OpenAIAgent(MCPAgent):
|
|
|
98
77
|
except Exception as exc: # pragma: no cover - network validation
|
|
99
78
|
raise ValueError(f"OpenAI API key is invalid: {exc}") from exc
|
|
100
79
|
|
|
101
|
-
self.openai_client = model_client
|
|
80
|
+
self.openai_client: AsyncOpenAI = model_client
|
|
102
81
|
self._model = self.config.model
|
|
103
82
|
self.max_output_tokens = self.config.max_output_tokens
|
|
104
83
|
self.temperature = self.config.temperature
|
|
105
|
-
self.reasoning = self.config.reasoning
|
|
84
|
+
self.reasoning: Reasoning | None = self.config.reasoning
|
|
106
85
|
self.tool_choice: ToolChoice | None = self.config.tool_choice
|
|
107
86
|
self.parallel_tool_calls = self.config.parallel_tool_calls
|
|
108
87
|
self.truncation: Literal["auto", "disabled"] | None = self.config.truncation
|
|
@@ -22,14 +22,14 @@ from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
|
22
22
|
|
|
23
23
|
import mcp.types as types
|
|
24
24
|
from openai import AsyncOpenAI
|
|
25
|
-
from pydantic import ConfigDict, Field
|
|
26
25
|
|
|
27
26
|
from hud.settings import settings
|
|
28
27
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
29
28
|
from hud.utils.hud_console import HUDConsole
|
|
30
29
|
from hud.utils.types import with_signature
|
|
31
30
|
|
|
32
|
-
from .base import
|
|
31
|
+
from .base import MCPAgent
|
|
32
|
+
from .types import OpenAIChatConfig, OpenAIChatCreateParams
|
|
33
33
|
|
|
34
34
|
if TYPE_CHECKING:
|
|
35
35
|
from openai.types.chat import ChatCompletionToolParam
|
|
@@ -38,23 +38,6 @@ if TYPE_CHECKING:
|
|
|
38
38
|
logger = logging.getLogger(__name__)
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
class OpenAIChatConfig(BaseAgentConfig):
|
|
42
|
-
"""Configuration for `OpenAIChatAgent`."""
|
|
43
|
-
|
|
44
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
45
|
-
|
|
46
|
-
model_name: str = "OpenAI Chat"
|
|
47
|
-
model: str = "gpt-5-mini"
|
|
48
|
-
openai_client: AsyncOpenAI | None = None
|
|
49
|
-
api_key: str | None = None
|
|
50
|
-
base_url: str | None = None
|
|
51
|
-
completion_kwargs: dict[str, Any] = Field(default_factory=dict)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
|
|
55
|
-
pass
|
|
56
|
-
|
|
57
|
-
|
|
58
41
|
class OpenAIChatAgent(MCPAgent):
|
|
59
42
|
"""MCP-enabled agent that speaks the OpenAI *chat.completions* protocol."""
|
|
60
43
|
|
|
@@ -82,6 +65,7 @@ class OpenAIChatAgent(MCPAgent):
|
|
|
82
65
|
"Use HUD_API_KEY for gateway auth and BYOK headers for provider keys."
|
|
83
66
|
)
|
|
84
67
|
|
|
68
|
+
self.oai: AsyncOpenAI
|
|
85
69
|
if self.config.openai_client is not None:
|
|
86
70
|
self.oai = self.config.openai_client
|
|
87
71
|
elif self.config.api_key is not None or self.config.base_url is not None:
|
|
@@ -17,14 +17,14 @@ from openai.types.responses.response_input_param import (
|
|
|
17
17
|
FunctionCallOutput,
|
|
18
18
|
)
|
|
19
19
|
from openai.types.shared_params.reasoning import Reasoning
|
|
20
|
-
from pydantic import ConfigDict
|
|
21
20
|
|
|
22
21
|
from hud.tools.computer.settings import computer_settings
|
|
23
22
|
from hud.types import BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
24
23
|
from hud.utils.types import with_signature
|
|
25
24
|
|
|
26
|
-
from .base import
|
|
27
|
-
from .openai import OpenAIAgent
|
|
25
|
+
from .base import MCPAgent
|
|
26
|
+
from .openai import OpenAIAgent
|
|
27
|
+
from .types import OperatorConfig, OperatorCreateParams
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
30
30
|
from openai.types.responses.response_computer_tool_call import PendingSafetyCheck
|
|
@@ -50,20 +50,6 @@ what they asked.
|
|
|
50
50
|
""".strip()
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
class OperatorConfig(OpenAIConfig):
|
|
54
|
-
"""Configuration model for `OperatorAgent`."""
|
|
55
|
-
|
|
56
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
57
|
-
|
|
58
|
-
model_name: str = "Operator"
|
|
59
|
-
model: str = "computer-use-preview"
|
|
60
|
-
environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = "linux"
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class OperatorCreateParams(BaseCreateParams, OperatorConfig):
|
|
64
|
-
pass
|
|
65
|
-
|
|
66
|
-
|
|
67
53
|
class OperatorAgent(OpenAIAgent):
|
|
68
54
|
"""
|
|
69
55
|
Backwards-compatible Operator agent built on top of OpenAIAgent.
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Agent configuration types.
|
|
2
|
+
|
|
3
|
+
Config classes are defined here separately from agent implementations
|
|
4
|
+
to allow importing them without requiring SDK dependencies (anthropic, google-genai).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
from pydantic import AliasChoices, BaseModel, ConfigDict, Field
|
|
12
|
+
|
|
13
|
+
from hud.types import BaseAgentConfig
|
|
14
|
+
|
|
15
|
+
# Alias to accept both 'model' and 'checkpoint_name' (backwards compat)
|
|
16
|
+
_model_alias = AliasChoices("model", "checkpoint_name")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BaseCreateParams(BaseModel):
|
|
20
|
+
"""Runtime parameters for agent creation."""
|
|
21
|
+
|
|
22
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
23
|
+
|
|
24
|
+
ctx: Any = None # EvalContext or Environment
|
|
25
|
+
auto_respond: bool = False
|
|
26
|
+
verbose: bool = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# -----------------------------------------------------------------------------
|
|
30
|
+
# Claude
|
|
31
|
+
# -----------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ClaudeConfig(BaseAgentConfig):
|
|
35
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
36
|
+
|
|
37
|
+
model_name: str = "Claude"
|
|
38
|
+
model: str = Field(default="claude-sonnet-4-5", validation_alias=_model_alias)
|
|
39
|
+
model_client: Any = None # AsyncAnthropic | AsyncAnthropicBedrock
|
|
40
|
+
max_tokens: int = 16384
|
|
41
|
+
use_computer_beta: bool = True
|
|
42
|
+
validate_api_key: bool = True
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# -----------------------------------------------------------------------------
|
|
50
|
+
# Gemini
|
|
51
|
+
# -----------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class GeminiConfig(BaseAgentConfig):
|
|
55
|
+
"""Configuration for GeminiAgent."""
|
|
56
|
+
|
|
57
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
58
|
+
|
|
59
|
+
model_name: str = "Gemini"
|
|
60
|
+
model: str = Field(default="gemini-3-pro-preview", validation_alias=_model_alias)
|
|
61
|
+
model_client: Any = None # genai.Client
|
|
62
|
+
temperature: float = 1.0
|
|
63
|
+
top_p: float = 0.95
|
|
64
|
+
top_k: int = 40
|
|
65
|
+
max_output_tokens: int = 8192
|
|
66
|
+
validate_api_key: bool = True
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class GeminiCreateParams(BaseCreateParams, GeminiConfig):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GeminiCUAConfig(GeminiConfig):
|
|
74
|
+
"""Configuration for GeminiCUAAgent."""
|
|
75
|
+
|
|
76
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
77
|
+
|
|
78
|
+
model_name: str = "GeminiCUA"
|
|
79
|
+
model: str = Field(
|
|
80
|
+
default="gemini-2.5-computer-use-preview-10-2025", validation_alias=_model_alias
|
|
81
|
+
)
|
|
82
|
+
excluded_predefined_functions: list[str] = Field(default_factory=list)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class GeminiCUACreateParams(BaseCreateParams, GeminiCUAConfig):
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# -----------------------------------------------------------------------------
|
|
90
|
+
# OpenAI
|
|
91
|
+
# -----------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class OpenAIConfig(BaseAgentConfig):
|
|
95
|
+
"""Configuration for OpenAIAgent."""
|
|
96
|
+
|
|
97
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
98
|
+
|
|
99
|
+
model_name: str = "OpenAI"
|
|
100
|
+
model: str = Field(default="gpt-5.1", validation_alias=_model_alias)
|
|
101
|
+
model_client: Any = None # AsyncOpenAI
|
|
102
|
+
max_output_tokens: int | None = None
|
|
103
|
+
temperature: float | None = None
|
|
104
|
+
reasoning: Any = None # openai Reasoning
|
|
105
|
+
tool_choice: Any = None # openai ToolChoice
|
|
106
|
+
truncation: Literal["auto", "disabled"] | None = None
|
|
107
|
+
parallel_tool_calls: bool | None = None
|
|
108
|
+
validate_api_key: bool = True
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class OpenAIChatConfig(BaseAgentConfig):
|
|
116
|
+
"""Configuration for OpenAIChatAgent."""
|
|
117
|
+
|
|
118
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
119
|
+
|
|
120
|
+
model_name: str = "OpenAI Chat"
|
|
121
|
+
model: str = Field(default="gpt-5-mini", validation_alias=_model_alias)
|
|
122
|
+
openai_client: Any = None # AsyncOpenAI
|
|
123
|
+
api_key: str | None = None
|
|
124
|
+
base_url: str | None = None
|
|
125
|
+
completion_kwargs: dict[str, Any] = Field(default_factory=dict)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# -----------------------------------------------------------------------------
|
|
133
|
+
# Operator
|
|
134
|
+
# -----------------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class OperatorConfig(OpenAIConfig):
|
|
138
|
+
"""Configuration for OperatorAgent."""
|
|
139
|
+
|
|
140
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
141
|
+
|
|
142
|
+
model_name: str = "Operator"
|
|
143
|
+
model: str = Field(default="computer-use-preview", validation_alias=_model_alias)
|
|
144
|
+
environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = "linux"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class OperatorCreateParams(BaseCreateParams, OperatorConfig):
|
|
148
|
+
pass
|
|
@@ -564,7 +564,7 @@ class EvalConfig(BaseModel):
|
|
|
564
564
|
table.add_row("", "")
|
|
565
565
|
table.add_row(f"[dim]{self.agent_type.value} config[/dim]", "")
|
|
566
566
|
|
|
567
|
-
config_cls = self.agent_type.
|
|
567
|
+
config_cls = self.agent_type.config_cls
|
|
568
568
|
defaults = config_cls()
|
|
569
569
|
overrides = self.agent_config.get(self.agent_type.value, {})
|
|
570
570
|
skip = {
|
|
@@ -64,9 +64,14 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
64
64
|
return
|
|
65
65
|
|
|
66
66
|
# Use configurable timeout for SSE read operations to support long-running tool calls.
|
|
67
|
+
max_request_timeout = 840
|
|
67
68
|
for server_cfg in mcp_config.values():
|
|
68
69
|
if "sse_read_timeout" not in server_cfg:
|
|
69
|
-
server_cfg["sse_read_timeout"] =
|
|
70
|
+
server_cfg["sse_read_timeout"] = (
|
|
71
|
+
min(settings.client_timeout, max_request_timeout)
|
|
72
|
+
if settings.client_timeout > 0
|
|
73
|
+
else max_request_timeout
|
|
74
|
+
)
|
|
70
75
|
|
|
71
76
|
# If a server target matches HUD's MCP host and no auth is provided,
|
|
72
77
|
# inject the HUD API key as a Bearer token to avoid OAuth browser flow.
|
|
@@ -14,6 +14,10 @@ import warnings
|
|
|
14
14
|
from pathlib import Path
|
|
15
15
|
from typing import TYPE_CHECKING, Any, overload
|
|
16
16
|
|
|
17
|
+
import httpx
|
|
18
|
+
|
|
19
|
+
from hud.settings import settings
|
|
20
|
+
|
|
17
21
|
if TYPE_CHECKING:
|
|
18
22
|
from hud.eval.task import Task
|
|
19
23
|
|
|
@@ -106,10 +110,6 @@ def _load_from_huggingface(dataset_name: str) -> list[Task]:
|
|
|
106
110
|
|
|
107
111
|
def _load_raw_from_api(dataset_name: str) -> list[dict[str, Any]]:
|
|
108
112
|
"""Load raw task dicts from HUD API."""
|
|
109
|
-
import httpx
|
|
110
|
-
|
|
111
|
-
from hud.settings import settings
|
|
112
|
-
|
|
113
113
|
headers = {}
|
|
114
114
|
if settings.api_key:
|
|
115
115
|
headers["Authorization"] = f"Bearer {settings.api_key}"
|
|
@@ -271,10 +271,6 @@ def save_tasks(
|
|
|
271
271
|
TypeError: If any task is not a v5 Task object (must have 'scenario')
|
|
272
272
|
ValueError: If API key is not set or save fails
|
|
273
273
|
"""
|
|
274
|
-
import httpx
|
|
275
|
-
|
|
276
|
-
from hud.settings import settings
|
|
277
|
-
|
|
278
274
|
if not settings.api_key:
|
|
279
275
|
raise ValueError("HUD_API_KEY is required to save tasks")
|
|
280
276
|
|
|
@@ -12,8 +12,8 @@ from hud.datasets.loader import load_tasks
|
|
|
12
12
|
class TestLoadTasks:
|
|
13
13
|
"""Tests for load_tasks() function."""
|
|
14
14
|
|
|
15
|
-
@patch("httpx.Client")
|
|
16
|
-
@patch("hud.
|
|
15
|
+
@patch("hud.datasets.loader.httpx.Client")
|
|
16
|
+
@patch("hud.datasets.loader.settings")
|
|
17
17
|
def test_load_tasks_success(
|
|
18
18
|
self, mock_settings: MagicMock, mock_client_class: MagicMock
|
|
19
19
|
) -> None:
|
|
@@ -62,8 +62,8 @@ class TestLoadTasks:
|
|
|
62
62
|
params={"all": "true"},
|
|
63
63
|
)
|
|
64
64
|
|
|
65
|
-
@patch("httpx.Client")
|
|
66
|
-
@patch("hud.
|
|
65
|
+
@patch("hud.datasets.loader.httpx.Client")
|
|
66
|
+
@patch("hud.datasets.loader.settings")
|
|
67
67
|
def test_load_tasks_single_task(
|
|
68
68
|
self, mock_settings: MagicMock, mock_client_class: MagicMock
|
|
69
69
|
) -> None:
|
|
@@ -97,8 +97,8 @@ class TestLoadTasks:
|
|
|
97
97
|
assert tasks[0].scenario == "checkout"
|
|
98
98
|
assert tasks[0].id == "task-1"
|
|
99
99
|
|
|
100
|
-
@patch("httpx.Client")
|
|
101
|
-
@patch("hud.
|
|
100
|
+
@patch("hud.datasets.loader.httpx.Client")
|
|
101
|
+
@patch("hud.datasets.loader.settings")
|
|
102
102
|
def test_load_tasks_no_api_key(
|
|
103
103
|
self, mock_settings: MagicMock, mock_client_class: MagicMock
|
|
104
104
|
) -> None:
|
|
@@ -129,8 +129,8 @@ class TestLoadTasks:
|
|
|
129
129
|
params={"all": "true"},
|
|
130
130
|
)
|
|
131
131
|
|
|
132
|
-
@patch("httpx.Client")
|
|
133
|
-
@patch("hud.
|
|
132
|
+
@patch("hud.datasets.loader.httpx.Client")
|
|
133
|
+
@patch("hud.datasets.loader.settings")
|
|
134
134
|
def test_load_tasks_http_error(
|
|
135
135
|
self, mock_settings: MagicMock, mock_client_class: MagicMock
|
|
136
136
|
) -> None:
|
|
@@ -149,8 +149,8 @@ class TestLoadTasks:
|
|
|
149
149
|
with pytest.raises(ValueError, match="Failed to load tasks"):
|
|
150
150
|
load_tasks("test-org/test-dataset")
|
|
151
151
|
|
|
152
|
-
@patch("httpx.Client")
|
|
153
|
-
@patch("hud.
|
|
152
|
+
@patch("hud.datasets.loader.httpx.Client")
|
|
153
|
+
@patch("hud.datasets.loader.settings")
|
|
154
154
|
def test_load_tasks_json_error(
|
|
155
155
|
self, mock_settings: MagicMock, mock_client_class: MagicMock
|
|
156
156
|
) -> None:
|
|
@@ -171,8 +171,8 @@ class TestLoadTasks:
|
|
|
171
171
|
with pytest.raises(ValueError, match="Failed to load tasks"):
|
|
172
172
|
load_tasks("test-org/test-dataset")
|
|
173
173
|
|
|
174
|
-
@patch("httpx.Client")
|
|
175
|
-
@patch("hud.
|
|
174
|
+
@patch("hud.datasets.loader.httpx.Client")
|
|
175
|
+
@patch("hud.datasets.loader.settings")
|
|
176
176
|
def test_load_tasks_empty(self, mock_settings: MagicMock, mock_client_class: MagicMock) -> None:
|
|
177
177
|
"""load_tasks() handles empty dataset."""
|
|
178
178
|
mock_settings.hud_api_url = "https://api.hud.ai"
|
|
@@ -192,8 +192,8 @@ class TestLoadTasks:
|
|
|
192
192
|
|
|
193
193
|
assert len(tasks) == 0
|
|
194
194
|
|
|
195
|
-
@patch("httpx.Client")
|
|
196
|
-
@patch("hud.
|
|
195
|
+
@patch("hud.datasets.loader.httpx.Client")
|
|
196
|
+
@patch("hud.datasets.loader.settings")
|
|
197
197
|
def test_load_tasks_missing_fields(
|
|
198
198
|
self, mock_settings: MagicMock, mock_client_class: MagicMock
|
|
199
199
|
) -> None:
|
|
@@ -120,8 +120,10 @@ class Connector:
|
|
|
120
120
|
"""
|
|
121
121
|
from fastmcp.client import Client as FastMCPClient
|
|
122
122
|
|
|
123
|
-
|
|
124
|
-
|
|
123
|
+
self.client = FastMCPClient(
|
|
124
|
+
transport=self._transport,
|
|
125
|
+
auth=self._auth,
|
|
126
|
+
)
|
|
125
127
|
await self.client.__aenter__()
|
|
126
128
|
|
|
127
129
|
async def disconnect(self) -> None:
|