hud-python 0.5.9__tar.gz → 0.5.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.5.9 → hud_python-0.5.11}/PKG-INFO +1 -1
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/__init__.py +5 -9
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/base.py +2 -13
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/claude.py +3 -18
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/gemini.py +3 -22
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/gemini_cua.py +3 -17
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/openai.py +4 -25
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/openai_chat.py +3 -19
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/operator.py +3 -17
- hud_python-0.5.11/hud/agents/types.py +148 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/eval.py +1 -1
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/task.py +13 -1
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/tests/test_task.py +33 -1
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/patches/mcp_patches.py +114 -0
- hud_python-0.5.11/hud/tools/computer/__init__.py +48 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/types.py +50 -27
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/version.py +1 -1
- {hud_python-0.5.9 → hud_python-0.5.11}/pyproject.toml +1 -1
- hud_python-0.5.9/hud/tools/computer/__init__.py +0 -19
- {hud_python-0.5.9 → hud_python-0.5.11}/.gitignore +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/LICENSE +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/README.md +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/examples/README.md +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/__main__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/gateway.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/misc/integration_test_agent.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/resolver.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/conftest.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/test_base_runtime.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/test_gemini.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/test_operator.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/test_resolver.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/agents/tests/test_run_eval.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/__main__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/analyze.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/build.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/clone.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/debug.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/dev.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/flows/dev.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/flows/init.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/flows/tasks.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/flows/templates.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/flows/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/flows/tests/test_dev.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/get.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/init.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/list_func.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/pull.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/push.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/remove.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/rft.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/rft_status.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_analyze_module.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_convert.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_dev.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_eval.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_init.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/celebrate.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/config.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/git.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/local_runner.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/package_runner.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/server.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_git.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_local_runner.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_package_runner.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_registry_utils.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_remote_runner.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_runner_modules.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/cli/utils/viewer.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/README.md +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/base.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/environment.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/fastmcp.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/mcp_use.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/tests/test_analyze_scenarios.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/tests/test_mcp_use_retry.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/utils/mcp_use_retry.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/utils/retry.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/datasets/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/datasets/loader.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/datasets/runner.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/datasets/tests/test_loader.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/datasets/tests/test_utils.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/datasets/utils.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/connection.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/connectors/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/connectors/base.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/connectors/local.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/connectors/mcp_config.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/connectors/openai.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/connectors/remote.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/environment.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/integrations/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/integrations/adk.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/integrations/anthropic.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/integrations/gemini.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/integrations/langchain.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/integrations/llamaindex.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/integrations/openai.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/mock.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/router.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/scenarios.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/tests/test_connection.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/tests/test_connectors.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/tests/test_environment.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/tests/test_integrations.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/tests/test_local_connectors.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/tests/test_scenarios.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/tests/test_tools.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/types.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/utils/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/utils/formats.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/utils/schema.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/environment/utils/tool_wrappers.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/context.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/display.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/instrument.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/manager.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/parallel.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/tests/test_context.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/tests/test_eval.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/tests/test_manager.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/tests/test_parallel.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/types.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/eval/utils.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/native/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/native/comparator.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/patches/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/patches/warnings.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/py.typed +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/samples/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/samples/browser.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/context.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/low_level.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/router.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/server.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/settings.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/shared/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/shared/exceptions.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/shared/hints.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/shared/requests.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/telemetry/tests/test_eval_telemetry.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/agent.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/apply_patch.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/base.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/bash.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/computer/gemini.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/edit.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/executors/base.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/jupyter.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/playwright.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/response.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/shell.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/submit.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_agent_tool.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_apply_patch.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_jupyter_tool.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_shell.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/types.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/tools/utils.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/env.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/hud_console.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/mcp.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/strict_schema.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/telemetry.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/tool_shorthand.py +0 -0
- {hud_python-0.5.9 → hud_python-0.5.11}/hud/utils/types.py +0 -0
|
@@ -56,15 +56,11 @@ def create_agent(model: str, **kwargs: Any) -> MCPAgent:
|
|
|
56
56
|
if gateway_info:
|
|
57
57
|
provider = gateway_info.get("provider") or "openai"
|
|
58
58
|
else:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
ClaudeAgent: "anthropic",
|
|
65
|
-
GeminiAgent: "google",
|
|
66
|
-
}
|
|
67
|
-
provider = _AGENT_TO_PROVIDER.get(agent_cls, "openai")
|
|
59
|
+
provider = "openai"
|
|
60
|
+
if agent_cls.__name__ == "ClaudeAgent":
|
|
61
|
+
provider = "anthropic"
|
|
62
|
+
elif agent_cls.__name__ in ("GeminiAgent", "GeminiCUAAgent"):
|
|
63
|
+
provider = "gemini"
|
|
68
64
|
|
|
69
65
|
client = build_gateway_client(provider)
|
|
70
66
|
|
|
@@ -9,11 +9,12 @@ from abc import ABC, abstractmethod
|
|
|
9
9
|
from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
10
10
|
|
|
11
11
|
import mcp.types as types
|
|
12
|
-
from pydantic import BaseModel, ConfigDict
|
|
13
12
|
|
|
14
13
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
|
|
15
14
|
from hud.utils.hud_console import HUDConsole
|
|
16
15
|
|
|
16
|
+
from .types import BaseCreateParams
|
|
17
|
+
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
19
|
from hud.environment import Environment
|
|
19
20
|
from hud.eval.context import EvalContext
|
|
@@ -22,18 +23,6 @@ if TYPE_CHECKING:
|
|
|
22
23
|
logger = logging.getLogger(__name__)
|
|
23
24
|
|
|
24
25
|
|
|
25
|
-
class BaseCreateParams(BaseModel):
|
|
26
|
-
"""Runtime parameters for agent creation."""
|
|
27
|
-
|
|
28
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
29
|
-
|
|
30
|
-
# Primary way to bind agent to execution context (v5)
|
|
31
|
-
ctx: Any | None = None # EvalContext or Environment - agent uses this for tool calls
|
|
32
|
-
|
|
33
|
-
auto_respond: bool = False
|
|
34
|
-
verbose: bool = False
|
|
35
|
-
|
|
36
|
-
|
|
37
26
|
class MCPAgent(ABC):
|
|
38
27
|
"""
|
|
39
28
|
Base class for MCP-enabled agents.
|
|
@@ -25,7 +25,6 @@ from anthropic.types.beta import (
|
|
|
25
25
|
BetaToolTextEditor20250728Param,
|
|
26
26
|
BetaToolUnionParam,
|
|
27
27
|
)
|
|
28
|
-
from pydantic import ConfigDict
|
|
29
28
|
|
|
30
29
|
from hud.settings import settings
|
|
31
30
|
from hud.tools.computer.settings import computer_settings
|
|
@@ -33,7 +32,8 @@ from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
|
33
32
|
from hud.utils.hud_console import HUDConsole
|
|
34
33
|
from hud.utils.types import with_signature
|
|
35
34
|
|
|
36
|
-
from .base import
|
|
35
|
+
from .base import MCPAgent
|
|
36
|
+
from .types import ClaudeConfig, ClaudeCreateParams
|
|
37
37
|
|
|
38
38
|
if TYPE_CHECKING:
|
|
39
39
|
from collections.abc import Sequence
|
|
@@ -41,21 +41,6 @@ if TYPE_CHECKING:
|
|
|
41
41
|
logger = logging.getLogger(__name__)
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
class ClaudeConfig(BaseAgentConfig):
|
|
45
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
46
|
-
|
|
47
|
-
model_name: str = "Claude"
|
|
48
|
-
model: str = "claude-sonnet-4-5"
|
|
49
|
-
model_client: AsyncAnthropic | AsyncAnthropicBedrock | None = None
|
|
50
|
-
max_tokens: int = 16384
|
|
51
|
-
use_computer_beta: bool = True
|
|
52
|
-
validate_api_key: bool = True
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
|
|
59
44
|
class ClaudeAgent(MCPAgent):
|
|
60
45
|
"""
|
|
61
46
|
Claude agent that uses MCP servers for tool execution.
|
|
@@ -94,7 +79,7 @@ class ClaudeAgent(MCPAgent):
|
|
|
94
79
|
"or ANTHROPIC_API_KEY for direct Anthropic access."
|
|
95
80
|
)
|
|
96
81
|
|
|
97
|
-
self.anthropic_client = model_client
|
|
82
|
+
self.anthropic_client: AsyncAnthropic | AsyncAnthropicBedrock = model_client
|
|
98
83
|
self.max_tokens = self.config.max_tokens
|
|
99
84
|
self.use_computer_beta = self.config.use_computer_beta
|
|
100
85
|
self.hud_console = HUDConsole(logger=logger)
|
|
@@ -8,37 +8,18 @@ from typing import Any, ClassVar, cast
|
|
|
8
8
|
import mcp.types as types
|
|
9
9
|
from google import genai
|
|
10
10
|
from google.genai import types as genai_types
|
|
11
|
-
from pydantic import ConfigDict
|
|
12
11
|
|
|
13
12
|
from hud.settings import settings
|
|
14
13
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
15
14
|
from hud.utils.hud_console import HUDConsole
|
|
16
15
|
from hud.utils.types import with_signature
|
|
17
16
|
|
|
18
|
-
from .base import
|
|
17
|
+
from .base import MCPAgent
|
|
18
|
+
from .types import GeminiConfig, GeminiCreateParams
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class GeminiConfig(BaseAgentConfig):
|
|
24
|
-
"""Configuration for `GeminiAgent`."""
|
|
25
|
-
|
|
26
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
27
|
-
|
|
28
|
-
model_name: str = "Gemini"
|
|
29
|
-
model: str = "gemini-3-pro-preview"
|
|
30
|
-
model_client: genai.Client | None = None
|
|
31
|
-
temperature: float = 1.0
|
|
32
|
-
top_p: float = 0.95
|
|
33
|
-
top_k: int = 40
|
|
34
|
-
max_output_tokens: int = 8192
|
|
35
|
-
validate_api_key: bool = True
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class GeminiCreateParams(BaseCreateParams, GeminiConfig):
|
|
39
|
-
pass
|
|
40
|
-
|
|
41
|
-
|
|
42
23
|
class GeminiAgent(MCPAgent):
|
|
43
24
|
"""
|
|
44
25
|
Gemini agent that uses MCP servers for tool execution.
|
|
@@ -80,7 +61,7 @@ class GeminiAgent(MCPAgent):
|
|
|
80
61
|
except Exception as e:
|
|
81
62
|
raise ValueError(f"Gemini API key is invalid: {e}") from e
|
|
82
63
|
|
|
83
|
-
self.gemini_client = model_client
|
|
64
|
+
self.gemini_client: genai.Client = model_client
|
|
84
65
|
self.temperature = self.config.temperature
|
|
85
66
|
self.top_p = self.config.top_p
|
|
86
67
|
self.top_k = self.config.top_k
|
|
@@ -7,14 +7,14 @@ from typing import Any, ClassVar
|
|
|
7
7
|
|
|
8
8
|
import mcp.types as types
|
|
9
9
|
from google.genai import types as genai_types
|
|
10
|
-
from pydantic import ConfigDict, Field
|
|
11
10
|
|
|
12
11
|
from hud.tools.computer.settings import computer_settings
|
|
13
12
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
14
13
|
from hud.utils.types import with_signature
|
|
15
14
|
|
|
16
|
-
from .base import
|
|
17
|
-
from .gemini import GeminiAgent
|
|
15
|
+
from .base import MCPAgent
|
|
16
|
+
from .gemini import GeminiAgent
|
|
17
|
+
from .types import GeminiCUAConfig, GeminiCUACreateParams
|
|
18
18
|
|
|
19
19
|
logger = logging.getLogger(__name__)
|
|
20
20
|
|
|
@@ -56,20 +56,6 @@ what they asked.
|
|
|
56
56
|
""".strip()
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
class GeminiCUAConfig(GeminiConfig):
|
|
60
|
-
"""Configuration for `GeminiCUAAgent`."""
|
|
61
|
-
|
|
62
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
63
|
-
|
|
64
|
-
model_name: str = "GeminiCUA"
|
|
65
|
-
model: str = "gemini-2.5-computer-use-preview-10-2025"
|
|
66
|
-
excluded_predefined_functions: list[str] = Field(default_factory=list)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class GeminiCUACreateParams(BaseCreateParams, GeminiCUAConfig):
|
|
70
|
-
pass
|
|
71
|
-
|
|
72
|
-
|
|
73
59
|
class GeminiCUAAgent(GeminiAgent):
|
|
74
60
|
"""
|
|
75
61
|
Gemini Computer Use Agent that extends GeminiAgent with computer use capabilities.
|
|
@@ -29,39 +29,18 @@ from openai.types.responses import (
|
|
|
29
29
|
from openai.types.responses.response_create_params import ToolChoice # noqa: TC002
|
|
30
30
|
from openai.types.responses.response_input_param import FunctionCallOutput, Message
|
|
31
31
|
from openai.types.shared_params.reasoning import Reasoning # noqa: TC002
|
|
32
|
-
from pydantic import ConfigDict
|
|
33
32
|
|
|
34
33
|
from hud.settings import settings
|
|
35
34
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
|
|
36
35
|
from hud.utils.strict_schema import ensure_strict_json_schema
|
|
37
36
|
from hud.utils.types import with_signature
|
|
38
37
|
|
|
39
|
-
from .base import
|
|
38
|
+
from .base import MCPAgent
|
|
39
|
+
from .types import OpenAIConfig, OpenAICreateParams
|
|
40
40
|
|
|
41
41
|
logger = logging.getLogger(__name__)
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
class OpenAIConfig(BaseAgentConfig):
|
|
45
|
-
"""Configuration model for `OpenAIAgent`."""
|
|
46
|
-
|
|
47
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
48
|
-
|
|
49
|
-
model_name: str = "OpenAI"
|
|
50
|
-
model: str = "gpt-5.1"
|
|
51
|
-
model_client: AsyncOpenAI | None = None
|
|
52
|
-
max_output_tokens: int | None = None
|
|
53
|
-
temperature: float | None = None
|
|
54
|
-
reasoning: Reasoning | None = None
|
|
55
|
-
tool_choice: ToolChoice | None = None
|
|
56
|
-
truncation: Literal["auto", "disabled"] | None = None
|
|
57
|
-
parallel_tool_calls: bool | None = None
|
|
58
|
-
validate_api_key: bool = True
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
|
|
62
|
-
pass
|
|
63
|
-
|
|
64
|
-
|
|
65
44
|
class OpenAIAgent(MCPAgent):
|
|
66
45
|
"""Generic OpenAI agent that can execute MCP tools through the Responses API."""
|
|
67
46
|
|
|
@@ -98,11 +77,11 @@ class OpenAIAgent(MCPAgent):
|
|
|
98
77
|
except Exception as exc: # pragma: no cover - network validation
|
|
99
78
|
raise ValueError(f"OpenAI API key is invalid: {exc}") from exc
|
|
100
79
|
|
|
101
|
-
self.openai_client = model_client
|
|
80
|
+
self.openai_client: AsyncOpenAI = model_client
|
|
102
81
|
self._model = self.config.model
|
|
103
82
|
self.max_output_tokens = self.config.max_output_tokens
|
|
104
83
|
self.temperature = self.config.temperature
|
|
105
|
-
self.reasoning = self.config.reasoning
|
|
84
|
+
self.reasoning: Reasoning | None = self.config.reasoning
|
|
106
85
|
self.tool_choice: ToolChoice | None = self.config.tool_choice
|
|
107
86
|
self.parallel_tool_calls = self.config.parallel_tool_calls
|
|
108
87
|
self.truncation: Literal["auto", "disabled"] | None = self.config.truncation
|
|
@@ -22,14 +22,14 @@ from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
|
22
22
|
|
|
23
23
|
import mcp.types as types
|
|
24
24
|
from openai import AsyncOpenAI
|
|
25
|
-
from pydantic import ConfigDict, Field
|
|
26
25
|
|
|
27
26
|
from hud.settings import settings
|
|
28
27
|
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
29
28
|
from hud.utils.hud_console import HUDConsole
|
|
30
29
|
from hud.utils.types import with_signature
|
|
31
30
|
|
|
32
|
-
from .base import
|
|
31
|
+
from .base import MCPAgent
|
|
32
|
+
from .types import OpenAIChatConfig, OpenAIChatCreateParams
|
|
33
33
|
|
|
34
34
|
if TYPE_CHECKING:
|
|
35
35
|
from openai.types.chat import ChatCompletionToolParam
|
|
@@ -38,23 +38,6 @@ if TYPE_CHECKING:
|
|
|
38
38
|
logger = logging.getLogger(__name__)
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
class OpenAIChatConfig(BaseAgentConfig):
|
|
42
|
-
"""Configuration for `OpenAIChatAgent`."""
|
|
43
|
-
|
|
44
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
45
|
-
|
|
46
|
-
model_name: str = "OpenAI Chat"
|
|
47
|
-
model: str = "gpt-5-mini"
|
|
48
|
-
openai_client: AsyncOpenAI | None = None
|
|
49
|
-
api_key: str | None = None
|
|
50
|
-
base_url: str | None = None
|
|
51
|
-
completion_kwargs: dict[str, Any] = Field(default_factory=dict)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
|
|
55
|
-
pass
|
|
56
|
-
|
|
57
|
-
|
|
58
41
|
class OpenAIChatAgent(MCPAgent):
|
|
59
42
|
"""MCP-enabled agent that speaks the OpenAI *chat.completions* protocol."""
|
|
60
43
|
|
|
@@ -82,6 +65,7 @@ class OpenAIChatAgent(MCPAgent):
|
|
|
82
65
|
"Use HUD_API_KEY for gateway auth and BYOK headers for provider keys."
|
|
83
66
|
)
|
|
84
67
|
|
|
68
|
+
self.oai: AsyncOpenAI
|
|
85
69
|
if self.config.openai_client is not None:
|
|
86
70
|
self.oai = self.config.openai_client
|
|
87
71
|
elif self.config.api_key is not None or self.config.base_url is not None:
|
|
@@ -17,14 +17,14 @@ from openai.types.responses.response_input_param import (
|
|
|
17
17
|
FunctionCallOutput,
|
|
18
18
|
)
|
|
19
19
|
from openai.types.shared_params.reasoning import Reasoning
|
|
20
|
-
from pydantic import ConfigDict
|
|
21
20
|
|
|
22
21
|
from hud.tools.computer.settings import computer_settings
|
|
23
22
|
from hud.types import BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
24
23
|
from hud.utils.types import with_signature
|
|
25
24
|
|
|
26
|
-
from .base import
|
|
27
|
-
from .openai import OpenAIAgent
|
|
25
|
+
from .base import MCPAgent
|
|
26
|
+
from .openai import OpenAIAgent
|
|
27
|
+
from .types import OperatorConfig, OperatorCreateParams
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
30
30
|
from openai.types.responses.response_computer_tool_call import PendingSafetyCheck
|
|
@@ -50,20 +50,6 @@ what they asked.
|
|
|
50
50
|
""".strip()
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
class OperatorConfig(OpenAIConfig):
|
|
54
|
-
"""Configuration model for `OperatorAgent`."""
|
|
55
|
-
|
|
56
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
57
|
-
|
|
58
|
-
model_name: str = "Operator"
|
|
59
|
-
model: str = "computer-use-preview"
|
|
60
|
-
environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = "linux"
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class OperatorCreateParams(BaseCreateParams, OperatorConfig):
|
|
64
|
-
pass
|
|
65
|
-
|
|
66
|
-
|
|
67
53
|
class OperatorAgent(OpenAIAgent):
|
|
68
54
|
"""
|
|
69
55
|
Backwards-compatible Operator agent built on top of OpenAIAgent.
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Agent configuration types.
|
|
2
|
+
|
|
3
|
+
Config classes are defined here separately from agent implementations
|
|
4
|
+
to allow importing them without requiring SDK dependencies (anthropic, google-genai).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
from pydantic import AliasChoices, BaseModel, ConfigDict, Field
|
|
12
|
+
|
|
13
|
+
from hud.types import BaseAgentConfig
|
|
14
|
+
|
|
15
|
+
# Alias to accept both 'model' and 'checkpoint_name' (backwards compat)
|
|
16
|
+
_model_alias = AliasChoices("model", "checkpoint_name")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BaseCreateParams(BaseModel):
|
|
20
|
+
"""Runtime parameters for agent creation."""
|
|
21
|
+
|
|
22
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
23
|
+
|
|
24
|
+
ctx: Any = None # EvalContext or Environment
|
|
25
|
+
auto_respond: bool = False
|
|
26
|
+
verbose: bool = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# -----------------------------------------------------------------------------
|
|
30
|
+
# Claude
|
|
31
|
+
# -----------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ClaudeConfig(BaseAgentConfig):
|
|
35
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
36
|
+
|
|
37
|
+
model_name: str = "Claude"
|
|
38
|
+
model: str = Field(default="claude-sonnet-4-5", validation_alias=_model_alias)
|
|
39
|
+
model_client: Any = None # AsyncAnthropic | AsyncAnthropicBedrock
|
|
40
|
+
max_tokens: int = 16384
|
|
41
|
+
use_computer_beta: bool = True
|
|
42
|
+
validate_api_key: bool = True
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# -----------------------------------------------------------------------------
|
|
50
|
+
# Gemini
|
|
51
|
+
# -----------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class GeminiConfig(BaseAgentConfig):
|
|
55
|
+
"""Configuration for GeminiAgent."""
|
|
56
|
+
|
|
57
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
58
|
+
|
|
59
|
+
model_name: str = "Gemini"
|
|
60
|
+
model: str = Field(default="gemini-3-pro-preview", validation_alias=_model_alias)
|
|
61
|
+
model_client: Any = None # genai.Client
|
|
62
|
+
temperature: float = 1.0
|
|
63
|
+
top_p: float = 0.95
|
|
64
|
+
top_k: int = 40
|
|
65
|
+
max_output_tokens: int = 8192
|
|
66
|
+
validate_api_key: bool = True
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class GeminiCreateParams(BaseCreateParams, GeminiConfig):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GeminiCUAConfig(GeminiConfig):
|
|
74
|
+
"""Configuration for GeminiCUAAgent."""
|
|
75
|
+
|
|
76
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
77
|
+
|
|
78
|
+
model_name: str = "GeminiCUA"
|
|
79
|
+
model: str = Field(
|
|
80
|
+
default="gemini-2.5-computer-use-preview-10-2025", validation_alias=_model_alias
|
|
81
|
+
)
|
|
82
|
+
excluded_predefined_functions: list[str] = Field(default_factory=list)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class GeminiCUACreateParams(BaseCreateParams, GeminiCUAConfig):
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# -----------------------------------------------------------------------------
|
|
90
|
+
# OpenAI
|
|
91
|
+
# -----------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class OpenAIConfig(BaseAgentConfig):
|
|
95
|
+
"""Configuration for OpenAIAgent."""
|
|
96
|
+
|
|
97
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
98
|
+
|
|
99
|
+
model_name: str = "OpenAI"
|
|
100
|
+
model: str = Field(default="gpt-5.1", validation_alias=_model_alias)
|
|
101
|
+
model_client: Any = None # AsyncOpenAI
|
|
102
|
+
max_output_tokens: int | None = None
|
|
103
|
+
temperature: float | None = None
|
|
104
|
+
reasoning: Any = None # openai Reasoning
|
|
105
|
+
tool_choice: Any = None # openai ToolChoice
|
|
106
|
+
truncation: Literal["auto", "disabled"] | None = None
|
|
107
|
+
parallel_tool_calls: bool | None = None
|
|
108
|
+
validate_api_key: bool = True
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class OpenAIChatConfig(BaseAgentConfig):
|
|
116
|
+
"""Configuration for OpenAIChatAgent."""
|
|
117
|
+
|
|
118
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
119
|
+
|
|
120
|
+
model_name: str = "OpenAI Chat"
|
|
121
|
+
model: str = Field(default="gpt-5-mini", validation_alias=_model_alias)
|
|
122
|
+
openai_client: Any = None # AsyncOpenAI
|
|
123
|
+
api_key: str | None = None
|
|
124
|
+
base_url: str | None = None
|
|
125
|
+
completion_kwargs: dict[str, Any] = Field(default_factory=dict)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# -----------------------------------------------------------------------------
|
|
133
|
+
# Operator
|
|
134
|
+
# -----------------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class OperatorConfig(OpenAIConfig):
|
|
138
|
+
"""Configuration for OperatorAgent."""
|
|
139
|
+
|
|
140
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
141
|
+
|
|
142
|
+
model_name: str = "Operator"
|
|
143
|
+
model: str = Field(default="computer-use-preview", validation_alias=_model_alias)
|
|
144
|
+
environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = "linux"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class OperatorCreateParams(BaseCreateParams, OperatorConfig):
|
|
148
|
+
pass
|
|
@@ -564,7 +564,7 @@ class EvalConfig(BaseModel):
|
|
|
564
564
|
table.add_row("", "")
|
|
565
565
|
table.add_row(f"[dim]{self.agent_type.value} config[/dim]", "")
|
|
566
566
|
|
|
567
|
-
config_cls = self.agent_type.
|
|
567
|
+
config_cls = self.agent_type.config_cls
|
|
568
568
|
defaults = config_cls()
|
|
569
569
|
overrides = self.agent_config.get(self.agent_type.value, {})
|
|
570
570
|
skip = {
|
|
@@ -287,8 +287,20 @@ class Task(BaseModel):
|
|
|
287
287
|
]
|
|
288
288
|
|
|
289
289
|
# Preserve agent_config
|
|
290
|
+
agent_config: dict[str, Any] = {}
|
|
290
291
|
if data.get("agent_config"):
|
|
291
|
-
|
|
292
|
+
agent_config.update(data["agent_config"])
|
|
293
|
+
# Restore tool filters from Environment (they were extracted during v4 conversion)
|
|
294
|
+
if self.env is not None:
|
|
295
|
+
if getattr(self.env, "_agent_include", None) is not None:
|
|
296
|
+
agent_config["allowed_tools"] = self.env._agent_include
|
|
297
|
+
elif "allowed_tools" not in agent_config:
|
|
298
|
+
# ["*"] was converted to None, restore it for serialization
|
|
299
|
+
agent_config["allowed_tools"] = ["*"]
|
|
300
|
+
if getattr(self.env, "_agent_exclude", None) is not None:
|
|
301
|
+
agent_config["disallowed_tools"] = self.env._agent_exclude
|
|
302
|
+
if agent_config:
|
|
303
|
+
result["agent_config"] = agent_config
|
|
292
304
|
|
|
293
305
|
# Preserve metadata
|
|
294
306
|
if data.get("metadata"):
|
|
@@ -85,7 +85,11 @@ class TestTaskSerialization:
|
|
|
85
85
|
task = Task.from_v4(v4_dict)
|
|
86
86
|
data = task.model_dump(mode="json")
|
|
87
87
|
|
|
88
|
-
|
|
88
|
+
# agent_config should preserve system_prompt and include default allowed_tools
|
|
89
|
+
assert data.get("agent_config") == {
|
|
90
|
+
"system_prompt": "Custom system prompt",
|
|
91
|
+
"allowed_tools": ["*"], # Default when no allowed_tools specified
|
|
92
|
+
}
|
|
89
93
|
|
|
90
94
|
# Roundtrip
|
|
91
95
|
task2 = Task(**data)
|
|
@@ -250,3 +254,31 @@ class TestV4AgentConfigToolFilters:
|
|
|
250
254
|
|
|
251
255
|
assert "my_setup_tool" not in tool_names
|
|
252
256
|
assert "run_query" in tool_names
|
|
257
|
+
|
|
258
|
+
def test_v4_tool_filters_preserved_in_serialization(self) -> None:
|
|
259
|
+
"""v4 tool filters are preserved when serializing for remote execution."""
|
|
260
|
+
v4_dict = {
|
|
261
|
+
"prompt": "Test prompt",
|
|
262
|
+
"mcp_config": {"server": {"url": "http://localhost"}},
|
|
263
|
+
"evaluate_tool": {"name": "check", "arguments": {}},
|
|
264
|
+
"agent_config": {
|
|
265
|
+
"allowed_tools": ["*"],
|
|
266
|
+
"disallowed_tools": ["*setup*", "*evaluate*", "*grade*"],
|
|
267
|
+
},
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
task = Task.from_v4(v4_dict)
|
|
271
|
+
|
|
272
|
+
# Serialize (this is what gets sent to remote execution)
|
|
273
|
+
data = task.model_dump(mode="json")
|
|
274
|
+
|
|
275
|
+
# agent_config must include the tool filters for remote execution
|
|
276
|
+
assert "agent_config" in data
|
|
277
|
+
assert data["agent_config"]["allowed_tools"] == ["*"]
|
|
278
|
+
assert data["agent_config"]["disallowed_tools"] == ["*setup*", "*evaluate*", "*grade*"]
|
|
279
|
+
|
|
280
|
+
# Verify roundtrip works (remote worker will deserialize this)
|
|
281
|
+
task2 = Task(**data)
|
|
282
|
+
assert task2.env is not None
|
|
283
|
+
assert task2.env._agent_include is None # ["*"] → None
|
|
284
|
+
assert task2.env._agent_exclude == ["*setup*", "*evaluate*", "*grade*"]
|