hud-python 0.5.31__tar.gz → 0.5.33__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.5.31 → hud_python-0.5.33}/PKG-INFO +1 -1
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/claude.py +11 -3
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_build.py +2 -2
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/loader.py +9 -10
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/tests/test_loader.py +9 -9
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connection.py +3 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/mcp_config.py +23 -12
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_connection.py +29 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_connectors.py +43 -1
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/context.py +40 -14
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/instrument.py +50 -6
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/bash.py +28 -15
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/edit.py +6 -6
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_bash.py +13 -20
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_bash_extended.py +28 -2
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/anthropic.py +69 -38
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/gemini.py +0 -23
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/hud.py +20 -1
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/openai.py +0 -21
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/settings.py +5 -0
- hud_python-0.5.33/hud/tools/computer/tests/test_compression.py +164 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_native_types.py +1 -1
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/types.py +2 -1
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/version.py +1 -1
- {hud_python-0.5.31 → hud_python-0.5.33}/pyproject.toml +1 -1
- {hud_python-0.5.31 → hud_python-0.5.33}/.gitignore +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/LICENSE +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/README.md +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/examples/README.md +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/__main__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/base.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/gateway.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/gemini.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/gemini_cua.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/misc/integration_test_agent.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/openai.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/openai_chat.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/operator.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/resolver.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/conftest.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_base_runtime.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_gemini.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_integration_test_agent.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_operator.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_resolver.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/tests/test_run_eval.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/agents/types.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/__main__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/analyze.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/build.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/cancel.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/base.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/harbor.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/tests/conftest.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/convert/tests/test_harbor.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/debug.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/deploy.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/dev.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/eval.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/dev.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/init.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/tasks.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/templates.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/flows/tests/test_dev.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/init.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/link.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/models.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/push.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/rft.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/rft_status.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_analyze_module.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_convert.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_debug_directory_mode.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_deploy.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_dev.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_eval.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_init.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/api.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/args.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/build_display.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/build_logs.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/config.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/context.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/git.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/lockfile.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/mcp.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/server.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_git.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/validation.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/cli/utils/viewer.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/runner.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/tests/test_utils.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/datasets/utils.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/base.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/local.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/openai.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/connectors/remote.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/environment.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/adk.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/anthropic.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/gemini.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/langchain.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/llamaindex.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/integrations/openai.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/mock.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/router.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/scenarios.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_environment.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_integrations.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_local_connectors.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_scenarios.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_session_id.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/tests/test_tools.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/types.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/utils/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/utils/formats.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/utils/schema.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/environment/utils/tool_wrappers.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/display.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/instrument.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/manager.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/parallel.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/task.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/test_context.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/test_eval.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/test_manager.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/test_parallel.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/tests/test_task.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/types.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/eval/utils.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/native/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/native/chat.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/patches/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/patches/mcp_patches.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/patches/warnings.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/py.typed +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/context.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/low_level.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/router.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/server.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_prefix_naming.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/chat.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/chat_service.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/reply_metadata.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/tests/test_chat.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/services/tests/test_chat_service.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/settings.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/exceptions.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/hints.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/requests.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/tests/test_eval_telemetry.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/agent.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/base.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/apply_patch.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/gemini_edit.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/gemini_shell.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/session.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/shell.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_apply_patch.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_bash_integration.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_edit.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_gemini_tools.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/tests/test_shell.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/coding/utils.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/glm.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/tests/test_computer.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/tests/test_computer_actions.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/computer/tests/test_glm_computer.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/elicitation.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/base.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/base.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/gemini.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/glob.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/grep.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/list.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/read.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/tests/test_glob.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/tests/test_grep.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/tests/test_list.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/filesystem/tests/test_read.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/base.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/code_execution.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/google_search.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/tool_search.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/url_context.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/web_fetch.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/hosted/web_search.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/jupyter.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/base.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/claude.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/gemini.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/session.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/tests/test_claude.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/tests/test_gemini.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/memory/tests/test_session.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/native_types.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/playwright.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/response.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/submit.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_agent_tool.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_elicitation.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_jupyter_tool.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_native_tool_e2e.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/tools/utils.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/types.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/env.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/hud_console.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/mcp.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/strict_schema.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/tool_shorthand.py +0 -0
- {hud_python-0.5.31 → hud_python-0.5.33}/hud/utils/types.py +0 -0
|
@@ -445,7 +445,9 @@ class ClaudeAgent(MCPAgent):
|
|
|
445
445
|
text_document_block(content.text, title=tool_call.name)
|
|
446
446
|
)
|
|
447
447
|
elif isinstance(content, types.ImageContent):
|
|
448
|
-
claude_blocks.append(
|
|
448
|
+
claude_blocks.append(
|
|
449
|
+
base64_to_content_block(content.data, content.mimeType)
|
|
450
|
+
)
|
|
449
451
|
elif isinstance(content, types.EmbeddedResource):
|
|
450
452
|
resource = content.resource
|
|
451
453
|
if (
|
|
@@ -683,13 +685,19 @@ class ClaudeAgent(MCPAgent):
|
|
|
683
685
|
return messages_cached
|
|
684
686
|
|
|
685
687
|
|
|
686
|
-
def base64_to_content_block(
|
|
688
|
+
def base64_to_content_block(
|
|
689
|
+
base64: str,
|
|
690
|
+
media_type: str = "image/png",
|
|
691
|
+
) -> BetaImageBlockParam:
|
|
687
692
|
"""Convert base64 image to Claude content block."""
|
|
688
693
|
return BetaImageBlockParam(
|
|
689
694
|
type="image",
|
|
690
695
|
source=BetaBase64ImageSourceParam(
|
|
691
696
|
type="base64",
|
|
692
|
-
media_type=
|
|
697
|
+
media_type=cast(
|
|
698
|
+
"Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']",
|
|
699
|
+
media_type,
|
|
700
|
+
),
|
|
693
701
|
data=base64,
|
|
694
702
|
),
|
|
695
703
|
)
|
|
@@ -61,12 +61,12 @@ class TestIncrementVersion:
|
|
|
61
61
|
def test_increment_minor(self):
|
|
62
62
|
"""Test incrementing minor version."""
|
|
63
63
|
assert increment_version("1.2.3", "minor") == "1.3.0"
|
|
64
|
-
assert increment_version("0.5.
|
|
64
|
+
assert increment_version("0.5.33", "minor") == "0.6.0"
|
|
65
65
|
|
|
66
66
|
def test_increment_major(self):
|
|
67
67
|
"""Test incrementing major version."""
|
|
68
68
|
assert increment_version("1.2.3", "major") == "2.0.0"
|
|
69
|
-
assert increment_version("0.5.
|
|
69
|
+
assert increment_version("0.5.33", "major") == "1.0.0"
|
|
70
70
|
|
|
71
71
|
def test_increment_with_v_prefix(self):
|
|
72
72
|
"""Test incrementing version with v prefix."""
|
|
@@ -70,15 +70,15 @@ def _load_from_file(path: Path) -> list[Task]:
|
|
|
70
70
|
return [Task(**{**item, "args": item.get("args") or {}}) for item in raw_items]
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def resolve_taskset_id(
|
|
74
|
-
"""Resolve a taskset
|
|
73
|
+
def resolve_taskset_id(name: str) -> str:
|
|
74
|
+
"""Resolve a taskset name to its UUID via the HUD API."""
|
|
75
75
|
headers = {}
|
|
76
76
|
if settings.api_key:
|
|
77
77
|
headers["Authorization"] = f"Bearer {settings.api_key}"
|
|
78
78
|
|
|
79
79
|
with httpx.Client() as client:
|
|
80
80
|
response = client.get(
|
|
81
|
-
f"{settings.hud_api_url}/tasks/evalset/{
|
|
81
|
+
f"{settings.hud_api_url}/tasks/evalset/{name}",
|
|
82
82
|
headers=headers,
|
|
83
83
|
)
|
|
84
84
|
response.raise_for_status()
|
|
@@ -86,7 +86,7 @@ def resolve_taskset_id(slug: str) -> str:
|
|
|
86
86
|
|
|
87
87
|
evalset_id = data.get("evalset_id")
|
|
88
88
|
if not evalset_id:
|
|
89
|
-
raise ValueError(f"Could not resolve taskset '{
|
|
89
|
+
raise ValueError(f"Could not resolve taskset '{name}' — not found or no access")
|
|
90
90
|
return evalset_id
|
|
91
91
|
|
|
92
92
|
|
|
@@ -146,14 +146,14 @@ def load_tasks(source: str, *, raw: bool = False) -> list[Task] | list[dict[str,
|
|
|
146
146
|
|
|
147
147
|
Supports multiple sources with auto-detection:
|
|
148
148
|
- Local file path (JSON or JSONL)
|
|
149
|
-
- HUD API
|
|
149
|
+
- HUD API evalset name (e.g., "SheetBench-50")
|
|
150
150
|
|
|
151
151
|
Automatically detects and converts v4 LegacyTask format to v5 Task.
|
|
152
152
|
|
|
153
153
|
Args:
|
|
154
154
|
source: Task source. Can be:
|
|
155
155
|
- Path to a local JSON/JSONL file
|
|
156
|
-
- HUD API
|
|
156
|
+
- HUD API evalset name (e.g., "SheetBench-50")
|
|
157
157
|
raw: If True, return raw dicts without validation or env var substitution.
|
|
158
158
|
Useful for preserving template strings like "${HUD_API_KEY}".
|
|
159
159
|
|
|
@@ -193,8 +193,7 @@ def save_tasks(
|
|
|
193
193
|
Creates or updates a taskset with the given tasks.
|
|
194
194
|
|
|
195
195
|
Args:
|
|
196
|
-
name:
|
|
197
|
-
If no org prefix, uses user's default org.
|
|
196
|
+
name: Evalset name (e.g., "benchmark-v1").
|
|
198
197
|
tasks: List of Task objects (v5 format) to save.
|
|
199
198
|
|
|
200
199
|
Returns:
|
|
@@ -214,10 +213,10 @@ def save_tasks(
|
|
|
214
213
|
]
|
|
215
214
|
|
|
216
215
|
# Save to HUD API
|
|
217
|
-
taskset_id = save_tasks("
|
|
216
|
+
taskset_id = save_tasks("benchmark-v1", tasks)
|
|
218
217
|
|
|
219
218
|
# Later, load them back
|
|
220
|
-
loaded = load_tasks("
|
|
219
|
+
loaded = load_tasks("benchmark-v1")
|
|
221
220
|
```
|
|
222
221
|
|
|
223
222
|
Raises:
|
|
@@ -50,7 +50,7 @@ class TestLoadTasks:
|
|
|
50
50
|
mock_client.__exit__.return_value = None
|
|
51
51
|
mock_client_class.return_value = mock_client
|
|
52
52
|
|
|
53
|
-
tasks = load_tasks("test-
|
|
53
|
+
tasks = load_tasks("test-dataset")
|
|
54
54
|
|
|
55
55
|
assert len(tasks) == 2
|
|
56
56
|
# Tasks are keyed by ID in dict, order may vary
|
|
@@ -61,7 +61,7 @@ class TestLoadTasks:
|
|
|
61
61
|
# Platform IDs are internal and should not be inferred from dict keys
|
|
62
62
|
assert all(t.id is None for t in tasks)
|
|
63
63
|
mock_client.get.assert_called_once_with(
|
|
64
|
-
"https://api.hud.ai/tasks/evalset/test-
|
|
64
|
+
"https://api.hud.ai/tasks/evalset/test-dataset",
|
|
65
65
|
headers={"Authorization": "Bearer test_key"},
|
|
66
66
|
params={"all": "true"},
|
|
67
67
|
)
|
|
@@ -96,7 +96,7 @@ class TestLoadTasks:
|
|
|
96
96
|
mock_client.__exit__.return_value = None
|
|
97
97
|
mock_client_class.return_value = mock_client
|
|
98
98
|
|
|
99
|
-
tasks = load_tasks("test-
|
|
99
|
+
tasks = load_tasks("test-dataset")
|
|
100
100
|
|
|
101
101
|
assert len(tasks) == 1
|
|
102
102
|
assert tasks[0].scenario == "checkout"
|
|
@@ -126,11 +126,11 @@ class TestLoadTasks:
|
|
|
126
126
|
mock_client.__exit__.return_value = None
|
|
127
127
|
mock_client_class.return_value = mock_client
|
|
128
128
|
|
|
129
|
-
tasks = load_tasks("test-
|
|
129
|
+
tasks = load_tasks("test-dataset")
|
|
130
130
|
|
|
131
131
|
assert len(tasks) == 0
|
|
132
132
|
mock_client.get.assert_called_once_with(
|
|
133
|
-
"https://api.hud.ai/tasks/evalset/test-
|
|
133
|
+
"https://api.hud.ai/tasks/evalset/test-dataset",
|
|
134
134
|
headers={},
|
|
135
135
|
params={"all": "true"},
|
|
136
136
|
)
|
|
@@ -198,7 +198,7 @@ class TestLoadTasks:
|
|
|
198
198
|
mock_client.__exit__.return_value = None
|
|
199
199
|
mock_client_class.return_value = mock_client
|
|
200
200
|
|
|
201
|
-
tasks = load_tasks("test-
|
|
201
|
+
tasks = load_tasks("test-dataset")
|
|
202
202
|
|
|
203
203
|
assert len(tasks) == 0
|
|
204
204
|
|
|
@@ -223,7 +223,7 @@ class TestLoadTasks:
|
|
|
223
223
|
mock_client.__exit__.return_value = None
|
|
224
224
|
mock_client_class.return_value = mock_client
|
|
225
225
|
|
|
226
|
-
tasks = load_tasks("test-
|
|
226
|
+
tasks = load_tasks("test-dataset")
|
|
227
227
|
|
|
228
228
|
assert len(tasks) == 1
|
|
229
229
|
assert tasks[0].scenario == "test"
|
|
@@ -259,7 +259,7 @@ class TestSaveTasks:
|
|
|
259
259
|
mock_client_class.return_value = mock_client
|
|
260
260
|
|
|
261
261
|
taskset_id = save_tasks(
|
|
262
|
-
"test-
|
|
262
|
+
"test-dataset",
|
|
263
263
|
[
|
|
264
264
|
Task(
|
|
265
265
|
env={"name": "test-env"},
|
|
@@ -276,6 +276,6 @@ class TestSaveTasks:
|
|
|
276
276
|
call_args = mock_client.post.call_args
|
|
277
277
|
assert call_args.args[0] == "https://api.hud.ai/tasks/upload"
|
|
278
278
|
payload = call_args.kwargs["json"]
|
|
279
|
-
assert payload["name"] == "test-
|
|
279
|
+
assert payload["name"] == "test-dataset"
|
|
280
280
|
assert payload["tasks"][0]["slug"] == "checkout-smoke"
|
|
281
281
|
assert "id" not in payload["tasks"][0]
|
|
@@ -159,6 +159,9 @@ class Connector:
|
|
|
159
159
|
"transport": self._transport,
|
|
160
160
|
"auth": self._auth,
|
|
161
161
|
}
|
|
162
|
+
client_timeout = getattr(self._transport, "_hud_client_timeout", None)
|
|
163
|
+
if client_timeout is not None:
|
|
164
|
+
client_kwargs["timeout"] = client_timeout
|
|
162
165
|
if self._elicitation_handler is not None:
|
|
163
166
|
client_kwargs["elicitation_handler"] = self._elicitation_handler
|
|
164
167
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import TYPE_CHECKING, Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
6
6
|
|
|
7
7
|
from hud.environment.connectors.base import BaseConnectorMixin
|
|
8
8
|
|
|
@@ -66,8 +66,7 @@ class MCPConfigConnectorMixin(BaseConnectorMixin):
|
|
|
66
66
|
if settings.client_timeout > 0
|
|
67
67
|
else min(request_timeout, settings.__class__.model_fields["client_timeout"].default)
|
|
68
68
|
)
|
|
69
|
-
server_config
|
|
70
|
-
transport = _build_transport(server_config)
|
|
69
|
+
transport = _build_transport(server_config, timeout=timeout)
|
|
71
70
|
|
|
72
71
|
return self._add_connection(
|
|
73
72
|
name,
|
|
@@ -121,17 +120,29 @@ class MCPConfigConnectorMixin(BaseConnectorMixin):
|
|
|
121
120
|
return self
|
|
122
121
|
|
|
123
122
|
|
|
124
|
-
def _build_transport(server_config: dict[str, Any]) -> Any:
|
|
123
|
+
def _build_transport(server_config: dict[str, Any], *, timeout: float | None = None) -> Any:
|
|
125
124
|
from fastmcp.client.transports import SSETransport, StreamableHttpTransport
|
|
126
125
|
from fastmcp.mcp_config import infer_transport_type_from_url
|
|
127
126
|
|
|
128
127
|
url = server_config["url"]
|
|
129
128
|
transport_type = server_config.get("transport") or infer_transport_type_from_url(url)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
129
|
+
transport_timeout = timeout if timeout is not None else server_config.get("sse_read_timeout")
|
|
130
|
+
transport_kwargs = {
|
|
131
|
+
"url": url,
|
|
132
|
+
"headers": server_config.get("headers"),
|
|
133
|
+
"auth": server_config.get("auth"),
|
|
134
|
+
"httpx_client_factory": server_config.get("httpx_client_factory"),
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if transport_type == "sse":
|
|
138
|
+
return SSETransport(
|
|
139
|
+
**transport_kwargs,
|
|
140
|
+
sse_read_timeout=transport_timeout,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
transport = StreamableHttpTransport(**transport_kwargs)
|
|
144
|
+
if transport_timeout is not None:
|
|
145
|
+
# FastMCP 3.x wants streamable HTTP timeouts on the client/session,
|
|
146
|
+
# not on the transport constructor.
|
|
147
|
+
cast("Any", transport)._hud_client_timeout = transport_timeout
|
|
148
|
+
return transport
|
|
@@ -140,6 +140,35 @@ class TestConnector:
|
|
|
140
140
|
# Client is now set
|
|
141
141
|
assert connector.client is mock_client
|
|
142
142
|
|
|
143
|
+
@pytest.mark.asyncio
|
|
144
|
+
async def test_connect_passes_transport_timeout_to_client(self) -> None:
|
|
145
|
+
"""connect() forwards transport timeout to FastMCP client session kwargs."""
|
|
146
|
+
|
|
147
|
+
class Transport:
|
|
148
|
+
_hud_client_timeout = 300
|
|
149
|
+
|
|
150
|
+
transport = Transport()
|
|
151
|
+
connector = Connector(
|
|
152
|
+
transport=transport,
|
|
153
|
+
config=ConnectionConfig(),
|
|
154
|
+
name="test",
|
|
155
|
+
connection_type=ConnectionType.REMOTE,
|
|
156
|
+
auth="test-token",
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
mock_client = MagicMock()
|
|
160
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
161
|
+
mock_client.is_connected = MagicMock(return_value=True)
|
|
162
|
+
|
|
163
|
+
with patch("fastmcp.client.Client", return_value=mock_client) as mock_cls:
|
|
164
|
+
await connector.connect()
|
|
165
|
+
|
|
166
|
+
mock_cls.assert_called_once_with(
|
|
167
|
+
transport=transport,
|
|
168
|
+
auth="test-token",
|
|
169
|
+
timeout=300,
|
|
170
|
+
)
|
|
171
|
+
|
|
143
172
|
@pytest.mark.asyncio
|
|
144
173
|
async def test_disconnect_clears_client(self) -> None:
|
|
145
174
|
"""disconnect() closes client and clears state."""
|
|
@@ -197,7 +197,7 @@ class TestRemoteConnectorMixin:
|
|
|
197
197
|
env = TestEnv()
|
|
198
198
|
with patch("hud.settings.settings", spec=Settings) as mock_settings:
|
|
199
199
|
mock_settings.hud_mcp_url = "https://mcp.hud.ai"
|
|
200
|
-
mock_settings.client_timeout = 300 # Used in connect_mcp
|
|
200
|
+
mock_settings.client_timeout = 300 # Used in connect_mcp transport timeout logic
|
|
201
201
|
|
|
202
202
|
env.connect_hub("browser")
|
|
203
203
|
|
|
@@ -205,3 +205,45 @@ class TestRemoteConnectorMixin:
|
|
|
205
205
|
assert "hud" in env._connections
|
|
206
206
|
# Verify hub config is stored for serialization
|
|
207
207
|
assert env._hub_config == {"name": "browser"}
|
|
208
|
+
|
|
209
|
+
def test_connect_mcp_streamable_transport_uses_client_timeout(self) -> None:
|
|
210
|
+
"""Streamable HTTP uses FastMCP client timeout instead of deprecated transport arg."""
|
|
211
|
+
from fastmcp.client.transports import StreamableHttpTransport
|
|
212
|
+
|
|
213
|
+
from hud.environment.connectors.mcp_config import MCPConfigConnectorMixin
|
|
214
|
+
from hud.settings import Settings
|
|
215
|
+
|
|
216
|
+
class TestEnv(MCPConfigConnectorMixin):
|
|
217
|
+
def __init__(self) -> None:
|
|
218
|
+
self._connections: dict[str, Connector] = {}
|
|
219
|
+
|
|
220
|
+
env = TestEnv()
|
|
221
|
+
with patch("hud.settings.settings", spec=Settings) as mock_settings:
|
|
222
|
+
mock_settings.client_timeout = 300
|
|
223
|
+
env.connect_mcp({"browser": {"url": "https://mcp.hud.ai/browser"}})
|
|
224
|
+
|
|
225
|
+
transport = env._connections["browser"]._transport
|
|
226
|
+
assert isinstance(transport, StreamableHttpTransport)
|
|
227
|
+
assert transport.sse_read_timeout is None
|
|
228
|
+
assert getattr(transport, "_hud_client_timeout", None) == 300
|
|
229
|
+
|
|
230
|
+
def test_connect_mcp_sse_transport_keeps_sse_timeout(self) -> None:
|
|
231
|
+
"""SSE transports should continue to receive sse_read_timeout directly."""
|
|
232
|
+
from fastmcp.client.transports import SSETransport
|
|
233
|
+
|
|
234
|
+
from hud.environment.connectors.mcp_config import MCPConfigConnectorMixin
|
|
235
|
+
from hud.settings import Settings
|
|
236
|
+
|
|
237
|
+
class TestEnv(MCPConfigConnectorMixin):
|
|
238
|
+
def __init__(self) -> None:
|
|
239
|
+
self._connections: dict[str, Connector] = {}
|
|
240
|
+
|
|
241
|
+
env = TestEnv()
|
|
242
|
+
with patch("hud.settings.settings", spec=Settings) as mock_settings:
|
|
243
|
+
mock_settings.client_timeout = 300
|
|
244
|
+
env.connect_mcp({"browser": {"url": "https://mcp.hud.ai/browser", "transport": "sse"}})
|
|
245
|
+
|
|
246
|
+
transport = env._connections["browser"]._transport
|
|
247
|
+
assert isinstance(transport, SSETransport)
|
|
248
|
+
assert transport.sse_read_timeout is not None
|
|
249
|
+
assert transport.sse_read_timeout.total_seconds() == 300
|
|
@@ -723,21 +723,18 @@ class EvalContext(Environment):
|
|
|
723
723
|
return False
|
|
724
724
|
|
|
725
725
|
# =========================================================================
|
|
726
|
-
#
|
|
726
|
+
# MCP Telemetry Instrumentation
|
|
727
727
|
# =========================================================================
|
|
728
728
|
|
|
729
|
-
|
|
730
|
-
"""
|
|
729
|
+
def _should_instrument(self) -> bool:
|
|
730
|
+
"""Whether local MCP instrumentation should be applied.
|
|
731
731
|
|
|
732
|
-
|
|
733
|
-
Instrumentation is disabled when connected to a remote HUD server (telemetry is
|
|
734
|
-
recorded server-side in that case).
|
|
732
|
+
Returns False when telemetry is handled server-side (remote hub or HUD MCP).
|
|
735
733
|
"""
|
|
736
|
-
|
|
734
|
+
if not self._trace_enabled:
|
|
735
|
+
return False
|
|
737
736
|
if self._hub_config is not None:
|
|
738
|
-
return
|
|
739
|
-
|
|
740
|
-
# Skip instrumentation for v4 tasks with HUD MCP config (remote server)
|
|
737
|
+
return False
|
|
741
738
|
if self._mcp_config is not None:
|
|
742
739
|
from hud.utils.mcp import _is_hud_server
|
|
743
740
|
|
|
@@ -745,18 +742,47 @@ class EvalContext(Environment):
|
|
|
745
742
|
if isinstance(server_cfg, dict):
|
|
746
743
|
url = server_cfg.get("url", "")
|
|
747
744
|
if url and _is_hud_server(url):
|
|
748
|
-
return
|
|
745
|
+
return False
|
|
746
|
+
return True
|
|
749
747
|
|
|
750
|
-
|
|
748
|
+
async def _execute_tool(self, name: str, arguments: dict[str, Any]) -> MCPToolResult:
|
|
749
|
+
if not self._should_instrument():
|
|
750
|
+
return await super()._execute_tool(name, arguments)
|
|
751
751
|
return await self._execute_tool_instrumented(name, arguments)
|
|
752
752
|
|
|
753
|
-
@instrument(
|
|
753
|
+
@instrument(method="tools/call")
|
|
754
754
|
async def _execute_tool_instrumented(
|
|
755
755
|
self, name: str, arguments: dict[str, Any]
|
|
756
756
|
) -> MCPToolResult:
|
|
757
|
-
"""Instrumented version of _execute_tool for local environments."""
|
|
758
757
|
return await super()._execute_tool(name, arguments)
|
|
759
758
|
|
|
759
|
+
async def run_scenario_setup(
|
|
760
|
+
self,
|
|
761
|
+
scenario_name: str,
|
|
762
|
+
args: dict[str, Any],
|
|
763
|
+
session_id: str | None = None,
|
|
764
|
+
) -> str | None:
|
|
765
|
+
if not self._should_instrument():
|
|
766
|
+
return await super().run_scenario_setup(scenario_name, args, session_id)
|
|
767
|
+
return await self._run_setup_instrumented(scenario_name, args)
|
|
768
|
+
|
|
769
|
+
@instrument(method="prompts/get")
|
|
770
|
+
async def _run_setup_instrumented(self, name: str, arguments: dict[str, Any]) -> str | None:
|
|
771
|
+
return await super().run_scenario_setup(name, arguments)
|
|
772
|
+
|
|
773
|
+
async def run_scenario_evaluate(
|
|
774
|
+
self,
|
|
775
|
+
scenario_name: str,
|
|
776
|
+
session_id: str | None = None,
|
|
777
|
+
) -> EvaluationResult:
|
|
778
|
+
if not self._should_instrument():
|
|
779
|
+
return await super().run_scenario_evaluate(scenario_name, session_id)
|
|
780
|
+
return await self._run_evaluate_instrumented(scenario_name)
|
|
781
|
+
|
|
782
|
+
@instrument(method="resources/read")
|
|
783
|
+
async def _run_evaluate_instrumented(self, uri: str) -> EvaluationResult:
|
|
784
|
+
return await super().run_scenario_evaluate(uri)
|
|
785
|
+
|
|
760
786
|
def __repr__(self) -> str:
|
|
761
787
|
return f"EvalContext({self.trace_id[:8]}..., name={self.eval_name!r}, reward={self.reward})"
|
|
762
788
|
|
|
@@ -100,6 +100,7 @@ def instrument(
|
|
|
100
100
|
name: str | None = None,
|
|
101
101
|
category: str = "function",
|
|
102
102
|
span_type: str | None = None,
|
|
103
|
+
method: str | None = None,
|
|
103
104
|
internal_type: str | None = None,
|
|
104
105
|
record_args: bool = True,
|
|
105
106
|
record_result: bool = True,
|
|
@@ -113,6 +114,7 @@ def instrument(
|
|
|
113
114
|
name: str | None = None,
|
|
114
115
|
category: str = "function",
|
|
115
116
|
span_type: str | None = None,
|
|
117
|
+
method: str | None = None,
|
|
116
118
|
internal_type: str | None = None,
|
|
117
119
|
record_args: bool = True,
|
|
118
120
|
record_result: bool = True,
|
|
@@ -126,6 +128,7 @@ def instrument(
|
|
|
126
128
|
name: str | None = None,
|
|
127
129
|
category: str = "function",
|
|
128
130
|
span_type: str | None = None,
|
|
131
|
+
method: str | None = None,
|
|
129
132
|
internal_type: str | None = None,
|
|
130
133
|
record_args: bool = True,
|
|
131
134
|
record_result: bool = True,
|
|
@@ -138,6 +141,7 @@ def instrument(
|
|
|
138
141
|
name: str | None = None,
|
|
139
142
|
category: str = "function",
|
|
140
143
|
span_type: str | None = None,
|
|
144
|
+
method: str | None = None,
|
|
141
145
|
internal_type: str | None = None,
|
|
142
146
|
record_args: bool = True,
|
|
143
147
|
record_result: bool = True,
|
|
@@ -151,6 +155,10 @@ def instrument(
|
|
|
151
155
|
name: Custom span name (defaults to module.function)
|
|
152
156
|
category: Span category (e.g., "agent", "tool", "function", "mcp")
|
|
153
157
|
span_type: Alias for category (deprecated, use category instead)
|
|
158
|
+
method: MCP method name (e.g., "tools/call", "resources/read").
|
|
159
|
+
When set, produces MCP spans: name becomes "{method}.mcp",
|
|
160
|
+
type becomes "SERVER", and request is structured as
|
|
161
|
+
{"method": ..., "params": ...}.
|
|
154
162
|
internal_type: Internal span type (e.g., "user-message")
|
|
155
163
|
record_args: Whether to record function arguments
|
|
156
164
|
record_result: Whether to record function result
|
|
@@ -168,6 +176,7 @@ def instrument(
|
|
|
168
176
|
return await model.generate(messages)
|
|
169
177
|
"""
|
|
170
178
|
effective_category = span_type if span_type is not None else category
|
|
179
|
+
effective_method = method
|
|
171
180
|
|
|
172
181
|
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
173
182
|
if hasattr(func, "_hud_instrumented"):
|
|
@@ -193,13 +202,19 @@ def instrument(
|
|
|
193
202
|
error: str | None = None,
|
|
194
203
|
) -> dict[str, Any]:
|
|
195
204
|
"""Build a HudSpan-compatible span record."""
|
|
196
|
-
|
|
205
|
+
is_mcp = effective_method is not None
|
|
206
|
+
|
|
207
|
+
extra_attrs: dict[str, Any] = {}
|
|
208
|
+
if is_mcp:
|
|
209
|
+
extra_attrs["method_name"] = effective_method
|
|
210
|
+
|
|
197
211
|
attributes = TraceStep(
|
|
198
212
|
task_run_id=task_run_id,
|
|
199
|
-
category=effective_category,
|
|
200
|
-
type="CLIENT",
|
|
213
|
+
category="mcp" if is_mcp else effective_category,
|
|
214
|
+
type="SERVER" if is_mcp else "CLIENT",
|
|
201
215
|
start_timestamp=start_time,
|
|
202
216
|
end_timestamp=end_time,
|
|
217
|
+
**extra_attrs,
|
|
203
218
|
)
|
|
204
219
|
|
|
205
220
|
# Record arguments as request
|
|
@@ -213,21 +228,50 @@ def instrument(
|
|
|
213
228
|
if k not in ("self", "cls")
|
|
214
229
|
}
|
|
215
230
|
if args_dict:
|
|
216
|
-
|
|
231
|
+
if is_mcp:
|
|
232
|
+
attributes.request = {
|
|
233
|
+
"method": effective_method,
|
|
234
|
+
"params": args_dict,
|
|
235
|
+
}
|
|
236
|
+
else:
|
|
237
|
+
attributes.request = args_dict
|
|
217
238
|
except Exception as e:
|
|
218
239
|
logger.debug("Failed to serialize args: %s", e)
|
|
219
240
|
|
|
220
241
|
# Record result
|
|
221
242
|
if record_result and result is not None and error is None:
|
|
222
243
|
try:
|
|
223
|
-
|
|
244
|
+
serialized = _serialize_value(result)
|
|
245
|
+
if is_mcp and effective_method == "prompts/get":
|
|
246
|
+
if isinstance(serialized, str):
|
|
247
|
+
serialized = {
|
|
248
|
+
"messages": [
|
|
249
|
+
{
|
|
250
|
+
"role": "user",
|
|
251
|
+
"content": {
|
|
252
|
+
"type": "text",
|
|
253
|
+
"text": serialized,
|
|
254
|
+
},
|
|
255
|
+
}
|
|
256
|
+
]
|
|
257
|
+
}
|
|
258
|
+
elif is_mcp and effective_method == "resources/read":
|
|
259
|
+
if isinstance(serialized, list):
|
|
260
|
+
serialized = {"contents": serialized}
|
|
261
|
+
elif isinstance(serialized, dict) and "reward" in serialized:
|
|
262
|
+
uri = args_dict.get("uri", "") if args_dict else ""
|
|
263
|
+
serialized = {
|
|
264
|
+
"contents": [{"uri": uri, "text": json.dumps(serialized)}]
|
|
265
|
+
}
|
|
266
|
+
attributes.result = serialized
|
|
224
267
|
except Exception as e:
|
|
225
268
|
logger.debug("Failed to serialize result: %s", e)
|
|
226
269
|
|
|
227
270
|
# Build span
|
|
228
271
|
span_id = uuid.uuid4().hex[:16]
|
|
272
|
+
effective_name = f"{effective_method}.mcp" if is_mcp else span_name
|
|
229
273
|
span: dict[str, Any] = {
|
|
230
|
-
"name":
|
|
274
|
+
"name": effective_name,
|
|
231
275
|
"trace_id": _normalize_trace_id(task_run_id),
|
|
232
276
|
"span_id": span_id,
|
|
233
277
|
"parent_span_id": None,
|