hud-python 0.5.1__tar.gz → 0.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.5.1 → hud_python-0.5.3}/PKG-INFO +1 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/__init__.py +1 -1
- hud_python-0.5.3/hud/agents/__init__.py +82 -0
- hud_python-0.5.3/hud/agents/gateway.py +42 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/misc/response_agent.py +7 -0
- hud_python-0.5.3/hud/agents/resolver.py +70 -0
- hud_python-0.5.3/hud/agents/tests/test_resolver.py +192 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/eval.py +17 -37
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/init.py +1 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/pull.py +1 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/push.py +9 -2
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_push.py +1 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/metadata.py +1 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_metadata.py +1 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/loader.py +13 -10
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/runner.py +9 -10
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/tests/test_loader.py +1 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/environment.py +37 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/scenarios.py +53 -4
- hud_python-0.5.3/hud/environment/tests/test_environment.py +329 -0
- hud_python-0.5.3/hud/environment/tests/test_scenarios.py +749 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/context.py +11 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/instrument.py +4 -2
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/task.py +5 -2
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/test_eval.py +1 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/instrument.py +8 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/tests/test_eval_telemetry.py +8 -8
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/__init__.py +2 -0
- hud_python-0.5.3/hud/tools/agent.py +216 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/shell.py +3 -3
- hud_python-0.5.3/hud/tools/tests/test_agent_tool.py +355 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/types.py +5 -3
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/strict_schema.py +1 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/version.py +1 -1
- {hud_python-0.5.1 → hud_python-0.5.3}/pyproject.toml +1 -1
- hud_python-0.5.1/hud/agents/__init__.py +0 -19
- hud_python-0.5.1/hud/environment/tests/test_environment.py +0 -161
- hud_python-0.5.1/hud/environment/tests/test_scenarios.py +0 -280
- {hud_python-0.5.1 → hud_python-0.5.3}/.gitignore +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/LICENSE +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/README.md +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/examples/README.md +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/__main__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/base.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/claude.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/gemini.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/gemini_cua.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/misc/integration_test_agent.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/openai.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/openai_chat.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/operator.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/conftest.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_base_runtime.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_gemini.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_operator.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/agents/tests/test_run_eval.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/__main__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/analyze.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/build.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/clone.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/debug.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/dev.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/dev.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/tasks.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/templates.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/flows/tests/test_dev.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/get.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/init.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/list_func.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/remove.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/rft.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/rft_status.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_analyze_module.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_convert.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_dev.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_eval.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_init.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/celebrate.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/config.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/git.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/local_runner.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/package_runner.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/server.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_git.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_local_runner.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_package_runner.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_registry_utils.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_remote_runner.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_runner_modules.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/cli/utils/viewer.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/README.md +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/base.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/environment.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/fastmcp.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/mcp_use.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/test_analyze_scenarios.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/test_mcp_use_retry.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/utils/mcp_use_retry.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/utils/retry.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/tests/test_utils.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/datasets/utils.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connection.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/base.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/local.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/mcp_config.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/openai.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/connectors/remote.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/adk.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/anthropic.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/gemini.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/langchain.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/llamaindex.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/integrations/openai.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/mock.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/router.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/test_connection.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/test_connectors.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/test_integrations.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/test_local_connectors.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/tests/test_tools.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/types.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/utils/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/utils/formats.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/utils/schema.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/environment/utils/tool_wrappers.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/display.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/manager.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/parallel.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/test_context.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/test_manager.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/test_parallel.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/tests/test_task.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/types.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/eval/utils.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/native/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/native/comparator.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/patches/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/patches/mcp_patches.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/patches/warnings.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/py.typed +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/samples/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/samples/browser.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/context.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/low_level.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/router.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/server.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/settings.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/exceptions.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/hints.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/requests.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/apply_patch.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/base.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/bash.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/gemini.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/edit.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/base.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/jupyter.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/playwright.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/response.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/submit.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_apply_patch.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_jupyter_tool.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_shell.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/types.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/tools/utils.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/env.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/hud_console.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/mcp.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/telemetry.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/tool_shorthand.py +0 -0
- {hud_python-0.5.1 → hud_python-0.5.3}/hud/utils/types.py +0 -0
|
@@ -18,7 +18,7 @@ from .telemetry.instrument import instrument
|
|
|
18
18
|
def trace(*args: object, **kwargs: object) -> EvalContext:
|
|
19
19
|
"""Deprecated: Use hud.eval() instead.
|
|
20
20
|
|
|
21
|
-
.. deprecated:: 0.5.
|
|
21
|
+
.. deprecated:: 0.5.2
|
|
22
22
|
hud.trace() is deprecated. Use hud.eval() or env.eval() instead.
|
|
23
23
|
"""
|
|
24
24
|
warnings.warn(
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from .base import MCPAgent
|
|
6
|
+
from .openai import OpenAIAgent
|
|
7
|
+
from .openai_chat import OpenAIChatAgent
|
|
8
|
+
from .operator import OperatorAgent
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"MCPAgent",
|
|
12
|
+
"OpenAIAgent",
|
|
13
|
+
"OpenAIChatAgent",
|
|
14
|
+
"OperatorAgent",
|
|
15
|
+
"create_agent",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def create_agent(model: str, **kwargs: Any) -> MCPAgent:
|
|
20
|
+
"""Create an agent for a gateway model.
|
|
21
|
+
|
|
22
|
+
This routes ALL requests through the HUD gateway. For direct API access
|
|
23
|
+
(using your own API keys), use the agent classes directly.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
model: Model name (e.g., "gpt-4o", "claude-sonnet-4-5").
|
|
27
|
+
**kwargs: Additional params passed to agent.create().
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Configured MCPAgent instance with gateway routing.
|
|
31
|
+
|
|
32
|
+
Example:
|
|
33
|
+
```python
|
|
34
|
+
# Gateway routing (recommended)
|
|
35
|
+
agent = create_agent("gpt-4o")
|
|
36
|
+
agent = create_agent("claude-sonnet-4-5", temperature=0.7)
|
|
37
|
+
|
|
38
|
+
# Direct API access (use agent classes)
|
|
39
|
+
from hud.agents.claude import ClaudeAgent
|
|
40
|
+
|
|
41
|
+
agent = ClaudeAgent.create(model="claude-sonnet-4-5")
|
|
42
|
+
```
|
|
43
|
+
"""
|
|
44
|
+
from hud.agents.gateway import build_gateway_client
|
|
45
|
+
from hud.agents.resolver import resolve_cls
|
|
46
|
+
|
|
47
|
+
# Resolve class and gateway info
|
|
48
|
+
agent_cls, gateway_info = resolve_cls(model)
|
|
49
|
+
|
|
50
|
+
# Get model ID from gateway info or use input
|
|
51
|
+
model_id = model
|
|
52
|
+
if gateway_info:
|
|
53
|
+
model_id = gateway_info.get("model") or gateway_info.get("id") or model
|
|
54
|
+
|
|
55
|
+
# Determine provider: from gateway info, or infer from agent class
|
|
56
|
+
if gateway_info:
|
|
57
|
+
provider = gateway_info.get("provider") or "openai"
|
|
58
|
+
else:
|
|
59
|
+
# Map agent class to provider for known types
|
|
60
|
+
from hud.agents.claude import ClaudeAgent
|
|
61
|
+
from hud.agents.gemini import GeminiAgent
|
|
62
|
+
|
|
63
|
+
_AGENT_TO_PROVIDER = {
|
|
64
|
+
ClaudeAgent: "anthropic",
|
|
65
|
+
GeminiAgent: "google",
|
|
66
|
+
}
|
|
67
|
+
provider = _AGENT_TO_PROVIDER.get(agent_cls, "openai")
|
|
68
|
+
|
|
69
|
+
client = build_gateway_client(provider)
|
|
70
|
+
|
|
71
|
+
# Set up kwargs
|
|
72
|
+
kwargs.setdefault("model", model_id)
|
|
73
|
+
|
|
74
|
+
# Use correct client key based on agent type
|
|
75
|
+
if agent_cls == OpenAIChatAgent:
|
|
76
|
+
kwargs.setdefault("openai_client", client)
|
|
77
|
+
else:
|
|
78
|
+
# Claude and other agents use model_client and validate_api_key
|
|
79
|
+
kwargs.setdefault("model_client", client)
|
|
80
|
+
kwargs.setdefault("validate_api_key", False)
|
|
81
|
+
|
|
82
|
+
return agent_cls.create(**kwargs)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Gateway client utilities for HUD inference gateway."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def build_gateway_client(provider: str) -> Any:
|
|
9
|
+
"""Build a client configured for HUD gateway routing.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
provider: Provider name ("anthropic", "openai", "gemini", etc.)
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Configured async client for the provider.
|
|
16
|
+
"""
|
|
17
|
+
from hud.settings import settings
|
|
18
|
+
|
|
19
|
+
provider = provider.lower()
|
|
20
|
+
|
|
21
|
+
if provider == "anthropic":
|
|
22
|
+
from anthropic import AsyncAnthropic
|
|
23
|
+
|
|
24
|
+
return AsyncAnthropic(api_key=settings.api_key, base_url=settings.hud_gateway_url)
|
|
25
|
+
|
|
26
|
+
if provider == "gemini":
|
|
27
|
+
from google import genai
|
|
28
|
+
from google.genai.types import HttpOptions
|
|
29
|
+
|
|
30
|
+
return genai.Client(
|
|
31
|
+
api_key="PLACEHOLDER",
|
|
32
|
+
http_options=HttpOptions(
|
|
33
|
+
api_version="v1beta",
|
|
34
|
+
base_url=settings.hud_gateway_url,
|
|
35
|
+
headers={"Authorization": f"Bearer {settings.api_key}"},
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# OpenAI-compatible (openai, azure, together, groq, fireworks, etc.)
|
|
40
|
+
from openai import AsyncOpenAI
|
|
41
|
+
|
|
42
|
+
return AsyncOpenAI(api_key=settings.api_key, base_url=settings.hud_gateway_url)
|
|
@@ -6,6 +6,7 @@ from typing import Literal
|
|
|
6
6
|
from openai import AsyncOpenAI
|
|
7
7
|
|
|
8
8
|
from hud.settings import settings
|
|
9
|
+
from hud.telemetry import instrument
|
|
9
10
|
|
|
10
11
|
logger = logging.getLogger(__name__)
|
|
11
12
|
|
|
@@ -64,6 +65,11 @@ class ResponseAgent:
|
|
|
64
65
|
self.model = model
|
|
65
66
|
self.system_prompt = system_prompt or DEFAULT_SYSTEM_PROMPT
|
|
66
67
|
|
|
68
|
+
@instrument(
|
|
69
|
+
category="agent",
|
|
70
|
+
name="response_agent",
|
|
71
|
+
internal_type="user-message",
|
|
72
|
+
)
|
|
67
73
|
async def determine_response(self, agent_message: str) -> ResponseType:
|
|
68
74
|
"""
|
|
69
75
|
Determine whether the agent should stop or continue based on its message.
|
|
@@ -86,6 +92,7 @@ class ResponseAgent:
|
|
|
86
92
|
],
|
|
87
93
|
temperature=0.1,
|
|
88
94
|
max_tokens=5,
|
|
95
|
+
extra_headers={"Trace-Id": ""},
|
|
89
96
|
)
|
|
90
97
|
|
|
91
98
|
response_text = response.choices[0].message.content
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Model resolution - maps model strings to agent classes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from hud.agents.base import MCPAgent
|
|
9
|
+
|
|
10
|
+
__all__ = ["resolve_cls"]
|
|
11
|
+
|
|
12
|
+
_models_cache: list[dict[str, Any]] | None = None
|
|
13
|
+
|
|
14
|
+
# Provider name → AgentType value (only anthropic differs)
|
|
15
|
+
_PROVIDER_TO_AGENT = {"anthropic": "claude"}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _fetch_gateway_models() -> list[dict[str, Any]]:
|
|
19
|
+
"""Fetch available models from HUD gateway (cached)."""
|
|
20
|
+
global _models_cache
|
|
21
|
+
if _models_cache is not None:
|
|
22
|
+
return _models_cache
|
|
23
|
+
|
|
24
|
+
import httpx
|
|
25
|
+
|
|
26
|
+
from hud.settings import settings
|
|
27
|
+
|
|
28
|
+
if not settings.api_key:
|
|
29
|
+
return []
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
resp = httpx.get(
|
|
33
|
+
f"{settings.hud_gateway_url}/models",
|
|
34
|
+
headers={"Authorization": f"Bearer {settings.api_key}"},
|
|
35
|
+
timeout=10.0,
|
|
36
|
+
)
|
|
37
|
+
resp.raise_for_status()
|
|
38
|
+
data = resp.json()
|
|
39
|
+
_models_cache = data.get("data", data) if isinstance(data, dict) else data
|
|
40
|
+
return _models_cache or []
|
|
41
|
+
except Exception:
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def resolve_cls(model: str) -> tuple[type[MCPAgent], dict[str, Any] | None]:
|
|
46
|
+
"""Resolve model string to (agent_class, gateway_info).
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
(agent_class, None) for known AgentTypes
|
|
50
|
+
(agent_class, gateway_model_info) for gateway models
|
|
51
|
+
"""
|
|
52
|
+
from hud.types import AgentType
|
|
53
|
+
|
|
54
|
+
# Known AgentType → no gateway info
|
|
55
|
+
try:
|
|
56
|
+
return AgentType(model).cls, None
|
|
57
|
+
except ValueError:
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
# Gateway lookup
|
|
61
|
+
for m in _fetch_gateway_models():
|
|
62
|
+
if model in (m.get("id"), m.get("name"), m.get("model")):
|
|
63
|
+
provider = (m.get("provider") or "openai_compatible").lower()
|
|
64
|
+
agent_str = _PROVIDER_TO_AGENT.get(provider, provider)
|
|
65
|
+
try:
|
|
66
|
+
return AgentType(agent_str).cls, m
|
|
67
|
+
except ValueError:
|
|
68
|
+
return AgentType.OPENAI_COMPATIBLE.cls, m
|
|
69
|
+
|
|
70
|
+
raise ValueError(f"Model '{model}' not found")
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Tests for model resolution and create_agent."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from unittest.mock import MagicMock, patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from hud.agents import create_agent
|
|
10
|
+
from hud.agents.resolver import resolve_cls
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestResolveCls:
|
|
14
|
+
"""Tests for resolve_cls function."""
|
|
15
|
+
|
|
16
|
+
def test_resolves_known_agent_type(self) -> None:
|
|
17
|
+
"""Known AgentType strings resolve to their class."""
|
|
18
|
+
from hud.agents.claude import ClaudeAgent
|
|
19
|
+
|
|
20
|
+
cls, gateway_info = resolve_cls("claude")
|
|
21
|
+
assert cls == ClaudeAgent
|
|
22
|
+
assert gateway_info is None
|
|
23
|
+
|
|
24
|
+
def test_resolves_openai(self) -> None:
|
|
25
|
+
"""Resolves 'openai' to OpenAIAgent."""
|
|
26
|
+
from hud.agents import OpenAIAgent
|
|
27
|
+
|
|
28
|
+
cls, _gateway_info = resolve_cls("openai")
|
|
29
|
+
assert cls == OpenAIAgent
|
|
30
|
+
|
|
31
|
+
def test_resolves_gemini(self) -> None:
|
|
32
|
+
"""Resolves 'gemini' to GeminiAgent."""
|
|
33
|
+
from hud.agents.gemini import GeminiAgent
|
|
34
|
+
|
|
35
|
+
cls, _gateway_info = resolve_cls("gemini")
|
|
36
|
+
assert cls == GeminiAgent
|
|
37
|
+
|
|
38
|
+
def test_unknown_model_without_gateway_raises(self) -> None:
|
|
39
|
+
"""Unknown model with no gateway models raises ValueError."""
|
|
40
|
+
with (
|
|
41
|
+
patch("hud.agents.resolver._fetch_gateway_models", return_value=[]),
|
|
42
|
+
pytest.raises(ValueError, match="not found"),
|
|
43
|
+
):
|
|
44
|
+
resolve_cls("unknown-model-xyz")
|
|
45
|
+
|
|
46
|
+
def test_resolves_gateway_model(self) -> None:
|
|
47
|
+
"""Resolves model found in gateway."""
|
|
48
|
+
from hud.agents import OpenAIAgent
|
|
49
|
+
|
|
50
|
+
mock_models = [
|
|
51
|
+
{"id": "gpt-4o", "model": "gpt-4o", "provider": "openai"},
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
with patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models):
|
|
55
|
+
cls, info = resolve_cls("gpt-4o")
|
|
56
|
+
assert cls == OpenAIAgent
|
|
57
|
+
assert info is not None
|
|
58
|
+
assert info["id"] == "gpt-4o"
|
|
59
|
+
|
|
60
|
+
def test_resolves_anthropic_provider_to_claude(self) -> None:
|
|
61
|
+
"""Provider 'anthropic' maps to ClaudeAgent."""
|
|
62
|
+
from hud.agents.claude import ClaudeAgent
|
|
63
|
+
|
|
64
|
+
mock_models = [
|
|
65
|
+
{"id": "claude-sonnet", "model": "claude-3-sonnet", "provider": "anthropic"},
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
with patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models):
|
|
69
|
+
cls, _info = resolve_cls("claude-sonnet")
|
|
70
|
+
assert cls == ClaudeAgent
|
|
71
|
+
|
|
72
|
+
def test_resolves_unknown_provider_to_openai_compatible(self) -> None:
|
|
73
|
+
"""Unknown provider maps to OpenAIChatAgent."""
|
|
74
|
+
from hud.agents.openai_chat import OpenAIChatAgent
|
|
75
|
+
|
|
76
|
+
mock_models = [
|
|
77
|
+
{"id": "custom-model", "model": "custom", "provider": "custom-provider"},
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
with patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models):
|
|
81
|
+
cls, _info = resolve_cls("custom-model")
|
|
82
|
+
assert cls == OpenAIChatAgent
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class TestCreateAgent:
|
|
86
|
+
"""Tests for create_agent function - gateway-only."""
|
|
87
|
+
|
|
88
|
+
def test_creates_with_gateway_client(self) -> None:
|
|
89
|
+
"""create_agent always uses gateway routing."""
|
|
90
|
+
from hud.agents import OpenAIAgent
|
|
91
|
+
|
|
92
|
+
mock_models = [
|
|
93
|
+
{"id": "gpt-4o", "model": "gpt-4o", "provider": "openai"},
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
with (
|
|
97
|
+
patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models),
|
|
98
|
+
patch.object(OpenAIAgent, "create") as mock_create,
|
|
99
|
+
patch("hud.agents.gateway.build_gateway_client") as mock_build_client,
|
|
100
|
+
):
|
|
101
|
+
mock_client = MagicMock()
|
|
102
|
+
mock_build_client.return_value = mock_client
|
|
103
|
+
mock_agent = MagicMock()
|
|
104
|
+
mock_create.return_value = mock_agent
|
|
105
|
+
|
|
106
|
+
agent = create_agent("gpt-4o")
|
|
107
|
+
|
|
108
|
+
# Should have set model and model_client
|
|
109
|
+
call_kwargs = mock_create.call_args.kwargs
|
|
110
|
+
assert call_kwargs["model"] == "gpt-4o"
|
|
111
|
+
assert "model_client" in call_kwargs
|
|
112
|
+
assert agent == mock_agent
|
|
113
|
+
|
|
114
|
+
def test_passes_kwargs_to_create(self) -> None:
|
|
115
|
+
"""Extra kwargs are passed to agent.create()."""
|
|
116
|
+
from hud.agents import OpenAIAgent
|
|
117
|
+
|
|
118
|
+
mock_models = [
|
|
119
|
+
{"id": "gpt-4o", "model": "gpt-4o", "provider": "openai"},
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
with (
|
|
123
|
+
patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models),
|
|
124
|
+
patch.object(OpenAIAgent, "create") as mock_create,
|
|
125
|
+
patch("hud.agents.gateway.build_gateway_client"),
|
|
126
|
+
):
|
|
127
|
+
mock_create.return_value = MagicMock()
|
|
128
|
+
|
|
129
|
+
create_agent("gpt-4o", temperature=0.5, max_tokens=1000)
|
|
130
|
+
|
|
131
|
+
call_kwargs = mock_create.call_args.kwargs
|
|
132
|
+
assert call_kwargs["temperature"] == 0.5
|
|
133
|
+
assert call_kwargs["max_tokens"] == 1000
|
|
134
|
+
|
|
135
|
+
def test_known_agent_type_also_uses_gateway(self) -> None:
|
|
136
|
+
"""Even 'claude' string uses gateway (it's a gateway shortcut)."""
|
|
137
|
+
from hud.agents.claude import ClaudeAgent
|
|
138
|
+
|
|
139
|
+
with (
|
|
140
|
+
patch.object(ClaudeAgent, "create") as mock_create,
|
|
141
|
+
patch("hud.agents.gateway.build_gateway_client") as mock_build_client,
|
|
142
|
+
):
|
|
143
|
+
mock_client = MagicMock()
|
|
144
|
+
mock_build_client.return_value = mock_client
|
|
145
|
+
mock_create.return_value = MagicMock()
|
|
146
|
+
|
|
147
|
+
create_agent("claude")
|
|
148
|
+
|
|
149
|
+
# Should still build gateway client
|
|
150
|
+
mock_build_client.assert_called_once()
|
|
151
|
+
call_kwargs = mock_create.call_args.kwargs
|
|
152
|
+
assert "model_client" in call_kwargs
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class TestBuildGatewayClient:
|
|
156
|
+
"""Tests for build_gateway_client function."""
|
|
157
|
+
|
|
158
|
+
def test_builds_anthropic_client(self) -> None:
|
|
159
|
+
"""Builds AsyncAnthropic for anthropic provider."""
|
|
160
|
+
from hud.agents.gateway import build_gateway_client
|
|
161
|
+
|
|
162
|
+
with patch("hud.settings.settings") as mock_settings:
|
|
163
|
+
mock_settings.api_key = "test-key"
|
|
164
|
+
mock_settings.hud_gateway_url = "https://gateway.hud.ai"
|
|
165
|
+
|
|
166
|
+
with patch("anthropic.AsyncAnthropic") as mock_client_cls:
|
|
167
|
+
build_gateway_client("anthropic")
|
|
168
|
+
mock_client_cls.assert_called_once()
|
|
169
|
+
|
|
170
|
+
def test_builds_openai_client_for_openai(self) -> None:
|
|
171
|
+
"""Builds AsyncOpenAI for openai provider."""
|
|
172
|
+
from hud.agents.gateway import build_gateway_client
|
|
173
|
+
|
|
174
|
+
with patch("hud.settings.settings") as mock_settings:
|
|
175
|
+
mock_settings.api_key = "test-key"
|
|
176
|
+
mock_settings.hud_gateway_url = "https://gateway.hud.ai"
|
|
177
|
+
|
|
178
|
+
with patch("openai.AsyncOpenAI") as mock_client_cls:
|
|
179
|
+
build_gateway_client("openai")
|
|
180
|
+
mock_client_cls.assert_called_once()
|
|
181
|
+
|
|
182
|
+
def test_builds_openai_client_for_unknown(self) -> None:
|
|
183
|
+
"""Builds AsyncOpenAI for unknown providers (openai-compatible)."""
|
|
184
|
+
from hud.agents.gateway import build_gateway_client
|
|
185
|
+
|
|
186
|
+
with patch("hud.settings.settings") as mock_settings:
|
|
187
|
+
mock_settings.api_key = "test-key"
|
|
188
|
+
mock_settings.hud_gateway_url = "https://gateway.hud.ai"
|
|
189
|
+
|
|
190
|
+
with patch("openai.AsyncOpenAI") as mock_client_cls:
|
|
191
|
+
build_gateway_client("together")
|
|
192
|
+
mock_client_cls.assert_called_once()
|
|
@@ -338,47 +338,27 @@ class EvalConfig(BaseModel):
|
|
|
338
338
|
|
|
339
339
|
# Configure gateway mode - route LLM API calls through HUD gateway
|
|
340
340
|
if self.gateway:
|
|
341
|
-
|
|
342
|
-
if not hud_api_key:
|
|
341
|
+
if not settings.api_key:
|
|
343
342
|
raise typer.Exit(1) # Already validated in validate_api_keys()
|
|
344
343
|
|
|
345
|
-
|
|
346
|
-
from anthropic import AsyncAnthropic
|
|
347
|
-
|
|
348
|
-
kwargs["model_client"] = AsyncAnthropic(
|
|
349
|
-
api_key=hud_api_key,
|
|
350
|
-
base_url=settings.hud_gateway_url,
|
|
351
|
-
)
|
|
352
|
-
hud_console.info("🌐 Using HUD Gateway for Claude API")
|
|
353
|
-
elif self.agent_type in (AgentType.OPENAI, AgentType.OPERATOR):
|
|
354
|
-
from openai import AsyncOpenAI
|
|
344
|
+
from hud.agents.gateway import build_gateway_client
|
|
355
345
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
346
|
+
# Map AgentType to provider
|
|
347
|
+
agent_to_provider = {
|
|
348
|
+
AgentType.CLAUDE: "anthropic",
|
|
349
|
+
AgentType.OPENAI: "openai",
|
|
350
|
+
AgentType.OPERATOR: "openai",
|
|
351
|
+
AgentType.GEMINI: "gemini",
|
|
352
|
+
AgentType.GEMINI_CUA: "gemini",
|
|
353
|
+
AgentType.OPENAI_COMPATIBLE: "openai",
|
|
354
|
+
}
|
|
355
|
+
provider = agent_to_provider.get(self.agent_type, "openai")
|
|
356
|
+
client = build_gateway_client(provider)
|
|
363
357
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
hud_console.info("🌐 Using HUD Gateway for OpenAI-compatible API")
|
|
369
|
-
elif self.agent_type in (AgentType.GEMINI, AgentType.GEMINI_CUA):
|
|
370
|
-
from google import genai
|
|
371
|
-
from google.genai.types import HttpOptions
|
|
372
|
-
|
|
373
|
-
kwargs["model_client"] = genai.Client(
|
|
374
|
-
api_key="PLACEHOLDER",
|
|
375
|
-
http_options=HttpOptions(
|
|
376
|
-
api_version="v1beta",
|
|
377
|
-
base_url=settings.hud_gateway_url,
|
|
378
|
-
headers={"Authorization": f"Bearer {hud_api_key}"},
|
|
379
|
-
),
|
|
380
|
-
)
|
|
381
|
-
hud_console.info("🌐 Using HUD Gateway for Gemini API")
|
|
358
|
+
# OpenAI-compatible uses openai_client key
|
|
359
|
+
is_oai_compat = self.agent_type == AgentType.OPENAI_COMPATIBLE
|
|
360
|
+
kwargs["openai_client" if is_oai_compat else "model_client"] = client
|
|
361
|
+
hud_console.info(f"🌐 Using HUD Gateway for {provider} API")
|
|
382
362
|
|
|
383
363
|
return kwargs
|
|
384
364
|
|
|
@@ -102,7 +102,7 @@ def smart_init(
|
|
|
102
102
|
hud_console.info(" hud set HUD_API_KEY=your-key-here")
|
|
103
103
|
hud_console.info(" Or: export HUD_API_KEY=your-key")
|
|
104
104
|
hud_console.info("")
|
|
105
|
-
hud_console.info("Get your key at: https://hud.ai/
|
|
105
|
+
hud_console.info("Get your key at: https://hud.ai/project/api-keys")
|
|
106
106
|
return
|
|
107
107
|
|
|
108
108
|
target = Path(directory).resolve()
|
|
@@ -63,7 +63,7 @@ def fetch_lock_from_registry(reference: str) -> dict | None:
|
|
|
63
63
|
|
|
64
64
|
# URL-encode the path segments to handle special characters in tags
|
|
65
65
|
url_safe_path = "/".join(quote(part, safe="") for part in reference.split("/"))
|
|
66
|
-
registry_url = f"{settings.
|
|
66
|
+
registry_url = f"{settings.hud_api_url.rstrip('/')}/registry/envs/{url_safe_path}"
|
|
67
67
|
|
|
68
68
|
headers = {}
|
|
69
69
|
if settings.api_key:
|
|
@@ -420,13 +420,20 @@ def push_environment(
|
|
|
420
420
|
|
|
421
421
|
# URL-encode the path segments to handle special characters in tags
|
|
422
422
|
url_safe_path = "/".join(quote(part, safe="") for part in name_with_tag.split("/"))
|
|
423
|
-
registry_url = f"{settings.
|
|
423
|
+
registry_url = f"{settings.hud_api_url.rstrip('/')}/registry/envs/{url_safe_path}"
|
|
424
|
+
|
|
425
|
+
# Detect git remote URL for matching existing GitHub-connected registries
|
|
426
|
+
from hud.cli.utils.git import get_git_remote_url
|
|
427
|
+
|
|
428
|
+
github_url = get_git_remote_url(Path(directory))
|
|
424
429
|
|
|
425
430
|
# Prepare the payload
|
|
426
|
-
payload = {
|
|
431
|
+
payload: dict[str, str | None] = {
|
|
427
432
|
"lock": yaml.dump(lock_data, default_flow_style=False, sort_keys=False),
|
|
428
433
|
"digest": pushed_digest.split("@")[-1] if "@" in pushed_digest else None,
|
|
429
434
|
}
|
|
435
|
+
if github_url:
|
|
436
|
+
payload["github_url"] = github_url
|
|
430
437
|
|
|
431
438
|
headers = {"Authorization": f"Bearer {settings.api_key}"}
|
|
432
439
|
|
|
@@ -160,7 +160,7 @@ class TestPushEnvironment:
|
|
|
160
160
|
mock_hud_console = mock.Mock()
|
|
161
161
|
mock_hud_console_class.return_value = mock_hud_console
|
|
162
162
|
mock_settings.api_key = "test-key"
|
|
163
|
-
mock_settings.
|
|
163
|
+
mock_settings.hud_api_url = "https://api.hud.test"
|
|
164
164
|
mock_get_username.return_value = "testuser"
|
|
165
165
|
|
|
166
166
|
# Create lock file
|
|
@@ -32,7 +32,7 @@ def fetch_lock_from_registry(reference: str) -> dict | None:
|
|
|
32
32
|
|
|
33
33
|
# URL-encode the path segments to handle special characters in tags
|
|
34
34
|
url_safe_path = "/".join(quote(part, safe="") for part in reference.split("/"))
|
|
35
|
-
registry_url = f"{settings.
|
|
35
|
+
registry_url = f"{settings.hud_api_url.rstrip('/')}/registry/envs/{url_safe_path}"
|
|
36
36
|
|
|
37
37
|
headers = {}
|
|
38
38
|
if settings.api_key:
|
|
@@ -18,7 +18,7 @@ if TYPE_CHECKING:
|
|
|
18
18
|
@patch("hud.cli.utils.metadata.settings")
|
|
19
19
|
@patch("requests.get")
|
|
20
20
|
def test_fetch_lock_from_registry_success(mock_get, mock_settings):
|
|
21
|
-
mock_settings.
|
|
21
|
+
mock_settings.hud_api_url = "https://api.example.com"
|
|
22
22
|
mock_settings.api_key = None
|
|
23
23
|
resp = MagicMock(status_code=200)
|
|
24
24
|
resp.json.return_value = {"lock": "image: img\n"}
|
|
@@ -63,7 +63,8 @@ def _load_from_file(path: Path) -> list[Task]:
|
|
|
63
63
|
from hud.eval.task import Task
|
|
64
64
|
|
|
65
65
|
raw_items = _load_raw_from_file(path)
|
|
66
|
-
|
|
66
|
+
# Default args to {} for runnable tasks (None = template)
|
|
67
|
+
return [Task(**{**item, "args": item.get("args") or {}}) for item in raw_items]
|
|
67
68
|
|
|
68
69
|
|
|
69
70
|
def _load_raw_from_huggingface(dataset_name: str) -> list[dict[str, Any]]:
|
|
@@ -99,7 +100,8 @@ def _load_from_huggingface(dataset_name: str) -> list[Task]:
|
|
|
99
100
|
raw_items = _load_raw_from_huggingface(dataset_name)
|
|
100
101
|
from hud.eval.task import Task
|
|
101
102
|
|
|
102
|
-
|
|
103
|
+
# Default args to {} for runnable tasks (None = template)
|
|
104
|
+
return [Task(**{**item, "args": item.get("args") or {}}) for item in raw_items]
|
|
103
105
|
|
|
104
106
|
|
|
105
107
|
def _load_raw_from_api(dataset_name: str) -> list[dict[str, Any]]:
|
|
@@ -138,7 +140,8 @@ def _load_from_api(dataset_name: str) -> list[Task]:
|
|
|
138
140
|
from hud.eval.task import Task
|
|
139
141
|
|
|
140
142
|
raw_items = _load_raw_from_api(dataset_name)
|
|
141
|
-
|
|
143
|
+
# Default args to {} for runnable tasks (None = template)
|
|
144
|
+
return [Task(**{**item, "args": item.get("args") or {}}) for item in raw_items]
|
|
142
145
|
|
|
143
146
|
|
|
144
147
|
@overload
|
|
@@ -234,15 +237,15 @@ def save_tasks(
|
|
|
234
237
|
) -> str:
|
|
235
238
|
"""Save tasks to the HUD API.
|
|
236
239
|
|
|
237
|
-
Creates or updates
|
|
240
|
+
Creates or updates a taskset with the given tasks.
|
|
238
241
|
|
|
239
242
|
Args:
|
|
240
|
-
name:
|
|
243
|
+
name: Taskset name/slug (e.g., "my-evals/benchmark-v1").
|
|
241
244
|
If no org prefix, uses user's default org.
|
|
242
245
|
tasks: List of Task objects (v5 format) to save.
|
|
243
246
|
|
|
244
247
|
Returns:
|
|
245
|
-
The
|
|
248
|
+
The taskset ID of the created/updated taskset.
|
|
246
249
|
|
|
247
250
|
Example:
|
|
248
251
|
```python
|
|
@@ -258,7 +261,7 @@ def save_tasks(
|
|
|
258
261
|
]
|
|
259
262
|
|
|
260
263
|
# Save to HUD API
|
|
261
|
-
|
|
264
|
+
taskset_id = save_tasks("my-evals/benchmark-v1", tasks)
|
|
262
265
|
|
|
263
266
|
# Later, load them back
|
|
264
267
|
loaded = load_tasks("my-evals/benchmark-v1")
|
|
@@ -303,9 +306,9 @@ def save_tasks(
|
|
|
303
306
|
)
|
|
304
307
|
response.raise_for_status()
|
|
305
308
|
data = response.json()
|
|
306
|
-
|
|
307
|
-
logger.info("Saved %d tasks to
|
|
308
|
-
return
|
|
309
|
+
taskset_id = data.get("evalset_id") or data.get("id") or name
|
|
310
|
+
logger.info("Saved %d tasks to taskset: %s", len(tasks), taskset_id)
|
|
311
|
+
return taskset_id
|
|
309
312
|
except httpx.HTTPStatusError as e:
|
|
310
313
|
raise ValueError(f"Failed to save tasks: {e.response.text}") from e
|
|
311
314
|
except Exception as e:
|