hud-python 0.5.24__tar.gz → 0.5.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.5.24 → hud_python-0.5.25}/PKG-INFO +1 -1
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/openai_chat.py +8 -0
- hud_python-0.5.25/hud/agents/tests/test_integration_test_agent.py +42 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/types.py +7 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/dev.py +22 -20
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/dev.py +5 -3
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/list_func.py +11 -10
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/remove.py +1 -1
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_build.py +2 -2
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/context.py +6 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/test_context.py +138 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/session.py +8 -6
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_bash.py +81 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_shell.py +123 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/version.py +1 -1
- {hud_python-0.5.24 → hud_python-0.5.25}/pyproject.toml +1 -1
- {hud_python-0.5.24 → hud_python-0.5.25}/.gitignore +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/LICENSE +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/README.md +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/examples/README.md +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/__main__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/base.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/claude.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/gateway.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/gemini.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/gemini_cua.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/misc/integration_test_agent.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/openai.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/operator.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/resolver.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/conftest.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_base_runtime.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_gemini.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_operator.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_resolver.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/agents/tests/test_run_eval.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/__main__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/analyze.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/build.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/clone.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/base.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/harbor.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/tests/conftest.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/convert/tests/test_harbor.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/debug.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/deploy.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/eval.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/init.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/tasks.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/templates.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/flows/tests/test_dev.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/get.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/init.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/link.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/pull.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/push.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/rft.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/rft_status.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_analyze_module.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_convert.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_debug_directory_mode.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_deploy.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_dev.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_eval.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_init.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/build_display.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/build_logs.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/celebrate.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/config.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/context.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/git.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/local_runner.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/mcp.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/package_runner.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/server.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_git.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_local_runner.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_package_runner.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_registry_utils.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_remote_runner.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_runner_modules.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/validation.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/cli/utils/viewer.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/loader.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/runner.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/tests/test_loader.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/tests/test_utils.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/datasets/utils.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connection.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/base.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/local.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/mcp_config.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/openai.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/connectors/remote.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/environment.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/adk.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/anthropic.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/gemini.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/langchain.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/llamaindex.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/integrations/openai.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/mock.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/router.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/scenarios.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_connection.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_connectors.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_environment.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_integrations.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_local_connectors.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_scenarios.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/tests/test_tools.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/types.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/utils/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/utils/formats.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/utils/schema.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/environment/utils/tool_wrappers.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/display.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/instrument.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/manager.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/parallel.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/task.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/test_eval.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/test_manager.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/test_parallel.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/tests/test_task.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/types.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/eval/utils.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/native/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/native/comparator.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/patches/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/patches/mcp_patches.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/patches/warnings.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/py.typed +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/samples/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/samples/browser.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/context.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/low_level.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/router.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/server.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/settings.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/exceptions.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/hints.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/requests.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/tests/test_eval_telemetry.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/agent.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/base.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/apply_patch.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/bash.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/edit.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/gemini_edit.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/gemini_shell.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/shell.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_apply_patch.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_bash_extended.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_bash_integration.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_edit.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/tests/test_gemini_tools.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/coding/utils.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/gemini.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/glm.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/tests/test_computer.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/tests/test_computer_actions.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/computer/tests/test_glm_computer.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/base.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/base.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/gemini.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/glob.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/grep.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/list.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/read.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/tests/test_glob.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/tests/test_grep.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/tests/test_list.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/filesystem/tests/test_read.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/base.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/code_execution.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/google_search.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/url_context.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/web_fetch.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/hosted/web_search.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/jupyter.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/base.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/claude.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/gemini.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/session.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/tests/test_claude.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/tests/test_gemini.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/memory/tests/test_session.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/native_types.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/playwright.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/response.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/submit.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_agent_tool.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_jupyter_tool.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_native_tool_e2e.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_native_types.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/types.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/tools/utils.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/types.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/env.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/hud_console.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/mcp.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/strict_schema.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/telemetry.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/tool_shorthand.py +0 -0
- {hud_python-0.5.24 → hud_python-0.5.25}/hud/utils/types.py +0 -0
|
@@ -88,6 +88,14 @@ class OpenAIChatAgent(MCPAgent):
|
|
|
88
88
|
)
|
|
89
89
|
|
|
90
90
|
self.completion_kwargs = dict(self.config.completion_kwargs)
|
|
91
|
+
|
|
92
|
+
# If a specific checkpoint is requested, inject it into extra_body
|
|
93
|
+
# so the HUD gateway routes to the exact checkpoint for inference.
|
|
94
|
+
if self.config.checkpoint:
|
|
95
|
+
extra_body = self.completion_kwargs.get("extra_body") or {}
|
|
96
|
+
extra_body["checkpoint"] = self.config.checkpoint
|
|
97
|
+
self.completion_kwargs["extra_body"] = extra_body
|
|
98
|
+
|
|
91
99
|
self.mcp_schemas: list[ChatCompletionToolParam] = []
|
|
92
100
|
self.hud_console = HUDConsole(logger=logger)
|
|
93
101
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Tests for IntegrationTestRunner."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from hud.agents.misc import IntegrationTestRunner
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_runs_all_integration_test_calls(mock_eval_context) -> None:
|
|
13
|
+
"""Runner executes each configured integration test call in order."""
|
|
14
|
+
|
|
15
|
+
async def _run() -> None:
|
|
16
|
+
mock_eval_context._integration_test_calls = [
|
|
17
|
+
("tool_a", {"x": 1}),
|
|
18
|
+
("tool_b", {"y": "ok"}),
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
runner = IntegrationTestRunner.create()
|
|
22
|
+
result = await runner.run(mock_eval_context)
|
|
23
|
+
|
|
24
|
+
assert result.done is True
|
|
25
|
+
assert mock_eval_context.tool_calls == [
|
|
26
|
+
("tool_a", {"x": 1}),
|
|
27
|
+
("tool_b", {"y": "ok"}),
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
asyncio.run(_run())
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_raises_when_no_integration_test_calls(mock_eval_context) -> None:
|
|
34
|
+
"""Runner fails fast when no integration calls are configured."""
|
|
35
|
+
|
|
36
|
+
async def _run() -> None:
|
|
37
|
+
runner = IntegrationTestRunner.create()
|
|
38
|
+
|
|
39
|
+
with pytest.raises(ValueError, match="integration_test_tool"):
|
|
40
|
+
await runner.run(mock_eval_context)
|
|
41
|
+
|
|
42
|
+
asyncio.run(_run())
|
|
@@ -119,6 +119,13 @@ class OpenAIChatConfig(BaseAgentConfig):
|
|
|
119
119
|
|
|
120
120
|
model_name: str = "OpenAI Chat"
|
|
121
121
|
model: str = Field(default="gpt-5-mini", validation_alias=_model_alias)
|
|
122
|
+
checkpoint: str | None = Field(
|
|
123
|
+
default=None,
|
|
124
|
+
description="Specific checkpoint name for inference routing. "
|
|
125
|
+
"When set, the HUD gateway routes to this exact checkpoint rather than "
|
|
126
|
+
"the model's current active checkpoint. Passed as 'checkpoint' in the "
|
|
127
|
+
"request body's extra_body.",
|
|
128
|
+
)
|
|
122
129
|
openai_client: Any = None # AsyncOpenAI
|
|
123
130
|
api_key: str | None = None
|
|
124
131
|
base_url: str | None = None
|
|
@@ -15,6 +15,7 @@ from pathlib import Path
|
|
|
15
15
|
from typing import Any
|
|
16
16
|
|
|
17
17
|
import typer
|
|
18
|
+
from rich.markup import escape
|
|
18
19
|
|
|
19
20
|
from hud.utils.hud_console import HUDConsole
|
|
20
21
|
|
|
@@ -49,41 +50,42 @@ def show_dev_server_info(
|
|
|
49
50
|
|
|
50
51
|
# Server section
|
|
51
52
|
hud_console.section_title("Server")
|
|
52
|
-
hud_console.
|
|
53
|
+
hud_console.print(f"{hud_console.sym.ITEM} {escape(server_name)}")
|
|
53
54
|
if transport == "http":
|
|
54
|
-
hud_console.
|
|
55
|
+
hud_console.print(f"{hud_console.sym.ITEM} http://localhost:{port}/mcp")
|
|
55
56
|
else:
|
|
56
|
-
hud_console.
|
|
57
|
+
hud_console.print(f"{hud_console.sym.ITEM} (stdio)")
|
|
57
58
|
|
|
58
59
|
# Quick Links (only for HTTP mode)
|
|
59
60
|
if transport == "http":
|
|
60
61
|
hud_console.section_title("Quick Links")
|
|
61
|
-
hud_console.
|
|
62
|
-
hud_console.
|
|
62
|
+
hud_console.print(f"{hud_console.sym.ITEM} Docs: http://localhost:{port}/docs")
|
|
63
|
+
hud_console.print(f"{hud_console.sym.ITEM} Cursor:")
|
|
63
64
|
# Display the Cursor link on its own line to prevent wrapping
|
|
64
65
|
hud_console.link(cursor_deeplink)
|
|
65
66
|
|
|
66
67
|
# Show eval endpoint if in Docker mode
|
|
67
68
|
if docker_mode:
|
|
68
|
-
hud_console.
|
|
69
|
+
hud_console.print(
|
|
69
70
|
f"{hud_console.sym.ITEM} Eval API: http://localhost:{port}/eval (POST)"
|
|
70
71
|
)
|
|
71
72
|
|
|
72
73
|
# Show debugging URLs from telemetry
|
|
73
74
|
if telemetry:
|
|
74
75
|
if "live_url" in telemetry:
|
|
75
|
-
|
|
76
|
+
url = escape(telemetry["live_url"])
|
|
77
|
+
hud_console.print(f"{hud_console.sym.ITEM} Live URL: {url}")
|
|
76
78
|
if "vnc_url" in telemetry:
|
|
77
|
-
hud_console.
|
|
79
|
+
hud_console.print(f"{hud_console.sym.ITEM} VNC URL: {escape(telemetry['vnc_url'])}")
|
|
78
80
|
if "cdp_url" in telemetry:
|
|
79
|
-
hud_console.
|
|
81
|
+
hud_console.print(f"{hud_console.sym.ITEM} CDP URL: {escape(telemetry['cdp_url'])}")
|
|
80
82
|
|
|
81
83
|
# Check for VNC (browser environment)
|
|
82
84
|
if env_dir and (env_dir / "environment" / "server.py").exists():
|
|
83
85
|
try:
|
|
84
86
|
content = (env_dir / "environment" / "server.py").read_text()
|
|
85
87
|
if "x11vnc" in content.lower() or "vnc" in content.lower():
|
|
86
|
-
hud_console.
|
|
88
|
+
hud_console.print(f"{hud_console.sym.ITEM} VNC: http://localhost:8080/vnc.html")
|
|
87
89
|
except Exception: # noqa: S110
|
|
88
90
|
pass
|
|
89
91
|
|
|
@@ -91,13 +93,13 @@ def show_dev_server_info(
|
|
|
91
93
|
if inspector or interactive:
|
|
92
94
|
hud_console.info("")
|
|
93
95
|
if inspector:
|
|
94
|
-
hud_console.
|
|
96
|
+
hud_console.print(f"{hud_console.sym.SUCCESS} Inspector launching...")
|
|
95
97
|
if interactive:
|
|
96
|
-
hud_console.
|
|
98
|
+
hud_console.print(f"{hud_console.sym.SUCCESS} Interactive mode enabled")
|
|
97
99
|
|
|
98
100
|
hud_console.info("")
|
|
99
101
|
if hot_reload_enabled:
|
|
100
|
-
hud_console.
|
|
102
|
+
hud_console.print(f"{hud_console.sym.SUCCESS} Hot-reload enabled")
|
|
101
103
|
else:
|
|
102
104
|
hud_console.info("Hot-reload disabled")
|
|
103
105
|
hud_console.dim_info("Tip", "Pass --watch/-w to enable hot-reload")
|
|
@@ -230,7 +232,7 @@ async def run_mcp_module(
|
|
|
230
232
|
hud_console.error(f"Failed to import module '{module_name}'")
|
|
231
233
|
hud_console.info(f"Error: {e}")
|
|
232
234
|
hud_console.info("")
|
|
233
|
-
hud_console.
|
|
235
|
+
hud_console.print("[bold cyan]Troubleshooting:[/bold cyan]")
|
|
234
236
|
hud_console.info(" • Verify module exists and is importable")
|
|
235
237
|
hud_console.info(" • Check for __init__.py in module directory")
|
|
236
238
|
hud_console.info(" • Check for import errors in the module")
|
|
@@ -238,7 +240,7 @@ async def run_mcp_module(
|
|
|
238
240
|
import traceback
|
|
239
241
|
|
|
240
242
|
hud_console.info("")
|
|
241
|
-
hud_console.
|
|
243
|
+
hud_console.print("[bold cyan]Full traceback:[/bold cyan]")
|
|
242
244
|
hud_console.info(traceback.format_exc())
|
|
243
245
|
sys.exit(1)
|
|
244
246
|
|
|
@@ -271,14 +273,14 @@ async def run_mcp_module(
|
|
|
271
273
|
available = [k for k in dir(module) if not k.startswith("_")]
|
|
272
274
|
hud_console.info(f"Available in module: {available}")
|
|
273
275
|
hud_console.info("")
|
|
274
|
-
hud_console.
|
|
276
|
+
hud_console.print("[bold cyan]Expected structure:[/bold cyan]")
|
|
275
277
|
hud_console.info(" from hud.environment import Environment")
|
|
276
278
|
hud_console.info(" env = Environment('my-env') # or mcp = ...")
|
|
277
279
|
raise AttributeError(f"Module '{module_name}' must define 'mcp', 'env', or 'environment'")
|
|
278
280
|
|
|
279
281
|
# Only show full header on first run, brief message on reload
|
|
280
282
|
if is_reload:
|
|
281
|
-
hud_console.
|
|
283
|
+
hud_console.print(f"{hud_console.sym.SUCCESS} Reloaded")
|
|
282
284
|
# Run server without showing full UI
|
|
283
285
|
else:
|
|
284
286
|
# Show full header on first run
|
|
@@ -344,7 +346,7 @@ async def run_mcp_module(
|
|
|
344
346
|
env_dir = cwd.parent / "environment"
|
|
345
347
|
if env_dir.exists() and (env_dir / "server.py").exists():
|
|
346
348
|
hud_console.info("")
|
|
347
|
-
hud_console.
|
|
349
|
+
hud_console.print(
|
|
348
350
|
f"{hud_console.sym.FLOW} Don't forget to start the environment "
|
|
349
351
|
"backend in another terminal:"
|
|
350
352
|
)
|
|
@@ -976,11 +978,11 @@ def run_mcp_dev_server(
|
|
|
976
978
|
if module is None:
|
|
977
979
|
hud_console.error("Could not auto-detect module in current directory")
|
|
978
980
|
hud_console.info("")
|
|
979
|
-
hud_console.
|
|
981
|
+
hud_console.print("[bold cyan]Expected:[/bold cyan]")
|
|
980
982
|
hud_console.info(" • __init__.py file in current directory")
|
|
981
983
|
hud_console.info(" • Module must define 'mcp' or 'env' variable")
|
|
982
984
|
hud_console.info("")
|
|
983
|
-
hud_console.
|
|
985
|
+
hud_console.print("[bold cyan]Examples:[/bold cyan]")
|
|
984
986
|
hud_console.info(" hud dev controller")
|
|
985
987
|
hud_console.info(" cd controller && hud dev")
|
|
986
988
|
hud_console.info(" hud dev --docker # For Docker-based environments")
|
|
@@ -6,6 +6,8 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
+
from rich.markup import escape
|
|
10
|
+
|
|
9
11
|
from hud.settings import settings
|
|
10
12
|
from hud.shared.requests import make_request
|
|
11
13
|
from hud.utils.hud_console import hud_console
|
|
@@ -136,13 +138,13 @@ def show_dev_ui(
|
|
|
136
138
|
# Show other info below
|
|
137
139
|
label = "Base image" if is_docker else "Server"
|
|
138
140
|
hud_console.info("")
|
|
139
|
-
hud_console.
|
|
140
|
-
hud_console.
|
|
141
|
+
hud_console.print(f"{hud_console.sym.ITEM} {escape(label)}: {escape(server_name)}")
|
|
142
|
+
hud_console.print(f"{hud_console.sym.ITEM} Cursor:")
|
|
141
143
|
# Display the Cursor link on its own line to prevent wrapping
|
|
142
144
|
hud_console.link(cursor_deeplink)
|
|
143
145
|
hud_console.info("")
|
|
144
146
|
if hot_reload_enabled:
|
|
145
|
-
hud_console.
|
|
147
|
+
hud_console.print(f"{hud_console.sym.SUCCESS} Hot-reload enabled")
|
|
146
148
|
else:
|
|
147
149
|
hud_console.info("Hot-reload disabled")
|
|
148
150
|
hud_console.dim_info("Tip", "Pass --watch/-w to enable hot-reload")
|
|
@@ -6,6 +6,7 @@ from datetime import datetime
|
|
|
6
6
|
|
|
7
7
|
import typer
|
|
8
8
|
import yaml
|
|
9
|
+
from rich.markup import escape
|
|
9
10
|
from rich.table import Table
|
|
10
11
|
|
|
11
12
|
from hud.utils.hud_console import HUDConsole
|
|
@@ -59,8 +60,8 @@ def list_environments(
|
|
|
59
60
|
else:
|
|
60
61
|
hud_console.info("No environments found in local registry.")
|
|
61
62
|
hud_console.info("")
|
|
62
|
-
hud_console.
|
|
63
|
-
hud_console.
|
|
63
|
+
hud_console.print("Pull environments with: [cyan]hud pull <org/name:tag>[/cyan]")
|
|
64
|
+
hud_console.print("Build environments with: [cyan]hud build[/cyan]")
|
|
64
65
|
return
|
|
65
66
|
|
|
66
67
|
# Collect all environments using the registry helper
|
|
@@ -131,8 +132,8 @@ def list_environments(
|
|
|
131
132
|
if not environments:
|
|
132
133
|
hud_console.info("No environments found matching criteria.")
|
|
133
134
|
hud_console.info("")
|
|
134
|
-
hud_console.
|
|
135
|
-
hud_console.
|
|
135
|
+
hud_console.print("Pull environments with: [cyan]hud pull <org/name:tag>[/cyan]")
|
|
136
|
+
hud_console.print("Build environments with: [cyan]hud build[/cyan]")
|
|
136
137
|
return
|
|
137
138
|
|
|
138
139
|
# Create table
|
|
@@ -179,16 +180,16 @@ def list_environments(
|
|
|
179
180
|
example_env = environments[0]
|
|
180
181
|
example_ref = f"{example_env['name']}:{example_env['tag']}"
|
|
181
182
|
|
|
182
|
-
hud_console.
|
|
183
|
-
hud_console.
|
|
184
|
-
hud_console.
|
|
183
|
+
hud_console.print(f"Run an environment: [cyan]hud run {escape(example_ref)}[/cyan]")
|
|
184
|
+
hud_console.print(f"Analyze tools: [cyan]hud analyze {escape(example_ref)}[/cyan]")
|
|
185
|
+
hud_console.print(f"Debug server: [cyan]hud debug {escape(example_ref)}[/cyan]")
|
|
185
186
|
|
|
186
|
-
hud_console.
|
|
187
|
-
hud_console.
|
|
187
|
+
hud_console.print("Pull more environments: [cyan]hud pull <org/name:tag>[/cyan]")
|
|
188
|
+
hud_console.print("Build new environments: [cyan]hud build[/cyan]")
|
|
188
189
|
|
|
189
190
|
if verbose:
|
|
190
191
|
hud_console.info("")
|
|
191
|
-
hud_console.
|
|
192
|
+
hud_console.print(f"[dim]Registry location: {escape(str(env_dir))}[/dim]")
|
|
192
193
|
|
|
193
194
|
|
|
194
195
|
def list_command(
|
|
@@ -162,7 +162,7 @@ def remove_all_environments(
|
|
|
162
162
|
|
|
163
163
|
hud_console.info("")
|
|
164
164
|
hud_console.info("Note: Docker images may still exist locally.")
|
|
165
|
-
hud_console.
|
|
165
|
+
hud_console.print("To remove them, use: [cyan]docker image prune[/cyan]")
|
|
166
166
|
|
|
167
167
|
|
|
168
168
|
def remove_command(
|
|
@@ -60,12 +60,12 @@ class TestIncrementVersion:
|
|
|
60
60
|
def test_increment_minor(self):
|
|
61
61
|
"""Test incrementing minor version."""
|
|
62
62
|
assert increment_version("1.2.3", "minor") == "1.3.0"
|
|
63
|
-
assert increment_version("0.5.
|
|
63
|
+
assert increment_version("0.5.25", "minor") == "0.6.0"
|
|
64
64
|
|
|
65
65
|
def test_increment_major(self):
|
|
66
66
|
"""Test incrementing major version."""
|
|
67
67
|
assert increment_version("1.2.3", "major") == "2.0.0"
|
|
68
|
-
assert increment_version("0.5.
|
|
68
|
+
assert increment_version("0.5.25", "major") == "1.0.0"
|
|
69
69
|
|
|
70
70
|
def test_increment_with_v_prefix(self):
|
|
71
71
|
"""Test incrementing version with v prefix."""
|
|
@@ -356,6 +356,12 @@ class EvalContext(Environment):
|
|
|
356
356
|
quiet=quiet,
|
|
357
357
|
)
|
|
358
358
|
|
|
359
|
+
# v5 validation overrides any environment-level integration calls.
|
|
360
|
+
if task.validation is not None:
|
|
361
|
+
ctx._integration_test_calls = [
|
|
362
|
+
(call.name, call.arguments or {}) for call in task.validation
|
|
363
|
+
]
|
|
364
|
+
|
|
359
365
|
# Store task info for scenario execution
|
|
360
366
|
ctx._task = task
|
|
361
367
|
|
|
@@ -187,3 +187,141 @@ class TestEvalContextFromEnvironment:
|
|
|
187
187
|
assert ctx.variants == {"model": "gpt-4o"}
|
|
188
188
|
assert ctx.group_id == "group-123"
|
|
189
189
|
assert ctx.index == 5
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class TestEvalContextFromTask:
|
|
193
|
+
"""Tests for EvalContext.from_task factory."""
|
|
194
|
+
|
|
195
|
+
def test_v5_validation_populates_integration_calls(self) -> None:
|
|
196
|
+
"""Task.validation is mapped to integration test calls for replay."""
|
|
197
|
+
from hud.environment import Environment
|
|
198
|
+
from hud.eval.task import Task
|
|
199
|
+
from hud.types import MCPToolCall
|
|
200
|
+
|
|
201
|
+
env = Environment("test-env")
|
|
202
|
+
validation_calls = [
|
|
203
|
+
MCPToolCall(name="tool_a", arguments={"x": 1}),
|
|
204
|
+
MCPToolCall(name="tool_b", arguments={"y": "ok"}),
|
|
205
|
+
]
|
|
206
|
+
task = Task(
|
|
207
|
+
env=env,
|
|
208
|
+
scenario="demo",
|
|
209
|
+
args={},
|
|
210
|
+
validation=validation_calls,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
ctx = EvalContext.from_task(task)
|
|
214
|
+
assert ctx._integration_test_calls == [
|
|
215
|
+
("tool_a", {"x": 1}),
|
|
216
|
+
("tool_b", {"y": "ok"}),
|
|
217
|
+
]
|
|
218
|
+
|
|
219
|
+
def test_v5_validation_overrides_environment_integration_calls(self) -> None:
|
|
220
|
+
"""Task.validation takes precedence over env-level integration calls."""
|
|
221
|
+
from hud.environment import Environment
|
|
222
|
+
from hud.eval.task import Task
|
|
223
|
+
from hud.types import MCPToolCall
|
|
224
|
+
|
|
225
|
+
env = Environment("test-env")
|
|
226
|
+
env._integration_test_calls = [("old_tool", {"stale": True})]
|
|
227
|
+
|
|
228
|
+
task = Task(
|
|
229
|
+
env=env,
|
|
230
|
+
scenario="demo",
|
|
231
|
+
args={},
|
|
232
|
+
validation=[MCPToolCall(name="new_tool", arguments={"fresh": True})],
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
ctx = EvalContext.from_task(task)
|
|
236
|
+
assert ctx._integration_test_calls == [("new_tool", {"fresh": True})]
|
|
237
|
+
|
|
238
|
+
def test_v5_empty_validation_clears_environment_integration_calls(self) -> None:
|
|
239
|
+
"""Task.validation=[] still overrides env-level integration calls."""
|
|
240
|
+
from hud.environment import Environment
|
|
241
|
+
from hud.eval.task import Task
|
|
242
|
+
|
|
243
|
+
env = Environment("test-env")
|
|
244
|
+
env._integration_test_calls = [("old_tool", {"stale": True})]
|
|
245
|
+
|
|
246
|
+
task = Task(
|
|
247
|
+
env=env,
|
|
248
|
+
scenario="demo",
|
|
249
|
+
args={},
|
|
250
|
+
validation=[],
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
ctx = EvalContext.from_task(task)
|
|
254
|
+
|
|
255
|
+
assert ctx._integration_test_calls == []
|
|
256
|
+
|
|
257
|
+
def test_v4_integration_test_tool_remains_supported(self) -> None:
|
|
258
|
+
"""Legacy integration_test_tool still populates integration calls."""
|
|
259
|
+
from hud.eval.task import Task
|
|
260
|
+
|
|
261
|
+
task = Task.from_v4(
|
|
262
|
+
{
|
|
263
|
+
"prompt": "test",
|
|
264
|
+
"mcp_config": {"server": {"url": "http://localhost"}},
|
|
265
|
+
"evaluate_tool": {"name": "check", "arguments": {}},
|
|
266
|
+
"integration_test_tool": [
|
|
267
|
+
{"name": "legacy_tool", "arguments": {"v": 1}},
|
|
268
|
+
],
|
|
269
|
+
}
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
ctx = EvalContext.from_task(task)
|
|
273
|
+
assert ctx._integration_test_calls == [("legacy_tool", {"v": 1})]
|
|
274
|
+
|
|
275
|
+
def test_v5_validation_replays_with_integration_runner(self) -> None:
|
|
276
|
+
"""IntegrationTestRunner executes v5 Task.validation calls via EvalContext.from_task."""
|
|
277
|
+
import asyncio
|
|
278
|
+
|
|
279
|
+
from mcp import types as mcp_types
|
|
280
|
+
|
|
281
|
+
from hud.agents.misc import IntegrationTestRunner
|
|
282
|
+
from hud.environment import Environment
|
|
283
|
+
from hud.eval.task import Task
|
|
284
|
+
from hud.types import MCPToolCall, MCPToolResult
|
|
285
|
+
|
|
286
|
+
executed_calls: list[tuple[str, dict[str, object]]] = []
|
|
287
|
+
|
|
288
|
+
async def _run() -> None:
|
|
289
|
+
env = Environment("test-env")
|
|
290
|
+
validation_calls = [
|
|
291
|
+
MCPToolCall(name="tool_a", arguments={"x": 1}),
|
|
292
|
+
MCPToolCall(name="tool_b", arguments={"y": "ok"}),
|
|
293
|
+
]
|
|
294
|
+
task = Task(
|
|
295
|
+
env=env,
|
|
296
|
+
scenario="demo",
|
|
297
|
+
args={},
|
|
298
|
+
validation=validation_calls,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
ctx = EvalContext.from_task(task)
|
|
302
|
+
|
|
303
|
+
async def fake_call_tool(call, /, **kwargs):
|
|
304
|
+
if isinstance(call, tuple):
|
|
305
|
+
name = str(call[0])
|
|
306
|
+
arguments = dict(call[1]) if len(call) > 1 else {}
|
|
307
|
+
else:
|
|
308
|
+
name = str(call)
|
|
309
|
+
arguments = {}
|
|
310
|
+
executed_calls.append((name, arguments))
|
|
311
|
+
return MCPToolResult(
|
|
312
|
+
content=[mcp_types.TextContent(type="text", text="ok")],
|
|
313
|
+
isError=False,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
ctx.call_tool = fake_call_tool # type: ignore[method-assign]
|
|
317
|
+
|
|
318
|
+
runner = IntegrationTestRunner.create()
|
|
319
|
+
result = await runner.run(ctx)
|
|
320
|
+
assert result.done is True
|
|
321
|
+
|
|
322
|
+
asyncio.run(_run())
|
|
323
|
+
|
|
324
|
+
assert executed_calls == [
|
|
325
|
+
("tool_a", {"x": 1}),
|
|
326
|
+
("tool_b", {"y": "ok"}),
|
|
327
|
+
]
|
|
@@ -153,18 +153,20 @@ class BashSession:
|
|
|
153
153
|
assert self._process.stdout
|
|
154
154
|
assert self._process.stderr
|
|
155
155
|
|
|
156
|
-
# Send command with sentinel for exit code capture
|
|
157
|
-
#
|
|
156
|
+
# Send command with sentinel for exit code capture.
|
|
157
|
+
# Use a newline before the sentinel echo (not ";" or "&") so that:
|
|
158
|
+
# 1. Heredoc delimiters aren't corrupted (e.g. EOF; echo '...' wouldn't match EOF)
|
|
159
|
+
# 2. The echo is a standalone command, avoiding syntax errors from leading ";"
|
|
158
160
|
if sys.platform == "win32":
|
|
159
161
|
if capture_exit_code:
|
|
160
|
-
cmd_line = f"{command}
|
|
162
|
+
cmd_line = f"{command}\necho {self._sentinel}%errorlevel%\n"
|
|
161
163
|
else:
|
|
162
|
-
cmd_line = f"{command}
|
|
164
|
+
cmd_line = f"{command}\necho {self._sentinel}\n"
|
|
163
165
|
else:
|
|
164
166
|
if capture_exit_code:
|
|
165
|
-
cmd_line = f"{command}
|
|
167
|
+
cmd_line = f"{command}\necho '{self._sentinel}'$?\n"
|
|
166
168
|
else:
|
|
167
|
-
cmd_line = f"{command}
|
|
169
|
+
cmd_line = f"{command}\necho '{self._sentinel}'\n"
|
|
168
170
|
|
|
169
171
|
self._process.stdin.write(cmd_line.encode())
|
|
170
172
|
await self._process.stdin.drain()
|
|
@@ -73,6 +73,87 @@ class TestBashSession:
|
|
|
73
73
|
assert result.error == ""
|
|
74
74
|
|
|
75
75
|
|
|
76
|
+
class TestBashSessionHeredoc:
|
|
77
|
+
"""Tests for heredoc handling in ClaudeBashSession."""
|
|
78
|
+
|
|
79
|
+
@pytest.mark.asyncio
|
|
80
|
+
async def test_sentinel_on_own_line_after_heredoc(self):
|
|
81
|
+
"""Sentinel echo must be on its own line so heredoc terminators aren't corrupted."""
|
|
82
|
+
session = _BashSession()
|
|
83
|
+
session._started = True
|
|
84
|
+
|
|
85
|
+
mock_process = MagicMock()
|
|
86
|
+
mock_process.returncode = None
|
|
87
|
+
mock_process.stdin = MagicMock()
|
|
88
|
+
mock_process.stdin.write = MagicMock()
|
|
89
|
+
mock_process.stdin.drain = AsyncMock()
|
|
90
|
+
mock_process.stdout = MagicMock()
|
|
91
|
+
mock_process.stdout.readuntil = AsyncMock(return_value=b"hello\n<<exit>>\n")
|
|
92
|
+
mock_process.stderr = MagicMock()
|
|
93
|
+
mock_process.stderr.read = AsyncMock(return_value=b"")
|
|
94
|
+
|
|
95
|
+
session._process = mock_process
|
|
96
|
+
|
|
97
|
+
heredoc_cmd = "python3 << 'EOF'\nprint('hello')\nEOF"
|
|
98
|
+
await session.run(heredoc_cmd)
|
|
99
|
+
|
|
100
|
+
written = mock_process.stdin.write.call_args[0][0].decode()
|
|
101
|
+
|
|
102
|
+
# EOF must be followed by newline, then the echo — never "EOF;" or "EOF echo"
|
|
103
|
+
assert "EOF\necho '<<exit>>'\n" in written
|
|
104
|
+
assert "EOF;" not in written
|
|
105
|
+
assert "EOF echo" not in written
|
|
106
|
+
|
|
107
|
+
@pytest.mark.asyncio
|
|
108
|
+
async def test_heredoc_integration(self):
|
|
109
|
+
"""Integration test: a real heredoc command completes without hanging."""
|
|
110
|
+
from hud.tools.coding.bash import ClaudeBashSession
|
|
111
|
+
|
|
112
|
+
session = ClaudeBashSession()
|
|
113
|
+
session._timeout = 5.0 # fail fast if sentinel is broken
|
|
114
|
+
await session.start()
|
|
115
|
+
try:
|
|
116
|
+
result = await session.run("cat << 'EOF'\nhello from heredoc\nEOF")
|
|
117
|
+
assert result.output is not None
|
|
118
|
+
assert "hello from heredoc" in result.output
|
|
119
|
+
finally:
|
|
120
|
+
session.stop()
|
|
121
|
+
|
|
122
|
+
@pytest.mark.asyncio
|
|
123
|
+
async def test_heredoc_with_python_integration(self):
|
|
124
|
+
"""Integration test: python heredoc executes and returns output."""
|
|
125
|
+
from hud.tools.coding.bash import ClaudeBashSession
|
|
126
|
+
|
|
127
|
+
session = ClaudeBashSession()
|
|
128
|
+
session._timeout = 5.0
|
|
129
|
+
await session.start()
|
|
130
|
+
try:
|
|
131
|
+
result = await session.run("python3 << 'PYEOF'\nprint('result:', 2 + 2)\nPYEOF")
|
|
132
|
+
assert result.output is not None
|
|
133
|
+
assert "result: 4" in result.output
|
|
134
|
+
finally:
|
|
135
|
+
session.stop()
|
|
136
|
+
|
|
137
|
+
@pytest.mark.asyncio
|
|
138
|
+
async def test_command_after_heredoc_still_works(self):
|
|
139
|
+
"""Integration test: session is usable for further commands after a heredoc."""
|
|
140
|
+
from hud.tools.coding.bash import ClaudeBashSession
|
|
141
|
+
|
|
142
|
+
session = ClaudeBashSession()
|
|
143
|
+
session._timeout = 5.0
|
|
144
|
+
await session.start()
|
|
145
|
+
try:
|
|
146
|
+
r1 = await session.run("cat << 'EOF'\nfirst\nEOF")
|
|
147
|
+
assert r1.output is not None
|
|
148
|
+
assert "first" in r1.output
|
|
149
|
+
|
|
150
|
+
r2 = await session.run("echo second")
|
|
151
|
+
assert r2.output is not None
|
|
152
|
+
assert "second" in r2.output
|
|
153
|
+
finally:
|
|
154
|
+
session.stop()
|
|
155
|
+
|
|
156
|
+
|
|
76
157
|
class TestBashTool:
|
|
77
158
|
"""Tests for BashTool."""
|
|
78
159
|
|