hud-python 0.5.33__tar.gz → 0.5.34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.5.33 → hud_python-0.5.34}/PKG-INFO +68 -64
- {hud_python-0.5.33 → hud_python-0.5.34}/README.md +67 -63
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/openai.py +2 -3
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/__init__.py +12 -14
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/deploy.py +108 -57
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/eval.py +35 -9
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/flows/init.py +74 -41
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/flows/templates.py +2 -2
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/init.py +2 -3
- hud_python-0.5.34/hud/cli/link.py +38 -0
- hud_python-0.5.34/hud/cli/scenario.py +187 -0
- hud_python-0.5.34/hud/cli/sync.py +785 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_build.py +2 -2
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_deploy.py +4 -5
- hud_python-0.5.34/hud/cli/tests/test_scenario.py +283 -0
- hud_python-0.5.34/hud/cli/tests/test_sync.py +1161 -0
- hud_python-0.5.34/hud/cli/utils/collect.py +155 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/environment.py +6 -28
- hud_python-0.5.34/hud/cli/utils/evalset.py +83 -0
- hud_python-0.5.34/hud/cli/utils/name_check.py +140 -0
- hud_python-0.5.34/hud/cli/utils/project_config.py +106 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/task.py +34 -1
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/services/chat.py +6 -5
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/services/tests/test_chat.py +6 -23
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/edit.py +6 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/utils.py +11 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/hud_console.py +11 -8
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/version.py +1 -1
- {hud_python-0.5.33 → hud_python-0.5.34}/pyproject.toml +2 -2
- hud_python-0.5.33/hud/cli/link.py +0 -199
- {hud_python-0.5.33 → hud_python-0.5.34}/.gitignore +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/LICENSE +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/examples/README.md +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/__main__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/claude.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/gateway.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/gemini_cua.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/misc/integration_test_agent.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/openai_chat.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/operator.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/resolver.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/conftest.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/test_base_runtime.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/test_gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/test_integration_test_agent.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/test_operator.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/test_resolver.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/tests/test_run_eval.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/agents/types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/__main__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/analyze.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/build.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/cancel.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/convert/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/convert/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/convert/harbor.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/convert/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/convert/tests/conftest.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/convert/tests/test_harbor.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/debug.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/dev.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/flows/dev.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/flows/tasks.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/flows/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/flows/tests/test_dev.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/models.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/push.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/rft.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/rft_status.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_analyze_module.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_convert.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_debug_directory_mode.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_dev.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_eval.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_init.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/api.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/args.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/build_display.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/build_logs.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/config.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/git.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/lockfile.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/mcp.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/server.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/test_git.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/validation.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/cli/utils/viewer.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/datasets/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/datasets/loader.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/datasets/runner.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/datasets/tests/test_loader.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/datasets/tests/test_utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/datasets/utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/connection.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/connectors/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/connectors/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/connectors/local.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/connectors/mcp_config.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/connectors/openai.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/connectors/remote.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/environment.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/integrations/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/integrations/adk.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/integrations/anthropic.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/integrations/gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/integrations/langchain.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/integrations/llamaindex.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/integrations/openai.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/mock.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/router.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/scenarios.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/tests/test_connection.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/tests/test_connectors.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/tests/test_environment.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/tests/test_integrations.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/tests/test_local_connectors.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/tests/test_scenarios.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/tests/test_session_id.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/tests/test_tools.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/utils/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/utils/formats.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/utils/schema.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/environment/utils/tool_wrappers.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/display.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/instrument.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/manager.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/parallel.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/tests/test_context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/tests/test_eval.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/tests/test_manager.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/tests/test_parallel.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/tests/test_task.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/eval/utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/native/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/native/chat.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/patches/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/patches/mcp_patches.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/patches/warnings.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/py.typed +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/low_level.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/router.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/server.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/tests/test_prefix_naming.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/services/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/services/chat_service.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/services/reply_metadata.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/services/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/services/tests/test_chat_service.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/settings.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/shared/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/shared/exceptions.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/shared/hints.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/shared/requests.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/telemetry/tests/test_eval_telemetry.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/agent.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/apply_patch.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/bash.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/gemini_edit.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/gemini_shell.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/session.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/shell.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/tests/test_apply_patch.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/tests/test_bash.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/tests/test_bash_extended.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/tests/test_bash_integration.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/tests/test_edit.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/tests/test_gemini_tools.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/coding/tests/test_shell.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/glm.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/tests/test_compression.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/tests/test_computer.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/tests/test_computer_actions.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/computer/tests/test_glm_computer.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/elicitation.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/executors/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/glob.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/grep.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/list.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/read.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/tests/test_glob.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/tests/test_grep.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/tests/test_list.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/filesystem/tests/test_read.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/hosted/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/hosted/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/hosted/code_execution.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/hosted/google_search.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/hosted/tool_search.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/hosted/url_context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/hosted/web_fetch.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/hosted/web_search.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/jupyter.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/memory/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/memory/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/memory/claude.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/memory/gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/memory/session.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/memory/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/memory/tests/test_claude.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/memory/tests/test_gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/memory/tests/test_session.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/native_types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/playwright.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/response.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/submit.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_agent_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_elicitation.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_jupyter_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_native_tool_e2e.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_native_types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/tools/utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/env.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/mcp.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/strict_schema.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/tool_shorthand.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.34}/hud/utils/types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.34
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -101,7 +101,7 @@ Description-Content-Type: text/markdown
|
|
|
101
101
|
</picture>
|
|
102
102
|
</div>
|
|
103
103
|
|
|
104
|
-
|
|
104
|
+
HUD is a platform for building RL environments for AI agents. Define agent-callable tools, write evaluation scenarios, run evals at scale, and train models on the results.
|
|
105
105
|
|
|
106
106
|
To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
|
|
107
107
|
|
|
@@ -110,15 +110,14 @@ To learn more, check out our [Documentation](https://docs.hud.ai) and [API Refer
|
|
|
110
110
|
[](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
|
|
111
111
|
[](https://discord.gg/wkjtmHYYjm)
|
|
112
112
|
[](https://x.com/intent/user?screen_name=hud_evals)
|
|
113
|
-
[](https://shop.hud.ai)
|
|
114
113
|
[](https://scarf.sh)
|
|
115
114
|
[](https://docs.hud.ai)
|
|
116
115
|
|
|
117
116
|
## Install
|
|
118
117
|
|
|
119
118
|
```bash
|
|
120
|
-
|
|
121
|
-
|
|
119
|
+
# Install CLI (recommended)
|
|
120
|
+
uv tool install hud-python --python 3.12
|
|
122
121
|
|
|
123
122
|
Get your API key at [hud.ai](https://hud.ai) and set it:
|
|
124
123
|
|
|
@@ -126,65 +125,88 @@ Get your API key at [hud.ai](https://hud.ai) and set it:
|
|
|
126
125
|
export HUD_API_KEY=your-key-here
|
|
127
126
|
```
|
|
128
127
|
|
|
129
|
-
|
|
128
|
+
Get your API key at [hud.ai/project/api-keys](https://hud.ai/project/api-keys).
|
|
129
|
+
|
|
130
|
+
> Or install as a library: `pip install hud-python`
|
|
130
131
|
|
|
131
132
|

|
|
132
133
|
|
|
133
|
-
##
|
|
134
|
+
## Environments
|
|
134
135
|
|
|
135
|
-
|
|
136
|
+
An environment is the harness an agent operates in. It packages tools (functions agents can call) and scenarios (how agents are evaluated) into a single deployable unit. Each environment spins up fresh and isolated for every evaluation.
|
|
136
137
|
|
|
137
|
-
|
|
138
|
+
```python
|
|
139
|
+
from hud import Environment
|
|
140
|
+
|
|
141
|
+
env = Environment("my-env")
|
|
142
|
+
|
|
143
|
+
@env.scenario("count")
|
|
144
|
+
async def count(word: str, letter: str):
|
|
145
|
+
# PROMPT — send a question to the agent.
|
|
146
|
+
# The agent runs its reasoning loop and returns an answer.
|
|
147
|
+
answer = yield f"How many '{letter}' in '{word}'?"
|
|
148
|
+
|
|
149
|
+
# SCORE — check the agent's answer against the correct count.
|
|
150
|
+
# Return a reward: 1.0 for correct, 0.0 for wrong.
|
|
151
|
+
correct = str(word.lower().count(letter.lower()))
|
|
152
|
+
yield 1.0 if answer and correct in answer else 0.0
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
A scenario has two yields. The first sends a prompt — the agent runs between the yields, calling tools and reasoning. The second checks the result and returns a reward (0.0 to 1.0). → [Core Concepts](https://docs.hud.ai/concepts)
|
|
156
|
+
|
|
157
|
+
## Run an Agent
|
|
138
158
|
|
|
139
159
|
```python
|
|
140
|
-
|
|
141
|
-
import
|
|
160
|
+
import hud
|
|
161
|
+
from hud.agents import create_agent
|
|
142
162
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
api_key=os.environ["HUD_API_KEY"]
|
|
146
|
-
)
|
|
163
|
+
task = env("count", word="strawberry", letter="r")
|
|
164
|
+
agent = create_agent("claude-sonnet-4-5")
|
|
147
165
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
)
|
|
166
|
+
async with hud.eval(task) as ctx:
|
|
167
|
+
result = await agent.run(ctx)
|
|
168
|
+
|
|
169
|
+
print(f"Reward: {result.reward}") # 1.0 if agent answers "3"
|
|
152
170
|
```
|
|
153
171
|
|
|
154
|
-
|
|
172
|
+
`create_agent()` picks the right agent class and native tools for each model. → [Environments](https://docs.hud.ai/quick-links/environments)
|
|
155
173
|
|
|
156
|
-
|
|
174
|
+
## Workflow
|
|
157
175
|
|
|
158
|
-
|
|
176
|
+
```bash
|
|
177
|
+
hud init my-env # Scaffold environment
|
|
178
|
+
cd my-env
|
|
179
|
+
hud dev env:env -w env.py # Run locally with hot-reload
|
|
180
|
+
hud eval tasks.py claude # Run evals locally
|
|
181
|
+
hud deploy # Deploy to platform
|
|
182
|
+
hud sync tasks my-taskset # Sync tasks to platform
|
|
183
|
+
```
|
|
159
184
|
|
|
160
|
-
|
|
161
|
-
from hud import Environment
|
|
185
|
+
Once deployed, run evals at scale from the CLI or the [platform UI](https://hud.ai):
|
|
162
186
|
|
|
163
|
-
|
|
187
|
+
```bash
|
|
188
|
+
hud eval my-taskset claude --remote --full
|
|
189
|
+
```
|
|
164
190
|
|
|
165
|
-
|
|
166
|
-
def add(a: int, b: int) -> int:
|
|
167
|
-
"""Add two numbers."""
|
|
168
|
-
return a + b
|
|
191
|
+
→ [Deploy](https://docs.hud.ai/quick-links/deploy) · [Testing & Evaluation](https://docs.hud.ai/advanced/testing-environments)
|
|
169
192
|
|
|
170
|
-
|
|
171
|
-
async def solve_math(problem: str, answer: int):
|
|
172
|
-
response = yield problem # Prompt
|
|
173
|
-
yield 1.0 if str(answer) in response else 0.0 # Reward
|
|
193
|
+
## Pre-built Tools
|
|
174
194
|
|
|
175
|
-
|
|
176
|
-
# Your agent logic here - call tools, get response
|
|
177
|
-
result = await ctx.call_tool("add", a=2, b=2)
|
|
178
|
-
await ctx.submit(f"The answer is {result}")
|
|
195
|
+
HUD ships tools for computer control, shell execution, file editing, browser automation, and web search. Add them to any environment:
|
|
179
196
|
|
|
180
|
-
|
|
197
|
+
```python
|
|
198
|
+
from hud.tools import AnthropicComputerTool, BashTool, EditTool
|
|
199
|
+
|
|
200
|
+
env.add_tool(AnthropicComputerTool()) # Mouse, keyboard, screenshots
|
|
201
|
+
env.add_tool(BashTool()) # Persistent bash shell
|
|
202
|
+
env.add_tool(EditTool()) # File viewing and editing
|
|
181
203
|
```
|
|
182
204
|
|
|
183
|
-
|
|
205
|
+
HUD adapts each tool to the model's native format — Claude gets `computer_20250124`, OpenAI gets `computer_use_preview`, Gemini gets `ComputerUse`. → [Tools Reference](https://docs.hud.ai/tools/computer)
|
|
184
206
|
|
|
185
|
-
|
|
207
|
+
## Model Gateway
|
|
186
208
|
|
|
187
|
-
|
|
209
|
+
Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
|
|
188
210
|
|
|
189
211
|
```python
|
|
190
212
|
from openai import AsyncOpenAI
|
|
@@ -195,31 +217,13 @@ client = AsyncOpenAI(
|
|
|
195
217
|
api_key=os.environ["HUD_API_KEY"]
|
|
196
218
|
)
|
|
197
219
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
messages=[{"role": "user", "content": ctx.prompt}],
|
|
203
|
-
tools=ctx.tools # Environment tools available to the model
|
|
204
|
-
)
|
|
205
|
-
await ctx.submit(response.choices[0].message.content)
|
|
206
|
-
```
|
|
207
|
-
|
|
208
|
-
**Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/evals)
|
|
209
|
-
|
|
210
|
-
### Deploy & Train
|
|
211
|
-
|
|
212
|
-
Push to GitHub, connect on hud.ai, run at scale:
|
|
213
|
-
|
|
214
|
-
```bash
|
|
215
|
-
hud init # Scaffold environment
|
|
216
|
-
git push # Push to GitHub
|
|
217
|
-
# Connect on hud.ai → New → Environment
|
|
218
|
-
hud eval my-eval --model gpt-4o --group-size 100
|
|
219
|
-
# Or create and run tasks on the platform
|
|
220
|
+
response = await client.chat.completions.create(
|
|
221
|
+
model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
|
|
222
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
223
|
+
)
|
|
220
224
|
```
|
|
221
225
|
|
|
222
|
-
Every
|
|
226
|
+
Every call is traced at [hud.ai](https://hud.ai). → [Models](https://docs.hud.ai/quick-links/models)
|
|
223
227
|
|
|
224
228
|
## Links
|
|
225
229
|
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
</picture>
|
|
7
7
|
</div>
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
HUD is a platform for building RL environments for AI agents. Define agent-callable tools, write evaluation scenarios, run evals at scale, and train models on the results.
|
|
10
10
|
|
|
11
11
|
To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
|
|
12
12
|
|
|
@@ -15,15 +15,14 @@ To learn more, check out our [Documentation](https://docs.hud.ai) and [API Refer
|
|
|
15
15
|
[](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
|
|
16
16
|
[](https://discord.gg/wkjtmHYYjm)
|
|
17
17
|
[](https://x.com/intent/user?screen_name=hud_evals)
|
|
18
|
-
[](https://shop.hud.ai)
|
|
19
18
|
[](https://scarf.sh)
|
|
20
19
|
[](https://docs.hud.ai)
|
|
21
20
|
|
|
22
21
|
## Install
|
|
23
22
|
|
|
24
23
|
```bash
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
# Install CLI (recommended)
|
|
25
|
+
uv tool install hud-python --python 3.12
|
|
27
26
|
|
|
28
27
|
Get your API key at [hud.ai](https://hud.ai) and set it:
|
|
29
28
|
|
|
@@ -31,65 +30,88 @@ Get your API key at [hud.ai](https://hud.ai) and set it:
|
|
|
31
30
|
export HUD_API_KEY=your-key-here
|
|
32
31
|
```
|
|
33
32
|
|
|
34
|
-
|
|
33
|
+
Get your API key at [hud.ai/project/api-keys](https://hud.ai/project/api-keys).
|
|
34
|
+
|
|
35
|
+
> Or install as a library: `pip install hud-python`
|
|
35
36
|
|
|
36
37
|

|
|
37
38
|
|
|
38
|
-
##
|
|
39
|
+
## Environments
|
|
39
40
|
|
|
40
|
-
|
|
41
|
+
An environment is the harness an agent operates in. It packages tools (functions agents can call) and scenarios (how agents are evaluated) into a single deployable unit. Each environment spins up fresh and isolated for every evaluation.
|
|
41
42
|
|
|
42
|
-
|
|
43
|
+
```python
|
|
44
|
+
from hud import Environment
|
|
45
|
+
|
|
46
|
+
env = Environment("my-env")
|
|
47
|
+
|
|
48
|
+
@env.scenario("count")
|
|
49
|
+
async def count(word: str, letter: str):
|
|
50
|
+
# PROMPT — send a question to the agent.
|
|
51
|
+
# The agent runs its reasoning loop and returns an answer.
|
|
52
|
+
answer = yield f"How many '{letter}' in '{word}'?"
|
|
53
|
+
|
|
54
|
+
# SCORE — check the agent's answer against the correct count.
|
|
55
|
+
# Return a reward: 1.0 for correct, 0.0 for wrong.
|
|
56
|
+
correct = str(word.lower().count(letter.lower()))
|
|
57
|
+
yield 1.0 if answer and correct in answer else 0.0
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
A scenario has two yields. The first sends a prompt — the agent runs between the yields, calling tools and reasoning. The second checks the result and returns a reward (0.0 to 1.0). → [Core Concepts](https://docs.hud.ai/concepts)
|
|
61
|
+
|
|
62
|
+
## Run an Agent
|
|
43
63
|
|
|
44
64
|
```python
|
|
45
|
-
|
|
46
|
-
import
|
|
65
|
+
import hud
|
|
66
|
+
from hud.agents import create_agent
|
|
47
67
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
api_key=os.environ["HUD_API_KEY"]
|
|
51
|
-
)
|
|
68
|
+
task = env("count", word="strawberry", letter="r")
|
|
69
|
+
agent = create_agent("claude-sonnet-4-5")
|
|
52
70
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
)
|
|
71
|
+
async with hud.eval(task) as ctx:
|
|
72
|
+
result = await agent.run(ctx)
|
|
73
|
+
|
|
74
|
+
print(f"Reward: {result.reward}") # 1.0 if agent answers "3"
|
|
57
75
|
```
|
|
58
76
|
|
|
59
|
-
|
|
77
|
+
`create_agent()` picks the right agent class and native tools for each model. → [Environments](https://docs.hud.ai/quick-links/environments)
|
|
60
78
|
|
|
61
|
-
|
|
79
|
+
## Workflow
|
|
62
80
|
|
|
63
|
-
|
|
81
|
+
```bash
|
|
82
|
+
hud init my-env # Scaffold environment
|
|
83
|
+
cd my-env
|
|
84
|
+
hud dev env:env -w env.py # Run locally with hot-reload
|
|
85
|
+
hud eval tasks.py claude # Run evals locally
|
|
86
|
+
hud deploy # Deploy to platform
|
|
87
|
+
hud sync tasks my-taskset # Sync tasks to platform
|
|
88
|
+
```
|
|
64
89
|
|
|
65
|
-
|
|
66
|
-
from hud import Environment
|
|
90
|
+
Once deployed, run evals at scale from the CLI or the [platform UI](https://hud.ai):
|
|
67
91
|
|
|
68
|
-
|
|
92
|
+
```bash
|
|
93
|
+
hud eval my-taskset claude --remote --full
|
|
94
|
+
```
|
|
69
95
|
|
|
70
|
-
|
|
71
|
-
def add(a: int, b: int) -> int:
|
|
72
|
-
"""Add two numbers."""
|
|
73
|
-
return a + b
|
|
96
|
+
→ [Deploy](https://docs.hud.ai/quick-links/deploy) · [Testing & Evaluation](https://docs.hud.ai/advanced/testing-environments)
|
|
74
97
|
|
|
75
|
-
|
|
76
|
-
async def solve_math(problem: str, answer: int):
|
|
77
|
-
response = yield problem # Prompt
|
|
78
|
-
yield 1.0 if str(answer) in response else 0.0 # Reward
|
|
98
|
+
## Pre-built Tools
|
|
79
99
|
|
|
80
|
-
|
|
81
|
-
# Your agent logic here - call tools, get response
|
|
82
|
-
result = await ctx.call_tool("add", a=2, b=2)
|
|
83
|
-
await ctx.submit(f"The answer is {result}")
|
|
100
|
+
HUD ships tools for computer control, shell execution, file editing, browser automation, and web search. Add them to any environment:
|
|
84
101
|
|
|
85
|
-
|
|
102
|
+
```python
|
|
103
|
+
from hud.tools import AnthropicComputerTool, BashTool, EditTool
|
|
104
|
+
|
|
105
|
+
env.add_tool(AnthropicComputerTool()) # Mouse, keyboard, screenshots
|
|
106
|
+
env.add_tool(BashTool()) # Persistent bash shell
|
|
107
|
+
env.add_tool(EditTool()) # File viewing and editing
|
|
86
108
|
```
|
|
87
109
|
|
|
88
|
-
|
|
110
|
+
HUD adapts each tool to the model's native format — Claude gets `computer_20250124`, OpenAI gets `computer_use_preview`, Gemini gets `ComputerUse`. → [Tools Reference](https://docs.hud.ai/tools/computer)
|
|
89
111
|
|
|
90
|
-
|
|
112
|
+
## Model Gateway
|
|
91
113
|
|
|
92
|
-
|
|
114
|
+
Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
|
|
93
115
|
|
|
94
116
|
```python
|
|
95
117
|
from openai import AsyncOpenAI
|
|
@@ -100,31 +122,13 @@ client = AsyncOpenAI(
|
|
|
100
122
|
api_key=os.environ["HUD_API_KEY"]
|
|
101
123
|
)
|
|
102
124
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
messages=[{"role": "user", "content": ctx.prompt}],
|
|
108
|
-
tools=ctx.tools # Environment tools available to the model
|
|
109
|
-
)
|
|
110
|
-
await ctx.submit(response.choices[0].message.content)
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
**Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/evals)
|
|
114
|
-
|
|
115
|
-
### Deploy & Train
|
|
116
|
-
|
|
117
|
-
Push to GitHub, connect on hud.ai, run at scale:
|
|
118
|
-
|
|
119
|
-
```bash
|
|
120
|
-
hud init # Scaffold environment
|
|
121
|
-
git push # Push to GitHub
|
|
122
|
-
# Connect on hud.ai → New → Environment
|
|
123
|
-
hud eval my-eval --model gpt-4o --group-size 100
|
|
124
|
-
# Or create and run tasks on the platform
|
|
125
|
+
response = await client.chat.completions.create(
|
|
126
|
+
model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
|
|
127
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
128
|
+
)
|
|
125
129
|
```
|
|
126
130
|
|
|
127
|
-
Every
|
|
131
|
+
Every call is traced at [hud.ai](https://hud.ai). → [Models](https://docs.hud.ai/quick-links/models)
|
|
128
132
|
|
|
129
133
|
## Links
|
|
130
134
|
|
|
@@ -485,10 +485,9 @@ class OpenAIAgent(MCPAgent):
|
|
|
485
485
|
type="computer_screenshot",
|
|
486
486
|
image_url=f"data:image/png;base64,{screenshot}",
|
|
487
487
|
),
|
|
488
|
-
acknowledged_safety_checks=(
|
|
489
|
-
acknowledged_checks if acknowledged_checks else None
|
|
490
|
-
),
|
|
491
488
|
)
|
|
489
|
+
if acknowledged_checks:
|
|
490
|
+
output_payload["acknowledged_safety_checks"] = acknowledged_checks
|
|
492
491
|
computer_outputs.append(output_payload)
|
|
493
492
|
self.pending_call_id = None
|
|
494
493
|
self.pending_safety_checks = []
|
|
@@ -11,7 +11,7 @@ from rich.panel import Panel
|
|
|
11
11
|
# Create the main Typer app
|
|
12
12
|
app = typer.Typer(
|
|
13
13
|
name="hud",
|
|
14
|
-
help="
|
|
14
|
+
help="HUD CLI - build, test, and deploy evaluation environments",
|
|
15
15
|
add_completion=False,
|
|
16
16
|
rich_markup_mode="rich",
|
|
17
17
|
pretty_exceptions_enable=False,
|
|
@@ -40,8 +40,8 @@ from .init import init_command # noqa: E402
|
|
|
40
40
|
from .link import link_command # noqa: E402
|
|
41
41
|
from .models import models_command # noqa: E402
|
|
42
42
|
from .push import push_command # noqa: E402
|
|
43
|
-
from .
|
|
44
|
-
from .
|
|
43
|
+
from .scenario import scenario_app # noqa: E402
|
|
44
|
+
from .sync import sync_app # noqa: E402
|
|
45
45
|
|
|
46
46
|
_EXTRA_ARGS = {"allow_extra_args": True, "ignore_unknown_options": True}
|
|
47
47
|
|
|
@@ -50,7 +50,7 @@ app.command(name="debug", context_settings=_EXTRA_ARGS)(debug_command)
|
|
|
50
50
|
app.command(name="dev", context_settings=_EXTRA_ARGS)(dev_command)
|
|
51
51
|
app.command(name="build", context_settings=_EXTRA_ARGS)(build_command)
|
|
52
52
|
app.command(name="deploy")(deploy_command)
|
|
53
|
-
app.command(name="link")(link_command)
|
|
53
|
+
app.command(name="link", hidden=True)(link_command)
|
|
54
54
|
app.command(name="eval")(eval_command)
|
|
55
55
|
app.command(name="push", hidden=True)(push_command)
|
|
56
56
|
app.command(name="init")(init_command)
|
|
@@ -108,11 +108,11 @@ def version() -> None:
|
|
|
108
108
|
console.print("HUD CLI version: [cyan]unknown[/cyan]")
|
|
109
109
|
|
|
110
110
|
|
|
111
|
-
#
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
app.add_typer(
|
|
111
|
+
# Scenario subcommand group
|
|
112
|
+
app.add_typer(scenario_app, name="scenario")
|
|
113
|
+
|
|
114
|
+
# Sync subcommand group
|
|
115
|
+
app.add_typer(sync_app, name="sync")
|
|
116
116
|
|
|
117
117
|
|
|
118
118
|
# ---------------------------------------------------------------------------
|
|
@@ -140,7 +140,7 @@ def main() -> None:
|
|
|
140
140
|
if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ["--help", "-h"]):
|
|
141
141
|
console.print(
|
|
142
142
|
Panel.fit(
|
|
143
|
-
"[bold cyan]
|
|
143
|
+
"[bold cyan]HUD CLI[/bold cyan]\nBuild, test, and deploy environments",
|
|
144
144
|
border_style="cyan",
|
|
145
145
|
)
|
|
146
146
|
)
|
|
@@ -150,10 +150,8 @@ def main() -> None:
|
|
|
150
150
|
)
|
|
151
151
|
console.print(" 2. Start dev server: [cyan]hud dev[/cyan]")
|
|
152
152
|
console.print(" 3. Deploy to HUD platform: [cyan]hud deploy[/cyan]")
|
|
153
|
-
console.print(" 4.
|
|
154
|
-
console.print("
|
|
155
|
-
console.print(" [cyan]hud rft run tasks.jsonl[/cyan] Launch an RFT training job")
|
|
156
|
-
console.print(" [cyan]hud rft status <model-id>[/cyan] Check training status\n")
|
|
153
|
+
console.print(" 4. Sync tasks: [cyan]hud sync tasks my-taskset[/cyan]")
|
|
154
|
+
console.print(" 5. Run evaluations: [cyan]hud eval tasks.py claude[/cyan]\n")
|
|
157
155
|
|
|
158
156
|
app()
|
|
159
157
|
except typer.Exit as e:
|