hud-python 0.5.33__tar.gz → 0.5.35__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.5.33 → hud_python-0.5.35}/PKG-INFO +68 -64
- {hud_python-0.5.33 → hud_python-0.5.35}/README.md +67 -63
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/base.py +14 -15
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/claude.py +6 -2
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/gemini.py +6 -2
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/openai.py +8 -5
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/__init__.py +19 -14
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/analyze.py +36 -17
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/build.py +226 -462
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/debug.py +5 -3
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/deploy.py +131 -61
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/eval.py +35 -9
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/init.py +74 -41
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/templates.py +2 -2
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/init.py +2 -3
- hud_python-0.5.35/hud/cli/link.py +38 -0
- hud_python-0.5.35/hud/cli/rl.py +372 -0
- hud_python-0.5.35/hud/cli/scenario.py +187 -0
- hud_python-0.5.35/hud/cli/sync.py +969 -0
- hud_python-0.5.35/hud/cli/tests/test_analysis_utils.py +38 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_analyze.py +8 -8
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_analyze_module.py +4 -4
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_build.py +70 -27
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_build_failure.py +2 -2
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_deploy.py +4 -5
- hud_python-0.5.35/hud/cli/tests/test_lockfile_utils.py +72 -0
- hud_python-0.5.35/hud/cli/tests/test_rl.py +154 -0
- hud_python-0.5.35/hud/cli/tests/test_scenario.py +283 -0
- hud_python-0.5.35/hud/cli/tests/test_sync.py +1433 -0
- hud_python-0.5.33/hud/cli/utils/mcp.py → hud_python-0.5.35/hud/cli/utils/analysis.py +57 -15
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/build_display.py +1 -3
- hud_python-0.5.35/hud/cli/utils/collect.py +292 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/environment.py +6 -28
- hud_python-0.5.35/hud/cli/utils/lockfile.py +169 -0
- hud_python-0.5.35/hud/cli/utils/name_check.py +140 -0
- hud_python-0.5.35/hud/cli/utils/project_config.py +106 -0
- hud_python-0.5.35/hud/cli/utils/taskset.py +83 -0
- hud_python-0.5.35/hud/cli/utils/tests/test_collect.py +283 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/mcp_config.py +51 -8
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_connectors.py +76 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/task.py +43 -1
- hud_python-0.5.35/hud/native/__init__.py +36 -0
- hud_python-0.5.35/hud/native/graders.py +581 -0
- hud_python-0.5.35/hud/native/permissions.py +170 -0
- hud_python-0.5.35/hud/native/skills.py +127 -0
- hud_python-0.5.35/hud/native/tests/__init__.py +1 -0
- hud_python-0.5.35/hud/native/tests/test_graders.py +233 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/patches/mcp_patches.py +1 -1
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/chat.py +6 -5
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/tests/test_chat.py +6 -23
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/instrument.py +2 -5
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/edit.py +6 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/utils.py +11 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/base.py +2 -2
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/claude.py +2 -3
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/gemini.py +2 -3
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/session.py +1 -3
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/hud_console.py +96 -8
- hud_python-0.5.35/hud/utils/serialization.py +26 -0
- hud_python-0.5.35/hud/utils/tests/test_serialization.py +31 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/version.py +1 -1
- {hud_python-0.5.33 → hud_python-0.5.35}/pyproject.toml +2 -2
- hud_python-0.5.33/hud/cli/link.py +0 -199
- hud_python-0.5.33/hud/cli/rft.py +0 -350
- hud_python-0.5.33/hud/cli/rft_status.py +0 -162
- hud_python-0.5.33/hud/cli/utils/lockfile.py +0 -36
- hud_python-0.5.33/hud/native/__init__.py +0 -1
- {hud_python-0.5.33 → hud_python-0.5.35}/.gitignore +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/LICENSE +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/examples/README.md +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/__main__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/gateway.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/gemini_cua.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/misc/integration_test_agent.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/openai_chat.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/operator.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/resolver.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/conftest.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_base_runtime.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_integration_test_agent.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_operator.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_resolver.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/tests/test_run_eval.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/agents/types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/__main__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/cancel.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/harbor.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/tests/conftest.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/convert/tests/test_harbor.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/dev.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/dev.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/tasks.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/flows/tests/test_dev.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/models.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/push.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_convert.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_debug_directory_mode.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_dev.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_eval.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_eval_bedrock.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_init.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/api.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/args.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/build_logs.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/config.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/git.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/server.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_git.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/validation.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/cli/utils/viewer.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/loader.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/runner.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/tests/test_loader.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/tests/test_utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/datasets/utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connection.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/local.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/openai.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/connectors/remote.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/environment.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/adk.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/anthropic.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/langchain.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/llamaindex.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/integrations/openai.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/mock.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/router.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/scenarios.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_connection.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_environment.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_integrations.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_local_connectors.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_scenarios.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_session_id.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/tests/test_tools.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/utils/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/utils/formats.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/utils/schema.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/environment/utils/tool_wrappers.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/display.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/instrument.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/manager.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/parallel.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/test_context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/test_eval.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/test_manager.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/test_parallel.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/tests/test_task.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/eval/utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/native/chat.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/patches/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/patches/warnings.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/py.typed +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/low_level.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/router.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/server.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_prefix_naming.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/chat_service.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/reply_metadata.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/services/tests/test_chat_service.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/settings.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/exceptions.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/hints.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/requests.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/exporter.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/tests/test_eval_telemetry.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/tests/test_exporter.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/agent.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/apply_patch.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/bash.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/gemini_edit.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/gemini_shell.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/session.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/shell.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_apply_patch.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_bash.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_bash_extended.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_bash_integration.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_edit.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_gemini_tools.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/coding/tests/test_shell.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/glm.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/tests/test_compression.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/tests/test_computer.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/tests/test_computer_actions.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/computer/tests/test_glm_computer.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/elicitation.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/glob.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/grep.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/list.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/read.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/tests/test_glob.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/tests/test_grep.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/tests/test_list.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/filesystem/tests/test_read.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/code_execution.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/google_search.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/tool_search.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/url_context.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/web_fetch.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/hosted/web_search.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/jupyter.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/tests/test_claude.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/tests/test_gemini.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/memory/tests/test_session.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/native_types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/playwright.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/response.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/submit.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_agent_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_elicitation.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_jupyter_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_native_tool_e2e.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_native_types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/tools/utils.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/types.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/env.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/mcp.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/strict_schema.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/tool_shorthand.py +0 -0
- {hud_python-0.5.33 → hud_python-0.5.35}/hud/utils/types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.35
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -101,7 +101,7 @@ Description-Content-Type: text/markdown
|
|
|
101
101
|
</picture>
|
|
102
102
|
</div>
|
|
103
103
|
|
|
104
|
-
|
|
104
|
+
HUD is a platform for building RL environments for AI agents. Define agent-callable tools, write evaluation scenarios, run evals at scale, and train models on the results.
|
|
105
105
|
|
|
106
106
|
To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
|
|
107
107
|
|
|
@@ -110,15 +110,14 @@ To learn more, check out our [Documentation](https://docs.hud.ai) and [API Refer
|
|
|
110
110
|
[](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
|
|
111
111
|
[](https://discord.gg/wkjtmHYYjm)
|
|
112
112
|
[](https://x.com/intent/user?screen_name=hud_evals)
|
|
113
|
-
[](https://shop.hud.ai)
|
|
114
113
|
[](https://scarf.sh)
|
|
115
114
|
[](https://docs.hud.ai)
|
|
116
115
|
|
|
117
116
|
## Install
|
|
118
117
|
|
|
119
118
|
```bash
|
|
120
|
-
|
|
121
|
-
|
|
119
|
+
# Install CLI (recommended)
|
|
120
|
+
uv tool install hud-python --python 3.12
|
|
122
121
|
|
|
123
122
|
Get your API key at [hud.ai](https://hud.ai) and set it:
|
|
124
123
|
|
|
@@ -126,65 +125,88 @@ Get your API key at [hud.ai](https://hud.ai) and set it:
|
|
|
126
125
|
export HUD_API_KEY=your-key-here
|
|
127
126
|
```
|
|
128
127
|
|
|
129
|
-
|
|
128
|
+
Get your API key at [hud.ai/project/api-keys](https://hud.ai/project/api-keys).
|
|
129
|
+
|
|
130
|
+
> Or install as a library: `pip install hud-python`
|
|
130
131
|
|
|
131
132
|

|
|
132
133
|
|
|
133
|
-
##
|
|
134
|
+
## Environments
|
|
134
135
|
|
|
135
|
-
|
|
136
|
+
An environment is the harness an agent operates in. It packages tools (functions agents can call) and scenarios (how agents are evaluated) into a single deployable unit. Each environment spins up fresh and isolated for every evaluation.
|
|
136
137
|
|
|
137
|
-
|
|
138
|
+
```python
|
|
139
|
+
from hud import Environment
|
|
140
|
+
|
|
141
|
+
env = Environment("my-env")
|
|
142
|
+
|
|
143
|
+
@env.scenario("count")
|
|
144
|
+
async def count(word: str, letter: str):
|
|
145
|
+
# PROMPT — send a question to the agent.
|
|
146
|
+
# The agent runs its reasoning loop and returns an answer.
|
|
147
|
+
answer = yield f"How many '{letter}' in '{word}'?"
|
|
148
|
+
|
|
149
|
+
# SCORE — check the agent's answer against the correct count.
|
|
150
|
+
# Return a reward: 1.0 for correct, 0.0 for wrong.
|
|
151
|
+
correct = str(word.lower().count(letter.lower()))
|
|
152
|
+
yield 1.0 if answer and correct in answer else 0.0
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
A scenario has two yields. The first sends a prompt — the agent runs between the yields, calling tools and reasoning. The second checks the result and returns a reward (0.0 to 1.0). → [Core Concepts](https://docs.hud.ai/concepts)
|
|
156
|
+
|
|
157
|
+
## Run an Agent
|
|
138
158
|
|
|
139
159
|
```python
|
|
140
|
-
|
|
141
|
-
import
|
|
160
|
+
import hud
|
|
161
|
+
from hud.agents import create_agent
|
|
142
162
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
api_key=os.environ["HUD_API_KEY"]
|
|
146
|
-
)
|
|
163
|
+
task = env("count", word="strawberry", letter="r")
|
|
164
|
+
agent = create_agent("claude-sonnet-4-5")
|
|
147
165
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
)
|
|
166
|
+
async with hud.eval(task) as ctx:
|
|
167
|
+
result = await agent.run(ctx)
|
|
168
|
+
|
|
169
|
+
print(f"Reward: {result.reward}") # 1.0 if agent answers "3"
|
|
152
170
|
```
|
|
153
171
|
|
|
154
|
-
|
|
172
|
+
`create_agent()` picks the right agent class and native tools for each model. → [Environments](https://docs.hud.ai/quick-links/environments)
|
|
155
173
|
|
|
156
|
-
|
|
174
|
+
## Workflow
|
|
157
175
|
|
|
158
|
-
|
|
176
|
+
```bash
|
|
177
|
+
hud init my-env # Scaffold environment
|
|
178
|
+
cd my-env
|
|
179
|
+
hud dev env:env -w env.py # Run locally with hot-reload
|
|
180
|
+
hud eval tasks.py claude # Run evals locally
|
|
181
|
+
hud deploy # Deploy to platform
|
|
182
|
+
hud sync tasks my-taskset # Sync tasks to platform
|
|
183
|
+
```
|
|
159
184
|
|
|
160
|
-
|
|
161
|
-
from hud import Environment
|
|
185
|
+
Once deployed, run evals at scale from the CLI or the [platform UI](https://hud.ai):
|
|
162
186
|
|
|
163
|
-
|
|
187
|
+
```bash
|
|
188
|
+
hud eval my-taskset claude --remote --full
|
|
189
|
+
```
|
|
164
190
|
|
|
165
|
-
|
|
166
|
-
def add(a: int, b: int) -> int:
|
|
167
|
-
"""Add two numbers."""
|
|
168
|
-
return a + b
|
|
191
|
+
→ [Deploy](https://docs.hud.ai/quick-links/deploy) · [Testing & Evaluation](https://docs.hud.ai/advanced/testing-environments)
|
|
169
192
|
|
|
170
|
-
|
|
171
|
-
async def solve_math(problem: str, answer: int):
|
|
172
|
-
response = yield problem # Prompt
|
|
173
|
-
yield 1.0 if str(answer) in response else 0.0 # Reward
|
|
193
|
+
## Pre-built Tools
|
|
174
194
|
|
|
175
|
-
|
|
176
|
-
# Your agent logic here - call tools, get response
|
|
177
|
-
result = await ctx.call_tool("add", a=2, b=2)
|
|
178
|
-
await ctx.submit(f"The answer is {result}")
|
|
195
|
+
HUD ships tools for computer control, shell execution, file editing, browser automation, and web search. Add them to any environment:
|
|
179
196
|
|
|
180
|
-
|
|
197
|
+
```python
|
|
198
|
+
from hud.tools import AnthropicComputerTool, BashTool, EditTool
|
|
199
|
+
|
|
200
|
+
env.add_tool(AnthropicComputerTool()) # Mouse, keyboard, screenshots
|
|
201
|
+
env.add_tool(BashTool()) # Persistent bash shell
|
|
202
|
+
env.add_tool(EditTool()) # File viewing and editing
|
|
181
203
|
```
|
|
182
204
|
|
|
183
|
-
|
|
205
|
+
HUD adapts each tool to the model's native format — Claude gets `computer_20250124`, OpenAI gets `computer_use_preview`, Gemini gets `ComputerUse`. → [Tools Reference](https://docs.hud.ai/tools/computer)
|
|
184
206
|
|
|
185
|
-
|
|
207
|
+
## Model Gateway
|
|
186
208
|
|
|
187
|
-
|
|
209
|
+
Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
|
|
188
210
|
|
|
189
211
|
```python
|
|
190
212
|
from openai import AsyncOpenAI
|
|
@@ -195,31 +217,13 @@ client = AsyncOpenAI(
|
|
|
195
217
|
api_key=os.environ["HUD_API_KEY"]
|
|
196
218
|
)
|
|
197
219
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
messages=[{"role": "user", "content": ctx.prompt}],
|
|
203
|
-
tools=ctx.tools # Environment tools available to the model
|
|
204
|
-
)
|
|
205
|
-
await ctx.submit(response.choices[0].message.content)
|
|
206
|
-
```
|
|
207
|
-
|
|
208
|
-
**Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/evals)
|
|
209
|
-
|
|
210
|
-
### Deploy & Train
|
|
211
|
-
|
|
212
|
-
Push to GitHub, connect on hud.ai, run at scale:
|
|
213
|
-
|
|
214
|
-
```bash
|
|
215
|
-
hud init # Scaffold environment
|
|
216
|
-
git push # Push to GitHub
|
|
217
|
-
# Connect on hud.ai → New → Environment
|
|
218
|
-
hud eval my-eval --model gpt-4o --group-size 100
|
|
219
|
-
# Or create and run tasks on the platform
|
|
220
|
+
response = await client.chat.completions.create(
|
|
221
|
+
model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
|
|
222
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
223
|
+
)
|
|
220
224
|
```
|
|
221
225
|
|
|
222
|
-
Every
|
|
226
|
+
Every call is traced at [hud.ai](https://hud.ai). → [Models](https://docs.hud.ai/quick-links/models)
|
|
223
227
|
|
|
224
228
|
## Links
|
|
225
229
|
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
</picture>
|
|
7
7
|
</div>
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
HUD is a platform for building RL environments for AI agents. Define agent-callable tools, write evaluation scenarios, run evals at scale, and train models on the results.
|
|
10
10
|
|
|
11
11
|
To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
|
|
12
12
|
|
|
@@ -15,15 +15,14 @@ To learn more, check out our [Documentation](https://docs.hud.ai) and [API Refer
|
|
|
15
15
|
[](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
|
|
16
16
|
[](https://discord.gg/wkjtmHYYjm)
|
|
17
17
|
[](https://x.com/intent/user?screen_name=hud_evals)
|
|
18
|
-
[](https://shop.hud.ai)
|
|
19
18
|
[](https://scarf.sh)
|
|
20
19
|
[](https://docs.hud.ai)
|
|
21
20
|
|
|
22
21
|
## Install
|
|
23
22
|
|
|
24
23
|
```bash
|
|
25
|
-
|
|
26
|
-
|
|
24
|
+
# Install CLI (recommended)
|
|
25
|
+
uv tool install hud-python --python 3.12
|
|
27
26
|
|
|
28
27
|
Get your API key at [hud.ai](https://hud.ai) and set it:
|
|
29
28
|
|
|
@@ -31,65 +30,88 @@ Get your API key at [hud.ai](https://hud.ai) and set it:
|
|
|
31
30
|
export HUD_API_KEY=your-key-here
|
|
32
31
|
```
|
|
33
32
|
|
|
34
|
-
|
|
33
|
+
Get your API key at [hud.ai/project/api-keys](https://hud.ai/project/api-keys).
|
|
34
|
+
|
|
35
|
+
> Or install as a library: `pip install hud-python`
|
|
35
36
|
|
|
36
37
|

|
|
37
38
|
|
|
38
|
-
##
|
|
39
|
+
## Environments
|
|
39
40
|
|
|
40
|
-
|
|
41
|
+
An environment is the harness an agent operates in. It packages tools (functions agents can call) and scenarios (how agents are evaluated) into a single deployable unit. Each environment spins up fresh and isolated for every evaluation.
|
|
41
42
|
|
|
42
|
-
|
|
43
|
+
```python
|
|
44
|
+
from hud import Environment
|
|
45
|
+
|
|
46
|
+
env = Environment("my-env")
|
|
47
|
+
|
|
48
|
+
@env.scenario("count")
|
|
49
|
+
async def count(word: str, letter: str):
|
|
50
|
+
# PROMPT — send a question to the agent.
|
|
51
|
+
# The agent runs its reasoning loop and returns an answer.
|
|
52
|
+
answer = yield f"How many '{letter}' in '{word}'?"
|
|
53
|
+
|
|
54
|
+
# SCORE — check the agent's answer against the correct count.
|
|
55
|
+
# Return a reward: 1.0 for correct, 0.0 for wrong.
|
|
56
|
+
correct = str(word.lower().count(letter.lower()))
|
|
57
|
+
yield 1.0 if answer and correct in answer else 0.0
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
A scenario has two yields. The first sends a prompt — the agent runs between the yields, calling tools and reasoning. The second checks the result and returns a reward (0.0 to 1.0). → [Core Concepts](https://docs.hud.ai/concepts)
|
|
61
|
+
|
|
62
|
+
## Run an Agent
|
|
43
63
|
|
|
44
64
|
```python
|
|
45
|
-
|
|
46
|
-
import
|
|
65
|
+
import hud
|
|
66
|
+
from hud.agents import create_agent
|
|
47
67
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
api_key=os.environ["HUD_API_KEY"]
|
|
51
|
-
)
|
|
68
|
+
task = env("count", word="strawberry", letter="r")
|
|
69
|
+
agent = create_agent("claude-sonnet-4-5")
|
|
52
70
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
)
|
|
71
|
+
async with hud.eval(task) as ctx:
|
|
72
|
+
result = await agent.run(ctx)
|
|
73
|
+
|
|
74
|
+
print(f"Reward: {result.reward}") # 1.0 if agent answers "3"
|
|
57
75
|
```
|
|
58
76
|
|
|
59
|
-
|
|
77
|
+
`create_agent()` picks the right agent class and native tools for each model. → [Environments](https://docs.hud.ai/quick-links/environments)
|
|
60
78
|
|
|
61
|
-
|
|
79
|
+
## Workflow
|
|
62
80
|
|
|
63
|
-
|
|
81
|
+
```bash
|
|
82
|
+
hud init my-env # Scaffold environment
|
|
83
|
+
cd my-env
|
|
84
|
+
hud dev env:env -w env.py # Run locally with hot-reload
|
|
85
|
+
hud eval tasks.py claude # Run evals locally
|
|
86
|
+
hud deploy # Deploy to platform
|
|
87
|
+
hud sync tasks my-taskset # Sync tasks to platform
|
|
88
|
+
```
|
|
64
89
|
|
|
65
|
-
|
|
66
|
-
from hud import Environment
|
|
90
|
+
Once deployed, run evals at scale from the CLI or the [platform UI](https://hud.ai):
|
|
67
91
|
|
|
68
|
-
|
|
92
|
+
```bash
|
|
93
|
+
hud eval my-taskset claude --remote --full
|
|
94
|
+
```
|
|
69
95
|
|
|
70
|
-
|
|
71
|
-
def add(a: int, b: int) -> int:
|
|
72
|
-
"""Add two numbers."""
|
|
73
|
-
return a + b
|
|
96
|
+
→ [Deploy](https://docs.hud.ai/quick-links/deploy) · [Testing & Evaluation](https://docs.hud.ai/advanced/testing-environments)
|
|
74
97
|
|
|
75
|
-
|
|
76
|
-
async def solve_math(problem: str, answer: int):
|
|
77
|
-
response = yield problem # Prompt
|
|
78
|
-
yield 1.0 if str(answer) in response else 0.0 # Reward
|
|
98
|
+
## Pre-built Tools
|
|
79
99
|
|
|
80
|
-
|
|
81
|
-
# Your agent logic here - call tools, get response
|
|
82
|
-
result = await ctx.call_tool("add", a=2, b=2)
|
|
83
|
-
await ctx.submit(f"The answer is {result}")
|
|
100
|
+
HUD ships tools for computer control, shell execution, file editing, browser automation, and web search. Add them to any environment:
|
|
84
101
|
|
|
85
|
-
|
|
102
|
+
```python
|
|
103
|
+
from hud.tools import AnthropicComputerTool, BashTool, EditTool
|
|
104
|
+
|
|
105
|
+
env.add_tool(AnthropicComputerTool()) # Mouse, keyboard, screenshots
|
|
106
|
+
env.add_tool(BashTool()) # Persistent bash shell
|
|
107
|
+
env.add_tool(EditTool()) # File viewing and editing
|
|
86
108
|
```
|
|
87
109
|
|
|
88
|
-
|
|
110
|
+
HUD adapts each tool to the model's native format — Claude gets `computer_20250124`, OpenAI gets `computer_use_preview`, Gemini gets `ComputerUse`. → [Tools Reference](https://docs.hud.ai/tools/computer)
|
|
89
111
|
|
|
90
|
-
|
|
112
|
+
## Model Gateway
|
|
91
113
|
|
|
92
|
-
|
|
114
|
+
Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
|
|
93
115
|
|
|
94
116
|
```python
|
|
95
117
|
from openai import AsyncOpenAI
|
|
@@ -100,31 +122,13 @@ client = AsyncOpenAI(
|
|
|
100
122
|
api_key=os.environ["HUD_API_KEY"]
|
|
101
123
|
)
|
|
102
124
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
messages=[{"role": "user", "content": ctx.prompt}],
|
|
108
|
-
tools=ctx.tools # Environment tools available to the model
|
|
109
|
-
)
|
|
110
|
-
await ctx.submit(response.choices[0].message.content)
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
**Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/evals)
|
|
114
|
-
|
|
115
|
-
### Deploy & Train
|
|
116
|
-
|
|
117
|
-
Push to GitHub, connect on hud.ai, run at scale:
|
|
118
|
-
|
|
119
|
-
```bash
|
|
120
|
-
hud init # Scaffold environment
|
|
121
|
-
git push # Push to GitHub
|
|
122
|
-
# Connect on hud.ai → New → Environment
|
|
123
|
-
hud eval my-eval --model gpt-4o --group-size 100
|
|
124
|
-
# Or create and run tasks on the platform
|
|
125
|
+
response = await client.chat.completions.create(
|
|
126
|
+
model="claude-sonnet-4-5", # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
|
|
127
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
128
|
+
)
|
|
125
129
|
```
|
|
126
130
|
|
|
127
|
-
Every
|
|
131
|
+
Every call is traced at [hud.ai](https://hud.ai). → [Models](https://docs.hud.ai/quick-links/models)
|
|
128
132
|
|
|
129
133
|
## Links
|
|
130
134
|
|
|
@@ -336,12 +336,15 @@ class MCPAgent(ABC):
|
|
|
336
336
|
f"Available tools: {sorted(available_tool_names)}"
|
|
337
337
|
)
|
|
338
338
|
|
|
339
|
-
self.
|
|
340
|
-
|
|
341
|
-
|
|
339
|
+
self._categorized_tools = self.categorize_tools()
|
|
340
|
+
|
|
341
|
+
# Show tool discovery table (visible at INFO level)
|
|
342
|
+
self.console.format_tool_discovery(
|
|
343
|
+
tools=self._available_tools,
|
|
344
|
+
native_tools=self._categorized_tools.native + self._categorized_tools.hosted,
|
|
345
|
+
skipped=self._categorized_tools.skipped,
|
|
342
346
|
)
|
|
343
347
|
|
|
344
|
-
self._categorized_tools = self.categorize_tools()
|
|
345
348
|
for tool, reason in self._categorized_tools.skipped:
|
|
346
349
|
logger.debug("Skipping tool %s: %s", tool.name, reason)
|
|
347
350
|
|
|
@@ -574,17 +577,13 @@ class MCPAgent(ABC):
|
|
|
574
577
|
tool_messages = await self.format_tool_results(tool_calls, tool_results)
|
|
575
578
|
messages.extend(tool_messages)
|
|
576
579
|
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
for call, result in zip(tool_calls, tool_results, strict=False):
|
|
585
|
-
step_info += f"\n{call}\n{result}"
|
|
586
|
-
|
|
587
|
-
self.console.info_log(step_info)
|
|
580
|
+
if logger.isEnabledFor(logging.INFO):
|
|
581
|
+
self.console.format_step(
|
|
582
|
+
step=step_count,
|
|
583
|
+
max_steps=max_steps,
|
|
584
|
+
tool_calls=tool_calls,
|
|
585
|
+
tool_results=tool_results,
|
|
586
|
+
)
|
|
588
587
|
|
|
589
588
|
except Exception as e:
|
|
590
589
|
self.console.error_log(f"Step failed: {e}")
|
|
@@ -145,8 +145,12 @@ class ClaudeAgent(MCPAgent):
|
|
|
145
145
|
model_client = AsyncAnthropic(api_key=settings.anthropic_api_key)
|
|
146
146
|
else:
|
|
147
147
|
raise ValueError(
|
|
148
|
-
"No API key found
|
|
149
|
-
"
|
|
148
|
+
"No API key found for Claude.\n"
|
|
149
|
+
" • Set HUD_API_KEY to use HUD Gateway"
|
|
150
|
+
" (add your Anthropic key at"
|
|
151
|
+
" hud.ai/project/secrets for BYOK)\n"
|
|
152
|
+
" • Or set ANTHROPIC_API_KEY for direct"
|
|
153
|
+
" access"
|
|
150
154
|
)
|
|
151
155
|
|
|
152
156
|
self.anthropic_client: AsyncAnthropic | AsyncAnthropicBedrock = model_client
|
|
@@ -94,8 +94,12 @@ class GeminiAgent(MCPAgent):
|
|
|
94
94
|
raise ValueError(f"Gemini API key is invalid: {e}") from e
|
|
95
95
|
else:
|
|
96
96
|
raise ValueError(
|
|
97
|
-
"No API key found
|
|
98
|
-
"
|
|
97
|
+
"No API key found for Gemini.\n"
|
|
98
|
+
" • Set HUD_API_KEY to use HUD Gateway"
|
|
99
|
+
" (add your Gemini key at"
|
|
100
|
+
" hud.ai/project/secrets for BYOK)\n"
|
|
101
|
+
" • Or set GEMINI_API_KEY for direct"
|
|
102
|
+
" access"
|
|
99
103
|
)
|
|
100
104
|
|
|
101
105
|
self.gemini_client: genai.Client = model_client
|
|
@@ -119,8 +119,12 @@ class OpenAIAgent(MCPAgent):
|
|
|
119
119
|
raise ValueError(f"OpenAI API key is invalid: {exc}") from exc
|
|
120
120
|
else:
|
|
121
121
|
raise ValueError(
|
|
122
|
-
"No API key found
|
|
123
|
-
"
|
|
122
|
+
"No API key found for OpenAI.\n"
|
|
123
|
+
" • Set HUD_API_KEY to use HUD Gateway"
|
|
124
|
+
" (add your OpenAI key at"
|
|
125
|
+
" hud.ai/project/secrets for BYOK)\n"
|
|
126
|
+
" • Or set OPENAI_API_KEY for direct"
|
|
127
|
+
" access"
|
|
124
128
|
)
|
|
125
129
|
|
|
126
130
|
self.openai_client: AsyncOpenAI = model_client
|
|
@@ -485,10 +489,9 @@ class OpenAIAgent(MCPAgent):
|
|
|
485
489
|
type="computer_screenshot",
|
|
486
490
|
image_url=f"data:image/png;base64,{screenshot}",
|
|
487
491
|
),
|
|
488
|
-
acknowledged_safety_checks=(
|
|
489
|
-
acknowledged_checks if acknowledged_checks else None
|
|
490
|
-
),
|
|
491
492
|
)
|
|
493
|
+
if acknowledged_checks:
|
|
494
|
+
output_payload["acknowledged_safety_checks"] = acknowledged_checks
|
|
492
495
|
computer_outputs.append(output_payload)
|
|
493
496
|
self.pending_call_id = None
|
|
494
497
|
self.pending_safety_checks = []
|
|
@@ -11,7 +11,7 @@ from rich.panel import Panel
|
|
|
11
11
|
# Create the main Typer app
|
|
12
12
|
app = typer.Typer(
|
|
13
13
|
name="hud",
|
|
14
|
-
help="
|
|
14
|
+
help="HUD CLI - build, test, and deploy evaluation environments",
|
|
15
15
|
add_completion=False,
|
|
16
16
|
rich_markup_mode="rich",
|
|
17
17
|
pretty_exceptions_enable=False,
|
|
@@ -40,8 +40,9 @@ from .init import init_command # noqa: E402
|
|
|
40
40
|
from .link import link_command # noqa: E402
|
|
41
41
|
from .models import models_command # noqa: E402
|
|
42
42
|
from .push import push_command # noqa: E402
|
|
43
|
-
from .
|
|
44
|
-
from .
|
|
43
|
+
from .rl import rl_run_command, rl_status_command # noqa: E402
|
|
44
|
+
from .scenario import scenario_app # noqa: E402
|
|
45
|
+
from .sync import sync_app # noqa: E402
|
|
45
46
|
|
|
46
47
|
_EXTRA_ARGS = {"allow_extra_args": True, "ignore_unknown_options": True}
|
|
47
48
|
|
|
@@ -50,7 +51,7 @@ app.command(name="debug", context_settings=_EXTRA_ARGS)(debug_command)
|
|
|
50
51
|
app.command(name="dev", context_settings=_EXTRA_ARGS)(dev_command)
|
|
51
52
|
app.command(name="build", context_settings=_EXTRA_ARGS)(build_command)
|
|
52
53
|
app.command(name="deploy")(deploy_command)
|
|
53
|
-
app.command(name="link")(link_command)
|
|
54
|
+
app.command(name="link", hidden=True)(link_command)
|
|
54
55
|
app.command(name="eval")(eval_command)
|
|
55
56
|
app.command(name="push", hidden=True)(push_command)
|
|
56
57
|
app.command(name="init")(init_command)
|
|
@@ -108,11 +109,17 @@ def version() -> None:
|
|
|
108
109
|
console.print("HUD CLI version: [cyan]unknown[/cyan]")
|
|
109
110
|
|
|
110
111
|
|
|
111
|
-
#
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
app.add_typer(
|
|
112
|
+
# Scenario subcommand group
|
|
113
|
+
app.add_typer(scenario_app, name="scenario")
|
|
114
|
+
|
|
115
|
+
# Sync subcommand group
|
|
116
|
+
app.add_typer(sync_app, name="sync")
|
|
117
|
+
|
|
118
|
+
# RL subcommand group
|
|
119
|
+
rl_app = typer.Typer(help="🚀 RL training commands\n\nExample: hud rl run my-taskset -m <model-id>")
|
|
120
|
+
rl_app.command("run")(rl_run_command)
|
|
121
|
+
rl_app.command("status")(rl_status_command)
|
|
122
|
+
app.add_typer(rl_app, name="rl")
|
|
116
123
|
|
|
117
124
|
|
|
118
125
|
# ---------------------------------------------------------------------------
|
|
@@ -140,7 +147,7 @@ def main() -> None:
|
|
|
140
147
|
if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ["--help", "-h"]):
|
|
141
148
|
console.print(
|
|
142
149
|
Panel.fit(
|
|
143
|
-
"[bold cyan]
|
|
150
|
+
"[bold cyan]HUD CLI[/bold cyan]\nBuild, test, and deploy environments",
|
|
144
151
|
border_style="cyan",
|
|
145
152
|
)
|
|
146
153
|
)
|
|
@@ -150,10 +157,8 @@ def main() -> None:
|
|
|
150
157
|
)
|
|
151
158
|
console.print(" 2. Start dev server: [cyan]hud dev[/cyan]")
|
|
152
159
|
console.print(" 3. Deploy to HUD platform: [cyan]hud deploy[/cyan]")
|
|
153
|
-
console.print(" 4.
|
|
154
|
-
console.print("
|
|
155
|
-
console.print(" [cyan]hud rft run tasks.jsonl[/cyan] Launch an RFT training job")
|
|
156
|
-
console.print(" [cyan]hud rft status <model-id>[/cyan] Check training status\n")
|
|
160
|
+
console.print(" 4. Sync tasks: [cyan]hud sync tasks my-taskset[/cyan]")
|
|
161
|
+
console.print(" 5. Run evaluations: [cyan]hud eval tasks.py claude[/cyan]\n")
|
|
157
162
|
|
|
158
163
|
app()
|
|
159
164
|
except typer.Exit as e:
|