PyPI - hud-python - Versions diffs - 0.5.40__tar.gz → 0.6.0__tar.gz - Mend

hud-python 0.5.40tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (544) hide show

{hud_python-0.5.40 → hud_python-0.6.0}/.gitignore +10 -2
hud_python-0.6.0/PKG-INFO +278 -0
hud_python-0.6.0/README.md +200 -0
hud_python-0.6.0/cookbooks/a2a-chat/README.md +37 -0
hud_python-0.6.0/cookbooks/a2a-chat/pyproject.toml +18 -0
hud_python-0.6.0/cookbooks/codex-coding/README.md +23 -0
hud_python-0.6.0/cookbooks/codex-coding/pyproject.toml +17 -0
hud_python-0.6.0/cookbooks/rl-training/README.md +113 -0
hud_python-0.6.0/cookbooks/rl-training/pyproject.toml +20 -0
hud_python-0.6.0/hud/__init__.py +64 -0
hud_python-0.6.0/hud/_legacy.py +300 -0
hud_python-0.6.0/hud/agents/__init__.py +133 -0
hud_python-0.6.0/hud/agents/base.py +22 -0
hud_python-0.6.0/hud/agents/browser_use/__init__.py +5 -0
hud_python-0.6.0/hud/agents/browser_use/agent.py +110 -0
hud_python-0.6.0/hud/agents/claude/__init__.py +22 -0
hud_python-0.6.0/hud/agents/claude/agent.py +369 -0
hud_python-0.6.0/hud/agents/claude/sdk/__init__.py +5 -0
hud_python-0.6.0/hud/agents/claude/sdk/agent.py +335 -0
hud_python-0.6.0/hud/agents/claude/sdk/computer_mcp.py +136 -0
hud_python-0.6.0/hud/agents/claude/tools/__init__.py +28 -0
hud_python-0.6.0/hud/agents/claude/tools/base.py +17 -0
hud_python-0.6.0/hud/agents/claude/tools/coding.py +141 -0
hud_python-0.6.0/hud/agents/claude/tools/computer.py +362 -0
hud_python-0.6.0/hud/agents/claude/tools/hosted.py +100 -0
hud_python-0.6.0/hud/agents/claude/tools/mcp_proxy.py +43 -0
hud_python-0.6.0/hud/agents/claude/tools/settings.py +36 -0
hud_python-0.6.0/hud/agents/claude/tools/tests/test_computer.py +149 -0
hud_python-0.6.0/hud/agents/gemini/__init__.py +6 -0
hud_python-0.6.0/hud/agents/gemini/agent.py +297 -0
hud_python-0.6.0/hud/agents/gemini/settings.py +21 -0
hud_python-0.6.0/hud/agents/gemini/tools/__init__.py +33 -0
hud_python-0.6.0/hud/agents/gemini/tools/base.py +9 -0
hud_python-0.6.0/hud/agents/gemini/tools/coding.py +143 -0
hud_python-0.6.0/hud/agents/gemini/tools/computer.py +200 -0
hud_python-0.6.0/hud/agents/gemini/tools/filesystem.py +152 -0
hud_python-0.6.0/hud/agents/gemini/tools/hosted.py +42 -0
hud_python-0.6.0/hud/agents/gemini/tools/mcp_proxy.py +34 -0
hud_python-0.6.0/hud/agents/gemini/tools/tests/test_computer.py +105 -0
hud_python-0.6.0/hud/agents/misc/__init__.py +7 -0
hud_python-0.6.0/hud/agents/misc/response_automation.py +103 -0
hud_python-0.6.0/hud/agents/openai/__init__.py +5 -0
hud_python-0.6.0/hud/agents/openai/agent.py +327 -0
hud_python-0.6.0/hud/agents/openai/tools/__init__.py +21 -0
hud_python-0.6.0/hud/agents/openai/tools/apply_patch.py +328 -0
hud_python-0.6.0/hud/agents/openai/tools/base.py +87 -0
hud_python-0.6.0/hud/agents/openai/tools/coding.py +111 -0
hud_python-0.6.0/hud/agents/openai/tools/computer.py +226 -0
hud_python-0.6.0/hud/agents/openai/tools/hosted.py +35 -0
hud_python-0.6.0/hud/agents/openai/tools/mcp_proxy.py +53 -0
{hud_python-0.5.40/hud/utils → hud_python-0.6.0/hud/agents/openai/tools}/strict_schema.py +5 -5
hud_python-0.6.0/hud/agents/openai/tools/tests/test_computer.py +110 -0
hud_python-0.6.0/hud/agents/openai/tools/tests/test_strict_schema.py +74 -0
hud_python-0.6.0/hud/agents/openai_compatible/__init__.py +5 -0
hud_python-0.6.0/hud/agents/openai_compatible/agent.py +238 -0
hud_python-0.6.0/hud/agents/openai_compatible/tools/__init__.py +14 -0
hud_python-0.6.0/hud/agents/openai_compatible/tools/base.py +170 -0
hud_python-0.6.0/hud/agents/openai_compatible/tools/filesystem.py +138 -0
hud_python-0.6.0/hud/agents/openai_compatible/tools/mcp_proxy.py +30 -0
hud_python-0.6.0/hud/agents/robot/__init__.py +35 -0
hud_python-0.6.0/hud/agents/robot/_types.py +12 -0
hud_python-0.6.0/hud/agents/robot/adapter.py +95 -0
hud_python-0.6.0/hud/agents/robot/agent.py +157 -0
hud_python-0.6.0/hud/agents/robot/model.py +138 -0
hud_python-0.6.0/hud/agents/tests/test_apply_patch.py +78 -0
hud_python-0.6.0/hud/agents/tests/test_base.py +125 -0
hud_python-0.6.0/hud/agents/tests/test_claude_agent.py +145 -0
hud_python-0.6.0/hud/agents/tests/test_claude_sdk_agent.py +148 -0
hud_python-0.6.0/hud/agents/tests/test_gemini_agent.py +148 -0
hud_python-0.6.0/hud/agents/tests/test_openai_agent.py +126 -0
hud_python-0.6.0/hud/agents/tests/test_openai_compatible_agent.py +83 -0
hud_python-0.6.0/hud/agents/tests/test_provider_native_tools.py +248 -0
hud_python-0.6.0/hud/agents/tests/test_tool_agent.py +144 -0
hud_python-0.6.0/hud/agents/tests/test_trace.py +134 -0
hud_python-0.6.0/hud/agents/tool_agent.py +307 -0
hud_python-0.6.0/hud/agents/tools/__init__.py +31 -0
hud_python-0.6.0/hud/agents/tools/base.py +93 -0
hud_python-0.6.0/hud/agents/tools/hosted.py +31 -0
hud_python-0.6.0/hud/agents/tools/mcp.py +45 -0
hud_python-0.6.0/hud/agents/tools/rfb.py +196 -0
hud_python-0.6.0/hud/agents/tools/ssh.py +66 -0
hud_python-0.6.0/hud/agents/types.py +459 -0
hud_python-0.6.0/hud/capabilities/__init__.py +37 -0
hud_python-0.6.0/hud/capabilities/base.py +222 -0
hud_python-0.6.0/hud/capabilities/cdp.py +148 -0
hud_python-0.6.0/hud/capabilities/filetracking.py +85 -0
hud_python-0.6.0/hud/capabilities/mcp.py +76 -0
hud_python-0.6.0/hud/capabilities/rfb.py +137 -0
hud_python-0.6.0/hud/capabilities/robot.py +148 -0
hud_python-0.6.0/hud/capabilities/ssh.py +53 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/__init__.py +40 -48
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/cancel.py +15 -26
hud_python-0.6.0/hud/cli/client.py +82 -0
hud_python-0.6.0/hud/cli/deploy.py +798 -0
hud_python-0.6.0/hud/cli/eval.py +939 -0
hud_python-0.6.0/hud/cli/init.py +76 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/login.py +59 -114
hud_python-0.6.0/hud/cli/models.py +252 -0
hud_python-0.6.0/hud/cli/serve.py +111 -0
hud_python-0.6.0/hud/cli/sync.py +499 -0
hud_python-0.6.0/hud/cli/task.py +210 -0
hud_python-0.6.0/hud/cli/templates.py +142 -0
hud_python-0.6.0/hud/cli/tests/test_cli_init.py +98 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/tests/test_deploy.py +174 -64
hud_python-0.6.0/hud/cli/tests/test_eval_config.py +239 -0
hud_python-0.6.0/hud/cli/tests/test_init.py +51 -0
hud_python-0.6.0/hud/cli/tests/test_sync_export.py +27 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/utils/api.py +2 -18
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/utils/build_display.py +53 -52
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/utils/build_logs.py +18 -31
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/utils/config.py +13 -0
hud_python-0.6.0/hud/cli/utils/display.py +100 -0
hud_python-0.6.0/hud/cli/utils/jobs.py +38 -0
hud_python-0.6.0/hud/cli/utils/registry.py +100 -0
hud_python-0.6.0/hud/cli/utils/source.py +567 -0
hud_python-0.6.0/hud/cli/utils/tests/test_build_display.py +49 -0
hud_python-0.6.0/hud/cli/utils/tests/test_context.py +74 -0
hud_python-0.6.0/hud/cli/utils/tests/test_registry.py +76 -0
hud_python-0.6.0/hud/cli/utils/tests/test_source.py +304 -0
hud_python-0.6.0/hud/cli/utils/tests/test_version_check.py +121 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/utils/version_check.py +1 -1
hud_python-0.6.0/hud/clients/__init__.py +13 -0
hud_python-0.6.0/hud/clients/client.py +396 -0
hud_python-0.6.0/hud/clients/tests/__init__.py +1 -0
hud_python-0.6.0/hud/clients/tests/test_connect.py +111 -0
hud_python-0.6.0/hud/conftest.py +30 -0
hud_python-0.6.0/hud/environment/__init__.py +58 -0
hud_python-0.6.0/hud/environment/env.py +336 -0
hud_python-0.6.0/hud/environment/file_tracker.py +582 -0
hud_python-0.6.0/hud/environment/file_tracking.py +75 -0
hud_python-0.6.0/hud/environment/legacy.py +364 -0
hud_python-0.6.0/hud/environment/robot/__init__.py +29 -0
hud_python-0.6.0/hud/environment/robot/bridge.py +176 -0
hud_python-0.6.0/hud/environment/robot/endpoint.py +210 -0
hud_python-0.6.0/hud/environment/robot/sim_runner.py +111 -0
hud_python-0.6.0/hud/environment/server.py +438 -0
hud_python-0.6.0/hud/environment/tests/conftest.py +28 -0
hud_python-0.6.0/hud/environment/tests/test_capability_backing.py +141 -0
hud_python-0.6.0/hud/environment/tests/test_file_tracker.py +186 -0
hud_python-0.6.0/hud/environment/tests/test_file_tracking.py +47 -0
hud_python-0.6.0/hud/environment/tests/test_legacy.py +273 -0
hud_python-0.6.0/hud/environment/tests/test_loader.py +31 -0
hud_python-0.6.0/hud/environment/tests/test_manifest.py +88 -0
hud_python-0.6.0/hud/environment/tests/test_server.py +81 -0
hud_python-0.6.0/hud/environment/tests/test_tunnel.py +126 -0
hud_python-0.6.0/hud/environment/utils.py +84 -0
hud_python-0.6.0/hud/environment/workspace.py +585 -0
hud_python-0.6.0/hud/eval/__init__.py +76 -0
hud_python-0.6.0/hud/eval/chat.py +157 -0
hud_python-0.6.0/hud/eval/file_tracking.py +112 -0
hud_python-0.6.0/hud/eval/job.py +135 -0
hud_python-0.6.0/hud/eval/run.py +393 -0
hud_python-0.6.0/hud/eval/runtime.py +995 -0
hud_python-0.6.0/hud/eval/sync.py +195 -0
hud_python-0.6.0/hud/eval/task.py +109 -0
hud_python-0.6.0/hud/eval/taskset.py +295 -0
hud_python-0.6.0/hud/eval/tests/test_chat.py +133 -0
hud_python-0.6.0/hud/eval/tests/test_docker_provider.py +742 -0
hud_python-0.6.0/hud/eval/tests/test_file_tracking_observer.py +128 -0
hud_python-0.6.0/hud/eval/tests/test_hosted.py +438 -0
hud_python-0.6.0/hud/eval/tests/test_job.py +63 -0
hud_python-0.6.0/hud/eval/tests/test_rollout.py +302 -0
hud_python-0.6.0/hud/eval/tests/test_sync.py +150 -0
hud_python-0.6.0/hud/eval/tests/test_task.py +271 -0
hud_python-0.6.0/hud/graders/__init__.py +58 -0
hud_python-0.6.0/hud/graders/base.py +49 -0
hud_python-0.6.0/hud/graders/bash.py +79 -0
hud_python-0.6.0/hud/graders/combine.py +172 -0
hud_python-0.6.0/hud/graders/judge.py +176 -0
hud_python-0.6.0/hud/graders/results.py +84 -0
hud_python-0.6.0/hud/graders/text.py +164 -0
hud_python-0.6.0/hud/patches/__init__.py +18 -0
hud_python-0.6.0/hud/patches/tests/__init__.py +3 -0
hud_python-0.6.0/hud/patches/tests/test_warnings.py +108 -0
hud_python-0.6.0/hud/patches/warnings.py +38 -0
hud_python-0.6.0/hud/server.py +32 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/settings.py +43 -23
hud_python-0.6.0/hud/telemetry/__init__.py +25 -0
hud_python-0.6.0/hud/telemetry/context.py +47 -0
hud_python-0.6.0/hud/telemetry/exporter.py +236 -0
hud_python-0.6.0/hud/telemetry/filetracking.py +76 -0
hud_python-0.6.0/hud/telemetry/instrument.py +255 -0
hud_python-0.6.0/hud/telemetry/span.py +93 -0
hud_python-0.6.0/hud/telemetry/tests/test_exporter.py +132 -0
hud_python-0.6.0/hud/telemetry/tests/test_filetracking.py +60 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/telemetry/tests/test_instrument.py +69 -43
hud_python-0.6.0/hud/train/__init__.py +47 -0
hud_python-0.6.0/hud/train/base.py +102 -0
hud_python-0.6.0/hud/train/client.py +213 -0
hud_python-0.6.0/hud/train/types.py +182 -0
hud_python-0.6.0/hud/types.py +412 -0
hud_python-0.6.0/hud/utils/__init__.py +13 -0
hud_python-0.6.0/hud/utils/exceptions.py +229 -0
hud_python-0.6.0/hud/utils/gateway.py +89 -0
{hud_python-0.5.40/hud/shared → hud_python-0.6.0/hud/utils}/hints.py +3 -20
{hud_python-0.5.40 → hud_python-0.6.0}/hud/utils/hud_console.py +16 -328
hud_python-0.6.0/hud/utils/modules.py +79 -0
hud_python-0.6.0/hud/utils/platform.py +62 -0
{hud_python-0.5.40/hud/shared → hud_python-0.6.0/hud/utils}/requests.py +2 -2
{hud_python-0.5.40 → hud_python-0.6.0}/hud/utils/serialization.py +7 -1
hud_python-0.6.0/hud/utils/tests/test_exceptions.py +102 -0
{hud_python-0.5.40/hud/shared → hud_python-0.6.0/hud/utils}/tests/test_hints.py +1 -1
hud_python-0.6.0/hud/utils/tests/test_hud_console.py +62 -0
hud_python-0.6.0/hud/utils/tests/test_platform.py +55 -0
{hud_python-0.5.40/hud/shared → hud_python-0.6.0/hud/utils}/tests/test_requests.py +5 -5
hud_python-0.6.0/hud/utils/time.py +13 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/version.py +1 -1
{hud_python-0.5.40 → hud_python-0.6.0}/pyproject.toml +53 -40
hud_python-0.5.40/PKG-INFO +0 -265
hud_python-0.5.40/README.md +0 -170
hud_python-0.5.40/examples/README.md +0 -62
hud_python-0.5.40/hud/__init__.py +0 -52
hud_python-0.5.40/hud/agents/__init__.py +0 -79
hud_python-0.5.40/hud/agents/base.py +0 -971
hud_python-0.5.40/hud/agents/claude.py +0 -753
hud_python-0.5.40/hud/agents/gateway.py +0 -42
hud_python-0.5.40/hud/agents/gemini.py +0 -593
hud_python-0.5.40/hud/agents/gemini_cua.py +0 -43
hud_python-0.5.40/hud/agents/grounded_openai.py +0 -280
hud_python-0.5.40/hud/agents/misc/__init__.py +0 -8
hud_python-0.5.40/hud/agents/misc/integration_test_agent.py +0 -92
hud_python-0.5.40/hud/agents/misc/response_agent.py +0 -123
hud_python-0.5.40/hud/agents/openai.py +0 -601
hud_python-0.5.40/hud/agents/openai_chat.py +0 -391
hud_python-0.5.40/hud/agents/operator.py +0 -144
hud_python-0.5.40/hud/agents/resolver.py +0 -64
hud_python-0.5.40/hud/agents/tests/conftest.py +0 -133
hud_python-0.5.40/hud/agents/tests/test_base.py +0 -552
hud_python-0.5.40/hud/agents/tests/test_base_runtime.py +0 -238
hud_python-0.5.40/hud/agents/tests/test_claude.py +0 -1159
hud_python-0.5.40/hud/agents/tests/test_gemini.py +0 -849
hud_python-0.5.40/hud/agents/tests/test_grounded_openai_agent.py +0 -170
hud_python-0.5.40/hud/agents/tests/test_integration_test_agent.py +0 -42
hud_python-0.5.40/hud/agents/tests/test_openai.py +0 -610
hud_python-0.5.40/hud/agents/tests/test_operator.py +0 -429
hud_python-0.5.40/hud/agents/tests/test_resolver.py +0 -284
hud_python-0.5.40/hud/agents/tests/test_run_eval.py +0 -271
hud_python-0.5.40/hud/agents/types.py +0 -158
hud_python-0.5.40/hud/cli/analyze.py +0 -518
hud_python-0.5.40/hud/cli/build.py +0 -1047
hud_python-0.5.40/hud/cli/convert/__init__.py +0 -317
hud_python-0.5.40/hud/cli/convert/base.py +0 -78
hud_python-0.5.40/hud/cli/convert/harbor.py +0 -565
hud_python-0.5.40/hud/cli/convert/tests/conftest.py +0 -258
hud_python-0.5.40/hud/cli/convert/tests/test_harbor.py +0 -751
hud_python-0.5.40/hud/cli/debug.py +0 -537
hud_python-0.5.40/hud/cli/deploy.py +0 -811
hud_python-0.5.40/hud/cli/dev.py +0 -1156
hud_python-0.5.40/hud/cli/eval.py +0 -944
hud_python-0.5.40/hud/cli/flows/dev.py +0 -176
hud_python-0.5.40/hud/cli/flows/init.py +0 -224
hud_python-0.5.40/hud/cli/flows/tasks.py +0 -476
hud_python-0.5.40/hud/cli/flows/templates.py +0 -151
hud_python-0.5.40/hud/cli/flows/tests/__init__.py +0 -1
hud_python-0.5.40/hud/cli/flows/tests/test_dev.py +0 -126
hud_python-0.5.40/hud/cli/init.py +0 -315
hud_python-0.5.40/hud/cli/link.py +0 -38
hud_python-0.5.40/hud/cli/models.py +0 -82
hud_python-0.5.40/hud/cli/push.py +0 -485
hud_python-0.5.40/hud/cli/rl.py +0 -372
hud_python-0.5.40/hud/cli/scenario.py +0 -187
hud_python-0.5.40/hud/cli/sync.py +0 -966
hud_python-0.5.40/hud/cli/tests/test_analysis_utils.py +0 -38
hud_python-0.5.40/hud/cli/tests/test_analyze.py +0 -299
hud_python-0.5.40/hud/cli/tests/test_analyze_metadata.py +0 -178
hud_python-0.5.40/hud/cli/tests/test_analyze_module.py +0 -167
hud_python-0.5.40/hud/cli/tests/test_build.py +0 -816
hud_python-0.5.40/hud/cli/tests/test_build_failure.py +0 -41
hud_python-0.5.40/hud/cli/tests/test_build_module.py +0 -50
hud_python-0.5.40/hud/cli/tests/test_cli_init.py +0 -192
hud_python-0.5.40/hud/cli/tests/test_cli_root.py +0 -83
hud_python-0.5.40/hud/cli/tests/test_convert.py +0 -361
hud_python-0.5.40/hud/cli/tests/test_debug.py +0 -463
hud_python-0.5.40/hud/cli/tests/test_debug_directory_mode.py +0 -32
hud_python-0.5.40/hud/cli/tests/test_dev.py +0 -326
hud_python-0.5.40/hud/cli/tests/test_eval.py +0 -251
hud_python-0.5.40/hud/cli/tests/test_init.py +0 -124
hud_python-0.5.40/hud/cli/tests/test_lockfile_utils.py +0 -72
hud_python-0.5.40/hud/cli/tests/test_mcp_server.py +0 -83
hud_python-0.5.40/hud/cli/tests/test_push.py +0 -369
hud_python-0.5.40/hud/cli/tests/test_push_happy.py +0 -74
hud_python-0.5.40/hud/cli/tests/test_push_wrapper.py +0 -23
hud_python-0.5.40/hud/cli/tests/test_rl.py +0 -154
hud_python-0.5.40/hud/cli/tests/test_scenario.py +0 -283
hud_python-0.5.40/hud/cli/tests/test_sync.py +0 -1432
hud_python-0.5.40/hud/cli/tests/test_utils.py +0 -388
hud_python-0.5.40/hud/cli/utils/analysis.py +0 -265
hud_python-0.5.40/hud/cli/utils/args.py +0 -80
hud_python-0.5.40/hud/cli/utils/collect.py +0 -292
hud_python-0.5.40/hud/cli/utils/docker.py +0 -422
hud_python-0.5.40/hud/cli/utils/env_check.py +0 -194
hud_python-0.5.40/hud/cli/utils/environment.py +0 -214
hud_python-0.5.40/hud/cli/utils/git.py +0 -136
hud_python-0.5.40/hud/cli/utils/interactive.py +0 -444
hud_python-0.5.40/hud/cli/utils/lockfile.py +0 -169
hud_python-0.5.40/hud/cli/utils/logging.py +0 -263
hud_python-0.5.40/hud/cli/utils/metadata.py +0 -233
hud_python-0.5.40/hud/cli/utils/name_check.py +0 -140
hud_python-0.5.40/hud/cli/utils/project_config.py +0 -106
hud_python-0.5.40/hud/cli/utils/server.py +0 -250
hud_python-0.5.40/hud/cli/utils/source_hash.py +0 -108
hud_python-0.5.40/hud/cli/utils/taskset.py +0 -83
hud_python-0.5.40/hud/cli/utils/tests/test_collect.py +0 -283
hud_python-0.5.40/hud/cli/utils/tests/test_docker.py +0 -93
hud_python-0.5.40/hud/cli/utils/tests/test_docker_hints.py +0 -71
hud_python-0.5.40/hud/cli/utils/tests/test_env_check.py +0 -74
hud_python-0.5.40/hud/cli/utils/tests/test_environment.py +0 -81
hud_python-0.5.40/hud/cli/utils/tests/test_git.py +0 -142
hud_python-0.5.40/hud/cli/utils/tests/test_interactive_module.py +0 -62
hud_python-0.5.40/hud/cli/utils/tests/test_logging_utils.py +0 -23
hud_python-0.5.40/hud/cli/utils/tests/test_metadata.py +0 -31
hud_python-0.5.40/hud/cli/utils/tests/test_source_hash.py +0 -36
hud_python-0.5.40/hud/cli/utils/validation.py +0 -312
hud_python-0.5.40/hud/cli/utils/viewer.py +0 -141
hud_python-0.5.40/hud/datasets/__init__.py +0 -36
hud_python-0.5.40/hud/datasets/loader.py +0 -283
hud_python-0.5.40/hud/datasets/runner.py +0 -263
hud_python-0.5.40/hud/datasets/tests/test_loader.py +0 -281
hud_python-0.5.40/hud/datasets/tests/test_utils.py +0 -316
hud_python-0.5.40/hud/datasets/utils.py +0 -305
hud_python-0.5.40/hud/environment/__init__.py +0 -53
hud_python-0.5.40/hud/environment/connection.py +0 -340
hud_python-0.5.40/hud/environment/connectors/__init__.py +0 -33
hud_python-0.5.40/hud/environment/connectors/base.py +0 -68
hud_python-0.5.40/hud/environment/connectors/local.py +0 -177
hud_python-0.5.40/hud/environment/connectors/mcp_config.py +0 -191
hud_python-0.5.40/hud/environment/connectors/openai.py +0 -101
hud_python-0.5.40/hud/environment/connectors/remote.py +0 -179
hud_python-0.5.40/hud/environment/environment.py +0 -1112
hud_python-0.5.40/hud/environment/integrations/__init__.py +0 -45
hud_python-0.5.40/hud/environment/integrations/adk.py +0 -67
hud_python-0.5.40/hud/environment/integrations/anthropic.py +0 -196
hud_python-0.5.40/hud/environment/integrations/gemini.py +0 -92
hud_python-0.5.40/hud/environment/integrations/langchain.py +0 -82
hud_python-0.5.40/hud/environment/integrations/llamaindex.py +0 -68
hud_python-0.5.40/hud/environment/integrations/openai.py +0 -219
hud_python-0.5.40/hud/environment/mock.py +0 -306
hud_python-0.5.40/hud/environment/router.py +0 -263
hud_python-0.5.40/hud/environment/scenarios.py +0 -1168
hud_python-0.5.40/hud/environment/tests/__init__.py +0 -1
hud_python-0.5.40/hud/environment/tests/test_connection.py +0 -377
hud_python-0.5.40/hud/environment/tests/test_connectors.py +0 -325
hud_python-0.5.40/hud/environment/tests/test_environment.py +0 -742
hud_python-0.5.40/hud/environment/tests/test_integrations.py +0 -257
hud_python-0.5.40/hud/environment/tests/test_local_connectors.py +0 -242
hud_python-0.5.40/hud/environment/tests/test_scenarios.py +0 -2051
hud_python-0.5.40/hud/environment/tests/test_session_id.py +0 -159
hud_python-0.5.40/hud/environment/tests/test_tools.py +0 -278
hud_python-0.5.40/hud/environment/types.py +0 -23
hud_python-0.5.40/hud/environment/utils/__init__.py +0 -33
hud_python-0.5.40/hud/environment/utils/formats.py +0 -214
hud_python-0.5.40/hud/environment/utils/schema.py +0 -55
hud_python-0.5.40/hud/environment/utils/tool_wrappers.py +0 -113
hud_python-0.5.40/hud/eval/__init__.py +0 -67
hud_python-0.5.40/hud/eval/context.py +0 -821
hud_python-0.5.40/hud/eval/display.py +0 -304
hud_python-0.5.40/hud/eval/instrument.py +0 -187
hud_python-0.5.40/hud/eval/manager.py +0 -459
hud_python-0.5.40/hud/eval/parallel.py +0 -268
hud_python-0.5.40/hud/eval/task.py +0 -468
hud_python-0.5.40/hud/eval/tests/__init__.py +0 -1
hud_python-0.5.40/hud/eval/tests/test_context.py +0 -434
hud_python-0.5.40/hud/eval/tests/test_eval.py +0 -245
hud_python-0.5.40/hud/eval/tests/test_manager.py +0 -238
hud_python-0.5.40/hud/eval/tests/test_parallel.py +0 -168
hud_python-0.5.40/hud/eval/tests/test_task.py +0 -347
hud_python-0.5.40/hud/eval/types.py +0 -66
hud_python-0.5.40/hud/eval/utils.py +0 -194
hud_python-0.5.40/hud/native/__init__.py +0 -36
hud_python-0.5.40/hud/native/chat.py +0 -74
hud_python-0.5.40/hud/native/graders.py +0 -581
hud_python-0.5.40/hud/native/permissions.py +0 -170
hud_python-0.5.40/hud/native/skills.py +0 -127
hud_python-0.5.40/hud/native/tests/__init__.py +0 -1
hud_python-0.5.40/hud/native/tests/test_graders.py +0 -233
hud_python-0.5.40/hud/patches/__init__.py +0 -19
hud_python-0.5.40/hud/patches/warnings.py +0 -54
hud_python-0.5.40/hud/server/__init__.py +0 -6
hud_python-0.5.40/hud/server/context.py +0 -114
hud_python-0.5.40/hud/server/helper/__init__.py +0 -5
hud_python-0.5.40/hud/server/low_level.py +0 -133
hud_python-0.5.40/hud/server/router.py +0 -122
hud_python-0.5.40/hud/server/server.py +0 -1011
hud_python-0.5.40/hud/server/tests/__init__.py +0 -3
hud_python-0.5.40/hud/server/tests/test_add_tool.py +0 -60
hud_python-0.5.40/hud/server/tests/test_context.py +0 -128
hud_python-0.5.40/hud/server/tests/test_mcp_server_handlers.py +0 -44
hud_python-0.5.40/hud/server/tests/test_mcp_server_integration.py +0 -405
hud_python-0.5.40/hud/server/tests/test_mcp_server_more.py +0 -249
hud_python-0.5.40/hud/server/tests/test_prefix_naming.py +0 -100
hud_python-0.5.40/hud/server/tests/test_run_wrapper.py +0 -53
hud_python-0.5.40/hud/server/tests/test_server_extra.py +0 -169
hud_python-0.5.40/hud/server/tests/test_sigterm_runner.py +0 -79
hud_python-0.5.40/hud/services/__init__.py +0 -9
hud_python-0.5.40/hud/services/chat.py +0 -366
hud_python-0.5.40/hud/services/chat_service.py +0 -274
hud_python-0.5.40/hud/services/reply_metadata.py +0 -50
hud_python-0.5.40/hud/services/tests/test_chat.py +0 -265
hud_python-0.5.40/hud/services/tests/test_chat_service.py +0 -152
hud_python-0.5.40/hud/shared/__init__.py +0 -5
hud_python-0.5.40/hud/shared/exceptions.py +0 -393
hud_python-0.5.40/hud/shared/tests/test_exceptions.py +0 -427
hud_python-0.5.40/hud/telemetry/__init__.py +0 -27
hud_python-0.5.40/hud/telemetry/exporter.py +0 -196
hud_python-0.5.40/hud/telemetry/instrument.py +0 -349
hud_python-0.5.40/hud/telemetry/tests/test_eval_telemetry.py +0 -356
hud_python-0.5.40/hud/telemetry/tests/test_exporter.py +0 -258
hud_python-0.5.40/hud/tools/__init__.py +0 -146
hud_python-0.5.40/hud/tools/agent.py +0 -223
hud_python-0.5.40/hud/tools/base.py +0 -541
hud_python-0.5.40/hud/tools/coding/__init__.py +0 -66
hud_python-0.5.40/hud/tools/coding/apply_patch.py +0 -670
hud_python-0.5.40/hud/tools/coding/bash.py +0 -231
hud_python-0.5.40/hud/tools/coding/edit.py +0 -293
hud_python-0.5.40/hud/tools/coding/gemini_edit.py +0 -340
hud_python-0.5.40/hud/tools/coding/gemini_shell.py +0 -228
hud_python-0.5.40/hud/tools/coding/gemini_write.py +0 -92
hud_python-0.5.40/hud/tools/coding/session.py +0 -231
hud_python-0.5.40/hud/tools/coding/shell.py +0 -179
hud_python-0.5.40/hud/tools/coding/tests/__init__.py +0 -1
hud_python-0.5.40/hud/tools/coding/tests/test_apply_patch.py +0 -718
hud_python-0.5.40/hud/tools/coding/tests/test_bash.py +0 -268
hud_python-0.5.40/hud/tools/coding/tests/test_bash_extended.py +0 -224
hud_python-0.5.40/hud/tools/coding/tests/test_bash_integration.py +0 -80
hud_python-0.5.40/hud/tools/coding/tests/test_edit.py +0 -244
hud_python-0.5.40/hud/tools/coding/tests/test_gemini_tools.py +0 -295
hud_python-0.5.40/hud/tools/coding/tests/test_shell.py +0 -724
hud_python-0.5.40/hud/tools/coding/utils.py +0 -241
hud_python-0.5.40/hud/tools/computer/__init__.py +0 -54
hud_python-0.5.40/hud/tools/computer/anthropic.py +0 -721
hud_python-0.5.40/hud/tools/computer/gemini.py +0 -389
hud_python-0.5.40/hud/tools/computer/glm.py +0 -516
hud_python-0.5.40/hud/tools/computer/hud.py +0 -491
hud_python-0.5.40/hud/tools/computer/openai.py +0 -336
hud_python-0.5.40/hud/tools/computer/qwen.py +0 -443
hud_python-0.5.40/hud/tools/computer/settings.py +0 -139
hud_python-0.5.40/hud/tools/computer/tests/__init__.py +0 -1
hud_python-0.5.40/hud/tools/computer/tests/test_compression.py +0 -164
hud_python-0.5.40/hud/tools/computer/tests/test_computer.py +0 -515
hud_python-0.5.40/hud/tools/computer/tests/test_computer_actions.py +0 -56
hud_python-0.5.40/hud/tools/computer/tests/test_glm_computer.py +0 -315
hud_python-0.5.40/hud/tools/elicitation.py +0 -91
hud_python-0.5.40/hud/tools/executors/__init__.py +0 -30
hud_python-0.5.40/hud/tools/executors/base.py +0 -622
hud_python-0.5.40/hud/tools/executors/pyautogui.py +0 -652
hud_python-0.5.40/hud/tools/executors/tests/__init__.py +0 -1
hud_python-0.5.40/hud/tools/executors/tests/test_base_executor.py +0 -365
hud_python-0.5.40/hud/tools/executors/tests/test_pyautogui_executor.py +0 -165
hud_python-0.5.40/hud/tools/executors/xdo.py +0 -554
hud_python-0.5.40/hud/tools/filesystem/__init__.py +0 -84
hud_python-0.5.40/hud/tools/filesystem/base.py +0 -719
hud_python-0.5.40/hud/tools/filesystem/gemini.py +0 -556
hud_python-0.5.40/hud/tools/filesystem/gemini_read_many.py +0 -207
hud_python-0.5.40/hud/tools/filesystem/glob.py +0 -128
hud_python-0.5.40/hud/tools/filesystem/grep.py +0 -135
hud_python-0.5.40/hud/tools/filesystem/list.py +0 -170
hud_python-0.5.40/hud/tools/filesystem/read.py +0 -143
hud_python-0.5.40/hud/tools/filesystem/tests/__init__.py +0 -1
hud_python-0.5.40/hud/tools/filesystem/tests/test_glob.py +0 -109
hud_python-0.5.40/hud/tools/filesystem/tests/test_grep.py +0 -160
hud_python-0.5.40/hud/tools/filesystem/tests/test_list.py +0 -115
hud_python-0.5.40/hud/tools/filesystem/tests/test_read.py +0 -170
hud_python-0.5.40/hud/tools/filesystem/tests/test_read_many.py +0 -121
hud_python-0.5.40/hud/tools/grounding/__init__.py +0 -13
hud_python-0.5.40/hud/tools/grounding/config.py +0 -54
hud_python-0.5.40/hud/tools/grounding/grounded_tool.py +0 -309
hud_python-0.5.40/hud/tools/grounding/grounder.py +0 -281
hud_python-0.5.40/hud/tools/grounding/tests/__init__.py +0 -1
hud_python-0.5.40/hud/tools/grounding/tests/test_grounded_tool.py +0 -178
hud_python-0.5.40/hud/tools/hosted/__init__.py +0 -26
hud_python-0.5.40/hud/tools/hosted/base.py +0 -47
hud_python-0.5.40/hud/tools/hosted/code_execution.py +0 -90
hud_python-0.5.40/hud/tools/hosted/google_search.py +0 -107
hud_python-0.5.40/hud/tools/hosted/tool_search.py +0 -82
hud_python-0.5.40/hud/tools/hosted/url_context.py +0 -32
hud_python-0.5.40/hud/tools/hosted/web_fetch.py +0 -81
hud_python-0.5.40/hud/tools/hosted/web_search.py +0 -73
hud_python-0.5.40/hud/tools/jupyter.py +0 -330
hud_python-0.5.40/hud/tools/memory/__init__.py +0 -50
hud_python-0.5.40/hud/tools/memory/base.py +0 -222
hud_python-0.5.40/hud/tools/memory/claude.py +0 -290
hud_python-0.5.40/hud/tools/memory/gemini.py +0 -199
hud_python-0.5.40/hud/tools/memory/session.py +0 -221
hud_python-0.5.40/hud/tools/memory/tests/__init__.py +0 -1
hud_python-0.5.40/hud/tools/memory/tests/test_claude.py +0 -329
hud_python-0.5.40/hud/tools/memory/tests/test_gemini.py +0 -85
hud_python-0.5.40/hud/tools/memory/tests/test_session.py +0 -249
hud_python-0.5.40/hud/tools/native_types.py +0 -102
hud_python-0.5.40/hud/tools/playwright.py +0 -427
hud_python-0.5.40/hud/tools/response.py +0 -65
hud_python-0.5.40/hud/tools/submit.py +0 -66
hud_python-0.5.40/hud/tools/tests/__init__.py +0 -3
hud_python-0.5.40/hud/tools/tests/test_agent_tool.py +0 -355
hud_python-0.5.40/hud/tools/tests/test_base.py +0 -270
hud_python-0.5.40/hud/tools/tests/test_elicitation.py +0 -118
hud_python-0.5.40/hud/tools/tests/test_init.py +0 -28
hud_python-0.5.40/hud/tools/tests/test_jupyter_tool.py +0 -181
hud_python-0.5.40/hud/tools/tests/test_native_tool_e2e.py +0 -862
hud_python-0.5.40/hud/tools/tests/test_native_types.py +0 -516
hud_python-0.5.40/hud/tools/tests/test_playwright_tool.py +0 -183
hud_python-0.5.40/hud/tools/tests/test_response.py +0 -60
hud_python-0.5.40/hud/tools/tests/test_submit.py +0 -85
hud_python-0.5.40/hud/tools/tests/test_tools.py +0 -148
hud_python-0.5.40/hud/tools/tests/test_tools_init.py +0 -49
hud_python-0.5.40/hud/tools/tests/test_types.py +0 -516
hud_python-0.5.40/hud/tools/tests/test_utils.py +0 -156
hud_python-0.5.40/hud/tools/types.py +0 -280
hud_python-0.5.40/hud/tools/utils.py +0 -50
hud_python-0.5.40/hud/types.py +0 -494
hud_python-0.5.40/hud/utils/__init__.py +0 -10
hud_python-0.5.40/hud/utils/env.py +0 -67
hud_python-0.5.40/hud/utils/mcp.py +0 -15
hud_python-0.5.40/hud/utils/pretty_errors.py +0 -68
hud_python-0.5.40/hud/utils/tests/test_init.py +0 -10
hud_python-0.5.40/hud/utils/tests/test_pretty_errors.py +0 -186
hud_python-0.5.40/hud/utils/tests/test_tool_shorthand.py +0 -154
hud_python-0.5.40/hud/utils/tests/test_version.py +0 -8
hud_python-0.5.40/hud/utils/tool_shorthand.py +0 -62
hud_python-0.5.40/hud/utils/types.py +0 -20
{hud_python-0.5.40 → hud_python-0.6.0}/LICENSE +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/__main__.py +0 -0
{hud_python-0.5.40/hud/cli/convert → hud_python-0.6.0/hud/agents/claude/tools}/tests/__init__.py +0 -0
{hud_python-0.5.40/hud/cli/flows → hud_python-0.6.0/hud/agents/gemini/tools/tests}/__init__.py +0 -0
{hud_python-0.5.40/hud/cli/utils → hud_python-0.6.0/hud/agents/openai/tools}/tests/__init__.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/agents/tests/__init__.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/__main__.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/tests/__init__.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/tests/test_cli_main.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/tests/test_eval_bedrock.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/tests/test_main_module.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/utils/__init__.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/utils/context.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/utils/tasks.py +0 -0
{hud_python-0.5.40/hud/datasets → hud_python-0.6.0/hud/cli/utils}/tests/__init__.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/utils/tests/test_config.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/cli/utils/tests/test_tasks.py +0 -0
{hud_python-0.5.40/hud/services → hud_python-0.6.0/hud/environment}/tests/__init__.py +0 -0
{hud_python-0.5.40/hud/shared → hud_python-0.6.0/hud/eval}/tests/__init__.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/patches/mcp_patches.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/py.typed +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/telemetry/tests/__init__.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/utils/tests/__init__.py +0 -0
{hud_python-0.5.40 → hud_python-0.6.0}/hud/utils/tests/test_serialization.py +0 -0

{hud_python-0.5.40 → hud_python-0.6.0}/.gitignore RENAMED Viewed

@@ -34,7 +34,6 @@ TODO.md
 /dev/
 .claude
-CLAUDE.md
 *.csv
 .rl_config_*.json
@@ -54,4 +53,13 @@ hud/rl/checkpoints_test/
 .ck/
 .hud_eval_config
-.hud_eval.toml
+.hud_eval.toml
+docs/internal
+environments/
+experiments/
+.memories/
+.codex/

hud_python-0.6.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,278 @@
+Metadata-Version: 2.4
+Name: hud-python
+Version: 0.6.0
+Summary: SDK for the HUD platform.
+Project-URL: Homepage, https://github.com/hud-evals/hud-python
+Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
+Project-URL: Documentation, https://docs.hud.ai
+Author-email: HUD <founders@hud.ai>
+License: MIT License
+        Copyright (c) 2025 Human Union Data, Inc
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: <3.13,>=3.11
+Requires-Dist: anthropic>=0.78.0
+Requires-Dist: asyncssh>=2.23.0
+Requires-Dist: asyncvnc>=1.3.0
+Requires-Dist: fastmcp==3.0.2
+Requires-Dist: google-genai
+Requires-Dist: httpx<1,>=0.23.0
+Requires-Dist: mcp<2.0,>=1.24.0
+Requires-Dist: openai>=2.26.0
+Requires-Dist: packaging>=21.0
+Requires-Dist: pillow>=11.0.0
+Requires-Dist: prompt-toolkit==3.0.51
+Requires-Dist: pydantic-settings<3,>=2.2
+Requires-Dist: pydantic<3,>=2.6
+Requires-Dist: questionary==2.1.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: typer>=0.9.0
+Requires-Dist: websockets>=15.0.1
+Provides-Extra: agent
+Provides-Extra: agents
+Provides-Extra: bedrock
+Requires-Dist: anthropic[bedrock]>=0.78.0; extra == 'bedrock'
+Provides-Extra: browseruse
+Requires-Dist: browser-use>=0.11.13; extra == 'browseruse'
+Provides-Extra: daytona
+Requires-Dist: daytona>=0.100; extra == 'daytona'
+Provides-Extra: dev
+Requires-Dist: dotenv>=0.9.9; extra == 'dev'
+Requires-Dist: pyright==1.1.407; extra == 'dev'
+Requires-Dist: pytest-asyncio; extra == 'dev'
+Requires-Dist: pytest-cov; extra == 'dev'
+Requires-Dist: pytest-mock; extra == 'dev'
+Requires-Dist: pytest>=8.1.1; extra == 'dev'
+Requires-Dist: ruff<0.15.0,>=0.11.8; extra == 'dev'
+Provides-Extra: modal
+Requires-Dist: modal>=1.0; extra == 'modal'
+Provides-Extra: robot
+Requires-Dist: numpy>=1.24; extra == 'robot'
+Requires-Dist: openpi-client>=0.1.2; extra == 'robot'
+Provides-Extra: train
+Requires-Dist: torch>=2; extra == 'train'
+Description-Content-Type: text/markdown
+<div align="left">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo_dark.svg">
+    <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg">
+    <img src="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg" alt="HUD" width="150" style="margin-bottom: 24px;"/>
+  </picture>
+</div>
+HUD is a platform for building RL environments for AI agents, across coding, browser, computer-use, and robotics. Define an environment, write tasks, and run them as evals and training across any model, at any scale.
+To learn more, see the [documentation](https://docs.hud.ai) and [API reference](https://docs.hud.ai/reference/environment).
+[![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
+[![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
+[![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
+[![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
+[![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
+[![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
+[![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
+## Install
+```bash
+# Install the CLI (recommended)
+uv tool install hud-python --python 3.12
+# …or as a library
+pip install hud-python
+```
+Get your API key at [hud.ai/project/api-keys](https://hud.ai/project/api-keys) and set it:
+```bash
+hud set HUD_API_KEY=your-key-here
+# or: export HUD_API_KEY=your-key-here
+```
+Then scaffold your first environment:
+```bash
+hud init my-env
+```
+![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
+## The protocol
+HUD is **protocol-first**. An agent and an environment exchange just three things: a **manifest** (the environment's capabilities and tasks), **`tasks.start`** that returns the prompt, and **`tasks.grade`** that returns the reward. In between, the agent just *works*, driving the capabilities itself. HUD owns only that thin envelope, so any model or harness plugs into any environment.
+```mermaid
+sequenceDiagram
+    participant Agent
+    participant Env as Environment
+    participant Caps as Capabilities (ssh · mcp · cdp · rfb · robot)
+    Agent->>Env: manifest exchange
+    Env-->>Agent: capabilities + tasks
+    Agent->>Env: tasks.start
+    Env-->>Agent: prompt
+    rect rgb(238,238,238)
+    Note over Agent,Caps: the agent works, driving capabilities directly
+    Agent->>Caps: shell · browser · GUI · tools · robot
+    Caps-->>Agent: observations
+    end
+    Agent->>Env: tasks.grade
+    Env-->>Agent: reward
+```
+Because the protocol only exposes **capabilities** (never a fixed agent), an environment outlives any single harness: new harnesses and models keep running against the same environments, benchmarks, and tasks.
+## Package & run anywhere
+A built image is the **end product for your tasks**: one build packs every task from a single definition. The recommended path is **`hud deploy`**, which builds and registers your environment on HUD in one step; then sync a taskset and run remotely:
+```bash
+hud deploy
+hud sync tasks my-taskset
+hud eval my-taskset --remote
+```
+For local iteration, the same protocol works against a container on your laptop:
+```bash
+hud build .
+docker run -d --name run1 my-env
+docker exec run1 hud task start fix_bug
+docker exec run1 hud task grade fix_bug --answer "…"
+docker rm -f run1
+```
+→ [Package & deploy](https://docs.hud.ai/run/deploy)
+## Environments & templates
+A **template** is an async generator registered with `@env.template()`: `yield` a prompt, receive the agent's answer, `yield` a reward. Calling the template mints a runnable **Task**; one function spans a whole dataset of variants. The simplest needs no capabilities — just a prompt and a grader:
+```python
+from hud import Environment
+env = Environment(name="letter-count")
+@env.template()
+async def count_letter(word: str = "strawberry", letter: str = "r"):
+    answer = yield f"How many '{letter}'s are in '{word}'? Reply with just the number."
+    yield 1.0 if answer and str(word.count(letter)) in answer else 0.0
+tasks = [count_letter(word=w) for w in ("strawberry", "raspberry", "blueberry")]
+```
+Run it immediately against any model:
+```bash
+hud eval tasks.py claude --group 3
+```
+Each graded evaluation is a **trace** (the SDK's live handle is a `Run`). With `HUD_API_KEY` set, every rollout is recorded on [hud.ai](https://hud.ai). Tasks that need a shell, browser, GUI, or robot declare **capabilities** (below); everything else — variants, grading, batching — stays identical.
+→ [Quickstart](https://docs.hud.ai/quickstart) · [Tasks & tasksets](https://docs.hud.ai/reference/tasks)
+## Capabilities & harnesses
+A **capability** is a connection the environment exposes; a **harness** attaches its own tools to it. The same environment serves a one-shot Q&A or a full computer-use rollout, depending on which capabilities the harness opens.
+| Protocol | What it exposes |
+|----------|-----------------|
+| **`ssh`** | Shell + files in a sandboxed workspace (`env.workspace(root)`) |
+| **`mcp`** | Tools over the Model Context Protocol |
+| **`cdp`** | Browser control over the Chrome DevTools Protocol |
+| **`rfb`** | Full computer-use over VNC: screen + keyboard/mouse |
+| **`robot`** *(beta)* | Schema-driven robot observation/action loop over WebSocket |
+**Ships natively:** Claude, OpenAI (Responses), OpenAI-compatible endpoints, and Gemini via `create_agent("claude-sonnet-4-5")` (or `gpt-…`, `gemini-…`). The harness wires capability-backed tools for the model you choose at run time.
+**Bring your own:** a harness attaches to a capability and defines a tool spec — wrap `browser-use` on `cdp`, a VLA policy on `robot`, or your own agent on `ssh` / `mcp`. No protocol work required.
+→ [Capabilities](https://docs.hud.ai/reference/capabilities) · [Models](https://docs.hud.ai/run/models) · [Robots](https://docs.hud.ai/reference/robots)
+## Deploy on the platform
+From the [platform UI](https://hud.ai) you can run batches, compare models on the same taskset, and inspect every trace.
+→ [Deploy](https://docs.hud.ai/run/deploy) · [Leaderboards](https://hud.ai/leaderboards)
+## Train on rewards
+Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and turn the rewards into GRPO advantages with `group_relative()`:
+```python
+from hud.agents import create_agent
+from hud.eval import Taskset, group_relative
+agent = create_agent("claude-sonnet-4-5")
+job = await Taskset(count_letter(word=w) for w in words).run(agent, group=16)
+for runs in job.results.values():
+    advantages = group_relative([r.reward for r in runs], normalize_std=True)
+    ...  # feed (run.trace_id, adv) into your optimizer
+```
+HUD is the environment-and-reward source for your own GRPO/PPO loop — the same environment trains any model, text or multimodal, unchanged.
+→ [Training](https://docs.hud.ai/run/training) · [Designing tasks for signal](https://docs.hud.ai/run/signal)
+## Links
+- [Documentation](https://docs.hud.ai)
+- [Quickstart](https://docs.hud.ai/quickstart)
+- [CLI reference](https://docs.hud.ai/reference/cli)
+- [Leaderboards](https://hud.ai/leaderboards)
+- [Environment templates](https://hud.ai/environments)
+- [Supported models](https://hud.ai/models)
+- [Discord](https://discord.gg/wkjtmHYYjm)
+## Enterprise
+Building agents at scale? We work with teams on custom environments, benchmarks, and training.
+[📅 Book a call](https://cal.com/jay-hud) · [📧 founders@hud.ai](mailto:founders@hud.ai)
+## Contributing
+We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
+Key areas: [Agents](hud/agents/) · [Environments](hud/environment/) · [Capabilities](hud/capabilities/) · [Eval](hud/eval/)
+<a href="https://github.com/hud-evals/hud-python/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=hud-evals/hud-python&max=50" />
+</a>
+## Citation
+```bibtex
+@software{hud2025agentevalplatform,
+  author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
+  title  = {HUD: An Evaluation and RL Envrionments Platform for Agents},
+  date   = {2025-04},
+  url    = {https://github.com/hud-evals/hud-python},
+  langid = {en}
+}
+```
+MIT License · [LICENSE](LICENSE)

hud_python-0.6.0/README.md ADDED Viewed

@@ -0,0 +1,200 @@
+<div align="left">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo_dark.svg">
+    <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg">
+    <img src="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg" alt="HUD" width="150" style="margin-bottom: 24px;"/>
+  </picture>
+</div>
+HUD is a platform for building RL environments for AI agents, across coding, browser, computer-use, and robotics. Define an environment, write tasks, and run them as evals and training across any model, at any scale.
+To learn more, see the [documentation](https://docs.hud.ai) and [API reference](https://docs.hud.ai/reference/environment).
+[![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
+[![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
+[![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
+[![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
+[![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
+[![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
+[![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
+## Install
+```bash
+# Install the CLI (recommended)
+uv tool install hud-python --python 3.12
+# …or as a library
+pip install hud-python
+```
+Get your API key at [hud.ai/project/api-keys](https://hud.ai/project/api-keys) and set it:
+```bash
+hud set HUD_API_KEY=your-key-here
+# or: export HUD_API_KEY=your-key-here
+```
+Then scaffold your first environment:
+```bash
+hud init my-env
+```
+![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
+## The protocol
+HUD is **protocol-first**. An agent and an environment exchange just three things: a **manifest** (the environment's capabilities and tasks), **`tasks.start`** that returns the prompt, and **`tasks.grade`** that returns the reward. In between, the agent just *works*, driving the capabilities itself. HUD owns only that thin envelope, so any model or harness plugs into any environment.
+```mermaid
+sequenceDiagram
+    participant Agent
+    participant Env as Environment
+    participant Caps as Capabilities (ssh · mcp · cdp · rfb · robot)
+    Agent->>Env: manifest exchange
+    Env-->>Agent: capabilities + tasks
+    Agent->>Env: tasks.start
+    Env-->>Agent: prompt
+    rect rgb(238,238,238)
+    Note over Agent,Caps: the agent works, driving capabilities directly
+    Agent->>Caps: shell · browser · GUI · tools · robot
+    Caps-->>Agent: observations
+    end
+    Agent->>Env: tasks.grade
+    Env-->>Agent: reward
+```
+Because the protocol only exposes **capabilities** (never a fixed agent), an environment outlives any single harness: new harnesses and models keep running against the same environments, benchmarks, and tasks.
+## Package & run anywhere
+A built image is the **end product for your tasks**: one build packs every task from a single definition. The recommended path is **`hud deploy`**, which builds and registers your environment on HUD in one step; then sync a taskset and run remotely:
+```bash
+hud deploy
+hud sync tasks my-taskset
+hud eval my-taskset --remote
+```
+For local iteration, the same protocol works against a container on your laptop:
+```bash
+hud build .
+docker run -d --name run1 my-env
+docker exec run1 hud task start fix_bug
+docker exec run1 hud task grade fix_bug --answer "…"
+docker rm -f run1
+```
+→ [Package & deploy](https://docs.hud.ai/run/deploy)
+## Environments & templates
+A **template** is an async generator registered with `@env.template()`: `yield` a prompt, receive the agent's answer, `yield` a reward. Calling the template mints a runnable **Task**; one function spans a whole dataset of variants. The simplest needs no capabilities — just a prompt and a grader:
+```python
+from hud import Environment
+env = Environment(name="letter-count")
+@env.template()
+async def count_letter(word: str = "strawberry", letter: str = "r"):
+    answer = yield f"How many '{letter}'s are in '{word}'? Reply with just the number."
+    yield 1.0 if answer and str(word.count(letter)) in answer else 0.0
+tasks = [count_letter(word=w) for w in ("strawberry", "raspberry", "blueberry")]
+```
+Run it immediately against any model:
+```bash
+hud eval tasks.py claude --group 3
+```
+Each graded evaluation is a **trace** (the SDK's live handle is a `Run`). With `HUD_API_KEY` set, every rollout is recorded on [hud.ai](https://hud.ai). Tasks that need a shell, browser, GUI, or robot declare **capabilities** (below); everything else — variants, grading, batching — stays identical.
+→ [Quickstart](https://docs.hud.ai/quickstart) · [Tasks & tasksets](https://docs.hud.ai/reference/tasks)
+## Capabilities & harnesses
+A **capability** is a connection the environment exposes; a **harness** attaches its own tools to it. The same environment serves a one-shot Q&A or a full computer-use rollout, depending on which capabilities the harness opens.
+| Protocol | What it exposes |
+|----------|-----------------|
+| **`ssh`** | Shell + files in a sandboxed workspace (`env.workspace(root)`) |
+| **`mcp`** | Tools over the Model Context Protocol |
+| **`cdp`** | Browser control over the Chrome DevTools Protocol |
+| **`rfb`** | Full computer-use over VNC: screen + keyboard/mouse |
+| **`robot`** *(beta)* | Schema-driven robot observation/action loop over WebSocket |
+**Ships natively:** Claude, OpenAI (Responses), OpenAI-compatible endpoints, and Gemini via `create_agent("claude-sonnet-4-5")` (or `gpt-…`, `gemini-…`). The harness wires capability-backed tools for the model you choose at run time.
+**Bring your own:** a harness attaches to a capability and defines a tool spec — wrap `browser-use` on `cdp`, a VLA policy on `robot`, or your own agent on `ssh` / `mcp`. No protocol work required.
+→ [Capabilities](https://docs.hud.ai/reference/capabilities) · [Models](https://docs.hud.ai/run/models) · [Robots](https://docs.hud.ai/reference/robots)
+## Deploy on the platform
+From the [platform UI](https://hud.ai) you can run batches, compare models on the same taskset, and inspect every trace.
+→ [Deploy](https://docs.hud.ai/run/deploy) · [Leaderboards](https://hud.ai/leaderboards)
+## Train on rewards
+Every rollout returns a `Run` carrying a `trace_id` and a `reward`, so the tasks you evaluate are already training data. Run a **group** per task and turn the rewards into GRPO advantages with `group_relative()`:
+```python
+from hud.agents import create_agent
+from hud.eval import Taskset, group_relative
+agent = create_agent("claude-sonnet-4-5")
+job = await Taskset(count_letter(word=w) for w in words).run(agent, group=16)
+for runs in job.results.values():
+    advantages = group_relative([r.reward for r in runs], normalize_std=True)
+    ...  # feed (run.trace_id, adv) into your optimizer
+```
+HUD is the environment-and-reward source for your own GRPO/PPO loop — the same environment trains any model, text or multimodal, unchanged.
+→ [Training](https://docs.hud.ai/run/training) · [Designing tasks for signal](https://docs.hud.ai/run/signal)
+## Links
+- [Documentation](https://docs.hud.ai)
+- [Quickstart](https://docs.hud.ai/quickstart)
+- [CLI reference](https://docs.hud.ai/reference/cli)
+- [Leaderboards](https://hud.ai/leaderboards)
+- [Environment templates](https://hud.ai/environments)
+- [Supported models](https://hud.ai/models)
+- [Discord](https://discord.gg/wkjtmHYYjm)
+## Enterprise
+Building agents at scale? We work with teams on custom environments, benchmarks, and training.
+[📅 Book a call](https://cal.com/jay-hud) · [📧 founders@hud.ai](mailto:founders@hud.ai)
+## Contributing
+We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
+Key areas: [Agents](hud/agents/) · [Environments](hud/environment/) · [Capabilities](hud/capabilities/) · [Eval](hud/eval/)
+<a href="https://github.com/hud-evals/hud-python/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=hud-evals/hud-python&max=50" />
+</a>
+## Citation
+```bibtex
+@software{hud2025agentevalplatform,
+  author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
+  title  = {HUD: An Evaluation and RL Envrionments Platform for Agents},
+  date   = {2025-04},
+  url    = {https://github.com/hud-evals/hud-python},
+  langid = {en}
+}
+```
+MIT License · [LICENSE](LICENSE)

hud_python-0.6.0/cookbooks/a2a-chat/README.md ADDED Viewed

@@ -0,0 +1,37 @@
+# A2A Chat
+Serve a HUD chat task over the [A2A protocol](https://github.com/google/a2a),
+and talk to it from Python clients.
+`hud.Chat` is protocol-agnostic — these scripts are the protocol layer, kept
+outside the SDK on purpose. Copy and adapt them.
+| File | What it does |
+|------|--------------|
+| `server.py` | A2A server: one `Chat` (conversation) per A2A context, agent card, citations artifact |
+| `client.py` | Minimal A2A client: send messages, print replies |
+| `llm_client.py` | LLM-fronted client: an OpenAI model decides when to call the A2A agent as a tool |
+| `chat_env.py` | Sample chat environment with `messages`-style tasks to serve |
+## Run
+From this directory (uv resolves the dependencies on first run):
+```bash
+# Terminal 1: serve the bundled chat task (spawns chat_env.py per turn)
+uv run server.py
+# Terminal 2: talk to it
+uv run client.py            # plain client
+uv run llm_client.py        # LLM-fronted client
+```
+Configuration is via env vars: `HUD_MODEL` picks the agent's model (gateway,
+needs `HUD_API_KEY`), `HUD_TASK`/`HUD_ENV` pick the task row, `HUD_SOURCE`
+spawns a different env source, and `HUD_ENV_URL` attaches each turn to an
+already-served control channel (e.g. `hud serve chat_env.py` →
+`HUD_ENV_URL=tcp://127.0.0.1:8765`) instead of spawning.
+The server publishes an agent card at `/.well-known/agent-card.json` and
+accepts A2A messages at the root endpoint. The configured task should accept a
+`messages` argument for multi-turn history (see `chat_env.py`).

hud_python-0.6.0/cookbooks/a2a-chat/pyproject.toml ADDED Viewed

@@ -0,0 +1,18 @@
+[project]
+name = "a2a-chat"
+version = "0.1.0"
+description = "Serve a HUD chat task over the A2A protocol (cookbook)"
+requires-python = ">=3.11,<3.13"
+dependencies = [
+    "hud-python",
+    # The scripts are written against the 0.3.x server API.
+    "a2a-sdk==0.3.26",
+]
+[tool.uv]
+package = false
+# Track the SDK from this repo. If you copied this folder out, delete this
+# block to use the released hud-python from PyPI.
+[tool.uv.sources]
+hud-python = { path = "../..", editable = true }

hud_python-0.6.0/cookbooks/codex-coding/README.md ADDED Viewed

@@ -0,0 +1,23 @@
+# Codex Coding Agent
+Build your own [Codex](https://github.com/openai/codex) with the HUD SDK: an
+environment exposes an `ssh` capability backed by a `Workspace`, and
+`OpenAIAgent` drives it with OpenAI's native `shell` and `apply_patch` tools —
+the same protocol the `codex` CLI uses.
+## Run
+From this directory (requires `HUD_API_KEY` for gateway inference):
+```bash
+uv run codex_agent.py
+# Custom task
+uv run codex_agent.py --task "Create a Python script that prints the Fibonacci sequence"
+# Custom working directory
+uv run codex_agent.py --work-dir ./codex_output
+```
+To run the same environment as a packaged, sandboxed box instead of on your
+machine, see `hud deploy` and `RemoteSandbox` in the deploy docs.

hud_python-0.6.0/cookbooks/codex-coding/pyproject.toml ADDED Viewed

@@ -0,0 +1,17 @@
+[project]
+name = "codex-coding"
+version = "0.1.0"
+description = "Build your own Codex with the HUD SDK (cookbook)"
+requires-python = ">=3.11,<3.13"
+dependencies = [
+    "hud-python",
+    "python-dotenv",
+]
+[tool.uv]
+package = false
+# Track the SDK from this repo. If you copied this folder out, delete this
+# block to use the released hud-python from PyPI.
+[tool.uv.sources]
+hud-python = { path = "../..", editable = true }

hud-python 0.5.40__tar.gz → 0.6.0__tar.gz

hud-python 0.5.40tar.gz → 0.6.0tar.gz