hud-python 0.4.61__tar.gz → 0.4.62__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.4.61 → hud_python-0.4.62}/PKG-INFO +43 -43
- {hud_python-0.4.61 → hud_python-0.4.62}/README.md +40 -40
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/blank/README.md +3 -3
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/browser/README.md +2 -2
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/browser/server/pyproject.toml +1 -1
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/deepresearch/README.md +3 -3
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/jupyter/README.md +2 -2
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/jupyter/server/pyproject.toml +2 -2
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/rubrics/README.md +3 -3
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/__init__.py +5 -5
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/flows/tasks.py +3 -3
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/remote_runner.py +1 -1
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_convert.py +13 -13
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/base.py +1 -1
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/fastmcp.py +1 -1
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/samples/browser.py +1 -1
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/settings.py +4 -4
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/shared/exceptions.py +1 -1
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/shared/tests/test_exceptions.py +1 -1
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/types.py +1 -1
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/version.py +1 -1
- {hud_python-0.4.61 → hud_python-0.4.62}/pyproject.toml +3 -3
- {hud_python-0.4.61 → hud_python-0.4.62}/.gitignore +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/LICENSE +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/blank/environment/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/blank/environment/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/blank/server/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/blank/server/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/browser/browser-base/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/browser/environment/2048/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/browser/environment/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/browser/environment/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/browser/environment/todo/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/browser/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/deepresearch/environment/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/deepresearch/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/deepresearch/server/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/online_mind2web/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/online_mind2web/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/online_mind2web/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/remote_browser/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/remote_browser/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/rubrics/environment/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/rubrics/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/rubrics/server/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/text_2048/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/environments/text_2048/pyproject.toml +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/examples/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/__main__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/base.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/claude.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/gemini.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/langchain.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/lite_llm.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/misc/integration_test_agent.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/openai.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/openai_chat_generic.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/tests/test_base_runtime.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/tests/test_gemini.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/agents/utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/__main__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/analyze.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/build.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/clone.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/debug.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/dev.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/eval.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/flows/dev.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/get.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/init.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/list_func.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/pull.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/push.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/remove.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/celebrate.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/config.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/display.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/gpu.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/gpu_utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/local_runner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/presets.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/rl_api.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/viewer.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/vllm.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/rl/wait_utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_analyze_module.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_eval.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/config.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/local_runner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/package_runner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/server.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_local_runner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_package_runner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_registry_utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_remote_runner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_runner_modules.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/mcp_use.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/tests/test_mcp_use_retry.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/utils/mcp_use_retry.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/utils/retry.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/datasets/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/datasets/parallel.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/datasets/runner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/datasets/tests/test_runner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/datasets/tests/test_utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/datasets/utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/misc/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/misc/claude_plays_pokemon.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/native/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/native/comparator.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/otel/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/otel/collector.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/otel/config.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/otel/context.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/otel/exporters.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/otel/instrumentation.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/otel/processors.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/otel/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/otel/tests/test_instrumentation.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/otel/tests/test_processors.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/py.typed +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/README.md +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/actor.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/buffer.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/chat_template.jinja +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/config.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/distributed.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/learner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/tests/test_learner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/train.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/types.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/utils/start_vllm_server.sh +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/rl/vllm_adapter.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/samples/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/context.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/low_level.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/router.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/server.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/shared/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/shared/hints.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/shared/requests.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/async_context.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/job.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/replay.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/tests/test_async_context.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/tests/test_job.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/tests/test_replay.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/tests/test_trace.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/trace.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/telemetry/utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/base.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/bash.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/computer/gemini.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/edit.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/executors/base.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/jupyter.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/playwright.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/response.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/submit.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_jupyter_tool.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/types.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/tools/utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/agent_factories.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/async_utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/group_eval.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/hud_console.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/mcp.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/progress.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/task_tracking.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tasks.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/telemetry.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tests/test_agent_factories.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tests/test_async_utils.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tests/test_progress.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tests/test_tasks.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.4.61 → hud_python-0.4.62}/hud/utils/tool_shorthand.py +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.62
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
7
|
-
Project-URL: Documentation, https://docs.hud.
|
|
8
|
-
Author-email: HUD
|
|
7
|
+
Project-URL: Documentation, https://docs.hud.ai
|
|
8
|
+
Author-email: HUD <founders@hud.ai>
|
|
9
9
|
License: MIT License
|
|
10
10
|
|
|
11
11
|
Copyright (c) 2025 Human Union Data, Inc
|
|
@@ -154,21 +154,21 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
|
|
|
154
154
|
[](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLnNvL21jcCJ9)
|
|
155
155
|
[](https://discord.gg/wkjtmHYYjm)
|
|
156
156
|
[](https://x.com/intent/user?screen_name=hud_evals)
|
|
157
|
-
[](https://shop.hud.
|
|
157
|
+
[](https://shop.hud.ai)
|
|
158
158
|
|
|
159
159
|
|
|
160
160
|
### Are you a startup building agents?
|
|
161
161
|
|
|
162
|
-
[📅 Hop on a call](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.
|
|
162
|
+
[📅 Hop on a call](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.ai](mailto:founders@hud.ai)
|
|
163
163
|
|
|
164
164
|
## Highlights
|
|
165
165
|
|
|
166
|
-
- 🚀 **[MCP environment skeleton](https://docs.hud.
|
|
167
|
-
- ⚡️ **[Live telemetry](https://hud.
|
|
168
|
-
- 🗂️ **[Public benchmarks](https://hud.
|
|
166
|
+
- 🚀 **[MCP environment skeleton](https://docs.hud.ai/core-concepts/mcp-protocol)** – any agent can call any environment.
|
|
167
|
+
- ⚡️ **[Live telemetry](https://hud.ai)** – inspect every tool call, observation, and reward in real time.
|
|
168
|
+
- 🗂️ **[Public benchmarks](https://hud.ai/leaderboards)** – OSWorld-Verified, SheetBench-50, and more.
|
|
169
169
|
- 🌐 **[Cloud browsers](environments/remote_browser/)** – AnchorBrowser, Steel, BrowserBase integrations for browser automation.
|
|
170
170
|
- 🛠️ **[Hot-reload dev loop](environments/README.md#phase-5-hot-reload-development-with-cursor-agent)** – `hud dev` for iterating on environments without rebuilds.
|
|
171
|
-
- 🎓 **[One-click RL](https://hud.
|
|
171
|
+
- 🎓 **[One-click RL](https://hud.ai/models)** – Run `hud rl` to get a trained model on any environment.
|
|
172
172
|
|
|
173
173
|
> We welcome contributors and feature requests – open an issue or hop on a call to discuss improvements!
|
|
174
174
|
|
|
@@ -183,10 +183,10 @@ uv tool install hud-python
|
|
|
183
183
|
# uv tool update-shell
|
|
184
184
|
```
|
|
185
185
|
|
|
186
|
-
> See [docs.hud.
|
|
187
|
-
> `claude mcp add --transport http docs-hud https://docs.hud.
|
|
186
|
+
> See [docs.hud.ai](https://docs.hud.ai), or add docs to any MCP client:
|
|
187
|
+
> `claude mcp add --transport http docs-hud https://docs.hud.ai/mcp`
|
|
188
188
|
|
|
189
|
-
Before starting, get your HUD_API_KEY at [hud.
|
|
189
|
+
Before starting, get your HUD_API_KEY at [hud.ai](https://hud.ai).
|
|
190
190
|
|
|
191
191
|
|
|
192
192
|
## Quickstart: Evals
|
|
@@ -204,17 +204,17 @@ import asyncio, hud, os
|
|
|
204
204
|
from hud.settings import settings
|
|
205
205
|
from hud.clients import MCPClient
|
|
206
206
|
from hud.agents import ClaudeAgent
|
|
207
|
-
from hud.datasets import Task # See docs: https://docs.hud.
|
|
207
|
+
from hud.datasets import Task # See docs: https://docs.hud.ai/reference/tasks
|
|
208
208
|
|
|
209
209
|
async def main() -> None:
|
|
210
|
-
with hud.trace("Quick Start 2048"): # All telemetry works for any MCP-based agent (see https://hud.
|
|
210
|
+
with hud.trace("Quick Start 2048"): # All telemetry works for any MCP-based agent (see https://hud.ai)
|
|
211
211
|
task = {
|
|
212
212
|
"prompt": "Reach 64 in 2048.",
|
|
213
213
|
"mcp_config": {
|
|
214
214
|
"hud": {
|
|
215
|
-
"url": "https://mcp.hud.
|
|
215
|
+
"url": "https://mcp.hud.ai/v3/mcp", # HUD's cloud MCP server (see https://docs.hud.ai/core-concepts/architecture)
|
|
216
216
|
"headers": {
|
|
217
|
-
"Authorization": f"Bearer {settings.api_key}", # Get your key at https://hud.
|
|
217
|
+
"Authorization": f"Bearer {settings.api_key}", # Get your key at https://hud.ai
|
|
218
218
|
"Mcp-Image": "hudpython/hud-text-2048:v1.2" # Docker image from https://hub.docker.com/u/hudpython
|
|
219
219
|
}
|
|
220
220
|
}
|
|
@@ -241,7 +241,7 @@ async def main() -> None:
|
|
|
241
241
|
asyncio.run(main())
|
|
242
242
|
```
|
|
243
243
|
|
|
244
|
-
The above example let's the agent play 2048 ([See replay](https://hud.
|
|
244
|
+
The above example let's the agent play 2048 ([See replay](https://hud.ai/trace/6feed7bd-5f67-4d66-b77f-eb1e3164604f))
|
|
245
245
|
|
|
246
246
|

|
|
247
247
|
|
|
@@ -254,7 +254,7 @@ hud get hud-evals/2048-basic # from HF
|
|
|
254
254
|
hud rl 2048-basic.json
|
|
255
255
|
```
|
|
256
256
|
|
|
257
|
-
> See [agent training docs](https://docs.hud.
|
|
257
|
+
> See [agent training docs](https://docs.hud.ai/train-agents/quickstart)
|
|
258
258
|
|
|
259
259
|
Or make your own environment and dataset:
|
|
260
260
|
|
|
@@ -265,7 +265,7 @@ hud dev --interactive
|
|
|
265
265
|
hud rl
|
|
266
266
|
```
|
|
267
267
|
|
|
268
|
-
> See [environment design docs](https://docs.hud.
|
|
268
|
+
> See [environment design docs](https://docs.hud.ai/build-environments)
|
|
269
269
|
|
|
270
270
|
## Benchmarking Agents
|
|
271
271
|
|
|
@@ -273,7 +273,7 @@ This is Claude Computer Use running on our proprietary financial analyst benchma
|
|
|
273
273
|
|
|
274
274
|

|
|
275
275
|
|
|
276
|
-
> [See this trace on _hud.
|
|
276
|
+
> [See this trace on _hud.ai_](https://hud.ai/trace/9e212e9e-3627-4f1f-9eb5-c6d03c59070a)
|
|
277
277
|
|
|
278
278
|
This example runs the full dataset (only takes ~20 minutes) using [run_evaluation.py](examples/run_evaluation.py):
|
|
279
279
|
|
|
@@ -291,7 +291,7 @@ from hud.agents import ClaudeAgent
|
|
|
291
291
|
results = await run_dataset(
|
|
292
292
|
name="My SheetBench-50 Evaluation",
|
|
293
293
|
dataset="hud-evals/SheetBench-50", # <-- HuggingFace dataset
|
|
294
|
-
agent_class=ClaudeAgent, # <-- Your custom agent can replace this (see https://docs.hud.
|
|
294
|
+
agent_class=ClaudeAgent, # <-- Your custom agent can replace this (see https://docs.hud.ai/evaluate-agents/create-agents)
|
|
295
295
|
agent_config={"model": "claude-sonnet-4-20250514"},
|
|
296
296
|
max_concurrent=50,
|
|
297
297
|
max_steps=30,
|
|
@@ -299,13 +299,13 @@ results = await run_dataset(
|
|
|
299
299
|
print(f"Average reward: {sum(r.reward for r in results) / len(results):.2f}")
|
|
300
300
|
```
|
|
301
301
|
|
|
302
|
-
> Running a dataset creates a job and streams results to the [hud.
|
|
302
|
+
> Running a dataset creates a job and streams results to the [hud.ai](https://hud.ai) platform for analysis and [leaderboard submission](https://docs.hud.ai/evaluate-agents/leaderboards).
|
|
303
303
|
|
|
304
304
|
## Building Environments (MCP)
|
|
305
305
|
|
|
306
306
|
This is how you can make any environment into an interactable one in 5 steps:
|
|
307
307
|
|
|
308
|
-
1. Define MCP server layer using [`MCPServer`](https://docs.hud.
|
|
308
|
+
1. Define MCP server layer using [`MCPServer`](https://docs.hud.ai/reference/environments)
|
|
309
309
|
|
|
310
310
|
```python
|
|
311
311
|
from hud.server import MCPServer
|
|
@@ -313,10 +313,10 @@ from hud.tools import HudComputerTool
|
|
|
313
313
|
|
|
314
314
|
mcp = MCPServer("My Environment")
|
|
315
315
|
|
|
316
|
-
# Add hud tools (see all tools: https://docs.hud.
|
|
316
|
+
# Add hud tools (see all tools: https://docs.hud.ai/reference/tools)
|
|
317
317
|
mcp.tool(HudComputerTool())
|
|
318
318
|
|
|
319
|
-
# Or custom tools (see https://docs.hud.
|
|
319
|
+
# Or custom tools (see https://docs.hud.ai/build-environments/adapting-software)
|
|
320
320
|
@mcp.tool("launch_app"):
|
|
321
321
|
def launch_app(name: str = "Gmail")
|
|
322
322
|
...
|
|
@@ -390,16 +390,16 @@ Tools
|
|
|
390
390
|
hud push # needs docker login, hud api key
|
|
391
391
|
```
|
|
392
392
|
|
|
393
|
-
5. Now you can use `mcp.hud.
|
|
393
|
+
5. Now you can use `mcp.hud.ai` to launch 100s of instances of this environment in parallel with any agent, and see everything live on [hud.ai](https://hud.ai):
|
|
394
394
|
|
|
395
395
|
```python
|
|
396
396
|
from hud.agents import ClaudeAgent
|
|
397
397
|
|
|
398
|
-
result = await ClaudeAgent().run({ # See all agents: https://docs.hud.
|
|
398
|
+
result = await ClaudeAgent().run({ # See all agents: https://docs.hud.ai/reference/agents
|
|
399
399
|
"prompt": "Please explore this environment",
|
|
400
400
|
"mcp_config": {
|
|
401
401
|
"my-environment": {
|
|
402
|
-
"url": "https://mcp.hud.
|
|
402
|
+
"url": "https://mcp.hud.ai/v3/mcp",
|
|
403
403
|
"headers": {
|
|
404
404
|
"Authorization": f"Bearer {os.getenv('HUD_API_KEY')}",
|
|
405
405
|
"Mcp-Image": "my-name/my-environment:latest"
|
|
@@ -421,13 +421,13 @@ result = await ClaudeAgent().run({ # See all agents: https://docs.hud.so/refere
|
|
|
421
421
|
|
|
422
422
|
## Leaderboards & benchmarks
|
|
423
423
|
|
|
424
|
-
All leaderboards are publicly available on [hud.
|
|
424
|
+
All leaderboards are publicly available on [hud.ai/leaderboards](https://hud.ai/leaderboards) (see [docs](https://docs.hud.ai/evaluate-agents/leaderboards))
|
|
425
425
|
|
|
426
426
|

|
|
427
427
|
|
|
428
428
|
We highly suggest running 3-5 evaluations per dataset for the most consistent results across multiple jobs.
|
|
429
429
|
|
|
430
|
-
Using the [`run_dataset`](https://docs.hud.
|
|
430
|
+
Using the [`run_dataset`](https://docs.hud.ai/reference/tasks#run_dataset) function with a HuggingFace dataset automatically assigns your job to that leaderboard page, and allows you to create a scorecard out of it:
|
|
431
431
|
|
|
432
432
|
## Reinforcement Learning with GRPO
|
|
433
433
|
|
|
@@ -456,11 +456,11 @@ Supports multi‑turn RL for both:
|
|
|
456
456
|
- Language‑only models (e.g., `Qwen/Qwen2.5-7B-Instruct`)
|
|
457
457
|
- Vision‑Language models (e.g., `Qwen/Qwen2.5-VL-3B-Instruct`)
|
|
458
458
|
|
|
459
|
-
By default, `hud rl` provisions a persistent server and trainer in the cloud, streams telemetry to `hud.
|
|
459
|
+
By default, `hud rl` provisions a persistent server and trainer in the cloud, streams telemetry to `hud.ai`, and lets you monitor/manage models at `hud.ai/models`. Use `--local` to run entirely on your machines (typically 2+ GPUs: one for vLLM, the rest for training).
|
|
460
460
|
|
|
461
|
-
Any HUD MCP environment and evaluation works with our RL pipeline (including remote configurations). See the guided docs: `https://docs.hud.
|
|
461
|
+
Any HUD MCP environment and evaluation works with our RL pipeline (including remote configurations). See the guided docs: `https://docs.hud.ai/train-agents/quickstart`.
|
|
462
462
|
|
|
463
|
-
Pricing: Hosted vLLM and training GPU rates are listed in the [Training Quickstart → Pricing](https://docs.hud.
|
|
463
|
+
Pricing: Hosted vLLM and training GPU rates are listed in the [Training Quickstart → Pricing](https://docs.hud.ai/train-agents/quickstart#pricing). Manage billing at the [HUD billing dashboard](https://hud.ai/project/billing).
|
|
464
464
|
|
|
465
465
|
## Architecture
|
|
466
466
|
|
|
@@ -468,8 +468,8 @@ Pricing: Hosted vLLM and training GPU rates are listed in the [Training Quicksta
|
|
|
468
468
|
%%{init: {"theme": "neutral", "themeVariables": {"fontSize": "14px"}} }%%
|
|
469
469
|
graph LR
|
|
470
470
|
subgraph "Platform"
|
|
471
|
-
Dashboard["📊 hud.
|
|
472
|
-
API["🔌 mcp.hud.
|
|
471
|
+
Dashboard["📊 hud.ai"]
|
|
472
|
+
API["🔌 mcp.hud.ai"]
|
|
473
473
|
end
|
|
474
474
|
|
|
475
475
|
subgraph "hud"
|
|
@@ -507,14 +507,14 @@ graph LR
|
|
|
507
507
|
|
|
508
508
|
| Command | Purpose | Docs |
|
|
509
509
|
| ----------------------- | ------------------------------------------ | ---- |
|
|
510
|
-
| [`hud init`](https://docs.hud.
|
|
511
|
-
| [`hud dev`](https://docs.hud.
|
|
512
|
-
| [`hud build`](https://docs.hud.
|
|
513
|
-
| [`hud push`](https://docs.hud.
|
|
514
|
-
| [`hud pull <target>`](https://docs.hud.
|
|
515
|
-
| [`hud analyze <image>`](https://docs.hud.
|
|
516
|
-
| [`hud debug <image>`](https://docs.hud.
|
|
517
|
-
| [`hud run <image>`](https://docs.hud.
|
|
510
|
+
| [`hud init`](https://docs.hud.ai/reference/cli/init) | Create new environment with boilerplate. | [📖](https://docs.hud.ai/reference/cli/init) |
|
|
511
|
+
| [`hud dev`](https://docs.hud.ai/reference/cli/dev) | Hot-reload development with Docker. | [📖](https://docs.hud.ai/reference/cli/dev) |
|
|
512
|
+
| [`hud build`](https://docs.hud.ai/reference/cli/build) | Build image and generate lock file. | [📖](https://docs.hud.ai/reference/cli/build) |
|
|
513
|
+
| [`hud push`](https://docs.hud.ai/reference/cli/push) | Share environment to registry. | [📖](https://docs.hud.ai/reference/cli/push) |
|
|
514
|
+
| [`hud pull <target>`](https://docs.hud.ai/reference/cli/pull) | Get environment from registry. | [📖](https://docs.hud.ai/reference/cli/pull) |
|
|
515
|
+
| [`hud analyze <image>`](https://docs.hud.ai/reference/cli/analyze) | Discover tools, resources, and metadata. | [📖](https://docs.hud.ai/reference/cli/analyze) |
|
|
516
|
+
| [`hud debug <image>`](https://docs.hud.ai/reference/cli/debug) | Five-phase health check of an environment. | [📖](https://docs.hud.ai/reference/cli/debug) |
|
|
517
|
+
| [`hud run <image>`](https://docs.hud.ai/reference/cli/run) | Run MCP server locally or remotely. | [📖](https://docs.hud.ai/reference/cli/run) |
|
|
518
518
|
|
|
519
519
|
## Roadmap
|
|
520
520
|
|
|
@@ -13,21 +13,21 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
|
|
|
13
13
|
[](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLnNvL21jcCJ9)
|
|
14
14
|
[](https://discord.gg/wkjtmHYYjm)
|
|
15
15
|
[](https://x.com/intent/user?screen_name=hud_evals)
|
|
16
|
-
[](https://shop.hud.
|
|
16
|
+
[](https://shop.hud.ai)
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
### Are you a startup building agents?
|
|
20
20
|
|
|
21
|
-
[📅 Hop on a call](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.
|
|
21
|
+
[📅 Hop on a call](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.ai](mailto:founders@hud.ai)
|
|
22
22
|
|
|
23
23
|
## Highlights
|
|
24
24
|
|
|
25
|
-
- 🚀 **[MCP environment skeleton](https://docs.hud.
|
|
26
|
-
- ⚡️ **[Live telemetry](https://hud.
|
|
27
|
-
- 🗂️ **[Public benchmarks](https://hud.
|
|
25
|
+
- 🚀 **[MCP environment skeleton](https://docs.hud.ai/core-concepts/mcp-protocol)** – any agent can call any environment.
|
|
26
|
+
- ⚡️ **[Live telemetry](https://hud.ai)** – inspect every tool call, observation, and reward in real time.
|
|
27
|
+
- 🗂️ **[Public benchmarks](https://hud.ai/leaderboards)** – OSWorld-Verified, SheetBench-50, and more.
|
|
28
28
|
- 🌐 **[Cloud browsers](environments/remote_browser/)** – AnchorBrowser, Steel, BrowserBase integrations for browser automation.
|
|
29
29
|
- 🛠️ **[Hot-reload dev loop](environments/README.md#phase-5-hot-reload-development-with-cursor-agent)** – `hud dev` for iterating on environments without rebuilds.
|
|
30
|
-
- 🎓 **[One-click RL](https://hud.
|
|
30
|
+
- 🎓 **[One-click RL](https://hud.ai/models)** – Run `hud rl` to get a trained model on any environment.
|
|
31
31
|
|
|
32
32
|
> We welcome contributors and feature requests – open an issue or hop on a call to discuss improvements!
|
|
33
33
|
|
|
@@ -42,10 +42,10 @@ uv tool install hud-python
|
|
|
42
42
|
# uv tool update-shell
|
|
43
43
|
```
|
|
44
44
|
|
|
45
|
-
> See [docs.hud.
|
|
46
|
-
> `claude mcp add --transport http docs-hud https://docs.hud.
|
|
45
|
+
> See [docs.hud.ai](https://docs.hud.ai), or add docs to any MCP client:
|
|
46
|
+
> `claude mcp add --transport http docs-hud https://docs.hud.ai/mcp`
|
|
47
47
|
|
|
48
|
-
Before starting, get your HUD_API_KEY at [hud.
|
|
48
|
+
Before starting, get your HUD_API_KEY at [hud.ai](https://hud.ai).
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
## Quickstart: Evals
|
|
@@ -63,17 +63,17 @@ import asyncio, hud, os
|
|
|
63
63
|
from hud.settings import settings
|
|
64
64
|
from hud.clients import MCPClient
|
|
65
65
|
from hud.agents import ClaudeAgent
|
|
66
|
-
from hud.datasets import Task # See docs: https://docs.hud.
|
|
66
|
+
from hud.datasets import Task # See docs: https://docs.hud.ai/reference/tasks
|
|
67
67
|
|
|
68
68
|
async def main() -> None:
|
|
69
|
-
with hud.trace("Quick Start 2048"): # All telemetry works for any MCP-based agent (see https://hud.
|
|
69
|
+
with hud.trace("Quick Start 2048"): # All telemetry works for any MCP-based agent (see https://hud.ai)
|
|
70
70
|
task = {
|
|
71
71
|
"prompt": "Reach 64 in 2048.",
|
|
72
72
|
"mcp_config": {
|
|
73
73
|
"hud": {
|
|
74
|
-
"url": "https://mcp.hud.
|
|
74
|
+
"url": "https://mcp.hud.ai/v3/mcp", # HUD's cloud MCP server (see https://docs.hud.ai/core-concepts/architecture)
|
|
75
75
|
"headers": {
|
|
76
|
-
"Authorization": f"Bearer {settings.api_key}", # Get your key at https://hud.
|
|
76
|
+
"Authorization": f"Bearer {settings.api_key}", # Get your key at https://hud.ai
|
|
77
77
|
"Mcp-Image": "hudpython/hud-text-2048:v1.2" # Docker image from https://hub.docker.com/u/hudpython
|
|
78
78
|
}
|
|
79
79
|
}
|
|
@@ -100,7 +100,7 @@ async def main() -> None:
|
|
|
100
100
|
asyncio.run(main())
|
|
101
101
|
```
|
|
102
102
|
|
|
103
|
-
The above example let's the agent play 2048 ([See replay](https://hud.
|
|
103
|
+
The above example let's the agent play 2048 ([See replay](https://hud.ai/trace/6feed7bd-5f67-4d66-b77f-eb1e3164604f))
|
|
104
104
|
|
|
105
105
|

|
|
106
106
|
|
|
@@ -113,7 +113,7 @@ hud get hud-evals/2048-basic # from HF
|
|
|
113
113
|
hud rl 2048-basic.json
|
|
114
114
|
```
|
|
115
115
|
|
|
116
|
-
> See [agent training docs](https://docs.hud.
|
|
116
|
+
> See [agent training docs](https://docs.hud.ai/train-agents/quickstart)
|
|
117
117
|
|
|
118
118
|
Or make your own environment and dataset:
|
|
119
119
|
|
|
@@ -124,7 +124,7 @@ hud dev --interactive
|
|
|
124
124
|
hud rl
|
|
125
125
|
```
|
|
126
126
|
|
|
127
|
-
> See [environment design docs](https://docs.hud.
|
|
127
|
+
> See [environment design docs](https://docs.hud.ai/build-environments)
|
|
128
128
|
|
|
129
129
|
## Benchmarking Agents
|
|
130
130
|
|
|
@@ -132,7 +132,7 @@ This is Claude Computer Use running on our proprietary financial analyst benchma
|
|
|
132
132
|
|
|
133
133
|

|
|
134
134
|
|
|
135
|
-
> [See this trace on _hud.
|
|
135
|
+
> [See this trace on _hud.ai_](https://hud.ai/trace/9e212e9e-3627-4f1f-9eb5-c6d03c59070a)
|
|
136
136
|
|
|
137
137
|
This example runs the full dataset (only takes ~20 minutes) using [run_evaluation.py](examples/run_evaluation.py):
|
|
138
138
|
|
|
@@ -150,7 +150,7 @@ from hud.agents import ClaudeAgent
|
|
|
150
150
|
results = await run_dataset(
|
|
151
151
|
name="My SheetBench-50 Evaluation",
|
|
152
152
|
dataset="hud-evals/SheetBench-50", # <-- HuggingFace dataset
|
|
153
|
-
agent_class=ClaudeAgent, # <-- Your custom agent can replace this (see https://docs.hud.
|
|
153
|
+
agent_class=ClaudeAgent, # <-- Your custom agent can replace this (see https://docs.hud.ai/evaluate-agents/create-agents)
|
|
154
154
|
agent_config={"model": "claude-sonnet-4-20250514"},
|
|
155
155
|
max_concurrent=50,
|
|
156
156
|
max_steps=30,
|
|
@@ -158,13 +158,13 @@ results = await run_dataset(
|
|
|
158
158
|
print(f"Average reward: {sum(r.reward for r in results) / len(results):.2f}")
|
|
159
159
|
```
|
|
160
160
|
|
|
161
|
-
> Running a dataset creates a job and streams results to the [hud.
|
|
161
|
+
> Running a dataset creates a job and streams results to the [hud.ai](https://hud.ai) platform for analysis and [leaderboard submission](https://docs.hud.ai/evaluate-agents/leaderboards).
|
|
162
162
|
|
|
163
163
|
## Building Environments (MCP)
|
|
164
164
|
|
|
165
165
|
This is how you can make any environment into an interactable one in 5 steps:
|
|
166
166
|
|
|
167
|
-
1. Define MCP server layer using [`MCPServer`](https://docs.hud.
|
|
167
|
+
1. Define MCP server layer using [`MCPServer`](https://docs.hud.ai/reference/environments)
|
|
168
168
|
|
|
169
169
|
```python
|
|
170
170
|
from hud.server import MCPServer
|
|
@@ -172,10 +172,10 @@ from hud.tools import HudComputerTool
|
|
|
172
172
|
|
|
173
173
|
mcp = MCPServer("My Environment")
|
|
174
174
|
|
|
175
|
-
# Add hud tools (see all tools: https://docs.hud.
|
|
175
|
+
# Add hud tools (see all tools: https://docs.hud.ai/reference/tools)
|
|
176
176
|
mcp.tool(HudComputerTool())
|
|
177
177
|
|
|
178
|
-
# Or custom tools (see https://docs.hud.
|
|
178
|
+
# Or custom tools (see https://docs.hud.ai/build-environments/adapting-software)
|
|
179
179
|
@mcp.tool("launch_app"):
|
|
180
180
|
def launch_app(name: str = "Gmail")
|
|
181
181
|
...
|
|
@@ -249,16 +249,16 @@ Tools
|
|
|
249
249
|
hud push # needs docker login, hud api key
|
|
250
250
|
```
|
|
251
251
|
|
|
252
|
-
5. Now you can use `mcp.hud.
|
|
252
|
+
5. Now you can use `mcp.hud.ai` to launch 100s of instances of this environment in parallel with any agent, and see everything live on [hud.ai](https://hud.ai):
|
|
253
253
|
|
|
254
254
|
```python
|
|
255
255
|
from hud.agents import ClaudeAgent
|
|
256
256
|
|
|
257
|
-
result = await ClaudeAgent().run({ # See all agents: https://docs.hud.
|
|
257
|
+
result = await ClaudeAgent().run({ # See all agents: https://docs.hud.ai/reference/agents
|
|
258
258
|
"prompt": "Please explore this environment",
|
|
259
259
|
"mcp_config": {
|
|
260
260
|
"my-environment": {
|
|
261
|
-
"url": "https://mcp.hud.
|
|
261
|
+
"url": "https://mcp.hud.ai/v3/mcp",
|
|
262
262
|
"headers": {
|
|
263
263
|
"Authorization": f"Bearer {os.getenv('HUD_API_KEY')}",
|
|
264
264
|
"Mcp-Image": "my-name/my-environment:latest"
|
|
@@ -280,13 +280,13 @@ result = await ClaudeAgent().run({ # See all agents: https://docs.hud.so/refere
|
|
|
280
280
|
|
|
281
281
|
## Leaderboards & benchmarks
|
|
282
282
|
|
|
283
|
-
All leaderboards are publicly available on [hud.
|
|
283
|
+
All leaderboards are publicly available on [hud.ai/leaderboards](https://hud.ai/leaderboards) (see [docs](https://docs.hud.ai/evaluate-agents/leaderboards))
|
|
284
284
|
|
|
285
285
|

|
|
286
286
|
|
|
287
287
|
We highly suggest running 3-5 evaluations per dataset for the most consistent results across multiple jobs.
|
|
288
288
|
|
|
289
|
-
Using the [`run_dataset`](https://docs.hud.
|
|
289
|
+
Using the [`run_dataset`](https://docs.hud.ai/reference/tasks#run_dataset) function with a HuggingFace dataset automatically assigns your job to that leaderboard page, and allows you to create a scorecard out of it:
|
|
290
290
|
|
|
291
291
|
## Reinforcement Learning with GRPO
|
|
292
292
|
|
|
@@ -315,11 +315,11 @@ Supports multi‑turn RL for both:
|
|
|
315
315
|
- Language‑only models (e.g., `Qwen/Qwen2.5-7B-Instruct`)
|
|
316
316
|
- Vision‑Language models (e.g., `Qwen/Qwen2.5-VL-3B-Instruct`)
|
|
317
317
|
|
|
318
|
-
By default, `hud rl` provisions a persistent server and trainer in the cloud, streams telemetry to `hud.
|
|
318
|
+
By default, `hud rl` provisions a persistent server and trainer in the cloud, streams telemetry to `hud.ai`, and lets you monitor/manage models at `hud.ai/models`. Use `--local` to run entirely on your machines (typically 2+ GPUs: one for vLLM, the rest for training).
|
|
319
319
|
|
|
320
|
-
Any HUD MCP environment and evaluation works with our RL pipeline (including remote configurations). See the guided docs: `https://docs.hud.
|
|
320
|
+
Any HUD MCP environment and evaluation works with our RL pipeline (including remote configurations). See the guided docs: `https://docs.hud.ai/train-agents/quickstart`.
|
|
321
321
|
|
|
322
|
-
Pricing: Hosted vLLM and training GPU rates are listed in the [Training Quickstart → Pricing](https://docs.hud.
|
|
322
|
+
Pricing: Hosted vLLM and training GPU rates are listed in the [Training Quickstart → Pricing](https://docs.hud.ai/train-agents/quickstart#pricing). Manage billing at the [HUD billing dashboard](https://hud.ai/project/billing).
|
|
323
323
|
|
|
324
324
|
## Architecture
|
|
325
325
|
|
|
@@ -327,8 +327,8 @@ Pricing: Hosted vLLM and training GPU rates are listed in the [Training Quicksta
|
|
|
327
327
|
%%{init: {"theme": "neutral", "themeVariables": {"fontSize": "14px"}} }%%
|
|
328
328
|
graph LR
|
|
329
329
|
subgraph "Platform"
|
|
330
|
-
Dashboard["📊 hud.
|
|
331
|
-
API["🔌 mcp.hud.
|
|
330
|
+
Dashboard["📊 hud.ai"]
|
|
331
|
+
API["🔌 mcp.hud.ai"]
|
|
332
332
|
end
|
|
333
333
|
|
|
334
334
|
subgraph "hud"
|
|
@@ -366,14 +366,14 @@ graph LR
|
|
|
366
366
|
|
|
367
367
|
| Command | Purpose | Docs |
|
|
368
368
|
| ----------------------- | ------------------------------------------ | ---- |
|
|
369
|
-
| [`hud init`](https://docs.hud.
|
|
370
|
-
| [`hud dev`](https://docs.hud.
|
|
371
|
-
| [`hud build`](https://docs.hud.
|
|
372
|
-
| [`hud push`](https://docs.hud.
|
|
373
|
-
| [`hud pull <target>`](https://docs.hud.
|
|
374
|
-
| [`hud analyze <image>`](https://docs.hud.
|
|
375
|
-
| [`hud debug <image>`](https://docs.hud.
|
|
376
|
-
| [`hud run <image>`](https://docs.hud.
|
|
369
|
+
| [`hud init`](https://docs.hud.ai/reference/cli/init) | Create new environment with boilerplate. | [📖](https://docs.hud.ai/reference/cli/init) |
|
|
370
|
+
| [`hud dev`](https://docs.hud.ai/reference/cli/dev) | Hot-reload development with Docker. | [📖](https://docs.hud.ai/reference/cli/dev) |
|
|
371
|
+
| [`hud build`](https://docs.hud.ai/reference/cli/build) | Build image and generate lock file. | [📖](https://docs.hud.ai/reference/cli/build) |
|
|
372
|
+
| [`hud push`](https://docs.hud.ai/reference/cli/push) | Share environment to registry. | [📖](https://docs.hud.ai/reference/cli/push) |
|
|
373
|
+
| [`hud pull <target>`](https://docs.hud.ai/reference/cli/pull) | Get environment from registry. | [📖](https://docs.hud.ai/reference/cli/pull) |
|
|
374
|
+
| [`hud analyze <image>`](https://docs.hud.ai/reference/cli/analyze) | Discover tools, resources, and metadata. | [📖](https://docs.hud.ai/reference/cli/analyze) |
|
|
375
|
+
| [`hud debug <image>`](https://docs.hud.ai/reference/cli/debug) | Five-phase health check of an environment. | [📖](https://docs.hud.ai/reference/cli/debug) |
|
|
376
|
+
| [`hud run <image>`](https://docs.hud.ai/reference/cli/run) | Run MCP server locally or remotely. | [📖](https://docs.hud.ai/reference/cli/run) |
|
|
377
377
|
|
|
378
378
|
## Roadmap
|
|
379
379
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Blank Environment
|
|
2
2
|
|
|
3
3
|
Minimal starter template for building HUD environments.
|
|
4
|
-
See [docs](https://docs.hud.
|
|
4
|
+
See [docs](https://docs.hud.ai/build-environments) for the complete environment design workflow.
|
|
5
5
|
|
|
6
6
|
## Architecture
|
|
7
7
|
|
|
@@ -120,9 +120,9 @@ save_tasks(tasks, repo_id="your-org/your-dataset")
|
|
|
120
120
|
hud eval "your-org/your-dataset" claude
|
|
121
121
|
|
|
122
122
|
# View results at:
|
|
123
|
-
# hud.
|
|
123
|
+
# hud.ai/leaderboards/your-org/your-dataset
|
|
124
124
|
```
|
|
125
125
|
|
|
126
126
|
**Note**: Only public HuggingFace datasets appear as leaderboards!
|
|
127
127
|
|
|
128
|
-
📚 Learn more: [Creating Benchmarks](https://docs.hud.
|
|
128
|
+
📚 Learn more: [Creating Benchmarks](https://docs.hud.ai/evaluate-agents/create-benchmarks) | [Leaderboards](https://docs.hud.ai/evaluate-agents/leaderboards)
|
|
@@ -74,12 +74,12 @@ save_tasks(tasks, repo_id="your-org/your-dataset")
|
|
|
74
74
|
hud eval "your-org/your-dataset" --agent claude
|
|
75
75
|
|
|
76
76
|
# View results at:
|
|
77
|
-
# hud.
|
|
77
|
+
# hud.ai/leaderboards/your-org/your-dataset
|
|
78
78
|
```
|
|
79
79
|
|
|
80
80
|
**Note**: Only public HuggingFace datasets appear as leaderboards!
|
|
81
81
|
|
|
82
|
-
📚 Learn more: [Creating Benchmarks](https://docs.hud.
|
|
82
|
+
📚 Learn more: [Creating Benchmarks](https://docs.hud.ai/evaluate-agents/create-benchmarks) | [Leaderboards](https://docs.hud.ai/evaluate-agents/leaderboards)
|
|
83
83
|
|
|
84
84
|
## Architecture Overview
|
|
85
85
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Deep Research Environment
|
|
2
2
|
|
|
3
3
|
Web research environment powered by Exa API for searching and fetching content.
|
|
4
|
-
See [docs](https://docs.hud.
|
|
4
|
+
See [docs](https://docs.hud.ai/build-environments) for the complete environment design workflow.
|
|
5
5
|
|
|
6
6
|
## Architecture
|
|
7
7
|
|
|
@@ -141,12 +141,12 @@ save_tasks(tasks, repo_id="your-org/your-dataset")
|
|
|
141
141
|
hud eval "your-org/your-dataset" --agent claude
|
|
142
142
|
|
|
143
143
|
# View results at:
|
|
144
|
-
# hud.
|
|
144
|
+
# hud.ai/leaderboards/your-org/your-dataset
|
|
145
145
|
```
|
|
146
146
|
|
|
147
147
|
**Note**: Only public HuggingFace datasets appear as leaderboards!
|
|
148
148
|
|
|
149
|
-
📚 Learn more: [Creating Benchmarks](https://docs.hud.
|
|
149
|
+
📚 Learn more: [Creating Benchmarks](https://docs.hud.ai/evaluate-agents/create-benchmarks) | [Leaderboards](https://docs.hud.ai/evaluate-agents/leaderboards)
|
|
150
150
|
|
|
151
151
|
## Example Research Workflow
|
|
152
152
|
|
|
@@ -60,8 +60,8 @@ Here we introduce the main parts of the environments
|
|
|
60
60
|
* [Genteki/SpreadSheetBench](https://huggingface.co/datasets/Genteki/SpreadSheetBench) (Size: 912)
|
|
61
61
|
|
|
62
62
|
### Example Traces (May require permission)
|
|
63
|
-
* [Single Test Task](https://www.hud.
|
|
64
|
-
* [Genteki/SpreadSheetBench-Tiny Test](https://www.hud.
|
|
63
|
+
* [Single Test Task](https://www.hud.ai/trace/d31de170-e70a-4abb-8f95-70512515dade)
|
|
64
|
+
* [Genteki/SpreadSheetBench-Tiny Test](https://www.hud.ai/jobs/2c426368-e352-4c79-af4a-aefb136e3f58)
|
|
65
65
|
|
|
66
66
|
### Github
|
|
67
67
|
|
|
@@ -3,9 +3,9 @@ name = "sheet-mcp-server"
|
|
|
3
3
|
version = "0.1.0"
|
|
4
4
|
description = "MCP server for XLSX spreadsheet manipulation"
|
|
5
5
|
authors = [{name = "HUD Team"}]
|
|
6
|
-
requires-python = ">=3.11"
|
|
6
|
+
requires-python = ">=3.11,<3.14"
|
|
7
7
|
dependencies = [
|
|
8
|
-
"hud-python==0.4.
|
|
8
|
+
"hud-python==0.4.62",
|
|
9
9
|
"pandas>=2.0.0",
|
|
10
10
|
"openpyxl>=3.1.0",
|
|
11
11
|
"xlsxwriter>=3.1.0",
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
SEC filing research environment powered by the SEC EDGAR database for accessing company filings and financial data, with rubric-based evaluation for structured grading provided by [The LLM Data Company](https://llmdata.com).
|
|
4
4
|
|
|
5
|
-
See [docs](https://docs.hud.
|
|
5
|
+
See [docs](https://docs.hud.ai/build-environments) for the complete environment design workflow.
|
|
6
6
|
|
|
7
7
|
## Architecture
|
|
8
8
|
|
|
@@ -177,12 +177,12 @@ save_tasks(tasks, repo_id="your-org/your-dataset")
|
|
|
177
177
|
hud eval "your-org/your-dataset" --agent claude
|
|
178
178
|
|
|
179
179
|
# View results at:
|
|
180
|
-
# hud.
|
|
180
|
+
# hud.ai/leaderboards/your-org/your-dataset
|
|
181
181
|
```
|
|
182
182
|
|
|
183
183
|
**Note**: Only public HuggingFace datasets appear as leaderboards!
|
|
184
184
|
|
|
185
|
-
📚 Learn more: [Creating Benchmarks](https://docs.hud.
|
|
185
|
+
📚 Learn more: [Creating Benchmarks](https://docs.hud.ai/evaluate-agents/create-benchmarks) | [Leaderboards](https://docs.hud.ai/evaluate-agents/leaderboards)
|
|
186
186
|
|
|
187
187
|
## Example Research Workflow
|
|
188
188
|
|