hud-python 0.4.66__tar.gz → 0.4.68__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hud_python-0.4.66 → hud_python-0.4.68}/.gitignore +2 -1
- {hud_python-0.4.66 → hud_python-0.4.68}/PKG-INFO +11 -35
- {hud_python-0.4.66 → hud_python-0.4.68}/README.md +5 -5
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/README.md +1 -1
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/pyproject.toml +1 -1
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/online_mind2web/pyproject.toml +1 -1
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/__init__.py +5 -3
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/base.py +105 -98
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/claude.py +76 -71
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/gemini.py +42 -43
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/grounded_openai.py +66 -67
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/misc/integration_test_agent.py +12 -4
- hud_python-0.4.68/hud/agents/misc/response_agent.py +101 -0
- hud_python-0.4.68/hud/agents/openai.py +362 -0
- hud_python-0.4.66/hud/agents/openai_chat_generic.py → hud_python-0.4.68/hud/agents/openai_chat.py +47 -32
- hud_python-0.4.68/hud/agents/operator.py +211 -0
- hud_python-0.4.68/hud/agents/tests/conftest.py +124 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_base.py +60 -64
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_base_runtime.py +48 -35
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_claude.py +22 -34
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_gemini.py +46 -63
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_grounded_openai_agent.py +10 -48
- hud_python-0.4.68/hud/agents/tests/test_openai.py +1083 -0
- hud_python-0.4.68/hud/agents/tests/test_operator.py +308 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/__init__.py +118 -244
- hud_python-0.4.68/hud/cli/eval.py +741 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_cli_init.py +6 -1
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_eval.py +17 -156
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_tasks.py +5 -5
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/README.md +12 -11
- hud_python-0.4.68/hud/datasets/__init__.py +33 -0
- hud_python-0.4.68/hud/datasets/runner.py +295 -0
- hud_python-0.4.68/hud/datasets/tests/test_utils.py +319 -0
- hud_python-0.4.68/hud/datasets/utils.py +411 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/actor.py +3 -3
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/server.py +17 -30
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/settings.py +6 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/test_job.py +0 -8
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/hud.py +17 -1
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/openai.py +14 -7
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_computer.py +0 -8
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_computer_actions.py +22 -1
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/types.py +58 -4
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/__init__.py +2 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/hud_console.py +12 -1
- hud_python-0.4.68/hud/utils/strict_schema.py +162 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tasks.py +59 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_init.py +1 -2
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_tasks.py +170 -1
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_version.py +1 -1
- hud_python-0.4.68/hud/utils/types.py +20 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/version.py +1 -1
- {hud_python-0.4.66 → hud_python-0.4.68}/pyproject.toml +4 -14
- hud_python-0.4.66/hud/agents/langchain.py +0 -264
- hud_python-0.4.66/hud/agents/lite_llm.py +0 -72
- hud_python-0.4.66/hud/agents/misc/response_agent.py +0 -100
- hud_python-0.4.66/hud/agents/openai.py +0 -356
- hud_python-0.4.66/hud/agents/tests/test_openai.py +0 -213
- hud_python-0.4.66/hud/cli/eval.py +0 -913
- hud_python-0.4.66/hud/cli/eval_config.py +0 -179
- hud_python-0.4.66/hud/datasets/__init__.py +0 -33
- hud_python-0.4.66/hud/datasets/parallel.py +0 -675
- hud_python-0.4.66/hud/datasets/runner.py +0 -135
- hud_python-0.4.66/hud/datasets/tests/test_utils.py +0 -228
- hud_python-0.4.66/hud/datasets/utils.py +0 -118
- hud_python-0.4.66/hud/utils/agent_factories.py +0 -84
- hud_python-0.4.66/hud/utils/async_utils.py +0 -65
- hud_python-0.4.66/hud/utils/group_eval.py +0 -243
- hud_python-0.4.66/hud/utils/progress.py +0 -149
- hud_python-0.4.66/hud/utils/task_tracking.py +0 -223
- hud_python-0.4.66/hud/utils/tests/test_agent_factories.py +0 -61
- hud_python-0.4.66/hud/utils/tests/test_async_utils.py +0 -173
- hud_python-0.4.66/hud/utils/tests/test_progress.py +0 -261
- {hud_python-0.4.66 → hud_python-0.4.68}/LICENSE +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/blank/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/blank/environment/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/blank/environment/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/blank/server/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/blank/server/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/browser-base/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/2048/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/todo/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/server/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/deepresearch/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/deepresearch/environment/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/deepresearch/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/deepresearch/server/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/jupyter/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/jupyter/server/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/online_mind2web/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/online_mind2web/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/remote_browser/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/remote_browser/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/rubrics/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/rubrics/environment/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/rubrics/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/rubrics/server/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/text_2048/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/environments/text_2048/pyproject.toml +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/examples/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/__main__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/utils.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/__main__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/analyze.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/build.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/clone.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/debug.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/dev.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/flows/dev.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/flows/tasks.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/get.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/init.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/list_func.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/pull.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/push.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/remove.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rft.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rft_status.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/celebrate.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/config.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/display.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/gpu.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/gpu_utils.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/local_runner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/presets.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/remote_runner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/rl_api.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/viewer.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/vllm.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/rl/wait_utils.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_analyze_module.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_convert.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/config.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/local_runner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/package_runner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/server.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_local_runner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_package_runner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_registry_utils.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_remote_runner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_runner_modules.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/base.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/fastmcp.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/mcp_use.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/tests/test_mcp_use_retry.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/utils/mcp_use_retry.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/utils/retry.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/datasets/tests/test_runner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/misc/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/misc/claude_plays_pokemon.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/native/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/native/comparator.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/collector.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/config.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/context.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/exporters.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/instrumentation.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/processors.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/tests/test_instrumentation.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/otel/tests/test_processors.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/py.typed +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/README.md +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/buffer.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/chat_template.jinja +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/config.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/distributed.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/learner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/tests/test_learner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/train.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/types.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/utils/start_vllm_server.sh +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/utils.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/rl/vllm_adapter.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/samples/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/samples/browser.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/context.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/low_level.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/router.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/exceptions.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/hints.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/requests.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/async_context.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/job.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/replay.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/test_async_context.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/test_replay.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/tests/test_trace.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/trace.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/telemetry/utils.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/base.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/bash.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/gemini.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/edit.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/base.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/jupyter.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/playwright.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/response.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/submit.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_jupyter_tool.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/types.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/tools/utils.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/mcp.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/telemetry.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.4.66 → hud_python-0.4.68}/hud/utils/tool_shorthand.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.68
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -35,7 +35,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
35
35
|
Classifier: Programming Language :: Python :: 3.12
|
|
36
36
|
Classifier: Programming Language :: Python :: 3.13
|
|
37
37
|
Requires-Python: <3.13,>=3.11
|
|
38
|
-
Requires-Dist: anthropic
|
|
38
|
+
Requires-Dist: anthropic>=0.75
|
|
39
39
|
Requires-Dist: blessed>=1.20.0
|
|
40
40
|
Requires-Dist: datasets>=2.14.0
|
|
41
41
|
Requires-Dist: google-genai
|
|
@@ -45,7 +45,7 @@ Requires-Dist: hud-mcp-python-sdk>=3.13.2
|
|
|
45
45
|
Requires-Dist: hud-mcp-use-python-sdk==2.3.20
|
|
46
46
|
Requires-Dist: langchain==0.3.27
|
|
47
47
|
Requires-Dist: numpy>=1.24.0
|
|
48
|
-
Requires-Dist: openai
|
|
48
|
+
Requires-Dist: openai>=2.8.1
|
|
49
49
|
Requires-Dist: opentelemetry-api>=1.34.1
|
|
50
50
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
|
|
51
51
|
Requires-Dist: opentelemetry-instrumentation-mcp==0.47.0
|
|
@@ -64,74 +64,50 @@ Requires-Dist: typer>=0.9.0
|
|
|
64
64
|
Requires-Dist: watchfiles>=0.21.0
|
|
65
65
|
Requires-Dist: wrapt>=1.14.0
|
|
66
66
|
Provides-Extra: agent
|
|
67
|
-
Requires-Dist: aiodocker>=0.24.0; extra == 'agent'
|
|
68
67
|
Requires-Dist: dotenv>=0.9.9; extra == 'agent'
|
|
69
|
-
Requires-Dist: inspect-ai>=0.3.80; extra == 'agent'
|
|
70
68
|
Requires-Dist: ipykernel; extra == 'agent'
|
|
71
69
|
Requires-Dist: ipython<9; extra == 'agent'
|
|
72
70
|
Requires-Dist: jupyter-client; extra == 'agent'
|
|
73
71
|
Requires-Dist: jupyter-core; extra == 'agent'
|
|
74
|
-
Requires-Dist: langchain; extra == 'agent'
|
|
75
|
-
Requires-Dist: langchain-anthropic; extra == 'agent'
|
|
76
|
-
Requires-Dist: langchain-openai; extra == 'agent'
|
|
77
|
-
Requires-Dist: litellm>=1.55.0; extra == 'agent'
|
|
78
72
|
Requires-Dist: pillow>=11.1.0; extra == 'agent'
|
|
79
73
|
Requires-Dist: playwright; extra == 'agent'
|
|
80
74
|
Requires-Dist: pyautogui>=0.9.54; extra == 'agent'
|
|
81
|
-
Requires-Dist: pyright==1.1.
|
|
75
|
+
Requires-Dist: pyright==1.1.407; extra == 'agent'
|
|
82
76
|
Requires-Dist: pytest-asyncio; extra == 'agent'
|
|
83
77
|
Requires-Dist: pytest-cov; extra == 'agent'
|
|
84
78
|
Requires-Dist: pytest-mock; extra == 'agent'
|
|
85
79
|
Requires-Dist: pytest<9,>=8.1.1; extra == 'agent'
|
|
86
80
|
Requires-Dist: ruff>=0.11.8; extra == 'agent'
|
|
87
|
-
Requires-Dist: setuptools; extra == 'agent'
|
|
88
|
-
Requires-Dist: textdistance<5,>=4.5.0; extra == 'agent'
|
|
89
81
|
Provides-Extra: agents
|
|
90
|
-
Requires-Dist: aiodocker>=0.24.0; extra == 'agents'
|
|
91
82
|
Requires-Dist: dotenv>=0.9.9; extra == 'agents'
|
|
92
|
-
Requires-Dist: inspect-ai>=0.3.80; extra == 'agents'
|
|
93
83
|
Requires-Dist: ipykernel; extra == 'agents'
|
|
94
84
|
Requires-Dist: ipython<9; extra == 'agents'
|
|
95
85
|
Requires-Dist: jupyter-client; extra == 'agents'
|
|
96
86
|
Requires-Dist: jupyter-core; extra == 'agents'
|
|
97
|
-
Requires-Dist: langchain; extra == 'agents'
|
|
98
|
-
Requires-Dist: langchain-anthropic; extra == 'agents'
|
|
99
|
-
Requires-Dist: langchain-openai; extra == 'agents'
|
|
100
|
-
Requires-Dist: litellm>=1.55.0; extra == 'agents'
|
|
101
87
|
Requires-Dist: pillow>=11.1.0; extra == 'agents'
|
|
102
88
|
Requires-Dist: playwright; extra == 'agents'
|
|
103
89
|
Requires-Dist: pyautogui>=0.9.54; extra == 'agents'
|
|
104
|
-
Requires-Dist: pyright==1.1.
|
|
90
|
+
Requires-Dist: pyright==1.1.407; extra == 'agents'
|
|
105
91
|
Requires-Dist: pytest-asyncio; extra == 'agents'
|
|
106
92
|
Requires-Dist: pytest-cov; extra == 'agents'
|
|
107
93
|
Requires-Dist: pytest-mock; extra == 'agents'
|
|
108
94
|
Requires-Dist: pytest<9,>=8.1.1; extra == 'agents'
|
|
109
95
|
Requires-Dist: ruff>=0.11.8; extra == 'agents'
|
|
110
|
-
Requires-Dist: setuptools; extra == 'agents'
|
|
111
|
-
Requires-Dist: textdistance<5,>=4.5.0; extra == 'agents'
|
|
112
96
|
Provides-Extra: dev
|
|
113
|
-
Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
|
|
114
97
|
Requires-Dist: dotenv>=0.9.9; extra == 'dev'
|
|
115
|
-
Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
|
|
116
98
|
Requires-Dist: ipykernel; extra == 'dev'
|
|
117
99
|
Requires-Dist: ipython<9; extra == 'dev'
|
|
118
100
|
Requires-Dist: jupyter-client; extra == 'dev'
|
|
119
101
|
Requires-Dist: jupyter-core; extra == 'dev'
|
|
120
|
-
Requires-Dist: langchain; extra == 'dev'
|
|
121
|
-
Requires-Dist: langchain-anthropic; extra == 'dev'
|
|
122
|
-
Requires-Dist: langchain-openai; extra == 'dev'
|
|
123
|
-
Requires-Dist: litellm>=1.55.0; extra == 'dev'
|
|
124
102
|
Requires-Dist: pillow>=11.1.0; extra == 'dev'
|
|
125
103
|
Requires-Dist: playwright; extra == 'dev'
|
|
126
104
|
Requires-Dist: pyautogui>=0.9.54; extra == 'dev'
|
|
127
|
-
Requires-Dist: pyright==1.1.
|
|
105
|
+
Requires-Dist: pyright==1.1.407; extra == 'dev'
|
|
128
106
|
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
129
107
|
Requires-Dist: pytest-cov; extra == 'dev'
|
|
130
108
|
Requires-Dist: pytest-mock; extra == 'dev'
|
|
131
109
|
Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
|
|
132
110
|
Requires-Dist: ruff>=0.11.8; extra == 'dev'
|
|
133
|
-
Requires-Dist: setuptools; extra == 'dev'
|
|
134
|
-
Requires-Dist: textdistance<5,>=4.5.0; extra == 'dev'
|
|
135
111
|
Provides-Extra: rl
|
|
136
112
|
Requires-Dist: bitsandbytes>=0.41.0; (sys_platform == 'linux') and extra == 'rl'
|
|
137
113
|
Requires-Dist: liger-kernel>=0.5.0; (sys_platform == 'linux') and extra == 'rl'
|
|
@@ -151,15 +127,15 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
|
|
|
151
127
|
|
|
152
128
|
[](https://pypi.org/project/hud-python/)
|
|
153
129
|
[](LICENSE)
|
|
154
|
-
[](https://cursor.com/en/install-mcp?name=docs-hud-python&config=
|
|
130
|
+
[](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
|
|
155
131
|
[](https://discord.gg/wkjtmHYYjm)
|
|
156
132
|
[](https://x.com/intent/user?screen_name=hud_evals)
|
|
157
133
|
[](https://shop.hud.ai)
|
|
158
134
|
|
|
159
135
|
|
|
160
|
-
### Are you
|
|
136
|
+
### Are you an enterprise building agents?
|
|
161
137
|
|
|
162
|
-
[📅 Hop on a call](https://cal.com/jay-
|
|
138
|
+
[📅 Hop on a call](https://cal.com/jay-hud) or [📧 founders@hud.ai](mailto:founders@hud.ai)
|
|
163
139
|
|
|
164
140
|
## Highlights
|
|
165
141
|
|
|
@@ -179,7 +155,7 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
|
|
|
179
155
|
pip install hud-python
|
|
180
156
|
|
|
181
157
|
# CLI - RL pipeline, environment design
|
|
182
|
-
uv tool install hud-python
|
|
158
|
+
uv tool install hud-python@latest
|
|
183
159
|
# uv tool update-shell
|
|
184
160
|
```
|
|
185
161
|
|
|
@@ -439,7 +415,7 @@ Train with the new interactive `hud rl` flow:
|
|
|
439
415
|
|
|
440
416
|
```bash
|
|
441
417
|
# Install CLI
|
|
442
|
-
uv tool install hud-python
|
|
418
|
+
uv tool install hud-python@latest
|
|
443
419
|
|
|
444
420
|
# Option A: Run directly from a HuggingFace dataset
|
|
445
421
|
hud rl hud-evals/2048-basic
|
|
@@ -10,15 +10,15 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
|
|
|
10
10
|
|
|
11
11
|
[](https://pypi.org/project/hud-python/)
|
|
12
12
|
[](LICENSE)
|
|
13
|
-
[](https://cursor.com/en/install-mcp?name=docs-hud-python&config=
|
|
13
|
+
[](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
|
|
14
14
|
[](https://discord.gg/wkjtmHYYjm)
|
|
15
15
|
[](https://x.com/intent/user?screen_name=hud_evals)
|
|
16
16
|
[](https://shop.hud.ai)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
### Are you
|
|
19
|
+
### Are you an enterprise building agents?
|
|
20
20
|
|
|
21
|
-
[📅 Hop on a call](https://cal.com/jay-
|
|
21
|
+
[📅 Hop on a call](https://cal.com/jay-hud) or [📧 founders@hud.ai](mailto:founders@hud.ai)
|
|
22
22
|
|
|
23
23
|
## Highlights
|
|
24
24
|
|
|
@@ -38,7 +38,7 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
|
|
|
38
38
|
pip install hud-python
|
|
39
39
|
|
|
40
40
|
# CLI - RL pipeline, environment design
|
|
41
|
-
uv tool install hud-python
|
|
41
|
+
uv tool install hud-python@latest
|
|
42
42
|
# uv tool update-shell
|
|
43
43
|
```
|
|
44
44
|
|
|
@@ -298,7 +298,7 @@ Train with the new interactive `hud rl` flow:
|
|
|
298
298
|
|
|
299
299
|
```bash
|
|
300
300
|
# Install CLI
|
|
301
|
-
uv tool install hud-python
|
|
301
|
+
uv tool install hud-python@latest
|
|
302
302
|
|
|
303
303
|
# Option A: Run directly from a HuggingFace dataset
|
|
304
304
|
hud rl hud-evals/2048-basic
|
|
@@ -60,7 +60,7 @@ The HUD SDK includes a powerful CLI for debugging and analyzing MCP environments
|
|
|
60
60
|
|
|
61
61
|
```bash
|
|
62
62
|
# Install HUD CLI globally with uv (recommended)
|
|
63
|
-
uv tool install hud-python
|
|
63
|
+
uv tool install hud-python@latest
|
|
64
64
|
|
|
65
65
|
# Or use without installing
|
|
66
66
|
uvx --from hud-python hud --help
|
|
@@ -3,7 +3,7 @@ name = "hud-browser-controller"
|
|
|
3
3
|
version = "0.1.0"
|
|
4
4
|
description = "HUD Browser Controller - MCP interface for browser environments"
|
|
5
5
|
requires-python = ">=3.11,<3.14"
|
|
6
|
-
dependencies = [ "pydantic>=2.6,<3", "pydantic-settings>=2.2,<3", "hud-python
|
|
6
|
+
dependencies = [ "pydantic>=2.6,<3", "pydantic-settings>=2.2,<3", "hud-python>=0.4.68", "playwright", "pyautogui", "httpx", "typer", "fastapi>=0.104.1", "uvicorn[standard]>=0.24.0", "python-multipart>=0.0.6",]
|
|
7
7
|
|
|
8
8
|
[build-system]
|
|
9
9
|
requires = [ "hatchling",]
|
|
@@ -3,7 +3,7 @@ name = "hud-om2w"
|
|
|
3
3
|
version = "0.1.0"
|
|
4
4
|
description = "HUD Remote Browser Controller with MCP tools for cloud browser providers"
|
|
5
5
|
requires-python = ">=3.11,<3.13"
|
|
6
|
-
dependencies = [ "hud-python
|
|
6
|
+
dependencies = [ "hud-python>=0.4.68", "anthropic>=0.74.0", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
|
|
7
7
|
|
|
8
8
|
[build-system]
|
|
9
9
|
requires = [ "hatchling",]
|
|
@@ -3,13 +3,15 @@ from __future__ import annotations
|
|
|
3
3
|
from .base import MCPAgent
|
|
4
4
|
from .claude import ClaudeAgent
|
|
5
5
|
from .gemini import GeminiAgent
|
|
6
|
-
from .openai import
|
|
7
|
-
from .
|
|
6
|
+
from .openai import OpenAIAgent
|
|
7
|
+
from .openai_chat import OpenAIChatAgent
|
|
8
|
+
from .operator import OperatorAgent
|
|
8
9
|
|
|
9
10
|
__all__ = [
|
|
10
11
|
"ClaudeAgent",
|
|
11
12
|
"GeminiAgent",
|
|
12
|
-
"GenericOpenAIChatAgent",
|
|
13
13
|
"MCPAgent",
|
|
14
|
+
"OpenAIAgent",
|
|
15
|
+
"OpenAIChatAgent",
|
|
14
16
|
"OperatorAgent",
|
|
15
17
|
]
|
|
@@ -10,22 +10,32 @@ from abc import ABC, abstractmethod
|
|
|
10
10
|
from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
11
11
|
|
|
12
12
|
import mcp.types as types
|
|
13
|
+
from pydantic import BaseModel, ConfigDict
|
|
13
14
|
|
|
14
15
|
from hud.agents.utils import log_agent_metadata_to_status, log_task_config_to_current_trace
|
|
15
|
-
from hud.
|
|
16
|
+
from hud.clients.base import AgentMCPClient
|
|
17
|
+
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
|
|
16
18
|
from hud.utils.hud_console import HUDConsole
|
|
17
19
|
from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
|
|
18
20
|
|
|
19
21
|
if TYPE_CHECKING:
|
|
20
|
-
from hud.clients.base import AgentMCPClient
|
|
21
22
|
from hud.datasets import Task
|
|
22
23
|
|
|
23
|
-
from .misc import ResponseAgent
|
|
24
|
-
|
|
25
24
|
|
|
26
25
|
logger = logging.getLogger(__name__)
|
|
27
26
|
|
|
28
27
|
|
|
28
|
+
class BaseCreateParams(BaseModel):
|
|
29
|
+
"""Runtime parameters for agent creation."""
|
|
30
|
+
|
|
31
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
32
|
+
|
|
33
|
+
mcp_client: AgentMCPClient | None = None
|
|
34
|
+
auto_trace: bool = True
|
|
35
|
+
auto_respond: bool = False
|
|
36
|
+
verbose: bool = False
|
|
37
|
+
|
|
38
|
+
|
|
29
39
|
class MCPAgent(ABC):
|
|
30
40
|
"""
|
|
31
41
|
Base class for MCP-enabled agents.
|
|
@@ -45,80 +55,67 @@ class MCPAgent(ABC):
|
|
|
45
55
|
`format_blocks`, and `format_tool_results`.
|
|
46
56
|
"""
|
|
47
57
|
|
|
48
|
-
metadata: dict[str, Any] | None = None
|
|
58
|
+
metadata: ClassVar[dict[str, Any] | None] = None
|
|
49
59
|
required_tools: ClassVar[list[str]] = [] # Tools that must be available
|
|
60
|
+
config_cls: ClassVar[type[BaseAgentConfig]] = BaseAgentConfig
|
|
50
61
|
|
|
51
|
-
def __init__(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
# Filtering
|
|
55
|
-
allowed_tools: list[str] | None = None,
|
|
56
|
-
disallowed_tools: list[str] | None = None,
|
|
57
|
-
response_tool_name: str | None = None,
|
|
58
|
-
# Messages
|
|
59
|
-
system_prompt: str | None = None,
|
|
60
|
-
append_setup_output: bool = True,
|
|
61
|
-
initial_screenshot: bool = True,
|
|
62
|
-
# Misc
|
|
63
|
-
model_name: str = "mcp-agent",
|
|
64
|
-
checkpoint_name: str | None = None,
|
|
65
|
-
response_agent: ResponseAgent | None = None,
|
|
66
|
-
auto_trace: bool = True,
|
|
67
|
-
verbose: bool = False,
|
|
68
|
-
) -> None:
|
|
69
|
-
"""
|
|
70
|
-
Initialize the base MCP agent.
|
|
62
|
+
def __init__(self, params: BaseCreateParams | None = None, **kwargs: Any) -> None:
|
|
63
|
+
if params is None:
|
|
64
|
+
import warnings
|
|
71
65
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
"""
|
|
66
|
+
warnings.warn(
|
|
67
|
+
f"Passing kwargs to {self.__class__.__name__}() is deprecated. "
|
|
68
|
+
f"Use {self.__class__.__name__}.create(...) instead.",
|
|
69
|
+
DeprecationWarning,
|
|
70
|
+
stacklevel=2,
|
|
71
|
+
)
|
|
72
|
+
CreateParams = type(
|
|
73
|
+
f"{self.config_cls.__name__}CreateParams",
|
|
74
|
+
(BaseCreateParams, self.config_cls),
|
|
75
|
+
{"__module__": self.config_cls.__module__},
|
|
76
|
+
)
|
|
77
|
+
params = CreateParams(**kwargs)
|
|
78
|
+
|
|
79
|
+
config_kwargs = {
|
|
80
|
+
k: getattr(params, k) for k in self.config_cls.model_fields if hasattr(params, k)
|
|
81
|
+
}
|
|
82
|
+
self.config = self.config_cls(**config_kwargs)
|
|
90
83
|
|
|
91
|
-
self.mcp_client = mcp_client
|
|
92
|
-
self.
|
|
84
|
+
self.mcp_client = params.mcp_client
|
|
85
|
+
self.model_name: str = getattr(params, "model_name", "MCPAgent")
|
|
86
|
+
self.checkpoint_name: str = getattr(params, "checkpoint_name", "unknown")
|
|
87
|
+
self.auto_respond = params.auto_respond
|
|
93
88
|
|
|
94
|
-
self.model_name = model_name
|
|
95
|
-
self.checkpoint_name = checkpoint_name
|
|
96
89
|
self.console = HUDConsole(logger=logger)
|
|
97
90
|
|
|
98
|
-
|
|
99
|
-
if verbose:
|
|
91
|
+
if params.verbose:
|
|
100
92
|
self.console.set_verbose(True)
|
|
101
93
|
|
|
102
|
-
|
|
103
|
-
self.
|
|
104
|
-
self.
|
|
105
|
-
self.
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
self.system_prompt = system_prompt
|
|
109
|
-
self.append_setup_output = append_setup_output
|
|
110
|
-
self.initial_screenshot = initial_screenshot
|
|
94
|
+
self.allowed_tools = self.config.allowed_tools
|
|
95
|
+
self.disallowed_tools = self.config.disallowed_tools
|
|
96
|
+
self.system_prompt = self.config.system_prompt
|
|
97
|
+
self.append_setup_output = self.config.append_setup_output
|
|
98
|
+
self.initial_screenshot = self.config.initial_screenshot
|
|
99
|
+
self.response_tool_name = self.config.response_tool_name
|
|
111
100
|
|
|
112
|
-
|
|
113
|
-
self._tool_map: dict[str, types.Tool] = {}
|
|
114
|
-
self.response_tool_name = response_tool_name
|
|
101
|
+
self._available_tools: list[types.Tool] | None = None
|
|
102
|
+
self._tool_map: dict[str, types.Tool] = {}
|
|
115
103
|
|
|
116
104
|
# Trace
|
|
117
|
-
self._auto_trace = auto_trace
|
|
118
|
-
self._auto_trace_cm: Any | None = None
|
|
105
|
+
self._auto_trace = params.auto_trace
|
|
106
|
+
self._auto_trace_cm: Any | None = None
|
|
119
107
|
|
|
120
|
-
|
|
121
|
-
|
|
108
|
+
@classmethod
|
|
109
|
+
def create(cls, **kwargs: Any) -> MCPAgent:
|
|
110
|
+
"""
|
|
111
|
+
Factory method to create an agent with typed parameters.
|
|
112
|
+
"""
|
|
113
|
+
CreateParams = type(
|
|
114
|
+
f"{cls.config_cls.__name__}CreateParams",
|
|
115
|
+
(BaseCreateParams, cls.config_cls),
|
|
116
|
+
{"__module__": cls.config_cls.__module__},
|
|
117
|
+
)
|
|
118
|
+
return cls(params=CreateParams(**kwargs))
|
|
122
119
|
|
|
123
120
|
async def initialize(self, task: str | Task | None = None) -> None:
|
|
124
121
|
"""Initialize the agent with task-specific configuration."""
|
|
@@ -129,7 +126,6 @@ class MCPAgent(ABC):
|
|
|
129
126
|
from hud.clients import MCPClient
|
|
130
127
|
|
|
131
128
|
self.mcp_client = MCPClient(mcp_config=task.mcp_config)
|
|
132
|
-
self._auto_created_client = True
|
|
133
129
|
self.console.debug("Auto-created MCPClient from task.mcp_config")
|
|
134
130
|
|
|
135
131
|
# Ensure we have a client
|
|
@@ -148,41 +144,41 @@ class MCPAgent(ABC):
|
|
|
148
144
|
try:
|
|
149
145
|
await self.mcp_client.initialize()
|
|
150
146
|
except Exception as e:
|
|
147
|
+
self.console.error_log(f"Failed to initialize MCP client: {e}")
|
|
151
148
|
self._handle_connection_error(e)
|
|
152
149
|
|
|
153
150
|
# If task is provided, apply agent_config and add lifecycle tools
|
|
154
151
|
if isinstance(task, Task) and task.agent_config:
|
|
155
|
-
|
|
152
|
+
agent_cfg = task.agent_config
|
|
153
|
+
if agent_cfg.system_prompt:
|
|
156
154
|
if self.system_prompt is None:
|
|
157
|
-
self.system_prompt =
|
|
155
|
+
self.system_prompt = agent_cfg.system_prompt
|
|
158
156
|
else:
|
|
159
|
-
self.system_prompt += "\n\n" +
|
|
160
|
-
if "append_setup_output" in
|
|
161
|
-
self.append_setup_output =
|
|
162
|
-
if "initial_screenshot" in
|
|
163
|
-
self.initial_screenshot =
|
|
164
|
-
if
|
|
157
|
+
self.system_prompt += "\n\n" + agent_cfg.system_prompt
|
|
158
|
+
if "append_setup_output" in agent_cfg.model_fields_set:
|
|
159
|
+
self.append_setup_output = agent_cfg.append_setup_output
|
|
160
|
+
if "initial_screenshot" in agent_cfg.model_fields_set:
|
|
161
|
+
self.initial_screenshot = agent_cfg.initial_screenshot
|
|
162
|
+
if agent_cfg.allowed_tools is not None:
|
|
165
163
|
# If allowed_tools has already been set, we take the intersection of the two
|
|
166
164
|
# If the list had been empty, we were allowing all tools, so we overwrite this
|
|
167
165
|
if isinstance(self.allowed_tools, list) and len(self.allowed_tools) > 0:
|
|
168
166
|
# If task allows "*", keep CLI's allowed_tools unchanged
|
|
169
|
-
if "*" not in
|
|
167
|
+
if "*" not in agent_cfg.allowed_tools:
|
|
170
168
|
self.allowed_tools = [
|
|
171
|
-
tool
|
|
172
|
-
for tool in self.allowed_tools
|
|
173
|
-
if tool in task.agent_config["allowed_tools"]
|
|
169
|
+
tool for tool in self.allowed_tools if tool in agent_cfg.allowed_tools
|
|
174
170
|
]
|
|
175
171
|
# else: task allows all tools, so CLI's allowed_tools takes precedence
|
|
176
172
|
else: # If allowed_tools is None, we overwrite it
|
|
177
|
-
self.allowed_tools =
|
|
178
|
-
if
|
|
173
|
+
self.allowed_tools = agent_cfg.allowed_tools
|
|
174
|
+
if agent_cfg.disallowed_tools is not None:
|
|
179
175
|
# If disallowed_tools has already been set, we take the union of the two
|
|
180
176
|
if isinstance(self.disallowed_tools, list):
|
|
181
|
-
self.disallowed_tools.extend(
|
|
177
|
+
self.disallowed_tools.extend(agent_cfg.disallowed_tools)
|
|
182
178
|
else: # If disallowed_tools is None, we overwrite it
|
|
183
|
-
self.disallowed_tools =
|
|
184
|
-
if
|
|
185
|
-
self.response_tool_name =
|
|
179
|
+
self.disallowed_tools = agent_cfg.disallowed_tools
|
|
180
|
+
if agent_cfg.response_tool_name is not None:
|
|
181
|
+
self.response_tool_name = agent_cfg.response_tool_name
|
|
186
182
|
|
|
187
183
|
all_tools = await self.mcp_client.list_tools()
|
|
188
184
|
self._available_tools = []
|
|
@@ -201,6 +197,15 @@ class MCPAgent(ABC):
|
|
|
201
197
|
continue
|
|
202
198
|
self._available_tools.append(tool)
|
|
203
199
|
|
|
200
|
+
# Validate required tools are present
|
|
201
|
+
available_tool_names = {t.name for t in self._available_tools}
|
|
202
|
+
missing_tools = [tool for tool in self.required_tools if tool not in available_tool_names]
|
|
203
|
+
if missing_tools:
|
|
204
|
+
raise ValueError(
|
|
205
|
+
f"Required tools are missing: {missing_tools}. "
|
|
206
|
+
f"Available tools: {sorted(available_tool_names)}"
|
|
207
|
+
)
|
|
208
|
+
|
|
204
209
|
self.console.info(
|
|
205
210
|
f"Agent initialized with {len(self.get_available_tools())} tools: {', '.join([t.name for t in self.get_available_tools()])}" # noqa: E501
|
|
206
211
|
)
|
|
@@ -290,6 +295,10 @@ class MCPAgent(ABC):
|
|
|
290
295
|
self.console.progress_log(f"Setting up tool phase: {task.setup_tool}")
|
|
291
296
|
results = await self.call_tools(task.setup_tool)
|
|
292
297
|
if any(result.isError for result in results):
|
|
298
|
+
for result in results:
|
|
299
|
+
if result.isError:
|
|
300
|
+
self.console.error_log(f"Error in setup tool: {result}")
|
|
301
|
+
|
|
293
302
|
return Trace(
|
|
294
303
|
reward=0.0,
|
|
295
304
|
done=True,
|
|
@@ -389,6 +398,8 @@ class MCPAgent(ABC):
|
|
|
389
398
|
final_response = None
|
|
390
399
|
error = None
|
|
391
400
|
|
|
401
|
+
messages: list[Any] = []
|
|
402
|
+
|
|
392
403
|
try:
|
|
393
404
|
# Start with system messages
|
|
394
405
|
messages = await self.get_system_messages()
|
|
@@ -413,15 +424,16 @@ class MCPAgent(ABC):
|
|
|
413
424
|
|
|
414
425
|
# Check if we should stop
|
|
415
426
|
if response.done or not response.tool_calls:
|
|
416
|
-
#
|
|
417
|
-
decision = "STOP"
|
|
418
|
-
if self.
|
|
427
|
+
# Use auto_respond to decide whether to stop
|
|
428
|
+
decision: Literal["STOP", "CONTINUE"] = "STOP"
|
|
429
|
+
if self.auto_respond and response.content:
|
|
419
430
|
try:
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
)
|
|
431
|
+
from hud.agents.misc import ResponseAgent
|
|
432
|
+
|
|
433
|
+
response_agent = ResponseAgent()
|
|
434
|
+
decision = await response_agent.determine_response(response.content)
|
|
423
435
|
except Exception as e:
|
|
424
|
-
self.console.warning_log(f"
|
|
436
|
+
self.console.warning_log(f"Auto-respond failed: {e}")
|
|
425
437
|
if decision == "STOP":
|
|
426
438
|
# Try to submit response through lifecycle tool
|
|
427
439
|
await self._maybe_submit_response(response, messages)
|
|
@@ -436,11 +448,7 @@ class MCPAgent(ABC):
|
|
|
436
448
|
|
|
437
449
|
# 2. Execute tools
|
|
438
450
|
tool_calls = response.tool_calls
|
|
439
|
-
for tool_call in tool_calls:
|
|
440
|
-
self.console.info_log(f"{tool_call}")
|
|
441
451
|
tool_results = await self.call_tools(tool_calls)
|
|
442
|
-
for tool_result in tool_results:
|
|
443
|
-
self.console.info_log(f"{tool_result}")
|
|
444
452
|
|
|
445
453
|
# 3. Format tool results and add to messages
|
|
446
454
|
tool_messages = await self.format_tool_results(tool_calls, tool_results)
|
|
@@ -699,8 +707,8 @@ class MCPAgent(ABC):
|
|
|
699
707
|
finally:
|
|
700
708
|
self._auto_trace_cm = None
|
|
701
709
|
|
|
702
|
-
#
|
|
703
|
-
if self.
|
|
710
|
+
# Always clean up the client
|
|
711
|
+
if self.mcp_client:
|
|
704
712
|
try:
|
|
705
713
|
await self.mcp_client.shutdown()
|
|
706
714
|
self.console.debug("Closed auto-created MCPClient")
|
|
@@ -708,7 +716,6 @@ class MCPAgent(ABC):
|
|
|
708
716
|
self.console.warning_log(f"Failed to close auto-created client: {e}")
|
|
709
717
|
finally:
|
|
710
718
|
self.mcp_client = None
|
|
711
|
-
self._auto_created_client = False
|
|
712
719
|
|
|
713
720
|
def _is_connection_error(self, e: Exception) -> bool:
|
|
714
721
|
"""Check if an exception is a connection error."""
|