hud-python 0.4.53__tar.gz → 0.4.55__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- {hud_python-0.4.53 → hud_python-0.4.55}/PKG-INFO +8 -7
- {hud_python-0.4.53 → hud_python-0.4.55}/README.md +6 -6
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/README.md +2 -2
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/blank/server/pyproject.toml +1 -1
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/todo/README.md +2 -2
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/server/pyproject.toml +1 -1
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/deepresearch/server/pyproject.toml +1 -1
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/base.py +8 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/claude.py +4 -3
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/openai.py +2 -1
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/openai_chat_generic.py +3 -2
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_claude.py +2 -2
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_openai.py +1 -1
- hud_python-0.4.55/hud/agents/utils.py +50 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/__init__.py +65 -9
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/build.py +185 -25
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/dev.py +130 -40
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/eval.py +123 -24
- hud_python-0.4.55/hud/cli/flows/dev.py +155 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/flows/tasks.py +29 -9
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_eval.py +6 -6
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/docker.py +6 -3
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/base.py +2 -2
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/context.py +42 -1
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/server.py +29 -3
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/settings.py +6 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/async_context.py +16 -2
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/trace.py +6 -1
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/types.py +10 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/group_eval.py +14 -2
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_agent_factories.py +2 -1
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/version.py +1 -1
- {hud_python-0.4.53 → hud_python-0.4.55}/pyproject.toml +2 -1
- {hud_python-0.4.53 → hud_python-0.4.55}/.gitignore +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/LICENSE +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/blank/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/blank/environment/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/blank/environment/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/blank/server/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/2048/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/2048/backend/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/environment/todo/backend/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/browser/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/deepresearch/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/deepresearch/environment/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/deepresearch/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/remote_browser/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/remote_browser/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/text_2048/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/environments/text_2048/pyproject.toml +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/examples/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/__main__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/grounded_openai.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/langchain.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/lite_llm.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/misc/integration_test_agent.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_base_runtime.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/agents/tests/test_grounded_openai_agent.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/__main__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/analyze.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/clone.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/debug.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/flows/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/get.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/list_func.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/pull.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/push.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/remove.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/celebrate.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/display.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/gpu.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/gpu_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/local_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/presets.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/remote_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/rl_api.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/viewer.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/vllm.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/rl/wait_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_analyze_module.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_build_failure.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_build_module.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_cli_more_wrappers.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_cli_root.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_push_happy.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_push_wrapper.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/env_check.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/local_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/package_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/server.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/source_hash.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tasks.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_docker.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_docker_hints.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_env_check.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_environment.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_interactive_module.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_local_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_logging_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_metadata.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_package_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_registry_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_remote_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_runner_modules.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_source_hash.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/tests/test_tasks.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/cli/utils/version_check.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/fastmcp.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/mcp_use.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/tests/test_mcp_use_retry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/utils/mcp_use_retry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/utils/retry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/parallel.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/tests/test_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/tests/test_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/datasets/utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/misc/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/misc/claude_plays_pokemon.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/native/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/native/comparator.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/native/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/native/tests/test_comparator.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/native/tests/test_native_init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/collector.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/exporters.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/instrumentation.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/processors.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/tests/test_instrumentation.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/otel/tests/test_processors.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/py.typed +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/README.md +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/actor.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/buffer.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/chat_template.jinja +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/distributed.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/learner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/tests/test_learner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/train.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/types.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/utils/start_vllm_server.sh +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/rl/vllm_adapter.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/samples/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/samples/browser.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/context.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/low_level.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/router.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_add_tool.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_context.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_mcp_server_handlers.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_mcp_server_integration.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_mcp_server_more.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_run_wrapper.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_server_extra.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/server/tests/test_sigterm_runner.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/exceptions.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/hints.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/requests.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/tests/test_hints.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/job.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/replay.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/test_async_context.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/test_instrument.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/test_job.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/test_replay.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/telemetry/tests/test_trace.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/base.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/bash.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/qwen.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/edit.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/base.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/config.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/grounded_tool.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/grounder.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/grounding/tests/test_grounded_tool.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/playwright.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/response.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/submit.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_submit.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_types.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/types.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/tools/utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/agent_factories.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/async_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/hud_console.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/mcp.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/pretty_errors.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/progress.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/task_tracking.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tasks.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/telemetry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_async_utils.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_pretty_errors.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_progress.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_tasks.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tests/test_tool_shorthand.py +0 -0
- {hud_python-0.4.53 → hud_python-0.4.55}/hud/utils/tool_shorthand.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.55
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -42,6 +42,7 @@ Requires-Dist: httpx<1,>=0.23.0
|
|
|
42
42
|
Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
|
|
43
43
|
Requires-Dist: hud-mcp-python-sdk>=3.13.2
|
|
44
44
|
Requires-Dist: hud-mcp-use-python-sdk==2.3.20
|
|
45
|
+
Requires-Dist: langchain==0.3.27
|
|
45
46
|
Requires-Dist: numpy>=1.24.0
|
|
46
47
|
Requires-Dist: openai
|
|
47
48
|
Requires-Dist: opentelemetry-api>=1.34.1
|
|
@@ -247,8 +248,8 @@ The above example let's the agent play 2048 ([See replay](https://hud.so/trace/6
|
|
|
247
248
|
RL using GRPO a Qwen2.5-VL model on any hud dataset:
|
|
248
249
|
|
|
249
250
|
```bash
|
|
250
|
-
hud get hud-evals/basic
|
|
251
|
-
hud rl basic
|
|
251
|
+
hud get hud-evals/2048-basic # from HF
|
|
252
|
+
hud rl 2048-basic.json
|
|
252
253
|
```
|
|
253
254
|
|
|
254
255
|
> See [agent training docs](https://docs.hud.so/train-agents/quickstart)
|
|
@@ -439,14 +440,14 @@ Train with the new interactive `hud rl` flow:
|
|
|
439
440
|
uv tool install hud-python
|
|
440
441
|
|
|
441
442
|
# Option A: Run directly from a HuggingFace dataset
|
|
442
|
-
hud rl hud-evals/basic
|
|
443
|
+
hud rl hud-evals/2048-basic
|
|
443
444
|
|
|
444
445
|
# Option B: Download first, modify, then train
|
|
445
|
-
hud get hud-evals/basic
|
|
446
|
-
hud rl basic
|
|
446
|
+
hud get hud-evals/2048-basic
|
|
447
|
+
hud rl 2048-basic.json
|
|
447
448
|
|
|
448
449
|
# Optional: baseline evaluation
|
|
449
|
-
hud eval basic
|
|
450
|
+
hud eval 2048-basic.json
|
|
450
451
|
```
|
|
451
452
|
|
|
452
453
|
Supports multi‑turn RL for both:
|
|
@@ -109,8 +109,8 @@ The above example let's the agent play 2048 ([See replay](https://hud.so/trace/6
|
|
|
109
109
|
RL using GRPO a Qwen2.5-VL model on any hud dataset:
|
|
110
110
|
|
|
111
111
|
```bash
|
|
112
|
-
hud get hud-evals/basic
|
|
113
|
-
hud rl basic
|
|
112
|
+
hud get hud-evals/2048-basic # from HF
|
|
113
|
+
hud rl 2048-basic.json
|
|
114
114
|
```
|
|
115
115
|
|
|
116
116
|
> See [agent training docs](https://docs.hud.so/train-agents/quickstart)
|
|
@@ -301,14 +301,14 @@ Train with the new interactive `hud rl` flow:
|
|
|
301
301
|
uv tool install hud-python
|
|
302
302
|
|
|
303
303
|
# Option A: Run directly from a HuggingFace dataset
|
|
304
|
-
hud rl hud-evals/basic
|
|
304
|
+
hud rl hud-evals/2048-basic
|
|
305
305
|
|
|
306
306
|
# Option B: Download first, modify, then train
|
|
307
|
-
hud get hud-evals/basic
|
|
308
|
-
hud rl basic
|
|
307
|
+
hud get hud-evals/2048-basic
|
|
308
|
+
hud rl 2048-basic.json
|
|
309
309
|
|
|
310
310
|
# Optional: baseline evaluation
|
|
311
|
-
hud eval basic
|
|
311
|
+
hud eval 2048-basic.json
|
|
312
312
|
```
|
|
313
313
|
|
|
314
314
|
Supports multi‑turn RL for both:
|
|
@@ -804,9 +804,9 @@ class TodoCompleted:
|
|
|
804
804
|
@problem("todo_basic", description="Complete two todo items", difficulty="easy")
|
|
805
805
|
class TodoBasic:
|
|
806
806
|
def get_setup(self):
|
|
807
|
-
return {"
|
|
807
|
+
return {"name": "todo_seed", "arguments": {"num_items": 5}}
|
|
808
808
|
def get_evaluation(self):
|
|
809
|
-
return {"
|
|
809
|
+
return {"name": "todo_completed", "arguments": {"expected_count": 2}}
|
|
810
810
|
```
|
|
811
811
|
|
|
812
812
|
Decorators keep registration *next to the implementation* and avoid manual bookkeeping. The server simply exposes the combined metadata through an MCP **resource**. Follow `environments/browser/src/hud_controller/problems/registry.py` as a template and expose the JSON with `@mcp.resource("problems://registry")`.
|
|
@@ -47,8 +47,8 @@ await setup({"name": "todo_basic_usage"})
|
|
|
47
47
|
await evaluate({"name": "todo_basic_usage"})
|
|
48
48
|
|
|
49
49
|
# Direct function calls
|
|
50
|
-
await setup({"
|
|
51
|
-
await evaluate({"
|
|
50
|
+
await setup({"name": "todo_reset", "arguments": {}})
|
|
51
|
+
await evaluate({"name": "todo_completion_rate", "arguments": {"min_rate": 0.5}})
|
|
52
52
|
|
|
53
53
|
# MCP resource discovery
|
|
54
54
|
todo_evaluators = await client.read_resource("evaluators://todo")
|
|
@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
|
11
11
|
|
|
12
12
|
import mcp.types as types
|
|
13
13
|
|
|
14
|
+
from hud.agents.utils import log_agent_metadata_to_status, log_task_config_to_current_trace
|
|
14
15
|
from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
|
|
15
16
|
from hud.utils.hud_console import HUDConsole
|
|
16
17
|
from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
|
|
@@ -62,6 +63,7 @@ class MCPAgent(ABC):
|
|
|
62
63
|
initial_screenshot: bool = True,
|
|
63
64
|
# Misc
|
|
64
65
|
model_name: str = "mcp-agent",
|
|
66
|
+
checkpoint_name: str | None = None,
|
|
65
67
|
response_agent: ResponseAgent | None = None,
|
|
66
68
|
auto_trace: bool = True,
|
|
67
69
|
verbose: bool = False,
|
|
@@ -92,6 +94,7 @@ class MCPAgent(ABC):
|
|
|
92
94
|
self._auto_created_client = False # Track if we created the client
|
|
93
95
|
|
|
94
96
|
self.model_name = model_name
|
|
97
|
+
self.checkpoint_name = checkpoint_name
|
|
95
98
|
self.console = HUDConsole(logger=logger)
|
|
96
99
|
|
|
97
100
|
# Set verbose mode if requested
|
|
@@ -198,6 +201,8 @@ class MCPAgent(ABC):
|
|
|
198
201
|
f"Agent initialized with {len(self.get_available_tools())} tools: {', '.join([t.name for t in self.get_available_tools()])}" # noqa: E501
|
|
199
202
|
)
|
|
200
203
|
|
|
204
|
+
await log_agent_metadata_to_status(self.model_name, self.checkpoint_name)
|
|
205
|
+
|
|
201
206
|
async def run(self, prompt_or_task: str | Task | dict[str, Any], max_steps: int = 10) -> Trace:
|
|
202
207
|
"""
|
|
203
208
|
Run the agent with the given prompt or task.
|
|
@@ -223,6 +228,9 @@ class MCPAgent(ABC):
|
|
|
223
228
|
|
|
224
229
|
# Handle Task objects with full lifecycle
|
|
225
230
|
if isinstance(prompt_or_task, Task):
|
|
231
|
+
# Log a compact summary of task config to the current trace (async)
|
|
232
|
+
await log_task_config_to_current_trace(prompt_or_task)
|
|
233
|
+
|
|
226
234
|
return await self.run_task(prompt_or_task, max_steps)
|
|
227
235
|
|
|
228
236
|
# Handle simple string prompts
|
|
@@ -89,7 +89,8 @@ class ClaudeAgent(MCPAgent):
|
|
|
89
89
|
self.use_computer_beta = use_computer_beta
|
|
90
90
|
self.hud_console = HUDConsole(logger=logger)
|
|
91
91
|
|
|
92
|
-
self.model_name =
|
|
92
|
+
self.model_name = "Claude"
|
|
93
|
+
self.checkpoint_name = self.model
|
|
93
94
|
|
|
94
95
|
# Track mapping from Claude tool names to MCP tool names
|
|
95
96
|
self._claude_to_mcp_tool_map: dict[str, str] = {}
|
|
@@ -98,14 +99,14 @@ class ClaudeAgent(MCPAgent):
|
|
|
98
99
|
# Append Claude-specific instructions to the base system prompt
|
|
99
100
|
claude_instructions = """
|
|
100
101
|
You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
|
|
101
|
-
|
|
102
|
+
|
|
102
103
|
When working on tasks:
|
|
103
104
|
1. Be thorough and systematic in your approach
|
|
104
105
|
2. Complete tasks autonomously without asking for confirmation
|
|
105
106
|
3. Use available tools efficiently to accomplish your goals
|
|
106
107
|
4. Verify your actions and ensure task completion
|
|
107
108
|
5. Be precise and accurate in all operations
|
|
108
|
-
|
|
109
|
+
|
|
109
110
|
Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
|
|
110
111
|
""".strip() # noqa: E501
|
|
111
112
|
|
|
@@ -70,6 +70,7 @@ class OperatorAgent(MCPAgent):
|
|
|
70
70
|
|
|
71
71
|
self.openai_client = model_client
|
|
72
72
|
self.model = model
|
|
73
|
+
self.checkpoint_name = self.model
|
|
73
74
|
self.environment = environment
|
|
74
75
|
|
|
75
76
|
# State tracking for OpenAI's stateful API
|
|
@@ -84,7 +85,7 @@ class OperatorAgent(MCPAgent):
|
|
|
84
85
|
except Exception as e:
|
|
85
86
|
raise ValueError(f"OpenAI API key is invalid: {e}") from e
|
|
86
87
|
|
|
87
|
-
self.model_name = "
|
|
88
|
+
self.model_name = "Operator"
|
|
88
89
|
|
|
89
90
|
# Append OpenAI-specific instructions to the base system prompt
|
|
90
91
|
openai_instructions = """
|
|
@@ -62,7 +62,8 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
62
62
|
else:
|
|
63
63
|
raise ValueError("Either openai_client or (api_key and base_url) must be provided")
|
|
64
64
|
|
|
65
|
-
self.model_name =
|
|
65
|
+
self.model_name = "GenericOpenAI"
|
|
66
|
+
self.checkpoint_name = model_name
|
|
66
67
|
self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
|
|
67
68
|
self.mcp_schemas = []
|
|
68
69
|
self.hud_console = HUDConsole(logger=logger)
|
|
@@ -194,7 +195,7 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
194
195
|
raise ValueError("openai_client is required for GenericOpenAIChatAgent")
|
|
195
196
|
# default transport = OpenAI SDK
|
|
196
197
|
return await self.oai.chat.completions.create(
|
|
197
|
-
model=self.
|
|
198
|
+
model=self.checkpoint_name,
|
|
198
199
|
messages=messages,
|
|
199
200
|
tools=tools, # type: ignore ready ChatCompletionToolParam-shaped
|
|
200
201
|
**extra,
|
|
@@ -89,7 +89,7 @@ class TestClaudeAgent:
|
|
|
89
89
|
validate_api_key=False, # Skip validation in tests
|
|
90
90
|
)
|
|
91
91
|
|
|
92
|
-
assert agent.model_name == "
|
|
92
|
+
assert agent.model_name == "Claude"
|
|
93
93
|
assert agent.max_tokens == 1000
|
|
94
94
|
assert agent.anthropic_client == mock_model_client
|
|
95
95
|
|
|
@@ -103,7 +103,7 @@ class TestClaudeAgent:
|
|
|
103
103
|
validate_api_key=False, # Skip validation in tests
|
|
104
104
|
)
|
|
105
105
|
|
|
106
|
-
assert agent.model_name == "
|
|
106
|
+
assert agent.model_name == "Claude"
|
|
107
107
|
assert agent.anthropic_client is not None
|
|
108
108
|
|
|
109
109
|
@pytest.mark.asyncio
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from hud.otel.context import (
|
|
7
|
+
_update_task_status_async,
|
|
8
|
+
get_current_task_run_id,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from hud.datasets import Task
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def log_task_config_to_current_trace(task: Task) -> None:
|
|
16
|
+
with contextlib.suppress(Exception):
|
|
17
|
+
task_run_id = get_current_task_run_id()
|
|
18
|
+
if not task_run_id:
|
|
19
|
+
return
|
|
20
|
+
|
|
21
|
+
raw_config = task.model_dump()
|
|
22
|
+
|
|
23
|
+
await _update_task_status_async(
|
|
24
|
+
task_run_id,
|
|
25
|
+
"running",
|
|
26
|
+
task_id=task.id,
|
|
27
|
+
extra_metadata={"task_config": raw_config},
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def log_agent_metadata_to_status(
|
|
32
|
+
model_name: str | None = None, checkpoint_name: str | None = None
|
|
33
|
+
) -> None:
|
|
34
|
+
"""Attach agent metadata (model/checkpoint) to current trace status metadata."""
|
|
35
|
+
with contextlib.suppress(Exception):
|
|
36
|
+
task_run_id = get_current_task_run_id()
|
|
37
|
+
if not task_run_id or (not model_name and not checkpoint_name):
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
agent_meta = {}
|
|
41
|
+
if model_name is not None:
|
|
42
|
+
agent_meta["model_name"] = model_name
|
|
43
|
+
if checkpoint_name is not None:
|
|
44
|
+
agent_meta["checkpoint_name"] = checkpoint_name
|
|
45
|
+
|
|
46
|
+
await _update_task_status_async(
|
|
47
|
+
task_run_id,
|
|
48
|
+
"running",
|
|
49
|
+
extra_metadata={"agent": agent_meta},
|
|
50
|
+
)
|
|
@@ -12,6 +12,8 @@ from rich.console import Console
|
|
|
12
12
|
from rich.panel import Panel
|
|
13
13
|
from rich.table import Table
|
|
14
14
|
|
|
15
|
+
from hud.types import AgentType
|
|
16
|
+
|
|
15
17
|
from . import list_func as list_module
|
|
16
18
|
from .analyze import (
|
|
17
19
|
analyze_environment,
|
|
@@ -380,6 +382,11 @@ def dev(
|
|
|
380
382
|
"--watch",
|
|
381
383
|
help="Additional directories to watch for changes (default: current directory)",
|
|
382
384
|
),
|
|
385
|
+
new: bool = typer.Option(
|
|
386
|
+
False,
|
|
387
|
+
"--new",
|
|
388
|
+
help="Show Cursor installation link for new server setup",
|
|
389
|
+
),
|
|
383
390
|
) -> None:
|
|
384
391
|
"""🔥 Development mode - run MCP server with hot-reload.
|
|
385
392
|
|
|
@@ -420,6 +427,7 @@ def dev(
|
|
|
420
427
|
watch,
|
|
421
428
|
docker=docker,
|
|
422
429
|
docker_args=docker_args,
|
|
430
|
+
new=new,
|
|
423
431
|
)
|
|
424
432
|
|
|
425
433
|
|
|
@@ -847,7 +855,7 @@ def eval(
|
|
|
847
855
|
hud_console = HUDConsole()
|
|
848
856
|
|
|
849
857
|
if integration_test:
|
|
850
|
-
agent =
|
|
858
|
+
agent = AgentType.INTEGRATION_TEST
|
|
851
859
|
|
|
852
860
|
# If no source provided, reuse RL helper to find a tasks file interactively
|
|
853
861
|
if source is None:
|
|
@@ -894,17 +902,17 @@ def eval(
|
|
|
894
902
|
# Add standard agent choices
|
|
895
903
|
choices.extend(
|
|
896
904
|
[
|
|
897
|
-
{"name": "Claude 4 Sonnet", "value":
|
|
898
|
-
{"name": "OpenAI Computer Use", "value":
|
|
899
|
-
{"name": "vLLM (Local Server)", "value":
|
|
900
|
-
{"name": "LiteLLM (Multi-provider)", "value":
|
|
905
|
+
{"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE},
|
|
906
|
+
{"name": "OpenAI Computer Use", "value": AgentType.OPENAI},
|
|
907
|
+
{"name": "vLLM (Local Server)", "value": AgentType.VLLM},
|
|
908
|
+
{"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM},
|
|
901
909
|
]
|
|
902
910
|
)
|
|
903
911
|
|
|
904
912
|
agent = hud_console.select("Select an agent to use:", choices=choices, default=0)
|
|
905
913
|
|
|
906
914
|
# Handle HUD model selection
|
|
907
|
-
if agent and agent not in [
|
|
915
|
+
if agent and agent not in [e.value for e in AgentType]:
|
|
908
916
|
# Find remote model name
|
|
909
917
|
model = agent
|
|
910
918
|
if not vllm_base_url:
|
|
@@ -921,20 +929,23 @@ def eval(
|
|
|
921
929
|
hud_console.error(f"Model {model} not found")
|
|
922
930
|
raise typer.Exit(1)
|
|
923
931
|
model = base_model
|
|
924
|
-
agent =
|
|
932
|
+
agent = AgentType.VLLM # Use vLLM backend for HUD models
|
|
925
933
|
hud_console.info(f"Using HUD model: {model} (trained on {base_model})")
|
|
926
934
|
|
|
927
935
|
# Validate agent choice
|
|
928
|
-
valid_agents = [
|
|
936
|
+
valid_agents = [e.value for e in AgentType]
|
|
929
937
|
if agent not in valid_agents:
|
|
930
938
|
hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
|
|
931
939
|
raise typer.Exit(1)
|
|
932
940
|
|
|
941
|
+
# Type narrowing: agent is now guaranteed to be an AgentType value after validation
|
|
942
|
+
agent = AgentType(agent)
|
|
943
|
+
|
|
933
944
|
# Run the command
|
|
934
945
|
eval_command(
|
|
935
946
|
source=source,
|
|
936
947
|
full=full,
|
|
937
|
-
agent=agent,
|
|
948
|
+
agent=agent,
|
|
938
949
|
model=model,
|
|
939
950
|
allowed_tools=allowed_tools,
|
|
940
951
|
max_concurrent=max_concurrent,
|
|
@@ -1074,6 +1085,51 @@ def rl(
|
|
|
1074
1085
|
)
|
|
1075
1086
|
|
|
1076
1087
|
|
|
1088
|
+
@app.command()
|
|
1089
|
+
def convert(
|
|
1090
|
+
tasks_file: str = typer.Argument(
|
|
1091
|
+
..., help="Path to tasks file (JSON/JSONL) to convert to remote MCP configuration"
|
|
1092
|
+
),
|
|
1093
|
+
) -> None:
|
|
1094
|
+
"""Convert local MCP task configs to remote (mcp.hud.so) format.
|
|
1095
|
+
|
|
1096
|
+
This mirrors the implicit conversion flow used by 'hud rl' and writes a new
|
|
1097
|
+
remote_<name>.json next to the source file when needed.
|
|
1098
|
+
"""
|
|
1099
|
+
from pathlib import Path
|
|
1100
|
+
|
|
1101
|
+
from hud.utils.hud_console import HUDConsole
|
|
1102
|
+
|
|
1103
|
+
hud_console = HUDConsole()
|
|
1104
|
+
|
|
1105
|
+
try:
|
|
1106
|
+
from .flows.tasks import convert_tasks_to_remote
|
|
1107
|
+
|
|
1108
|
+
result_path = convert_tasks_to_remote(tasks_file)
|
|
1109
|
+
|
|
1110
|
+
# If nothing changed, inform the user
|
|
1111
|
+
try:
|
|
1112
|
+
if Path(result_path).resolve() == Path(tasks_file).resolve():
|
|
1113
|
+
hud_console.success(
|
|
1114
|
+
"Tasks already reference remote MCP URLs. No conversion needed."
|
|
1115
|
+
)
|
|
1116
|
+
hud_console.hint("You can run them directly with: hud eval <tasks_file> --full")
|
|
1117
|
+
return
|
|
1118
|
+
except Exception as e:
|
|
1119
|
+
# Best effort; continue with success message
|
|
1120
|
+
hud_console.debug(f"Path comparison failed, continuing: {e}")
|
|
1121
|
+
|
|
1122
|
+
hud_console.success(f"Converted tasks written to: {result_path}")
|
|
1123
|
+
hud_console.hint(
|
|
1124
|
+
"You can now run remote flows: hud rl <converted_file> or hud eval <converted_file>"
|
|
1125
|
+
)
|
|
1126
|
+
except typer.Exit:
|
|
1127
|
+
raise
|
|
1128
|
+
except Exception as e:
|
|
1129
|
+
hud_console.error(f"Failed to convert tasks: {e}")
|
|
1130
|
+
raise typer.Exit(1) from e
|
|
1131
|
+
|
|
1132
|
+
|
|
1077
1133
|
@app.command()
|
|
1078
1134
|
def set(
|
|
1079
1135
|
assignments: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
|