hud-python 0.4.20__tar.gz → 0.4.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- {hud_python-0.4.20 → hud_python-0.4.22}/PKG-INFO +2 -4
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/__init__.py +7 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/base.py +42 -10
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/claude.py +24 -14
- hud_python-0.4.22/hud/agents/grounded_openai.py +280 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/tests/test_client.py +11 -27
- hud_python-0.4.22/hud/agents/tests/test_grounded_openai_agent.py +155 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/__init__.py +50 -20
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/build.py +3 -44
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/eval.py +25 -6
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/init.py +4 -4
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/push.py +3 -1
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_push.py +6 -6
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/utils/interactive.py +1 -1
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/clients/__init__.py +3 -2
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/clients/base.py +20 -9
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/clients/mcp_use.py +44 -22
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/datasets/task.py +6 -2
- hud_python-0.4.22/hud/native/__init__.py +6 -0
- hud_python-0.4.22/hud/native/comparator.py +546 -0
- hud_python-0.4.22/hud/native/tests/__init__.py +1 -0
- hud_python-0.4.22/hud/native/tests/test_comparator.py +539 -0
- hud_python-0.4.22/hud/native/tests/test_native_init.py +79 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/otel/instrumentation.py +0 -2
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/server/server.py +9 -2
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/settings.py +6 -0
- hud_python-0.4.22/hud/shared/exceptions.py +364 -0
- hud_python-0.4.22/hud/shared/hints.py +177 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/shared/requests.py +15 -3
- hud_python-0.4.22/hud/shared/tests/test_exceptions.py +420 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/__init__.py +2 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/executors/tests/test_base_executor.py +1 -1
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/executors/xdo.py +1 -1
- hud_python-0.4.22/hud/tools/grounding/__init__.py +13 -0
- hud_python-0.4.22/hud/tools/grounding/config.py +54 -0
- hud_python-0.4.22/hud/tools/grounding/grounded_tool.py +314 -0
- hud_python-0.4.22/hud/tools/grounding/grounder.py +301 -0
- hud_python-0.4.22/hud/tools/grounding/tests/__init__.py +1 -0
- hud_python-0.4.22/hud/tools/grounding/tests/test_grounded_tool.py +196 -0
- hud_python-0.4.22/hud/tools/submit.py +66 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_playwright_tool.py +1 -1
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_tools_init.py +1 -1
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_utils.py +2 -2
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/types.py +33 -5
- hud_python-0.4.22/hud/utils/agent_factories.py +86 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/design.py +57 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/mcp.py +6 -0
- hud_python-0.4.22/hud/utils/pretty_errors.py +68 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/version.py +1 -1
- {hud_python-0.4.20 → hud_python-0.4.22}/pyproject.toml +2 -3
- hud_python-0.4.20/hud/shared/exceptions.py +0 -191
- hud_python-0.4.20/hud/shared/tests/test_exceptions.py +0 -179
- {hud_python-0.4.20 → hud_python-0.4.22}/.gitignore +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/LICENSE +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/browser/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/browser/apps/2048/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/browser/apps/2048/backend/pyproject.toml +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/browser/apps/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/browser/apps/todo/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/browser/apps/todo/backend/pyproject.toml +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/browser/pyproject.toml +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/remote_browser/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/remote_browser/pyproject.toml +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/text_2048/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/environments/text_2048/pyproject.toml +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/examples/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/__main__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/langchain.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/openai.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/openai_chat_generic.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/agents/tests/test_openai.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/__main__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/analyze.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/clone.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/debug.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/dev.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/hf.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/list_func.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/pull.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/remove.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/rl/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/rl/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/rl/init.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/rl/pod.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/rl/ssh.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/rl/train.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/rl/utils.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_mcp_server.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/cli/utils/server.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/clients/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/clients/fastmcp.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/datasets/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/datasets/execution/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/datasets/execution/parallel.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/datasets/execution/runner.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/datasets/utils.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/misc/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/misc/claude_plays_pokemon.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/otel/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/otel/collector.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/otel/config.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/otel/context.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/otel/exporters.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/otel/processors.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/otel/tests/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/otel/tests/test_processors.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/py.typed +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/server/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/server/context.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/server/low_level.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/shared/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/telemetry/job.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/telemetry/replay.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/telemetry/tests/test_replay.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/telemetry/tests/test_trace.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/telemetry/trace.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/base.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/bash.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/edit.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/executors/base.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/playwright.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/response.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/types.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/tools/utils.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/async_utils.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/progress.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/telemetry.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/tests/test_async_utils.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/tests/test_progress.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/rl/README.md +0 -0
- {hud_python-0.4.20 → hud_python-0.4.22}/rl/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.22
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -38,6 +38,7 @@ Requires-Python: <3.14,>=3.11
|
|
|
38
38
|
Requires-Dist: httpx<1,>=0.23.0
|
|
39
39
|
Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
|
|
40
40
|
Requires-Dist: hud-mcp-python-sdk>=3.13.2
|
|
41
|
+
Requires-Dist: hud-mcp-use-python-sdk>=2.3.16
|
|
41
42
|
Requires-Dist: opentelemetry-api>=1.34.1
|
|
42
43
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
|
|
43
44
|
Requires-Dist: opentelemetry-instrumentation-mcp>=0.44.1
|
|
@@ -56,7 +57,6 @@ Provides-Extra: agent
|
|
|
56
57
|
Requires-Dist: anthropic; extra == 'agent'
|
|
57
58
|
Requires-Dist: datasets>=2.14.0; extra == 'agent'
|
|
58
59
|
Requires-Dist: dotenv>=0.9.9; extra == 'agent'
|
|
59
|
-
Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agent'
|
|
60
60
|
Requires-Dist: ipykernel; extra == 'agent'
|
|
61
61
|
Requires-Dist: ipython<9; extra == 'agent'
|
|
62
62
|
Requires-Dist: jupyter-client; extra == 'agent'
|
|
@@ -70,7 +70,6 @@ Provides-Extra: agents
|
|
|
70
70
|
Requires-Dist: anthropic; extra == 'agents'
|
|
71
71
|
Requires-Dist: datasets>=2.14.0; extra == 'agents'
|
|
72
72
|
Requires-Dist: dotenv>=0.9.9; extra == 'agents'
|
|
73
|
-
Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agents'
|
|
74
73
|
Requires-Dist: ipykernel; extra == 'agents'
|
|
75
74
|
Requires-Dist: ipython<9; extra == 'agents'
|
|
76
75
|
Requires-Dist: jupyter-client; extra == 'agents'
|
|
@@ -85,7 +84,6 @@ Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
|
|
|
85
84
|
Requires-Dist: anthropic; extra == 'dev'
|
|
86
85
|
Requires-Dist: datasets>=2.14.0; extra == 'dev'
|
|
87
86
|
Requires-Dist: dotenv>=0.9.9; extra == 'dev'
|
|
88
|
-
Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'dev'
|
|
89
87
|
Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
|
|
90
88
|
Requires-Dist: ipykernel; extra == 'dev'
|
|
91
89
|
Requires-Dist: ipython<9; extra == 'dev'
|
|
@@ -94,6 +94,8 @@ class MCPAgent(ABC):
|
|
|
94
94
|
self.model_name = model_name
|
|
95
95
|
self.design = HUDDesign(logger=logger)
|
|
96
96
|
|
|
97
|
+
self.metadata = {}
|
|
98
|
+
|
|
97
99
|
# Set verbose mode if requested
|
|
98
100
|
if verbose:
|
|
99
101
|
self.design.set_verbose(True)
|
|
@@ -111,10 +113,12 @@ class MCPAgent(ABC):
|
|
|
111
113
|
# Initialize these here so methods can be called before initialize()
|
|
112
114
|
self._available_tools: list[types.Tool] = []
|
|
113
115
|
self._tool_map: dict[str, types.Tool] = {} # Simplified: just name to tool
|
|
114
|
-
self.
|
|
116
|
+
self.response_tool_name = None
|
|
117
|
+
self.initialization_complete = False
|
|
118
|
+
|
|
119
|
+
# Trace
|
|
115
120
|
self._auto_trace = auto_trace
|
|
116
121
|
self._auto_trace_cm: Any | None = None # Store auto-created trace context manager
|
|
117
|
-
self.initialization_complete = False
|
|
118
122
|
|
|
119
123
|
# Response agent to automatically interact with the model
|
|
120
124
|
self.response_agent = response_agent
|
|
@@ -530,6 +534,9 @@ class MCPAgent(ABC):
|
|
|
530
534
|
self._available_tools = []
|
|
531
535
|
self._tool_map = {}
|
|
532
536
|
|
|
537
|
+
# Track response tools by server
|
|
538
|
+
response_tools_by_server: dict[str, str] = {} # server_name -> tool_name
|
|
539
|
+
|
|
533
540
|
for tool in all_tools:
|
|
534
541
|
# Check if tool should be included
|
|
535
542
|
if self.allowed_tools and tool.name not in self.allowed_tools:
|
|
@@ -541,10 +548,36 @@ class MCPAgent(ABC):
|
|
|
541
548
|
# Simplified mapping - just tool name to tool
|
|
542
549
|
self._tool_map[tool.name] = tool
|
|
543
550
|
|
|
544
|
-
#
|
|
545
|
-
if tool.name
|
|
546
|
-
|
|
547
|
-
|
|
551
|
+
# Track response tools
|
|
552
|
+
if "response" in tool.name or tool.name == "response":
|
|
553
|
+
# Extract server name from tool name (e.g., "grader_response" -> "grader")
|
|
554
|
+
if "_" in tool.name:
|
|
555
|
+
server_name = tool.name.split("_", 1)[0]
|
|
556
|
+
response_tools_by_server[server_name] = tool.name
|
|
557
|
+
else:
|
|
558
|
+
response_tools_by_server["_default"] = tool.name
|
|
559
|
+
|
|
560
|
+
# Find the response tool to use (prioritize last server in config)
|
|
561
|
+
if response_tools_by_server and hasattr(self.mcp_client, "mcp_config"):
|
|
562
|
+
# Get server names in order from mcp_config
|
|
563
|
+
server_names = list(self.mcp_client.mcp_config.keys())
|
|
564
|
+
|
|
565
|
+
# Try to find response tool from last server first
|
|
566
|
+
response_tool_name = None
|
|
567
|
+
for server_name in reversed(server_names):
|
|
568
|
+
if server_name in response_tools_by_server:
|
|
569
|
+
response_tool_name = response_tools_by_server[server_name]
|
|
570
|
+
break
|
|
571
|
+
|
|
572
|
+
# Fallback to any response tool
|
|
573
|
+
if not response_tool_name and response_tools_by_server:
|
|
574
|
+
response_tool_name = next(iter(response_tools_by_server.values()))
|
|
575
|
+
|
|
576
|
+
# Add to lifecycle tools if found
|
|
577
|
+
if response_tool_name and response_tool_name not in self.lifecycle_tools:
|
|
578
|
+
self.design.debug(f"Auto-detected '{response_tool_name}' tool as a lifecycle tool")
|
|
579
|
+
self.response_tool_name = response_tool_name
|
|
580
|
+
self.lifecycle_tools.append(response_tool_name)
|
|
548
581
|
|
|
549
582
|
# Check if all required tools are available
|
|
550
583
|
if self.required_tools:
|
|
@@ -565,13 +598,12 @@ class MCPAgent(ABC):
|
|
|
565
598
|
response: The agent's response
|
|
566
599
|
messages: The current message history (will be modified in-place)
|
|
567
600
|
"""
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
self.design.debug("Calling response lifecycle tool")
|
|
601
|
+
if self.response_tool_name:
|
|
602
|
+
self.design.debug(f"Calling response lifecycle tool: {self.response_tool_name}")
|
|
571
603
|
try:
|
|
572
604
|
# Call the response tool with the agent's response
|
|
573
605
|
response_tool_call = MCPToolCall(
|
|
574
|
-
name=
|
|
606
|
+
name=self.response_tool_name, arguments={"response": response.content}
|
|
575
607
|
)
|
|
576
608
|
response_results = await self.call_tools(response_tool_call)
|
|
577
609
|
|
|
@@ -306,19 +306,20 @@ class ClaudeAgent(MCPAgent):
|
|
|
306
306
|
"""Convert MCP tools to Claude tool format."""
|
|
307
307
|
claude_tools = []
|
|
308
308
|
self._claude_to_mcp_tool_map = {} # Reset mapping
|
|
309
|
-
|
|
309
|
+
|
|
310
310
|
# Find computer tool by priority
|
|
311
311
|
computer_tool_priority = ["anthropic_computer", "computer_anthropic", "computer"]
|
|
312
312
|
selected_computer_tool = None
|
|
313
|
-
|
|
313
|
+
|
|
314
314
|
for priority_name in computer_tool_priority:
|
|
315
315
|
for tool in self._available_tools:
|
|
316
|
-
|
|
316
|
+
# Check both exact match and suffix match (for prefixed tools)
|
|
317
|
+
if tool.name == priority_name or tool.name.endswith(f"_{priority_name}"):
|
|
317
318
|
selected_computer_tool = tool
|
|
318
319
|
break
|
|
319
320
|
if selected_computer_tool:
|
|
320
321
|
break
|
|
321
|
-
|
|
322
|
+
|
|
322
323
|
# Add the selected computer tool if found
|
|
323
324
|
if selected_computer_tool:
|
|
324
325
|
claude_tool = {
|
|
@@ -330,14 +331,18 @@ class ClaudeAgent(MCPAgent):
|
|
|
330
331
|
# Map Claude's "computer" back to the actual MCP tool name
|
|
331
332
|
self._claude_to_mcp_tool_map["computer"] = selected_computer_tool.name
|
|
332
333
|
claude_tools.append(claude_tool)
|
|
333
|
-
logger.debug(
|
|
334
|
-
|
|
334
|
+
logger.debug("Using %s as computer tool for Claude", selected_computer_tool.name)
|
|
335
|
+
|
|
335
336
|
# Add other non-computer tools
|
|
336
337
|
for tool in self._available_tools:
|
|
337
338
|
# Skip computer tools (already handled) and lifecycle tools
|
|
338
|
-
|
|
339
|
+
is_computer_tool = any(
|
|
340
|
+
tool.name == priority_name or tool.name.endswith(f"_{priority_name}")
|
|
341
|
+
for priority_name in computer_tool_priority
|
|
342
|
+
)
|
|
343
|
+
if is_computer_tool or tool.name in self.lifecycle_tools:
|
|
339
344
|
continue
|
|
340
|
-
|
|
345
|
+
|
|
341
346
|
claude_tool = {
|
|
342
347
|
"name": tool.name,
|
|
343
348
|
"description": tool.description or f"Execute {tool.name}",
|
|
@@ -359,16 +364,21 @@ class ClaudeAgent(MCPAgent):
|
|
|
359
364
|
messages_cached = copy.deepcopy(messages)
|
|
360
365
|
|
|
361
366
|
# Mark last user message with cache control
|
|
362
|
-
if
|
|
367
|
+
if (
|
|
368
|
+
messages_cached
|
|
369
|
+
and isinstance(messages_cached[-1], dict)
|
|
370
|
+
and messages_cached[-1].get("role") == "user"
|
|
371
|
+
):
|
|
363
372
|
last_content = messages_cached[-1]["content"]
|
|
364
373
|
# Content is formatted to be list of ContentBlock in format_blocks and format_message
|
|
365
374
|
if isinstance(last_content, list):
|
|
366
375
|
for block in last_content:
|
|
367
|
-
# Only add cache control to block types that support it
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
376
|
+
# Only add cache control to dict-like block types that support it
|
|
377
|
+
if isinstance(block, dict):
|
|
378
|
+
block_type = block.get("type")
|
|
379
|
+
if block_type in ["text", "image", "tool_use", "tool_result"]:
|
|
380
|
+
cache_control: BetaCacheControlEphemeralParam = {"type": "ephemeral"}
|
|
381
|
+
block["cache_control"] = cache_control # type: ignore[reportGeneralTypeIssues]
|
|
372
382
|
|
|
373
383
|
return messages_cached
|
|
374
384
|
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""Grounded OpenAI agent that separates visual grounding from reasoning."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from hud import instrument
|
|
9
|
+
from hud.tools.grounding import GroundedComputerTool, Grounder, GrounderConfig
|
|
10
|
+
from hud.types import AgentResponse, MCPToolCall, MCPToolResult
|
|
11
|
+
|
|
12
|
+
from .openai_chat_generic import GenericOpenAIChatAgent
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class GroundedOpenAIChatAgent(GenericOpenAIChatAgent):
|
|
16
|
+
"""OpenAI agent that uses a separate grounding model for element detection.
|
|
17
|
+
|
|
18
|
+
This agent:
|
|
19
|
+
- Exposes only a synthetic "computer" tool to the planning model
|
|
20
|
+
- Intercepts tool calls to ground element descriptions to coordinates
|
|
21
|
+
- Converts grounded results to real computer tool calls
|
|
22
|
+
- Maintains screenshot state for grounding operations
|
|
23
|
+
|
|
24
|
+
The architecture separates concerns:
|
|
25
|
+
- Planning model (GPT-4o etc) focuses on high-level reasoning
|
|
26
|
+
- Grounding model (Qwen2-VL etc) handles visual element detection
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
*,
|
|
32
|
+
grounder_config: GrounderConfig,
|
|
33
|
+
model_name: str = "gpt-4o-mini",
|
|
34
|
+
allowed_tools: list[str] | None = None,
|
|
35
|
+
append_setup_output: bool = False,
|
|
36
|
+
system_prompt: str | None = None,
|
|
37
|
+
**kwargs: Any,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""Initialize the grounded OpenAI agent.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
grounder_config: Configuration for the grounding model
|
|
43
|
+
openai_client: OpenAI client for the planning model
|
|
44
|
+
model: Name of the OpenAI model to use for planning (e.g., "gpt-4o", "gpt-4o-mini")
|
|
45
|
+
real_computer_tool_name: Name of the actual computer tool to execute
|
|
46
|
+
**kwargs: Additional arguments passed to GenericOpenAIChatAgent
|
|
47
|
+
"""
|
|
48
|
+
# Set defaults for grounded agent
|
|
49
|
+
if allowed_tools is None:
|
|
50
|
+
allowed_tools = ["computer"]
|
|
51
|
+
|
|
52
|
+
if system_prompt is None:
|
|
53
|
+
system_prompt = (
|
|
54
|
+
"You are a helpful AI assistant that can control the computer "
|
|
55
|
+
"through visual interaction.\n\n"
|
|
56
|
+
"IMPORTANT: Always explain your reasoning and observations before taking actions:\n"
|
|
57
|
+
"1. First, describe what you see on the screen\n"
|
|
58
|
+
"2. Explain what you plan to do and why\n"
|
|
59
|
+
"3. Then use the computer tool with natural language descriptions\n\n"
|
|
60
|
+
"For example:\n"
|
|
61
|
+
"- 'I can see a login form with username and password fields. "
|
|
62
|
+
"I need to click on the username field first.'\n"
|
|
63
|
+
"- 'There's a blue submit button at the bottom. "
|
|
64
|
+
"I'll click on it to submit the form.'\n"
|
|
65
|
+
"- 'I notice a red close button in the top right corner. "
|
|
66
|
+
"I'll click it to close this dialog.'\n\n"
|
|
67
|
+
"Use descriptive element descriptions like:\n"
|
|
68
|
+
"- Colors: 'red button', 'blue link', 'green checkmark'\n"
|
|
69
|
+
"- Position: 'top right corner', 'bottom of the page', 'left sidebar'\n"
|
|
70
|
+
"- Text content: 'Submit button', 'Login link', 'Cancel option'\n"
|
|
71
|
+
"- Element type: 'text field', 'dropdown menu', 'checkbox'"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
super().__init__(
|
|
75
|
+
model_name=model_name,
|
|
76
|
+
allowed_tools=allowed_tools,
|
|
77
|
+
append_setup_output=append_setup_output,
|
|
78
|
+
system_prompt=system_prompt,
|
|
79
|
+
**kwargs,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
self.grounder = Grounder(grounder_config)
|
|
83
|
+
self.grounded_tool = None
|
|
84
|
+
|
|
85
|
+
async def initialize(self, task: Any = None) -> None:
|
|
86
|
+
"""Initialize the agent and create the grounded tool with mcp_client."""
|
|
87
|
+
# Call parent initialization first
|
|
88
|
+
await super().initialize(task)
|
|
89
|
+
|
|
90
|
+
if self.mcp_client is None:
|
|
91
|
+
raise ValueError("mcp_client must be initialized before creating grounded tool")
|
|
92
|
+
self.grounded_tool = GroundedComputerTool(
|
|
93
|
+
grounder=self.grounder, mcp_client=self.mcp_client, computer_tool_name="computer"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
def get_tool_schemas(self) -> list[Any]:
|
|
97
|
+
"""Override to expose only the synthetic grounded tool.
|
|
98
|
+
|
|
99
|
+
The planning model only sees the synthetic "computer" tool,
|
|
100
|
+
which is provided by the grounded tool itself.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List containing only the grounded computer tool schema
|
|
104
|
+
"""
|
|
105
|
+
if self.grounded_tool is None:
|
|
106
|
+
return []
|
|
107
|
+
return [self.grounded_tool.get_openai_tool_schema()]
|
|
108
|
+
|
|
109
|
+
@instrument(
|
|
110
|
+
span_type="agent",
|
|
111
|
+
record_args=False,
|
|
112
|
+
record_result=True,
|
|
113
|
+
)
|
|
114
|
+
async def get_response(self, messages: Any) -> AgentResponse:
|
|
115
|
+
"""Get response from the planning model and handle grounded tool calls.
|
|
116
|
+
|
|
117
|
+
This method:
|
|
118
|
+
1. Calls the planning model with the grounded tool schema
|
|
119
|
+
2. Executes any tool calls directly through the grounded tool
|
|
120
|
+
3. Returns the response
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
messages: Conversation messages
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
AgentResponse with either content or tool calls for MCP execution
|
|
127
|
+
"""
|
|
128
|
+
tool_schemas = self.get_tool_schemas()
|
|
129
|
+
|
|
130
|
+
# Take initial screenshot and add to messages if this is the first turn
|
|
131
|
+
has_image = any(
|
|
132
|
+
isinstance(m.get("content"), list)
|
|
133
|
+
and any(
|
|
134
|
+
block.get("type") == "image_url"
|
|
135
|
+
for block in m["content"]
|
|
136
|
+
if isinstance(block, dict)
|
|
137
|
+
)
|
|
138
|
+
for m in messages
|
|
139
|
+
if isinstance(m.get("content"), list)
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
if not has_image:
|
|
143
|
+
if self.mcp_client is None:
|
|
144
|
+
raise ValueError("mcp_client is not initialized")
|
|
145
|
+
screenshot_result = await self.mcp_client.call_tool(
|
|
146
|
+
MCPToolCall(name="computer", arguments={"action": "screenshot"})
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
for block in screenshot_result.content:
|
|
150
|
+
# Check for ImageContent type from MCP
|
|
151
|
+
if hasattr(block, "data") and hasattr(block, "mimeType"):
|
|
152
|
+
mime_type = getattr(block, "mimeType", "image/png")
|
|
153
|
+
data = getattr(block, "data", "")
|
|
154
|
+
messages.append(
|
|
155
|
+
{
|
|
156
|
+
"role": "user",
|
|
157
|
+
"content": [
|
|
158
|
+
{
|
|
159
|
+
"type": "image_url",
|
|
160
|
+
"image_url": {"url": f"data:{mime_type};base64,{data}"},
|
|
161
|
+
}
|
|
162
|
+
],
|
|
163
|
+
}
|
|
164
|
+
)
|
|
165
|
+
break
|
|
166
|
+
|
|
167
|
+
protected_keys = {"model", "messages", "tools", "parallel_tool_calls"}
|
|
168
|
+
extra = {k: v for k, v in (self.completion_kwargs or {}).items() if k not in protected_keys}
|
|
169
|
+
|
|
170
|
+
response = await self.oai.chat.completions.create(
|
|
171
|
+
model=self.model_name,
|
|
172
|
+
messages=messages,
|
|
173
|
+
tools=tool_schemas,
|
|
174
|
+
parallel_tool_calls=False,
|
|
175
|
+
**extra,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
choice = response.choices[0]
|
|
179
|
+
msg = choice.message
|
|
180
|
+
|
|
181
|
+
assistant_msg: dict[str, Any] = {"role": "assistant"}
|
|
182
|
+
if msg.content:
|
|
183
|
+
assistant_msg["content"] = msg.content
|
|
184
|
+
if msg.tool_calls:
|
|
185
|
+
assistant_msg["tool_calls"] = msg.tool_calls
|
|
186
|
+
|
|
187
|
+
messages.append(assistant_msg)
|
|
188
|
+
|
|
189
|
+
self.conversation_history = messages.copy()
|
|
190
|
+
|
|
191
|
+
if not msg.tool_calls:
|
|
192
|
+
return AgentResponse(
|
|
193
|
+
content=msg.content or "",
|
|
194
|
+
tool_calls=[],
|
|
195
|
+
done=choice.finish_reason in ("stop", "length"),
|
|
196
|
+
raw=response,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
tc = msg.tool_calls[0]
|
|
200
|
+
|
|
201
|
+
if tc.function.name != "computer":
|
|
202
|
+
return AgentResponse(
|
|
203
|
+
content=f"Error: Model called unexpected tool '{tc.function.name}'",
|
|
204
|
+
tool_calls=[],
|
|
205
|
+
done=True,
|
|
206
|
+
raw=response,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Parse the arguments
|
|
210
|
+
try:
|
|
211
|
+
args = json.loads(tc.function.arguments or "{}")
|
|
212
|
+
except json.JSONDecodeError:
|
|
213
|
+
return AgentResponse(
|
|
214
|
+
content="Error: Invalid tool arguments", tool_calls=[], done=True, raw=response
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
tool_call = MCPToolCall(name="computer", arguments=args, id=tc.id)
|
|
218
|
+
|
|
219
|
+
return AgentResponse(
|
|
220
|
+
content=msg.content or "", tool_calls=[tool_call], done=False, raw=response
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
async def call_tools(
|
|
224
|
+
self, tool_call: MCPToolCall | list[MCPToolCall] | None = None
|
|
225
|
+
) -> list[MCPToolResult]:
|
|
226
|
+
"""Override call_tools to intercept computer tool calls.
|
|
227
|
+
|
|
228
|
+
Execute them through grounded tool.
|
|
229
|
+
"""
|
|
230
|
+
if tool_call is None:
|
|
231
|
+
return []
|
|
232
|
+
|
|
233
|
+
if isinstance(tool_call, MCPToolCall):
|
|
234
|
+
tool_call = [tool_call]
|
|
235
|
+
|
|
236
|
+
results: list[MCPToolResult] = []
|
|
237
|
+
for tc in tool_call:
|
|
238
|
+
if tc.name == "computer":
|
|
239
|
+
# Execute through grounded tool instead of MCP
|
|
240
|
+
try:
|
|
241
|
+
# Extract latest screenshot from conversation history
|
|
242
|
+
screenshot_b64 = None
|
|
243
|
+
for m in reversed(self.conversation_history):
|
|
244
|
+
if m.get("role") == "user" and isinstance(m.get("content"), list):
|
|
245
|
+
for block in m["content"]:
|
|
246
|
+
if (
|
|
247
|
+
isinstance(block, dict)
|
|
248
|
+
and block.get("type") == "image_url"
|
|
249
|
+
and isinstance(block.get("image_url"), dict)
|
|
250
|
+
):
|
|
251
|
+
url = block["image_url"].get("url", "")
|
|
252
|
+
if url.startswith("data:"):
|
|
253
|
+
screenshot_b64 = (
|
|
254
|
+
url.split(",", 1)[1] if "," in url else None
|
|
255
|
+
)
|
|
256
|
+
break
|
|
257
|
+
if screenshot_b64:
|
|
258
|
+
break
|
|
259
|
+
|
|
260
|
+
# Pass screenshot to grounded tool
|
|
261
|
+
args_with_screenshot = dict(tc.arguments) if tc.arguments else {}
|
|
262
|
+
if screenshot_b64:
|
|
263
|
+
args_with_screenshot["screenshot_b64"] = screenshot_b64
|
|
264
|
+
|
|
265
|
+
if self.grounded_tool is None:
|
|
266
|
+
raise ValueError("Grounded tool is not initialized")
|
|
267
|
+
content_blocks = await self.grounded_tool(**args_with_screenshot)
|
|
268
|
+
results.append(MCPToolResult(content=content_blocks, isError=False))
|
|
269
|
+
except Exception as e:
|
|
270
|
+
# Create error result
|
|
271
|
+
from mcp.types import TextContent
|
|
272
|
+
|
|
273
|
+
error_content = TextContent(text=str(e), type="text")
|
|
274
|
+
results.append(MCPToolResult(content=[error_content], isError=True))
|
|
275
|
+
else:
|
|
276
|
+
# For non-computer tools, use parent implementation
|
|
277
|
+
parent_results = await super().call_tools(tc)
|
|
278
|
+
results.extend(parent_results)
|
|
279
|
+
|
|
280
|
+
return results
|
|
@@ -33,29 +33,6 @@ class TestMCPClient:
|
|
|
33
33
|
with patch("mcp_use.client.MCPClient.from_dict", return_value=mock_instance):
|
|
34
34
|
yield mock_instance
|
|
35
35
|
|
|
36
|
-
@pytest.mark.asyncio
|
|
37
|
-
async def test_init_with_config(self, mock_telemetry):
|
|
38
|
-
"""Test client initialization with config dictionary."""
|
|
39
|
-
mcp_config = {
|
|
40
|
-
"test_server": {
|
|
41
|
-
"command": "python",
|
|
42
|
-
"args": ["-m", "test_server"],
|
|
43
|
-
"env": {"TEST": "true"},
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
with patch("mcp_use.client.MCPClient.from_dict") as mock_from_dict:
|
|
48
|
-
mock_instance = MagicMock()
|
|
49
|
-
mock_instance.create_all_sessions = AsyncMock(return_value={})
|
|
50
|
-
mock_from_dict.return_value = mock_instance
|
|
51
|
-
client = MCPClient(mcp_config=mcp_config, verbose=True)
|
|
52
|
-
# Initialize to trigger connection
|
|
53
|
-
await client.initialize()
|
|
54
|
-
|
|
55
|
-
assert client.verbose is True
|
|
56
|
-
# Verify MCPUseClient.from_dict was called with proper config
|
|
57
|
-
mock_from_dict.assert_called_once_with({"mcpServers": mcp_config})
|
|
58
|
-
|
|
59
36
|
@pytest.mark.asyncio
|
|
60
37
|
async def test_connect_single_server(self, mock_telemetry, mock_mcp_use_client):
|
|
61
38
|
"""Test connecting to a single server."""
|
|
@@ -146,10 +123,10 @@ class TestMCPClient:
|
|
|
146
123
|
# Verify sessions were created
|
|
147
124
|
mock_mcp_use_client.create_all_sessions.assert_called_once()
|
|
148
125
|
|
|
149
|
-
# Check tools from both servers
|
|
126
|
+
# Check tools from both servers - should be prefixed with server names
|
|
150
127
|
tools = await client.list_tools()
|
|
151
128
|
names = {t.name for t in tools}
|
|
152
|
-
assert names == {"
|
|
129
|
+
assert names == {"server1_tool1", "server2_tool2"}
|
|
153
130
|
|
|
154
131
|
@pytest.mark.asyncio
|
|
155
132
|
async def test_call_tool(self, mock_telemetry, mock_mcp_use_client):
|
|
@@ -220,8 +197,15 @@ class TestMCPClient:
|
|
|
220
197
|
|
|
221
198
|
await client.initialize()
|
|
222
199
|
|
|
223
|
-
|
|
224
|
-
|
|
200
|
+
# Calling a non-existent tool should return an error result
|
|
201
|
+
result = await client.call_tool(name="nonexistent", arguments={})
|
|
202
|
+
assert result.isError is True
|
|
203
|
+
# Check that the error message is in the text content
|
|
204
|
+
text_content = ""
|
|
205
|
+
for content in result.content:
|
|
206
|
+
if isinstance(content, types.TextContent):
|
|
207
|
+
text_content += content.text
|
|
208
|
+
assert "Tool 'nonexistent' not found" in text_content
|
|
225
209
|
|
|
226
210
|
@pytest.mark.asyncio
|
|
227
211
|
async def test_get_telemetry_data(self, mock_telemetry, mock_mcp_use_client):
|