hud-python 0.4.11__tar.gz → 0.4.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- {hud_python-0.4.11 → hud_python-0.4.13}/PKG-INFO +16 -13
- {hud_python-0.4.11 → hud_python-0.4.13}/README.md +2 -6
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/README.md +15 -15
- hud_python-0.4.13/environments/browser/README.md +213 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/remote_browser/README.md +3 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/remote_browser/pyproject.toml +11 -16
- hud_python-0.4.13/hud/__main__.py +8 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/base.py +7 -8
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/langchain.py +2 -2
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/tests/test_openai.py +3 -1
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/__init__.py +114 -52
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/build.py +121 -71
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/debug.py +2 -2
- hud_python-0.4.11/hud/cli/mcp_server.py → hud_python-0.4.13/hud/cli/dev.py +101 -38
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/eval.py +175 -90
- hud_python-0.4.13/hud/cli/init.py +658 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/list_func.py +72 -71
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/pull.py +1 -2
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/push.py +35 -23
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/remove.py +35 -41
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_analyze.py +2 -1
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_analyze_metadata.py +42 -49
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_build.py +28 -52
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_cursor.py +1 -1
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_debug.py +1 -1
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_list_func.py +75 -64
- hud_python-0.4.13/hud/cli/tests/test_main_module.py +30 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_mcp_server.py +3 -3
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_pull.py +30 -61
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_push.py +70 -89
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_registry.py +36 -38
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_utils.py +1 -1
- hud_python-0.4.13/hud/cli/utils/__init__.py +1 -0
- hud_python-0.4.11/hud/cli/docker_utils.py → hud_python-0.4.13/hud/cli/utils/docker.py +36 -0
- hud_python-0.4.11/hud/cli/env_utils.py → hud_python-0.4.13/hud/cli/utils/environment.py +7 -7
- {hud_python-0.4.11/hud/cli → hud_python-0.4.13/hud/cli/utils}/interactive.py +91 -19
- hud_python-0.4.11/hud/cli/analyze_metadata.py → hud_python-0.4.13/hud/cli/utils/metadata.py +12 -8
- {hud_python-0.4.11/hud/cli → hud_python-0.4.13/hud/cli/utils}/registry.py +28 -30
- {hud_python-0.4.11/hud/cli → hud_python-0.4.13/hud/cli/utils}/remote_runner.py +1 -1
- hud_python-0.4.13/hud/cli/utils/runner.py +134 -0
- hud_python-0.4.13/hud/cli/utils/server.py +250 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/clients/base.py +1 -1
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/clients/fastmcp.py +5 -13
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/clients/mcp_use.py +6 -10
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/server/server.py +35 -5
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/shared/exceptions.py +11 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/shared/tests/test_exceptions.py +22 -0
- hud_python-0.4.13/hud/telemetry/tests/test_replay.py +40 -0
- hud_python-0.4.13/hud/telemetry/tests/test_trace.py +63 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/base.py +20 -3
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/computer/hud.py +15 -6
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/executors/tests/test_base_executor.py +27 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/response.py +12 -8
- hud_python-0.4.13/hud/tools/tests/test_response.py +60 -0
- hud_python-0.4.13/hud/tools/tests/test_tools_init.py +49 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/utils/design.py +19 -8
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/utils/mcp.py +17 -5
- hud_python-0.4.13/hud/utils/tests/__init__.py +0 -0
- hud_python-0.4.13/hud/utils/tests/test_mcp.py +112 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/version.py +1 -1
- {hud_python-0.4.11 → hud_python-0.4.13}/pyproject.toml +10 -11
- hud_python-0.4.11/environments/browser/README.md +0 -447
- hud_python-0.4.11/environments/browser/src/hud_controller/README.md +0 -117
- hud_python-0.4.11/hud/cli/init.py +0 -280
- hud_python-0.4.11/hud/cli/runner.py +0 -160
- {hud_python-0.4.11 → hud_python-0.4.13}/.gitignore +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/LICENSE +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/browser/apps/2048/README.md +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/browser/apps/2048/backend/pyproject.toml +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/browser/apps/README.md +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/browser/apps/todo/README.md +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/browser/apps/todo/backend/pyproject.toml +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/browser/pyproject.toml +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/text_2048/README.md +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/environments/text_2048/pyproject.toml +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/examples/README.md +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/claude.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/openai.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/openai_chat_generic.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/__main__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/analyze.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/clone.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.4.11/hud/cli → hud_python-0.4.13/hud/cli/utils}/cursor.py +0 -0
- /hud_python-0.4.11/hud/cli/utils.py → /hud_python-0.4.13/hud/cli/utils/logging.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/clients/README.md +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/clients/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/clients/utils/retry_transport.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/datasets.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/misc/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/misc/claude_plays_pokemon.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/otel/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/otel/collector.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/otel/config.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/otel/context.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/otel/exporters.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/otel/instrumentation.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/otel/processors.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/otel/tests/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/otel/tests/test_processors.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/py.typed +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/server/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/server/context.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/server/low_level.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/settings.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/shared/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/shared/requests.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/telemetry/job.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/telemetry/replay.py +0 -0
- {hud_python-0.4.11/hud/utils → hud_python-0.4.13/hud/telemetry}/tests/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/telemetry/trace.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/bash.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/edit.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/executors/base.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/playwright.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/types.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/tools/utils.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/types.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/utils/__init__.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/utils/async_utils.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/utils/progress.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/utils/telemetry.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/utils/tests/test_async_utils.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/utils/tests/test_progress.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/rl/README.md +0 -0
- {hud_python-0.4.11 → hud_python-0.4.13}/rl/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.13
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -35,10 +35,9 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
35
35
|
Classifier: Programming Language :: Python :: 3.12
|
|
36
36
|
Classifier: Programming Language :: Python :: 3.13
|
|
37
37
|
Requires-Python: <3.14,>=3.11
|
|
38
|
-
Requires-Dist: fastmcp>=2.11.2
|
|
39
38
|
Requires-Dist: httpx<1,>=0.23.0
|
|
40
|
-
Requires-Dist: hud-
|
|
41
|
-
Requires-Dist: mcp>=
|
|
39
|
+
Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
|
|
40
|
+
Requires-Dist: hud-mcp-python-sdk>=3.13.2
|
|
42
41
|
Requires-Dist: opentelemetry-api>=1.34.1
|
|
43
42
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
|
|
44
43
|
Requires-Dist: opentelemetry-instrumentation-mcp>=0.44.1
|
|
@@ -56,7 +55,11 @@ Provides-Extra: agent
|
|
|
56
55
|
Requires-Dist: anthropic; extra == 'agent'
|
|
57
56
|
Requires-Dist: datasets>=2.14.0; extra == 'agent'
|
|
58
57
|
Requires-Dist: dotenv>=0.9.9; extra == 'agent'
|
|
59
|
-
Requires-Dist: hud-mcp-use-python-sdk>=
|
|
58
|
+
Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agent'
|
|
59
|
+
Requires-Dist: ipykernel; extra == 'agent'
|
|
60
|
+
Requires-Dist: ipython<9; extra == 'agent'
|
|
61
|
+
Requires-Dist: jupyter-client; extra == 'agent'
|
|
62
|
+
Requires-Dist: jupyter-core; extra == 'agent'
|
|
60
63
|
Requires-Dist: langchain; extra == 'agent'
|
|
61
64
|
Requires-Dist: langchain-anthropic; extra == 'agent'
|
|
62
65
|
Requires-Dist: langchain-openai; extra == 'agent'
|
|
@@ -66,7 +69,11 @@ Provides-Extra: agents
|
|
|
66
69
|
Requires-Dist: anthropic; extra == 'agents'
|
|
67
70
|
Requires-Dist: datasets>=2.14.0; extra == 'agents'
|
|
68
71
|
Requires-Dist: dotenv>=0.9.9; extra == 'agents'
|
|
69
|
-
Requires-Dist: hud-mcp-use-python-sdk>=
|
|
72
|
+
Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agents'
|
|
73
|
+
Requires-Dist: ipykernel; extra == 'agents'
|
|
74
|
+
Requires-Dist: ipython<9; extra == 'agents'
|
|
75
|
+
Requires-Dist: jupyter-client; extra == 'agents'
|
|
76
|
+
Requires-Dist: jupyter-core; extra == 'agents'
|
|
70
77
|
Requires-Dist: langchain; extra == 'agents'
|
|
71
78
|
Requires-Dist: langchain-anthropic; extra == 'agents'
|
|
72
79
|
Requires-Dist: langchain-openai; extra == 'agents'
|
|
@@ -77,7 +84,7 @@ Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
|
|
|
77
84
|
Requires-Dist: anthropic; extra == 'dev'
|
|
78
85
|
Requires-Dist: datasets>=2.14.0; extra == 'dev'
|
|
79
86
|
Requires-Dist: dotenv>=0.9.9; extra == 'dev'
|
|
80
|
-
Requires-Dist: hud-mcp-use-python-sdk>=
|
|
87
|
+
Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'dev'
|
|
81
88
|
Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
|
|
82
89
|
Requires-Dist: ipykernel; extra == 'dev'
|
|
83
90
|
Requires-Dist: ipython<9; extra == 'dev'
|
|
@@ -233,7 +240,7 @@ Any hud MCP environment and evaluation works with our RL pipeline. Even our remo
|
|
|
233
240
|
|
|
234
241
|
This is Claude Computer Use running on our proprietary financial analyst benchmark [SheetBench-50](https://huggingface.co/datasets/hud-evals/SheetBench-50):
|
|
235
242
|
|
|
236
|
-

|
|
237
244
|
|
|
238
245
|
> [See this trace on _app.hud.so_](https://app.hud.so/trace/9e212e9e-3627-4f1f-9eb5-c6d03c59070a)
|
|
239
246
|
|
|
@@ -385,7 +392,7 @@ result = await ClaudeAgent().run({ # See all agents: https://docs.hud.so/refere
|
|
|
385
392
|
|
|
386
393
|
All leaderboards are publicly available on [app.hud.so/leaderboards](https://app.hud.so/leaderboards) (see [docs](https://docs.hud.so/evaluate-agents/leaderboards))
|
|
387
394
|
|
|
388
|
-

|
|
389
396
|
|
|
390
397
|
We highly suggest running 3-5 evaluations per dataset for the most consistent results across multiple jobs.
|
|
391
398
|
|
|
@@ -430,10 +437,6 @@ graph LR
|
|
|
430
437
|
Trace --> Dashboard
|
|
431
438
|
AnyMCP -->|"MCP"| API
|
|
432
439
|
|
|
433
|
-
style Dashboard fill:#e0e7ff,stroke:#6366f1,stroke-width:2px
|
|
434
|
-
style SDK fill:#fef3c7,stroke:#f59e0b,stroke-width:2px
|
|
435
|
-
style RemoteEnv fill:#d1fae5,stroke:#10b981,stroke-width:2px
|
|
436
|
-
style AnyMCP fill:#fce7f3,stroke:#ec4899,stroke-width:2px,stroke-dasharray: 5 5
|
|
437
440
|
```
|
|
438
441
|
|
|
439
442
|
## CLI reference
|
|
@@ -130,7 +130,7 @@ Any hud MCP environment and evaluation works with our RL pipeline. Even our remo
|
|
|
130
130
|
|
|
131
131
|
This is Claude Computer Use running on our proprietary financial analyst benchmark [SheetBench-50](https://huggingface.co/datasets/hud-evals/SheetBench-50):
|
|
132
132
|
|
|
133
|
-

|
|
134
134
|
|
|
135
135
|
> [See this trace on _app.hud.so_](https://app.hud.so/trace/9e212e9e-3627-4f1f-9eb5-c6d03c59070a)
|
|
136
136
|
|
|
@@ -282,7 +282,7 @@ result = await ClaudeAgent().run({ # See all agents: https://docs.hud.so/refere
|
|
|
282
282
|
|
|
283
283
|
All leaderboards are publicly available on [app.hud.so/leaderboards](https://app.hud.so/leaderboards) (see [docs](https://docs.hud.so/evaluate-agents/leaderboards))
|
|
284
284
|
|
|
285
|
-

|
|
286
286
|
|
|
287
287
|
We highly suggest running 3-5 evaluations per dataset for the most consistent results across multiple jobs.
|
|
288
288
|
|
|
@@ -327,10 +327,6 @@ graph LR
|
|
|
327
327
|
Trace --> Dashboard
|
|
328
328
|
AnyMCP -->|"MCP"| API
|
|
329
329
|
|
|
330
|
-
style Dashboard fill:#e0e7ff,stroke:#6366f1,stroke-width:2px
|
|
331
|
-
style SDK fill:#fef3c7,stroke:#f59e0b,stroke-width:2px
|
|
332
|
-
style RemoteEnv fill:#d1fae5,stroke:#10b981,stroke-width:2px
|
|
333
|
-
style AnyMCP fill:#fce7f3,stroke:#ec4899,stroke-width:2px,stroke-dasharray: 5 5
|
|
334
330
|
```
|
|
335
331
|
|
|
336
332
|
## CLI reference
|
|
@@ -351,7 +351,7 @@ from . import basic, advanced # This registers all @setup.tool() decorated func
|
|
|
351
351
|
|
|
352
352
|
# In setup/basic.py
|
|
353
353
|
from . import setup
|
|
354
|
-
from
|
|
354
|
+
from mcp.types import TextContent
|
|
355
355
|
|
|
356
356
|
@setup.tool()
|
|
357
357
|
async def reset(**kwargs):
|
|
@@ -361,14 +361,14 @@ async def reset(**kwargs):
|
|
|
361
361
|
**kwargs: Additional parameters
|
|
362
362
|
|
|
363
363
|
Returns:
|
|
364
|
-
|
|
364
|
+
TextContent
|
|
365
365
|
"""
|
|
366
366
|
# Access environment from the hub
|
|
367
367
|
env = setup.env
|
|
368
368
|
await env.reset_state()
|
|
369
|
-
return
|
|
370
|
-
|
|
371
|
-
|
|
369
|
+
return TextContent(
|
|
370
|
+
text="Environment reset to initial state",
|
|
371
|
+
type="text"
|
|
372
372
|
)
|
|
373
373
|
|
|
374
374
|
@setup.tool()
|
|
@@ -379,14 +379,14 @@ async def seed_data(num_items: int = 5):
|
|
|
379
379
|
num_items: Number of items to create
|
|
380
380
|
|
|
381
381
|
Returns:
|
|
382
|
-
|
|
382
|
+
TextContent
|
|
383
383
|
"""
|
|
384
384
|
# Access environment from the hub
|
|
385
385
|
env = setup.env
|
|
386
386
|
items = await env.create_items(num_items)
|
|
387
|
-
return
|
|
388
|
-
|
|
389
|
-
|
|
387
|
+
return TextContent(
|
|
388
|
+
text=f"Created {len(items)} items",
|
|
389
|
+
type="text"
|
|
390
390
|
)
|
|
391
391
|
|
|
392
392
|
# In evaluate/__init__.py
|
|
@@ -735,7 +735,7 @@ See the `browser` environment for a complete production example of this pattern.
|
|
|
735
735
|
|
|
736
736
|
### 4. Cursor rules – paste this once
|
|
737
737
|
|
|
738
|
-
Inside `.cursor/rules/
|
|
738
|
+
Inside `.cursor/rules/mcp_environment_iteration.mdc` add (or verify) the following so the agent always knows the expected iteration loop:
|
|
739
739
|
|
|
740
740
|
```mdc
|
|
741
741
|
---
|
|
@@ -743,7 +743,7 @@ description: Improve an MCP environment
|
|
|
743
743
|
alwaysApply: false
|
|
744
744
|
---
|
|
745
745
|
Setup
|
|
746
|
-
1. Make sure the user has
|
|
746
|
+
1. Make sure the user has set up the mcp config for the environment by seeing if you have access to the tools by the given name (i.e. my-environment-dev), and make sure the title is in dev mode. If not, ask the user to make a dev version!
|
|
747
747
|
2. Make sure you can find the source folder for this environment. Explore its contents and README.
|
|
748
748
|
3. Clarify the objectives and ask follow up questions on the initial query to determine precise implementation details.
|
|
749
749
|
|
|
@@ -760,7 +760,7 @@ Iteration
|
|
|
760
760
|
Context: In the my-environment folder, I have a browser app environment. I've built a tool to interact with it called my-environment-dev.
|
|
761
761
|
Interaction: There are multiple tools to setup and evaluate the environment. There are also interaction tools for you to be able to move around it, and a screenshot tool to see the state. Use all of the available tools.
|
|
762
762
|
Objective: Please test if all setup, evaluation functions are working. This means you should come up with new problem definitions to test all functionality on. Be creative in how you pick edge cases to test on.
|
|
763
|
-
Rules: @
|
|
763
|
+
Rules: @mcp_environment_iteration.mdc
|
|
764
764
|
```
|
|
765
765
|
|
|
766
766
|
---
|
|
@@ -827,13 +827,13 @@ Before making changes:
|
|
|
827
827
|
```python
|
|
828
828
|
# In setup/my_new_setup.py
|
|
829
829
|
from . import setup
|
|
830
|
-
from hud.tools import BaseSetup,
|
|
830
|
+
from hud.tools import BaseSetup, TextContent
|
|
831
831
|
|
|
832
832
|
@setup("my_new_setup", description="Clear description of what this does")
|
|
833
833
|
class MyNewSetup(BaseSetup):
|
|
834
|
-
async def __call__(self, context, param1: str, param2: int = 10) ->
|
|
834
|
+
async def __call__(self, context, param1: str, param2: int = 10) -> TextContent:
|
|
835
835
|
# Implementation
|
|
836
|
-
return
|
|
836
|
+
return TextContent(...)
|
|
837
837
|
```
|
|
838
838
|
|
|
839
839
|
**Adding New Evaluators**
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# Browser Environment
|
|
2
|
+
|
|
3
|
+
A browser automation environment for HUD that provides GUI access and web app interaction capabilities. This environment supports hot-reloading during development while maintaining persistent state.
|
|
4
|
+
|
|
5
|
+
## Architecture Overview
|
|
6
|
+
|
|
7
|
+
The browser environment uses a two-process architecture:
|
|
8
|
+
|
|
9
|
+
1. **Context Server** (`context.py`): Long-running process that maintains persistent state
|
|
10
|
+
2. **MCP Server** (`server.py`): Hot-reloadable process that handles tool requests
|
|
11
|
+
|
|
12
|
+
### Key Components
|
|
13
|
+
|
|
14
|
+
- **BrowserContext**: Stores persistent state (running apps, ports, playwright instance)
|
|
15
|
+
- **ServiceManager**: Manages X11, VNC, and app processes
|
|
16
|
+
- **BaseHub Tools**: Setup and evaluate tools organized by app (2048, todo)
|
|
17
|
+
- **Multiprocessing Proxy**: Enables state sharing between processes
|
|
18
|
+
|
|
19
|
+
## Context Management and Common Pitfalls
|
|
20
|
+
|
|
21
|
+
### Understanding the Proxy System
|
|
22
|
+
|
|
23
|
+
The browser environment uses Python's `multiprocessing.Manager` to share state between the context server and MCP server. This introduces important constraints:
|
|
24
|
+
|
|
25
|
+
#### ❌ Common Pitfall: Unpicklable Objects
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
# BAD: This will fail with "cannot pickle 'coroutine' object"
|
|
29
|
+
@setup.tool("my_tool")
|
|
30
|
+
async def my_tool():
|
|
31
|
+
env = setup.env
|
|
32
|
+
result = await env.call_app_api("app", "/api/endpoint") # Returns coroutine
|
|
33
|
+
# The coroutine can't be serialized through the proxy!
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
#### ✅ Solution: Direct HTTP Calls
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
# GOOD: Make HTTP calls directly
|
|
40
|
+
@setup.tool("my_tool")
|
|
41
|
+
async def my_tool():
|
|
42
|
+
import httpx
|
|
43
|
+
|
|
44
|
+
# Get the backend port from persistent context
|
|
45
|
+
persistent_ctx = setup.env
|
|
46
|
+
backend_port = persistent_ctx.get_app_backend_port("app")
|
|
47
|
+
|
|
48
|
+
# Make API call directly
|
|
49
|
+
url = f"http://localhost:{backend_port}/api/endpoint"
|
|
50
|
+
async with httpx.AsyncClient() as client:
|
|
51
|
+
response = await client.get(url)
|
|
52
|
+
response.raise_for_status()
|
|
53
|
+
result = response.json()
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### State Synchronization Issues
|
|
57
|
+
|
|
58
|
+
#### ❌ Common Pitfall: Direct List/Dict Manipulation
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
# BAD: Regular Python lists don't sync through proxy
|
|
62
|
+
class ServiceManager:
|
|
63
|
+
def __init__(self):
|
|
64
|
+
self._launched_apps = [] # Won't sync!
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
#### ✅ Solution: Store State in Persistent Context
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
# GOOD: Use the persistent context for shared state
|
|
71
|
+
class BrowserContext:
|
|
72
|
+
def __init__(self):
|
|
73
|
+
self._running_apps: List[str] = []
|
|
74
|
+
self._app_ports: Dict[str, Dict[str, int]] = {}
|
|
75
|
+
|
|
76
|
+
def add_running_app(self, app_name: str) -> None:
|
|
77
|
+
"""Add app to running list."""
|
|
78
|
+
if app_name not in self._running_apps:
|
|
79
|
+
self._running_apps.append(app_name)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Accessing Shared Resources
|
|
83
|
+
|
|
84
|
+
#### ❌ Common Pitfall: Direct Attribute Access
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
# BAD: Direct attribute access on proxy objects
|
|
88
|
+
playwright_tool = env.playwright # May not work with proxy
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
#### ✅ Solution: Use Getter Methods
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
# GOOD: Use proxy-friendly getter methods
|
|
95
|
+
playwright_tool = persistent_ctx.get_playwright_tool()
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Best Practices
|
|
99
|
+
|
|
100
|
+
### 1. Tool Implementation Pattern
|
|
101
|
+
|
|
102
|
+
All setup and evaluate tools should follow this pattern:
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
@setup.tool("tool_name")
|
|
106
|
+
async def tool_name(param1: type, param2: type):
|
|
107
|
+
"""Tool description."""
|
|
108
|
+
try:
|
|
109
|
+
# Get persistent context
|
|
110
|
+
persistent_ctx = setup.env # or evaluate.env
|
|
111
|
+
|
|
112
|
+
# Get app ports
|
|
113
|
+
backend_port = persistent_ctx.get_app_backend_port("app_name")
|
|
114
|
+
|
|
115
|
+
# Make HTTP request
|
|
116
|
+
url = f"http://localhost:{backend_port}/api/endpoint"
|
|
117
|
+
async with httpx.AsyncClient() as client:
|
|
118
|
+
response = await client.method(url, json=data)
|
|
119
|
+
response.raise_for_status()
|
|
120
|
+
result = response.json()
|
|
121
|
+
|
|
122
|
+
# Return result
|
|
123
|
+
return TextContent(
|
|
124
|
+
text=f"Success message",
|
|
125
|
+
type="text"
|
|
126
|
+
)
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.error(f"tool_name failed: {e}")
|
|
129
|
+
return TextContent(
|
|
130
|
+
text=f"Failed: {str(e)}",
|
|
131
|
+
type="text"
|
|
132
|
+
)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### 2. App Launch Pattern
|
|
136
|
+
|
|
137
|
+
When launching apps, ensure ports are stored in the persistent context:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
# In launch_app tool
|
|
141
|
+
app_info = await service_manager.launch_app(app_name)
|
|
142
|
+
|
|
143
|
+
# Store ports in persistent context for later access
|
|
144
|
+
try:
|
|
145
|
+
backend_port = service_manager.get_app_port(app_name)
|
|
146
|
+
frontend_port = service_manager.get_app_frontend_port(app_name)
|
|
147
|
+
persistent_ctx.set_app_ports(app_name, frontend_port, backend_port)
|
|
148
|
+
except Exception as e:
|
|
149
|
+
logger.error(f"Failed to store ports: {e}")
|
|
150
|
+
|
|
151
|
+
# Track app in persistent context
|
|
152
|
+
persistent_ctx.add_running_app(app_name)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### 3. Import Organization
|
|
156
|
+
|
|
157
|
+
Keep imports at module level:
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
# At top of file
|
|
161
|
+
import logging
|
|
162
|
+
import httpx
|
|
163
|
+
from mcp.types import TextContent
|
|
164
|
+
from . import setup
|
|
165
|
+
|
|
166
|
+
# Not inside functions
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Troubleshooting
|
|
170
|
+
|
|
171
|
+
### "Cannot pickle 'coroutine' object"
|
|
172
|
+
|
|
173
|
+
**Cause**: Trying to return an async function result through the proxy.
|
|
174
|
+
|
|
175
|
+
**Fix**: Don't use async methods on proxied objects. Make direct HTTP calls instead.
|
|
176
|
+
|
|
177
|
+
### "App not launched" errors
|
|
178
|
+
|
|
179
|
+
**Cause**: State synchronization issue between ServiceManager and persistent context.
|
|
180
|
+
|
|
181
|
+
**Fix**: Ensure `launch_app` stores app info in the persistent context, and setup/evaluate tools check the persistent context's app list.
|
|
182
|
+
|
|
183
|
+
### "Object has no attribute" on proxy objects
|
|
184
|
+
|
|
185
|
+
**Cause**: Direct attribute access on multiprocessing proxy objects.
|
|
186
|
+
|
|
187
|
+
**Fix**: Use getter/setter methods instead of direct attribute access.
|
|
188
|
+
|
|
189
|
+
## Development Workflow
|
|
190
|
+
|
|
191
|
+
1. **Start the environment**: `hud dev`
|
|
192
|
+
2. **Make changes**: Edit tools in `src/hud_controller/`
|
|
193
|
+
3. **Test immediately**: The MCP server hot-reloads automatically
|
|
194
|
+
4. **Check logs**: Look for serialization or proxy errors
|
|
195
|
+
|
|
196
|
+
## Adding New Apps
|
|
197
|
+
|
|
198
|
+
1. Create app directory in `apps/`
|
|
199
|
+
2. Add setup tools in `src/hud_controller/setup/app_name.py`
|
|
200
|
+
3. Add evaluate tools in `src/hud_controller/evaluate/app_name.py`
|
|
201
|
+
4. Follow the HTTP pattern - no `call_app_api` usage
|
|
202
|
+
5. Store app ports in persistent context when launching
|
|
203
|
+
|
|
204
|
+
## Key Files
|
|
205
|
+
|
|
206
|
+
- `context.py`: Persistent state management
|
|
207
|
+
- `server.py`: MCP server and tool definitions
|
|
208
|
+
- `services.py`: Process management for X11, VNC, apps
|
|
209
|
+
- `setup/`: Setup tools organized by app
|
|
210
|
+
- `evaluate/`: Evaluation tools organized by app
|
|
211
|
+
|
|
212
|
+
Remember: When in doubt, make direct HTTP calls and store state in the persistent context!
|
|
213
|
+
|
|
@@ -52,10 +52,13 @@ hud dev . --build
|
|
|
52
52
|
# - Provide HTTP endpoint for Cursor
|
|
53
53
|
# - Auto-restart on file changes
|
|
54
54
|
# - Pass through environment variables
|
|
55
|
+
# - **Keep browser sessions alive across restarts**
|
|
55
56
|
```
|
|
56
57
|
|
|
57
58
|
Add the URL from output to Cursor or click the deeplink.
|
|
58
59
|
|
|
60
|
+
**Note**: With hot-reload enabled, your browser session persists across code changes. This means you can modify your code and the server will restart automatically without losing your browser state, tabs, or navigation history.
|
|
61
|
+
|
|
59
62
|
#### Option 2: Manual Docker Run
|
|
60
63
|
|
|
61
64
|
For direct control over the development environment:
|
|
@@ -3,25 +3,20 @@ name = "hud-remote-browser"
|
|
|
3
3
|
version = "0.1.0"
|
|
4
4
|
description = "HUD Remote Browser Controller with MCP tools for cloud browser providers"
|
|
5
5
|
requires-python = ">=3.11,<3.13"
|
|
6
|
-
dependencies = [
|
|
7
|
-
"hud-python @ git+https://github.com/hud-evals/hud-python.git@l/text-2048",
|
|
8
|
-
"pyautogui",
|
|
9
|
-
"playwright",
|
|
10
|
-
"httpx",
|
|
11
|
-
"typer",
|
|
12
|
-
"google-api-python-client",
|
|
13
|
-
"google-auth",
|
|
14
|
-
]
|
|
15
|
-
|
|
16
|
-
[project.scripts]
|
|
17
|
-
hud-remote-browser = "hud_controller.__main__:main"
|
|
6
|
+
dependencies = [ "hud-python>=0.4.12", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
|
|
18
7
|
|
|
19
8
|
[build-system]
|
|
20
|
-
requires = ["hatchling"]
|
|
9
|
+
requires = [ "hatchling",]
|
|
21
10
|
build-backend = "hatchling.build"
|
|
22
11
|
|
|
23
|
-
[
|
|
24
|
-
|
|
12
|
+
[project.scripts]
|
|
13
|
+
hud-remote-browser = "hud_controller.__main__:main"
|
|
14
|
+
|
|
15
|
+
[tool.hud]
|
|
16
|
+
image = "hud-remote-browser:dev"
|
|
25
17
|
|
|
26
18
|
[tool.hatch.metadata]
|
|
27
|
-
allow-direct-references = true
|
|
19
|
+
allow-direct-references = true
|
|
20
|
+
|
|
21
|
+
[tool.hatch.build.targets.wheel]
|
|
22
|
+
packages = [ "src/hud_controller",]
|
|
@@ -306,7 +306,7 @@ class MCPAgent(ABC):
|
|
|
306
306
|
if decision == "STOP":
|
|
307
307
|
# Try to submit response through lifecycle tool
|
|
308
308
|
await self._maybe_submit_response(response, messages)
|
|
309
|
-
|
|
309
|
+
|
|
310
310
|
logger.info("Stopping execution")
|
|
311
311
|
final_response = response
|
|
312
312
|
break
|
|
@@ -487,7 +487,7 @@ class MCPAgent(ABC):
|
|
|
487
487
|
self._available_tools.append(tool)
|
|
488
488
|
# Simplified mapping - just tool name to tool
|
|
489
489
|
self._tool_map[tool.name] = tool
|
|
490
|
-
|
|
490
|
+
|
|
491
491
|
# Auto-detect response tool as a lifecycle tool
|
|
492
492
|
if tool.name == "response" and "response" not in self.lifecycle_tools:
|
|
493
493
|
logger.debug("Auto-detected 'response' tool as a lifecycle tool")
|
|
@@ -495,7 +495,7 @@ class MCPAgent(ABC):
|
|
|
495
495
|
|
|
496
496
|
async def _maybe_submit_response(self, response: AgentResponse, messages: list[Any]) -> None:
|
|
497
497
|
"""Submit response through lifecycle tool if available.
|
|
498
|
-
|
|
498
|
+
|
|
499
499
|
Args:
|
|
500
500
|
response: The agent's response
|
|
501
501
|
messages: The current message history (will be modified in-place)
|
|
@@ -506,17 +506,16 @@ class MCPAgent(ABC):
|
|
|
506
506
|
try:
|
|
507
507
|
# Call the response tool with the agent's response
|
|
508
508
|
response_tool_call = MCPToolCall(
|
|
509
|
-
name="response",
|
|
510
|
-
arguments={"response": response.content, "messages": messages}
|
|
509
|
+
name="response", arguments={"response": response.content, "messages": messages}
|
|
511
510
|
)
|
|
512
511
|
response_results = await self.call_tools(response_tool_call)
|
|
513
|
-
|
|
512
|
+
|
|
514
513
|
# Format and add the response tool results to messages
|
|
515
514
|
response_messages = await self.format_tool_results(
|
|
516
515
|
[response_tool_call], response_results
|
|
517
516
|
)
|
|
518
517
|
messages.extend(response_messages)
|
|
519
|
-
|
|
518
|
+
|
|
520
519
|
# Mark the task as done
|
|
521
520
|
logger.info("Response lifecycle tool executed, marking task as done")
|
|
522
521
|
except Exception as e:
|
|
@@ -579,7 +578,7 @@ class MCPAgent(ABC):
|
|
|
579
578
|
logger.warning("Failed to close auto-created trace: %s", e)
|
|
580
579
|
finally:
|
|
581
580
|
self._auto_trace_cm = None
|
|
582
|
-
|
|
581
|
+
|
|
583
582
|
# Clean up auto-created client
|
|
584
583
|
if self._auto_created_client and self.mcp_client:
|
|
585
584
|
try:
|
|
@@ -15,10 +15,10 @@ import hud
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
16
|
from langchain.schema.language_model import BaseLanguageModel
|
|
17
17
|
from langchain_core.tools import BaseTool
|
|
18
|
-
from mcp_use.adapters.langchain_adapter import LangChainAdapter
|
|
18
|
+
from mcp_use.adapters.langchain_adapter import LangChainAdapter # type: ignore[attr-defined]
|
|
19
19
|
|
|
20
20
|
try:
|
|
21
|
-
from mcp_use.adapters.langchain_adapter import LangChainAdapter
|
|
21
|
+
from mcp_use.adapters.langchain_adapter import LangChainAdapter # type: ignore[attr-defined]
|
|
22
22
|
except ImportError:
|
|
23
23
|
LangChainAdapter = None # type: ignore[misc, assignment]
|
|
24
24
|
|
|
@@ -17,7 +17,9 @@ class TestOperatorAgent:
|
|
|
17
17
|
@pytest.fixture
|
|
18
18
|
def mock_mcp_client(self):
|
|
19
19
|
"""Create a mock MCP client."""
|
|
20
|
-
mcp_client =
|
|
20
|
+
mcp_client = AsyncMock()
|
|
21
|
+
# Set up the mcp_config attribute as a regular dict, not a coroutine
|
|
22
|
+
mcp_client.mcp_config = {"test_server": {"url": "http://test"}}
|
|
21
23
|
return mcp_client
|
|
22
24
|
|
|
23
25
|
@pytest.fixture
|