hud-python 0.4.14__tar.gz → 0.4.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- {hud_python-0.4.14 → hud_python-0.4.16}/PKG-INFO +4 -3
- {hud_python-0.4.14 → hud_python-0.4.16}/README.md +1 -1
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/base.py +118 -33
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/claude.py +1 -1
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/openai.py +5 -16
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/tests/test_openai.py +24 -79
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/__init__.py +137 -15
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/analyze.py +2 -4
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/build.py +6 -2
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/dev.py +67 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/eval.py +90 -35
- hud_python-0.4.16/hud/cli/hf.py +406 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/init.py +38 -19
- hud_python-0.4.16/hud/cli/rl/README.md +243 -0
- hud_python-0.4.16/hud/cli/rl/__init__.py +82 -0
- hud_python-0.4.16/hud/cli/rl/init.py +370 -0
- hud_python-0.4.16/hud/cli/rl/pod.py +491 -0
- hud_python-0.4.16/hud/cli/rl/ssh.py +288 -0
- hud_python-0.4.16/hud/cli/rl/train.py +421 -0
- hud_python-0.4.16/hud/cli/rl/utils.py +165 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_mcp_server.py +1 -4
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/clients/base.py +2 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/clients/fastmcp.py +7 -2
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/clients/mcp_use.py +3 -1
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/clients/utils/retry_transport.py +34 -8
- hud_python-0.4.16/hud/datasets/__init__.py +32 -0
- hud_python-0.4.16/hud/datasets/execution/__init__.py +13 -0
- hud_python-0.4.16/hud/datasets/execution/parallel.py +592 -0
- hud_python-0.4.16/hud/datasets/execution/runner.py +123 -0
- hud_python-0.4.16/hud/datasets/task.py +107 -0
- hud_python-0.4.16/hud/datasets/utils.py +118 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/otel/instrumentation.py +2 -1
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/server/server.py +58 -21
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/settings.py +12 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/types.py +31 -10
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/design.py +168 -2
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/tests/test_version.py +1 -1
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/version.py +1 -1
- {hud_python-0.4.14 → hud_python-0.4.16}/pyproject.toml +10 -4
- hud_python-0.4.14/hud/datasets.py +0 -327
- {hud_python-0.4.14 → hud_python-0.4.16}/.gitignore +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/LICENSE +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/README.md +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/browser/README.md +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/browser/apps/2048/README.md +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/browser/apps/2048/backend/pyproject.toml +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/browser/apps/README.md +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/browser/apps/todo/README.md +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/browser/apps/todo/backend/pyproject.toml +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/browser/pyproject.toml +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/remote_browser/README.md +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/remote_browser/pyproject.toml +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/remote_browser/src/hud_controller/providers/README.md +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/text_2048/README.md +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/environments/text_2048/pyproject.toml +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/examples/README.md +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/__main__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/langchain.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/misc/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/misc/response_agent.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/openai_chat_generic.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/tests/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/tests/test_base.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/tests/test_claude.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/agents/tests/test_client.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/__main__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/clone.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/debug.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/list_func.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/pull.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/push.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/remove.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_analyze.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_analyze_metadata.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_build.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_cli_init.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_cli_main.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_clone.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_cursor.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_debug.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_list_func.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_main_module.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_pull.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_push.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_registry.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/tests/test_utils.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/utils/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/utils/cursor.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/utils/docker.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/utils/environment.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/utils/interactive.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/utils/logging.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/utils/metadata.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/utils/registry.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/utils/remote_runner.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/utils/runner.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/cli/utils/server.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/clients/README.md +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/clients/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/clients/tests/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/clients/tests/test_client_integration.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/clients/tests/test_fastmcp.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/clients/tests/test_protocol.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/clients/utils/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/misc/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/misc/claude_plays_pokemon.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/otel/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/otel/collector.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/otel/config.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/otel/context.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/otel/exporters.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/otel/processors.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/otel/tests/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/otel/tests/test_processors.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/py.typed +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/server/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/server/context.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/server/helper/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/server/low_level.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/server/tests/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/shared/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/shared/exceptions.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/shared/requests.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/shared/tests/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/shared/tests/test_exceptions.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/shared/tests/test_requests.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/telemetry/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/telemetry/instrument.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/telemetry/job.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/telemetry/replay.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/telemetry/tests/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/telemetry/tests/test_replay.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/telemetry/tests/test_trace.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/telemetry/trace.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/base.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/bash.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/computer/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/computer/anthropic.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/computer/hud.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/computer/openai.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/computer/settings.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/edit.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/executors/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/executors/base.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/executors/pyautogui.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/executors/tests/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/executors/tests/test_base_executor.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/executors/tests/test_pyautogui_executor.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/executors/xdo.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/playwright.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/response.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_base.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_bash.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_bash_extended.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_computer.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_computer_actions.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_edit.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_init.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_playwright_tool.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_response.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_tools.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_tools_init.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/tests/test_utils.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/types.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/tools/utils.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/async_utils.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/mcp.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/progress.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/telemetry.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/tests/__init__.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/tests/test_async_utils.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/tests/test_init.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/tests/test_mcp.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/tests/test_progress.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/hud/utils/tests/test_telemetry.py +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/rl/README.md +0 -0
- {hud_python-0.4.14 → hud_python-0.4.16}/rl/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.16
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -43,9 +43,10 @@ Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
|
|
|
43
43
|
Requires-Dist: opentelemetry-instrumentation-mcp>=0.44.1
|
|
44
44
|
Requires-Dist: opentelemetry-sdk>=1.34.1
|
|
45
45
|
Requires-Dist: pathspec>=0.12.1
|
|
46
|
+
Requires-Dist: prompt-toolkit==3.0.51
|
|
46
47
|
Requires-Dist: pydantic-settings<3,>=2
|
|
47
48
|
Requires-Dist: pydantic<3,>=2
|
|
48
|
-
Requires-Dist: questionary
|
|
49
|
+
Requires-Dist: questionary==2.1.0
|
|
49
50
|
Requires-Dist: rich>=13.0.0
|
|
50
51
|
Requires-Dist: toml>=0.10.2
|
|
51
52
|
Requires-Dist: typer>=0.9.0
|
|
@@ -160,7 +161,7 @@ pip install -e "hud-python[dev]"
|
|
|
160
161
|
```
|
|
161
162
|
|
|
162
163
|
> See [docs.hud.so](https://docs.hud.so), or add docs to any MCP client:
|
|
163
|
-
> `claude mcp add docs-hud https://docs.hud.so/mcp`
|
|
164
|
+
> `claude mcp add --transport http docs-hud https://docs.hud.so/mcp`
|
|
164
165
|
|
|
165
166
|
## Quickstart
|
|
166
167
|
|
|
@@ -50,7 +50,7 @@ pip install -e "hud-python[dev]"
|
|
|
50
50
|
```
|
|
51
51
|
|
|
52
52
|
> See [docs.hud.so](https://docs.hud.so), or add docs to any MCP client:
|
|
53
|
-
> `claude mcp add docs-hud https://docs.hud.so/mcp`
|
|
53
|
+
> `claude mcp add --transport http docs-hud https://docs.hud.so/mcp`
|
|
54
54
|
|
|
55
55
|
## Quickstart
|
|
56
56
|
|
|
@@ -6,11 +6,12 @@ import asyncio
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
8
|
from abc import ABC, abstractmethod
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Literal
|
|
9
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
10
10
|
|
|
11
11
|
import mcp.types as types
|
|
12
12
|
|
|
13
13
|
from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
|
|
14
|
+
from hud.utils.design import HUDDesign
|
|
14
15
|
from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
|
|
15
16
|
|
|
16
17
|
if TYPE_CHECKING:
|
|
@@ -35,6 +36,7 @@ class MCPAgent(ABC):
|
|
|
35
36
|
"""
|
|
36
37
|
|
|
37
38
|
metadata: dict[str, Any]
|
|
39
|
+
required_tools: ClassVar[list[str]] = [] # Tools that must be available
|
|
38
40
|
|
|
39
41
|
def __init__(
|
|
40
42
|
self,
|
|
@@ -51,6 +53,7 @@ class MCPAgent(ABC):
|
|
|
51
53
|
model_name: str = "mcp-agent",
|
|
52
54
|
response_agent: ResponseAgent | None = None,
|
|
53
55
|
auto_trace: bool = True,
|
|
56
|
+
verbose: bool = False,
|
|
54
57
|
) -> None:
|
|
55
58
|
"""
|
|
56
59
|
Initialize the base MCP agent.
|
|
@@ -63,12 +66,18 @@ class MCPAgent(ABC):
|
|
|
63
66
|
initial_screenshot: Whether to capture screenshot before first prompt
|
|
64
67
|
system_prompt: System prompt to use
|
|
65
68
|
append_setup_output: Whether to append setup tool output to initial messages
|
|
69
|
+
verbose: If True, sets logging level to INFO. If False, only WARNING and above.
|
|
66
70
|
"""
|
|
67
71
|
|
|
68
72
|
self.mcp_client = mcp_client
|
|
69
73
|
self._auto_created_client = False # Track if we created the client
|
|
70
74
|
|
|
71
75
|
self.model_name = model_name
|
|
76
|
+
self.design = HUDDesign(logger=logger)
|
|
77
|
+
|
|
78
|
+
# Set verbose mode if requested
|
|
79
|
+
if verbose:
|
|
80
|
+
self.design.set_verbose(True)
|
|
72
81
|
|
|
73
82
|
# Filtering
|
|
74
83
|
self.allowed_tools = allowed_tools
|
|
@@ -101,7 +110,7 @@ class MCPAgent(ABC):
|
|
|
101
110
|
|
|
102
111
|
self.mcp_client = MCPClient(mcp_config=task.mcp_config)
|
|
103
112
|
self._auto_created_client = True
|
|
104
|
-
|
|
113
|
+
self.design.info_log("Auto-created MCPClient from task.mcp_config")
|
|
105
114
|
|
|
106
115
|
# Ensure we have a client
|
|
107
116
|
if self.mcp_client is None:
|
|
@@ -112,7 +121,10 @@ class MCPAgent(ABC):
|
|
|
112
121
|
await self._setup_config(self.mcp_client.mcp_config)
|
|
113
122
|
|
|
114
123
|
# Initialize client if needed
|
|
115
|
-
|
|
124
|
+
try:
|
|
125
|
+
await self.mcp_client.initialize()
|
|
126
|
+
except Exception as e:
|
|
127
|
+
self._handle_connection_error(e)
|
|
116
128
|
|
|
117
129
|
# If task is provided, add lifecycle tools
|
|
118
130
|
if isinstance(task, Task):
|
|
@@ -134,9 +146,9 @@ class MCPAgent(ABC):
|
|
|
134
146
|
# Re-apply filtering with updated lifecycle tools
|
|
135
147
|
await self._filter_tools()
|
|
136
148
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
149
|
+
num_tools = len(self._available_tools)
|
|
150
|
+
self.design.success_log(
|
|
151
|
+
f"Agent initialized with {num_tools} available tools (after filtering)"
|
|
140
152
|
)
|
|
141
153
|
|
|
142
154
|
async def run(self, prompt_or_task: str | Task | dict[str, Any], max_steps: int = 10) -> Trace:
|
|
@@ -173,6 +185,16 @@ class MCPAgent(ABC):
|
|
|
173
185
|
|
|
174
186
|
else:
|
|
175
187
|
raise TypeError(f"prompt_or_task must be str or Task, got {type(prompt_or_task)}")
|
|
188
|
+
except Exception as e:
|
|
189
|
+
if self._is_connection_error(e):
|
|
190
|
+
# Return error trace for connection failures
|
|
191
|
+
return Trace(
|
|
192
|
+
reward=0.0,
|
|
193
|
+
done=True,
|
|
194
|
+
content=self._get_connection_error_message(e),
|
|
195
|
+
isError=True,
|
|
196
|
+
)
|
|
197
|
+
raise
|
|
176
198
|
finally:
|
|
177
199
|
# Cleanup auto-created resources
|
|
178
200
|
await self._cleanup()
|
|
@@ -200,7 +222,7 @@ class MCPAgent(ABC):
|
|
|
200
222
|
|
|
201
223
|
# Execute the setup tool and append the initial observation to the context
|
|
202
224
|
if task.setup_tool is not None:
|
|
203
|
-
|
|
225
|
+
self.design.progress_log(f"Setting up tool phase: {task.setup_tool}")
|
|
204
226
|
results = await self.call_tools(task.setup_tool)
|
|
205
227
|
if any(result.isError for result in results):
|
|
206
228
|
raise RuntimeError(f"{results}")
|
|
@@ -214,7 +236,7 @@ class MCPAgent(ABC):
|
|
|
214
236
|
prompt_result = await self._run_context(start_context, max_steps=max_steps)
|
|
215
237
|
|
|
216
238
|
except Exception as e:
|
|
217
|
-
|
|
239
|
+
self.design.error_log(f"Task execution failed: {e}")
|
|
218
240
|
# Create an error result but don't return yet - we still want to evaluate
|
|
219
241
|
prompt_result = Trace(reward=0.0, done=True, content=str(e), isError=True)
|
|
220
242
|
prompt_result.populate_from_context()
|
|
@@ -222,7 +244,7 @@ class MCPAgent(ABC):
|
|
|
222
244
|
# Always evaluate if we have a prompt result and evaluate tool
|
|
223
245
|
if prompt_result is not None and task.evaluate_tool is not None:
|
|
224
246
|
try:
|
|
225
|
-
|
|
247
|
+
self.design.progress_log(f"Evaluating tool phase: {task.evaluate_tool}")
|
|
226
248
|
results = await self.call_tools(task.evaluate_tool)
|
|
227
249
|
|
|
228
250
|
if any(result.isError for result in results):
|
|
@@ -245,7 +267,7 @@ class MCPAgent(ABC):
|
|
|
245
267
|
prompt_result.content = eval_content
|
|
246
268
|
|
|
247
269
|
except Exception as e:
|
|
248
|
-
|
|
270
|
+
self.design.error_log(f"Evaluation phase failed: {e}")
|
|
249
271
|
# Continue with the prompt result even if evaluation failed
|
|
250
272
|
|
|
251
273
|
return (
|
|
@@ -276,21 +298,21 @@ class MCPAgent(ABC):
|
|
|
276
298
|
|
|
277
299
|
# Add initial context
|
|
278
300
|
messages.extend(await self.format_message(context))
|
|
279
|
-
|
|
301
|
+
self.design.debug(f"Messages: {messages}")
|
|
280
302
|
|
|
281
303
|
step_count = 0
|
|
282
304
|
while max_steps == -1 or step_count < max_steps:
|
|
283
305
|
step_count += 1
|
|
284
306
|
if max_steps == -1:
|
|
285
|
-
|
|
307
|
+
self.design.debug(f"Step {step_count} (unlimited)")
|
|
286
308
|
else:
|
|
287
|
-
|
|
309
|
+
self.design.debug(f"Step {step_count}/{max_steps}")
|
|
288
310
|
|
|
289
311
|
try:
|
|
290
312
|
# 1. Get model response
|
|
291
313
|
response = await self.get_response(messages)
|
|
292
314
|
|
|
293
|
-
|
|
315
|
+
self.design.debug(f"Agent:\n{response}")
|
|
294
316
|
|
|
295
317
|
# Check if we should stop
|
|
296
318
|
if response.done or not response.tool_calls:
|
|
@@ -302,16 +324,16 @@ class MCPAgent(ABC):
|
|
|
302
324
|
response.content
|
|
303
325
|
)
|
|
304
326
|
except Exception as e:
|
|
305
|
-
|
|
327
|
+
self.design.warning_log(f"ResponseAgent failed: {e}")
|
|
306
328
|
if decision == "STOP":
|
|
307
329
|
# Try to submit response through lifecycle tool
|
|
308
330
|
await self._maybe_submit_response(response, messages)
|
|
309
331
|
|
|
310
|
-
|
|
332
|
+
self.design.debug("Stopping execution")
|
|
311
333
|
final_response = response
|
|
312
334
|
break
|
|
313
335
|
else:
|
|
314
|
-
|
|
336
|
+
self.design.debug("Continuing execution")
|
|
315
337
|
messages.extend(await self.format_message(decision))
|
|
316
338
|
continue
|
|
317
339
|
|
|
@@ -323,19 +345,31 @@ class MCPAgent(ABC):
|
|
|
323
345
|
tool_messages = await self.format_tool_results(tool_calls, tool_results)
|
|
324
346
|
messages.extend(tool_messages)
|
|
325
347
|
|
|
348
|
+
# Compact step completion display
|
|
349
|
+
step_info = f"\n[bold]Step {step_count}"
|
|
350
|
+
if max_steps != -1:
|
|
351
|
+
step_info += f"/{max_steps}"
|
|
352
|
+
step_info += "[/bold]"
|
|
353
|
+
|
|
354
|
+
# Show tool calls and results in compact format
|
|
355
|
+
for call, result in zip(tool_calls, tool_results, strict=False):
|
|
356
|
+
step_info += f"\n{call}\n{result}"
|
|
357
|
+
|
|
358
|
+
self.design.info_log(step_info)
|
|
359
|
+
|
|
326
360
|
except Exception as e:
|
|
327
|
-
|
|
361
|
+
self.design.error_log(f"Step failed: {e}")
|
|
328
362
|
error = str(e)
|
|
329
363
|
break
|
|
330
364
|
|
|
331
365
|
except KeyboardInterrupt:
|
|
332
|
-
|
|
366
|
+
self.design.warning_log("Agent execution interrupted by user")
|
|
333
367
|
error = "Interrupted by user"
|
|
334
368
|
except asyncio.CancelledError:
|
|
335
|
-
|
|
369
|
+
self.design.warning_log("Agent execution cancelled")
|
|
336
370
|
error = "Cancelled"
|
|
337
371
|
except Exception as e:
|
|
338
|
-
|
|
372
|
+
self.design.error_log(f"Unexpected error: {e}")
|
|
339
373
|
error = str(e)
|
|
340
374
|
|
|
341
375
|
# Build result
|
|
@@ -376,17 +410,17 @@ class MCPAgent(ABC):
|
|
|
376
410
|
results: list[MCPToolResult] = []
|
|
377
411
|
for tc in tool_call:
|
|
378
412
|
try:
|
|
379
|
-
|
|
413
|
+
self.design.debug(f"Calling tool: {tc}")
|
|
380
414
|
results.append(await self.mcp_client.call_tool(tc))
|
|
381
415
|
except TimeoutError as e:
|
|
382
|
-
|
|
416
|
+
self.design.error_log(f"Tool execution timed out: {e}")
|
|
383
417
|
try:
|
|
384
418
|
await self.mcp_client.shutdown()
|
|
385
419
|
except Exception as close_err:
|
|
386
|
-
|
|
420
|
+
self.design.debug(f"Failed to close MCP client cleanly: {close_err}")
|
|
387
421
|
raise
|
|
388
422
|
except Exception as e:
|
|
389
|
-
|
|
423
|
+
self.design.error_log(f"Tool execution failed: {e}")
|
|
390
424
|
results.append(_format_error_result(str(e)))
|
|
391
425
|
return results
|
|
392
426
|
|
|
@@ -490,9 +524,21 @@ class MCPAgent(ABC):
|
|
|
490
524
|
|
|
491
525
|
# Auto-detect response tool as a lifecycle tool
|
|
492
526
|
if tool.name == "response" and "response" not in self.lifecycle_tools:
|
|
493
|
-
|
|
527
|
+
self.design.debug("Auto-detected 'response' tool as a lifecycle tool")
|
|
494
528
|
self.lifecycle_tools.append("response")
|
|
495
529
|
|
|
530
|
+
# Check if all required tools are available
|
|
531
|
+
if self.required_tools:
|
|
532
|
+
available_tool_names = {tool.name for tool in self._available_tools}
|
|
533
|
+
missing_tools = [
|
|
534
|
+
tool for tool in self.required_tools if tool not in available_tool_names
|
|
535
|
+
]
|
|
536
|
+
if missing_tools:
|
|
537
|
+
raise ValueError(
|
|
538
|
+
f"Required tools not available: {missing_tools}. "
|
|
539
|
+
f"Available tools: {list(available_tool_names)}"
|
|
540
|
+
)
|
|
541
|
+
|
|
496
542
|
async def _maybe_submit_response(self, response: AgentResponse, messages: list[Any]) -> None:
|
|
497
543
|
"""Submit response through lifecycle tool if available.
|
|
498
544
|
|
|
@@ -502,7 +548,7 @@ class MCPAgent(ABC):
|
|
|
502
548
|
"""
|
|
503
549
|
# Check if we have a response lifecycle tool
|
|
504
550
|
if "response" in self.lifecycle_tools and "response" in self._tool_map:
|
|
505
|
-
|
|
551
|
+
self.design.debug("Calling response lifecycle tool")
|
|
506
552
|
try:
|
|
507
553
|
# Call the response tool with the agent's response
|
|
508
554
|
response_tool_call = MCPToolCall(
|
|
@@ -517,9 +563,9 @@ class MCPAgent(ABC):
|
|
|
517
563
|
messages.extend(response_messages)
|
|
518
564
|
|
|
519
565
|
# Mark the task as done
|
|
520
|
-
|
|
566
|
+
self.design.debug("Response lifecycle tool executed, marking task as done")
|
|
521
567
|
except Exception as e:
|
|
522
|
-
|
|
568
|
+
self.design.error_log(f"Response lifecycle tool failed: {e}")
|
|
523
569
|
|
|
524
570
|
async def _setup_config(self, mcp_config: dict[str, dict[str, Any]]) -> None:
|
|
525
571
|
"""Inject metadata into the metadata of the initialize request."""
|
|
@@ -573,9 +619,9 @@ class MCPAgent(ABC):
|
|
|
573
619
|
if self._auto_trace_cm:
|
|
574
620
|
try:
|
|
575
621
|
self._auto_trace_cm.__exit__(None, None, None)
|
|
576
|
-
|
|
622
|
+
self.design.debug("Closed auto-created trace")
|
|
577
623
|
except Exception as e:
|
|
578
|
-
|
|
624
|
+
self.design.warning_log(f"Failed to close auto-created trace: {e}")
|
|
579
625
|
finally:
|
|
580
626
|
self._auto_trace_cm = None
|
|
581
627
|
|
|
@@ -583,13 +629,52 @@ class MCPAgent(ABC):
|
|
|
583
629
|
if self._auto_created_client and self.mcp_client:
|
|
584
630
|
try:
|
|
585
631
|
await self.mcp_client.shutdown()
|
|
586
|
-
|
|
632
|
+
self.design.debug("Closed auto-created MCPClient")
|
|
587
633
|
except Exception as e:
|
|
588
|
-
|
|
634
|
+
self.design.warning_log(f"Failed to close auto-created client: {e}")
|
|
589
635
|
finally:
|
|
590
636
|
self.mcp_client = None
|
|
591
637
|
self._auto_created_client = False
|
|
592
638
|
|
|
639
|
+
def _is_connection_error(self, e: Exception) -> bool:
|
|
640
|
+
"""Check if an exception is a connection error."""
|
|
641
|
+
error_msg = str(e).lower()
|
|
642
|
+
return any(
|
|
643
|
+
pattern in error_msg
|
|
644
|
+
for pattern in [
|
|
645
|
+
"connection",
|
|
646
|
+
"connect",
|
|
647
|
+
"refused",
|
|
648
|
+
"failed",
|
|
649
|
+
"could not connect",
|
|
650
|
+
"mcp server",
|
|
651
|
+
]
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
def _get_connection_error_message(self, e: Exception) -> str:
|
|
655
|
+
"""Extract a helpful connection error message."""
|
|
656
|
+
import re
|
|
657
|
+
|
|
658
|
+
url_match = re.search(r"https?://[^\s]+", str(e))
|
|
659
|
+
url = url_match.group(0) if url_match else "the MCP server"
|
|
660
|
+
return f"Connection failed: Could not connect to {url}. Is your MCP client/server running?"
|
|
661
|
+
|
|
662
|
+
def _handle_connection_error(self, e: Exception) -> None:
|
|
663
|
+
"""Handle connection errors with helpful messages."""
|
|
664
|
+
if self._is_connection_error(e):
|
|
665
|
+
msg = self._get_connection_error_message(e)
|
|
666
|
+
# Always show connection errors, not just when logging is enabled
|
|
667
|
+
self.design.error(f"❌ {msg}")
|
|
668
|
+
self.design.info("💡 Make sure the MCP server is started before running the agent.")
|
|
669
|
+
|
|
670
|
+
# For localhost, provide specific instructions
|
|
671
|
+
error_str = str(e).lower()
|
|
672
|
+
if "localhost" in error_str or "127.0.0.1" in error_str:
|
|
673
|
+
self.design.info(" Run 'hud dev' in another terminal to start the MCP server")
|
|
674
|
+
|
|
675
|
+
raise RuntimeError(msg) from e
|
|
676
|
+
raise
|
|
677
|
+
|
|
593
678
|
|
|
594
679
|
def _format_error_result(error_message: str) -> MCPToolResult:
|
|
595
680
|
return MCPToolResult(content=text_to_blocks(error_message), isError=True)
|
|
@@ -50,7 +50,7 @@ class ClaudeAgent(MCPAgent):
|
|
|
50
50
|
def __init__(
|
|
51
51
|
self,
|
|
52
52
|
model_client: AsyncAnthropic | None = None,
|
|
53
|
-
model: str = "claude-
|
|
53
|
+
model: str = "claude-sonnet-4-20250514",
|
|
54
54
|
max_tokens: int = 4096,
|
|
55
55
|
use_computer_beta: bool = True,
|
|
56
56
|
**kwargs: Any,
|
|
@@ -38,6 +38,7 @@ class OperatorAgent(MCPAgent):
|
|
|
38
38
|
"display_width": computer_settings.OPENAI_COMPUTER_WIDTH,
|
|
39
39
|
"display_height": computer_settings.OPENAI_COMPUTER_HEIGHT,
|
|
40
40
|
}
|
|
41
|
+
required_tools: ClassVar[list[str]] = ["openai_computer"]
|
|
41
42
|
|
|
42
43
|
def __init__(
|
|
43
44
|
self,
|
|
@@ -143,20 +144,8 @@ class OperatorAgent(MCPAgent):
|
|
|
143
144
|
"""Get response from OpenAI including any tool calls."""
|
|
144
145
|
# OpenAI's API is stateful, so we handle messages differently
|
|
145
146
|
|
|
146
|
-
#
|
|
147
|
-
computer_tool_name =
|
|
148
|
-
for tool in self._available_tools:
|
|
149
|
-
if tool.name in ["openai_computer", "computer"]:
|
|
150
|
-
computer_tool_name = tool.name
|
|
151
|
-
break
|
|
152
|
-
|
|
153
|
-
if not computer_tool_name:
|
|
154
|
-
# No computer tools available, just return a text response
|
|
155
|
-
return AgentResponse(
|
|
156
|
-
content="No computer use tools available",
|
|
157
|
-
tool_calls=[],
|
|
158
|
-
done=True,
|
|
159
|
-
)
|
|
147
|
+
# Get the computer tool (guaranteed to exist due to required_tools)
|
|
148
|
+
computer_tool_name = "openai_computer"
|
|
160
149
|
|
|
161
150
|
# Define the computer use tool
|
|
162
151
|
computer_tool: ToolParam = { # type: ignore[reportAssignmentType]
|
|
@@ -209,7 +198,7 @@ class OperatorAgent(MCPAgent):
|
|
|
209
198
|
break
|
|
210
199
|
|
|
211
200
|
if not latest_screenshot:
|
|
212
|
-
|
|
201
|
+
self.design.warning_log("No screenshot provided for response to action")
|
|
213
202
|
return AgentResponse(
|
|
214
203
|
content="No screenshot available for next action",
|
|
215
204
|
tool_calls=[],
|
|
@@ -332,7 +321,7 @@ class OperatorAgent(MCPAgent):
|
|
|
332
321
|
for content in result.content:
|
|
333
322
|
if isinstance(content, types.TextContent):
|
|
334
323
|
# Don't add error text as input_text, just track it
|
|
335
|
-
|
|
324
|
+
self.design.error_log(f"Tool error: {content.text}")
|
|
336
325
|
elif isinstance(content, types.ImageContent):
|
|
337
326
|
# Even error results might have images
|
|
338
327
|
latest_screenshot = content.data
|
|
@@ -20,6 +20,15 @@ class TestOperatorAgent:
|
|
|
20
20
|
mcp_client = AsyncMock()
|
|
21
21
|
# Set up the mcp_config attribute as a regular dict, not a coroutine
|
|
22
22
|
mcp_client.mcp_config = {"test_server": {"url": "http://test"}}
|
|
23
|
+
# Mock list_tools to return the required openai_computer tool
|
|
24
|
+
mcp_client.list_tools = AsyncMock(
|
|
25
|
+
return_value=[
|
|
26
|
+
types.Tool(
|
|
27
|
+
name="openai_computer", description="OpenAI computer use tool", inputSchema={}
|
|
28
|
+
)
|
|
29
|
+
]
|
|
30
|
+
)
|
|
31
|
+
mcp_client.initialize = AsyncMock()
|
|
23
32
|
return mcp_client
|
|
24
33
|
|
|
25
34
|
@pytest.fixture
|
|
@@ -129,91 +138,27 @@ class TestOperatorAgent:
|
|
|
129
138
|
types.Tool(name="computer_openai", description="Computer tool", inputSchema={})
|
|
130
139
|
]
|
|
131
140
|
|
|
132
|
-
#
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
assert response.done is True
|
|
145
|
-
|
|
146
|
-
@pytest.mark.asyncio
|
|
147
|
-
async def test_get_model_response_text_only(self, mock_mcp_client, mock_openai):
|
|
148
|
-
"""Test getting text-only response when no computer tools available."""
|
|
149
|
-
agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
|
|
141
|
+
# Mock OpenAI API response for a successful computer use response
|
|
142
|
+
mock_response = MagicMock()
|
|
143
|
+
mock_response.id = "response_123"
|
|
144
|
+
mock_response.state = "completed"
|
|
145
|
+
# Mock the output message structure
|
|
146
|
+
mock_output_text = MagicMock()
|
|
147
|
+
mock_output_text.type = "output_text"
|
|
148
|
+
mock_output_text.text = "I can see the screen content."
|
|
149
|
+
mock_output_message = MagicMock()
|
|
150
|
+
mock_output_message.type = "message"
|
|
151
|
+
mock_output_message.content = [mock_output_text]
|
|
152
|
+
mock_response.output = [mock_output_message]
|
|
150
153
|
|
|
151
|
-
|
|
152
|
-
agent._available_tools = []
|
|
154
|
+
mock_openai.responses.create = AsyncMock(return_value=mock_response)
|
|
153
155
|
|
|
154
|
-
messages = [{"prompt": "
|
|
156
|
+
messages = [{"prompt": "What's on the screen?", "screenshot": None}]
|
|
155
157
|
response = await agent.get_response(messages)
|
|
156
158
|
|
|
157
|
-
assert response.content == "
|
|
158
|
-
assert response.tool_calls == []
|
|
159
|
+
assert response.content == "I can see the screen content."
|
|
159
160
|
assert response.done is True
|
|
160
161
|
|
|
161
|
-
@pytest.mark.asyncio
|
|
162
|
-
async def test_run_with_tools(self, mock_mcp_client, mock_openai):
|
|
163
|
-
"""Test running agent with tool usage."""
|
|
164
|
-
agent = OperatorAgent(mcp_client=mock_mcp_client, model_client=mock_openai)
|
|
165
|
-
|
|
166
|
-
# Mock tool availability
|
|
167
|
-
agent._available_tools = [
|
|
168
|
-
types.Tool(name="search", description="Search tool", inputSchema={"type": "object"})
|
|
169
|
-
]
|
|
170
|
-
# Base agent doesn't require server mapping for tool execution
|
|
171
|
-
|
|
172
|
-
# Mock initial response with tool use
|
|
173
|
-
initial_choice = MagicMock()
|
|
174
|
-
initial_choice.message = MagicMock(
|
|
175
|
-
content=None,
|
|
176
|
-
tool_calls=[
|
|
177
|
-
MagicMock(
|
|
178
|
-
id="call_search",
|
|
179
|
-
function=MagicMock(name="search", arguments='{"query": "OpenAI news"}'),
|
|
180
|
-
)
|
|
181
|
-
],
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
initial_response = MagicMock()
|
|
185
|
-
initial_response.choices = [initial_choice]
|
|
186
|
-
initial_response.usage = MagicMock(prompt_tokens=10, completion_tokens=15, total_tokens=25)
|
|
187
|
-
|
|
188
|
-
# Mock follow-up response
|
|
189
|
-
final_choice = MagicMock()
|
|
190
|
-
final_choice.message = MagicMock(
|
|
191
|
-
content="Here are the latest OpenAI news...", tool_calls=None
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
final_response = MagicMock()
|
|
195
|
-
final_response.choices = [final_choice]
|
|
196
|
-
final_response.usage = MagicMock(prompt_tokens=20, completion_tokens=10, total_tokens=30)
|
|
197
|
-
|
|
198
|
-
mock_openai.chat.completions.create = AsyncMock(
|
|
199
|
-
side_effect=[initial_response, final_response]
|
|
200
|
-
)
|
|
201
|
-
|
|
202
|
-
# Mock tool execution
|
|
203
|
-
mock_mcp_client.call_tool = AsyncMock(
|
|
204
|
-
return_value=MCPToolResult(
|
|
205
|
-
content=[types.TextContent(type="text", text="Search results...")], isError=False
|
|
206
|
-
)
|
|
207
|
-
)
|
|
208
|
-
|
|
209
|
-
# Use a string prompt instead of a task
|
|
210
|
-
result = await agent.run("Search for OpenAI news")
|
|
211
|
-
|
|
212
|
-
# Since OpenAI integration currently returns "No computer use tools available"
|
|
213
|
-
# when the tool isn't a computer tool, we expect this
|
|
214
|
-
assert result.content == "No computer use tools available"
|
|
215
|
-
assert result.done is True
|
|
216
|
-
|
|
217
162
|
@pytest.mark.asyncio
|
|
218
163
|
async def test_handle_empty_response(self, mock_mcp_client, mock_openai):
|
|
219
164
|
"""Test handling empty response from API."""
|