hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +11 -5
- hud/agents/base.py +220 -500
- hud/agents/claude.py +200 -240
- hud/agents/gemini.py +275 -0
- hud/agents/gemini_cua.py +335 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +41 -36
- hud/agents/openai.py +291 -292
- hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
- hud/agents/operator.py +211 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +379 -210
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +376 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/cli/__init__.py +461 -545
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +664 -110
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +882 -734
- hud/cli/eval.py +782 -668
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/push.py +29 -11
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +108 -6
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +69 -0
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +40 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +327 -0
- hud/datasets/runner.py +192 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +50 -0
- hud/environment/connection.py +206 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +109 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +694 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +112 -0
- hud/environment/scenarios.py +493 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +218 -0
- hud/environment/tests/test_environment.py +161 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +201 -0
- hud/environment/tests/test_scenarios.py +280 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +674 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +185 -0
- hud/eval/manager.py +466 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +340 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +145 -0
- hud/eval/types.py +63 -0
- hud/eval/utils.py +183 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +151 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +158 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +16 -2
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +4 -0
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +167 -57
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +61 -3
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.1.dist-info/METADATA +264 -0
- hud_python-0.5.1.dist-info/RECORD +299 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Runtime tests for MCPAgent base class."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import mcp.types as types
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
from hud.agents.base import BaseCreateParams, MCPAgent, find_content, find_reward, text_to_blocks
|
|
11
|
+
from hud.environment.router import ToolRouter
|
|
12
|
+
from hud.eval.context import EvalContext
|
|
13
|
+
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DummyConfig(BaseAgentConfig):
|
|
17
|
+
model_name: str = "DummyAgent"
|
|
18
|
+
model: str = "dummy-model"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DummyCreateParams(BaseCreateParams, DummyConfig):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MockEvalContext(EvalContext):
|
|
26
|
+
"""Mock EvalContext for testing."""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
prompt: str = "Test prompt",
|
|
31
|
+
tools: list[types.Tool] | None = None,
|
|
32
|
+
) -> None:
|
|
33
|
+
# Core attributes
|
|
34
|
+
self.prompt = prompt
|
|
35
|
+
self._tools = tools or []
|
|
36
|
+
self._submitted: str | None = None
|
|
37
|
+
self.reward: float | None = None
|
|
38
|
+
self._call_tool_handler: Any = None
|
|
39
|
+
|
|
40
|
+
# Environment attributes
|
|
41
|
+
self._router = ToolRouter()
|
|
42
|
+
self._agent_include: list[str] | None = None
|
|
43
|
+
self._agent_exclude: list[str] | None = None
|
|
44
|
+
|
|
45
|
+
# EvalContext attributes
|
|
46
|
+
self._task = None
|
|
47
|
+
self.trace_id = "test-trace-id"
|
|
48
|
+
self.eval_name = "test-eval"
|
|
49
|
+
self.job_id: str | None = None
|
|
50
|
+
self.group_id: str | None = None
|
|
51
|
+
self.index = 0
|
|
52
|
+
self.variants: dict[str, Any] = {}
|
|
53
|
+
self.answer: str | None = None
|
|
54
|
+
self.system_prompt: str | None = None
|
|
55
|
+
self.error: BaseException | None = None
|
|
56
|
+
self.metadata: dict[str, Any] = {}
|
|
57
|
+
self.results: list[Any] = []
|
|
58
|
+
self._is_summary = False
|
|
59
|
+
|
|
60
|
+
def as_tools(self) -> list[types.Tool]:
|
|
61
|
+
return self._tools
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def has_scenario(self) -> bool:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
def set_call_tool_handler(self, handler: Any) -> None:
|
|
68
|
+
self._call_tool_handler = handler
|
|
69
|
+
|
|
70
|
+
async def list_tools(self) -> list[types.Tool]:
|
|
71
|
+
return self._tools
|
|
72
|
+
|
|
73
|
+
async def call_tool(self, call: Any, /, **kwargs: Any) -> MCPToolResult:
|
|
74
|
+
if self._call_tool_handler:
|
|
75
|
+
# Parse the call
|
|
76
|
+
if isinstance(call, tuple):
|
|
77
|
+
tc = MCPToolCall(name=call[0], arguments=call[1] if len(call) > 1 else {})
|
|
78
|
+
elif hasattr(call, "name"):
|
|
79
|
+
tc = call
|
|
80
|
+
else:
|
|
81
|
+
tc = MCPToolCall(name=str(call), arguments=kwargs)
|
|
82
|
+
return self._call_tool_handler(tc)
|
|
83
|
+
return MCPToolResult(
|
|
84
|
+
content=[types.TextContent(type="text", text="ok")],
|
|
85
|
+
isError=False,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
async def submit(self, answer: str) -> None:
|
|
89
|
+
self._submitted = answer
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class DummyAgent(MCPAgent):
|
|
93
|
+
config_cls = DummyConfig
|
|
94
|
+
|
|
95
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
96
|
+
params = DummyCreateParams(**kwargs)
|
|
97
|
+
super().__init__(params)
|
|
98
|
+
|
|
99
|
+
async def get_system_messages(self) -> list[types.ContentBlock]:
|
|
100
|
+
return [types.TextContent(type="text", text="sys")]
|
|
101
|
+
|
|
102
|
+
async def get_response(self, messages: list[Any]) -> AgentResponse:
|
|
103
|
+
return AgentResponse(content="ok", tool_calls=[], done=True)
|
|
104
|
+
|
|
105
|
+
async def format_blocks(self, blocks: list[Any]) -> list[Any]:
|
|
106
|
+
return blocks
|
|
107
|
+
|
|
108
|
+
async def format_tool_results(
|
|
109
|
+
self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
|
|
110
|
+
) -> list[Any]:
|
|
111
|
+
return [types.TextContent(text="tools", type="text")]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def test_find_reward_and_content_extractors() -> None:
|
|
115
|
+
"""Test reward and content extraction from tool results."""
|
|
116
|
+
# Structured content
|
|
117
|
+
r = MCPToolResult(
|
|
118
|
+
content=text_to_blocks("{}"), isError=False, structuredContent={"reward": 0.7}
|
|
119
|
+
)
|
|
120
|
+
assert find_reward(r) == 0.7
|
|
121
|
+
|
|
122
|
+
# Text JSON
|
|
123
|
+
r2 = MCPToolResult(content=text_to_blocks('{"score": 0.5, "content": "hi"}'), isError=False)
|
|
124
|
+
assert find_reward(r2) == 0.5
|
|
125
|
+
assert find_content(r2) == "hi"
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_get_available_tools_before_run_raises() -> None:
|
|
129
|
+
"""Test that get_available_tools raises before initialization."""
|
|
130
|
+
agent = DummyAgent()
|
|
131
|
+
with pytest.raises(RuntimeError):
|
|
132
|
+
agent.get_available_tools()
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@pytest.mark.asyncio
|
|
136
|
+
async def test_format_message_invalid_type_raises() -> None:
|
|
137
|
+
"""Test that format_message raises for invalid types."""
|
|
138
|
+
agent = DummyAgent()
|
|
139
|
+
with pytest.raises(ValueError):
|
|
140
|
+
await agent.format_message({"oops": 1}) # type: ignore
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def test_text_to_blocks_shapes() -> None:
|
|
144
|
+
"""Test text_to_blocks returns correct structure."""
|
|
145
|
+
blocks = text_to_blocks("x")
|
|
146
|
+
assert isinstance(blocks, list) and blocks and isinstance(blocks[0], types.TextContent)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@pytest.mark.asyncio
|
|
150
|
+
async def test_run_with_eval_context() -> None:
|
|
151
|
+
"""Test basic run() with EvalContext."""
|
|
152
|
+
ctx = MockEvalContext(prompt="hello")
|
|
153
|
+
agent = DummyAgent()
|
|
154
|
+
result = await agent.run(ctx, max_steps=1)
|
|
155
|
+
assert result.done is True
|
|
156
|
+
assert result.isError is False
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@pytest.mark.asyncio
|
|
160
|
+
async def test_run_requires_eval_context() -> None:
|
|
161
|
+
"""Test run() raises TypeError for non-EvalContext."""
|
|
162
|
+
agent = DummyAgent()
|
|
163
|
+
with pytest.raises(TypeError, match="must be EvalContext"):
|
|
164
|
+
await agent.run("hello") # type: ignore
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@pytest.mark.asyncio
|
|
168
|
+
async def test_run_requires_prompt() -> None:
|
|
169
|
+
"""Test run() raises ValueError when prompt is empty."""
|
|
170
|
+
ctx = MockEvalContext(prompt="")
|
|
171
|
+
agent = DummyAgent()
|
|
172
|
+
with pytest.raises(ValueError, match="prompt is not set"):
|
|
173
|
+
await agent.run(ctx)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@pytest.mark.asyncio
|
|
177
|
+
async def test_call_tools_error_paths() -> None:
|
|
178
|
+
"""Test call_tools handles errors correctly."""
|
|
179
|
+
call_count = [0]
|
|
180
|
+
ok_result = MCPToolResult(content=text_to_blocks("ok"), isError=False)
|
|
181
|
+
|
|
182
|
+
def handler(tool_call: MCPToolCall) -> MCPToolResult:
|
|
183
|
+
call_count[0] += 1
|
|
184
|
+
if call_count[0] == 1:
|
|
185
|
+
return ok_result
|
|
186
|
+
raise RuntimeError("boom")
|
|
187
|
+
|
|
188
|
+
ctx = MockEvalContext(prompt="test")
|
|
189
|
+
ctx.set_call_tool_handler(handler)
|
|
190
|
+
agent = DummyAgent()
|
|
191
|
+
|
|
192
|
+
# Initialize the agent with context
|
|
193
|
+
agent.ctx = ctx
|
|
194
|
+
await agent._initialize_from_ctx(ctx)
|
|
195
|
+
|
|
196
|
+
results = await agent.call_tools(
|
|
197
|
+
[MCPToolCall(name="a", arguments={}), MCPToolCall(name="b", arguments={})]
|
|
198
|
+
)
|
|
199
|
+
assert results[0].isError is False
|
|
200
|
+
assert results[1].isError is True
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@pytest.mark.asyncio
|
|
204
|
+
async def test_call_tools_timeout_raises() -> None:
|
|
205
|
+
"""Test call_tools raises TimeoutError."""
|
|
206
|
+
|
|
207
|
+
def handler(tool_call: MCPToolCall) -> MCPToolResult:
|
|
208
|
+
raise TimeoutError("timeout")
|
|
209
|
+
|
|
210
|
+
ctx = MockEvalContext(prompt="test")
|
|
211
|
+
ctx.set_call_tool_handler(handler)
|
|
212
|
+
agent = DummyAgent()
|
|
213
|
+
|
|
214
|
+
agent.ctx = ctx
|
|
215
|
+
await agent._initialize_from_ctx(ctx)
|
|
216
|
+
|
|
217
|
+
with pytest.raises(TimeoutError):
|
|
218
|
+
await agent.call_tools(MCPToolCall(name="x", arguments={}))
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
@pytest.mark.asyncio
|
|
222
|
+
async def test_get_available_tools_after_run() -> None:
|
|
223
|
+
"""Test get_available_tools works after initialization."""
|
|
224
|
+
tools = [types.Tool(name="test_tool", description="Test", inputSchema={})]
|
|
225
|
+
ctx = MockEvalContext(prompt="hello", tools=tools)
|
|
226
|
+
agent = DummyAgent()
|
|
227
|
+
|
|
228
|
+
# Run initializes the agent
|
|
229
|
+
await agent.run(ctx, max_steps=1)
|
|
230
|
+
|
|
231
|
+
# After cleanup, we can't access tools (ctx is cleared)
|
|
232
|
+
# But during run, tools were available
|
|
233
|
+
assert agent._initialized is True
|