hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +70 -5
- hud/agents/base.py +238 -500
- hud/agents/claude.py +236 -247
- hud/agents/gateway.py +42 -0
- hud/agents/gemini.py +264 -0
- hud/agents/gemini_cua.py +324 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +48 -36
- hud/agents/openai.py +282 -296
- hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
- hud/agents/operator.py +199 -0
- hud/agents/resolver.py +70 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +381 -214
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +377 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_resolver.py +192 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/agents/types.py +148 -0
- hud/cli/__init__.py +493 -546
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +699 -113
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +889 -732
- hud/cli/eval.py +793 -667
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/pull.py +1 -1
- hud/cli/push.py +38 -13
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +110 -8
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push.py +1 -1
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +70 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +45 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +326 -0
- hud/datasets/runner.py +198 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +52 -0
- hud/environment/connection.py +258 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +137 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +835 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +263 -0
- hud/environment/scenarios.py +620 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +205 -0
- hud/environment/tests/test_environment.py +593 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +242 -0
- hud/environment/tests/test_scenarios.py +1086 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +727 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +187 -0
- hud/eval/manager.py +533 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +372 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +291 -0
- hud/eval/types.py +65 -0
- hud/eval/utils.py +194 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +308 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +165 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +18 -2
- hud/tools/agent.py +223 -0
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +36 -3
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_agent_tool.py +355 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +194 -56
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +89 -18
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.13.dist-info/METADATA +264 -0
- hud_python-0.5.13.dist-info/RECORD +305 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,593 @@
|
|
|
1
|
+
"""Tests for Environment class - context manager, resources, prompts, prompt feature."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestEnvironmentPrompt:
|
|
9
|
+
"""Tests for Environment.prompt feature."""
|
|
10
|
+
|
|
11
|
+
def test_prompt_defaults_to_none(self) -> None:
|
|
12
|
+
"""Environment.prompt defaults to None."""
|
|
13
|
+
from hud.environment import Environment
|
|
14
|
+
|
|
15
|
+
env = Environment("test")
|
|
16
|
+
assert env.prompt is None
|
|
17
|
+
|
|
18
|
+
def test_prompt_can_be_set(self) -> None:
|
|
19
|
+
"""Environment.prompt can be set manually."""
|
|
20
|
+
from hud.environment import Environment
|
|
21
|
+
|
|
22
|
+
env = Environment("test")
|
|
23
|
+
env.prompt = "Navigate to google.com"
|
|
24
|
+
assert env.prompt == "Navigate to google.com"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TestEnvironmentContextManager:
|
|
28
|
+
"""Tests for Environment async context manager."""
|
|
29
|
+
|
|
30
|
+
@pytest.mark.asyncio
|
|
31
|
+
async def test_context_manager_sets_in_context_flag(self) -> None:
|
|
32
|
+
"""Context manager sets _in_context flag."""
|
|
33
|
+
from hud.environment import Environment
|
|
34
|
+
|
|
35
|
+
env = Environment("test")
|
|
36
|
+
|
|
37
|
+
assert env._in_context is False
|
|
38
|
+
|
|
39
|
+
async with env:
|
|
40
|
+
assert env._in_context is True
|
|
41
|
+
|
|
42
|
+
assert env._in_context is False
|
|
43
|
+
|
|
44
|
+
@pytest.mark.asyncio
|
|
45
|
+
async def test_context_manager_no_connections(self) -> None:
|
|
46
|
+
"""Context manager works with no connections."""
|
|
47
|
+
from hud.environment import Environment
|
|
48
|
+
|
|
49
|
+
env = Environment("test")
|
|
50
|
+
|
|
51
|
+
async with env:
|
|
52
|
+
# Should work without connections
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TestEnvironmentResources:
|
|
57
|
+
"""Tests for Environment resource operations."""
|
|
58
|
+
|
|
59
|
+
@pytest.mark.asyncio
|
|
60
|
+
async def test_list_resources_empty(self) -> None:
|
|
61
|
+
"""list_resources returns empty list when no resources."""
|
|
62
|
+
from hud.environment import Environment
|
|
63
|
+
|
|
64
|
+
env = Environment("test")
|
|
65
|
+
|
|
66
|
+
async with env:
|
|
67
|
+
resources = await env.list_resources()
|
|
68
|
+
|
|
69
|
+
assert resources == []
|
|
70
|
+
|
|
71
|
+
@pytest.mark.asyncio
|
|
72
|
+
async def test_read_resource_not_found(self) -> None:
|
|
73
|
+
"""read_resource raises when resource not found."""
|
|
74
|
+
from hud.environment import Environment
|
|
75
|
+
|
|
76
|
+
env = Environment("test")
|
|
77
|
+
|
|
78
|
+
async with env:
|
|
79
|
+
with pytest.raises(ValueError, match="Resource not found"):
|
|
80
|
+
await env.read_resource("file://nonexistent.txt")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class TestEnvironmentPrompts:
|
|
84
|
+
"""Tests for Environment prompt operations (MCP prompts, not task prompt)."""
|
|
85
|
+
|
|
86
|
+
@pytest.mark.asyncio
|
|
87
|
+
async def test_list_prompts_empty(self) -> None:
|
|
88
|
+
"""list_prompts returns empty list when no prompts."""
|
|
89
|
+
from hud.environment import Environment
|
|
90
|
+
|
|
91
|
+
env = Environment("test")
|
|
92
|
+
|
|
93
|
+
async with env:
|
|
94
|
+
prompts = await env.list_prompts()
|
|
95
|
+
|
|
96
|
+
assert prompts == []
|
|
97
|
+
|
|
98
|
+
@pytest.mark.asyncio
|
|
99
|
+
async def test_get_prompt_not_found(self) -> None:
|
|
100
|
+
"""get_prompt raises when prompt not found."""
|
|
101
|
+
from hud.environment import Environment
|
|
102
|
+
|
|
103
|
+
env = Environment("test")
|
|
104
|
+
|
|
105
|
+
async with env:
|
|
106
|
+
with pytest.raises(ValueError, match="Prompt not found"):
|
|
107
|
+
await env.get_prompt("nonexistent")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class TestEnvironmentSetupEvaluate:
|
|
111
|
+
"""Tests for setup_tool and evaluate_tool methods."""
|
|
112
|
+
|
|
113
|
+
def test_setup_tool_with_name_and_kwargs(self) -> None:
|
|
114
|
+
"""setup_tool accepts name and kwargs."""
|
|
115
|
+
from hud.environment import Environment
|
|
116
|
+
|
|
117
|
+
env = Environment("test")
|
|
118
|
+
env.setup_tool("navigate", url="https://example.com")
|
|
119
|
+
|
|
120
|
+
assert len(env._setup_calls) == 1
|
|
121
|
+
assert env._setup_calls[0] == ("navigate", {"url": "https://example.com"})
|
|
122
|
+
|
|
123
|
+
def test_setup_tool_returns_self(self) -> None:
|
|
124
|
+
"""setup_tool returns self for chaining."""
|
|
125
|
+
from hud.environment import Environment
|
|
126
|
+
|
|
127
|
+
env = Environment("test")
|
|
128
|
+
result = env.setup_tool("navigate", url="https://example.com")
|
|
129
|
+
|
|
130
|
+
assert result is env
|
|
131
|
+
|
|
132
|
+
def test_evaluate_tool_with_name_and_kwargs(self) -> None:
|
|
133
|
+
"""evaluate_tool accepts name and kwargs."""
|
|
134
|
+
from hud.environment import Environment
|
|
135
|
+
|
|
136
|
+
env = Environment("test")
|
|
137
|
+
env.evaluate_tool("check_text", contains="success")
|
|
138
|
+
|
|
139
|
+
assert len(env._evaluate_calls) == 1
|
|
140
|
+
assert env._evaluate_calls[0] == ("check_text", {"contains": "success"})
|
|
141
|
+
|
|
142
|
+
def test_evaluate_tool_returns_self(self) -> None:
|
|
143
|
+
"""evaluate_tool returns self for chaining."""
|
|
144
|
+
from hud.environment import Environment
|
|
145
|
+
|
|
146
|
+
env = Environment("test")
|
|
147
|
+
result = env.evaluate_tool("check_text", contains="success")
|
|
148
|
+
|
|
149
|
+
assert result is env
|
|
150
|
+
|
|
151
|
+
def test_chaining_multiple_setup_calls(self) -> None:
|
|
152
|
+
"""Multiple setup_tool calls can be chained."""
|
|
153
|
+
from hud.environment import Environment
|
|
154
|
+
|
|
155
|
+
env = (
|
|
156
|
+
Environment("test")
|
|
157
|
+
.setup_tool("navigate", url="https://example.com")
|
|
158
|
+
.setup_tool("wait", seconds=2)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
assert len(env._setup_calls) == 2
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class TestEnvironmentMCPProtocol:
|
|
165
|
+
"""Tests for MCP protocol overrides - Environment._env_list_tools and _env_call_tool.
|
|
166
|
+
|
|
167
|
+
These test that Environment properly exposes connector tools via MCP handlers.
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
@pytest.mark.asyncio
|
|
171
|
+
async def test_env_list_tools_includes_local_tools(self) -> None:
|
|
172
|
+
"""_env_list_tools returns local tools after routing is built."""
|
|
173
|
+
from hud.environment import Environment
|
|
174
|
+
|
|
175
|
+
env = Environment("test")
|
|
176
|
+
|
|
177
|
+
@env.tool()
|
|
178
|
+
def my_tool(x: int) -> int:
|
|
179
|
+
"""A test tool."""
|
|
180
|
+
return x * 2
|
|
181
|
+
|
|
182
|
+
# Build routing (simulates what __aenter__ does)
|
|
183
|
+
await env._build_routing()
|
|
184
|
+
|
|
185
|
+
# Call the handler that MCP will call
|
|
186
|
+
tools = await env._env_list_tools()
|
|
187
|
+
|
|
188
|
+
assert len(tools) == 1
|
|
189
|
+
assert tools[0].name == "my_tool"
|
|
190
|
+
|
|
191
|
+
@pytest.mark.asyncio
|
|
192
|
+
async def test_env_list_tools_includes_connector_tools(self) -> None:
|
|
193
|
+
"""_env_list_tools returns tools from connectors (the key feature)."""
|
|
194
|
+
import mcp.types as mcp_types
|
|
195
|
+
|
|
196
|
+
from hud.environment import Environment
|
|
197
|
+
|
|
198
|
+
env = Environment("test")
|
|
199
|
+
|
|
200
|
+
# Create a mock connector with cached tools
|
|
201
|
+
mock_tools = [
|
|
202
|
+
mcp_types.Tool(
|
|
203
|
+
name="remote_tool",
|
|
204
|
+
description="A remote tool",
|
|
205
|
+
inputSchema={"type": "object"},
|
|
206
|
+
)
|
|
207
|
+
]
|
|
208
|
+
|
|
209
|
+
class MockConnector:
|
|
210
|
+
is_connected = True
|
|
211
|
+
_tools_cache = mock_tools
|
|
212
|
+
|
|
213
|
+
@property
|
|
214
|
+
def cached_tools(self) -> list[mcp_types.Tool]:
|
|
215
|
+
return self._tools_cache
|
|
216
|
+
|
|
217
|
+
@property
|
|
218
|
+
def cached_prompts(self) -> list[mcp_types.Prompt]:
|
|
219
|
+
return []
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def cached_resources(self) -> list[mcp_types.Resource]:
|
|
223
|
+
return []
|
|
224
|
+
|
|
225
|
+
async def connect(self) -> None:
|
|
226
|
+
pass
|
|
227
|
+
|
|
228
|
+
async def disconnect(self) -> None:
|
|
229
|
+
pass
|
|
230
|
+
|
|
231
|
+
async def list_tools(self) -> list[mcp_types.Tool]:
|
|
232
|
+
return self._tools_cache
|
|
233
|
+
|
|
234
|
+
# Add the mock connector
|
|
235
|
+
env._connections["mock"] = MockConnector() # type: ignore
|
|
236
|
+
|
|
237
|
+
# Build routing
|
|
238
|
+
await env._build_routing()
|
|
239
|
+
|
|
240
|
+
# Call the handler that MCP will call
|
|
241
|
+
tools = await env._env_list_tools()
|
|
242
|
+
|
|
243
|
+
# Should include the remote tool
|
|
244
|
+
tool_names = [t.name for t in tools]
|
|
245
|
+
assert "remote_tool" in tool_names
|
|
246
|
+
|
|
247
|
+
@pytest.mark.asyncio
|
|
248
|
+
async def test_env_call_tool_routes_to_local(self) -> None:
|
|
249
|
+
"""_env_call_tool routes local tool calls correctly."""
|
|
250
|
+
from hud.environment import Environment
|
|
251
|
+
|
|
252
|
+
env = Environment("test")
|
|
253
|
+
called_with: list[int] = []
|
|
254
|
+
|
|
255
|
+
@env.tool()
|
|
256
|
+
def my_tool(x: int) -> str:
|
|
257
|
+
"""A test tool."""
|
|
258
|
+
called_with.append(x)
|
|
259
|
+
return f"result: {x}"
|
|
260
|
+
|
|
261
|
+
# Build routing
|
|
262
|
+
await env._build_routing()
|
|
263
|
+
|
|
264
|
+
# Call the handler that MCP will call
|
|
265
|
+
result = await env._env_call_tool("my_tool", {"x": 42})
|
|
266
|
+
|
|
267
|
+
assert called_with == [42]
|
|
268
|
+
assert len(result) == 1
|
|
269
|
+
|
|
270
|
+
@pytest.mark.asyncio
|
|
271
|
+
async def test_env_call_tool_routes_to_connector(self) -> None:
|
|
272
|
+
"""_env_call_tool routes connector tool calls correctly."""
|
|
273
|
+
from unittest.mock import AsyncMock
|
|
274
|
+
|
|
275
|
+
import mcp.types as mcp_types
|
|
276
|
+
|
|
277
|
+
from hud.environment import Environment
|
|
278
|
+
from hud.types import MCPToolResult
|
|
279
|
+
|
|
280
|
+
env = Environment("test")
|
|
281
|
+
|
|
282
|
+
# Create a mock connector
|
|
283
|
+
mock_tools = [
|
|
284
|
+
mcp_types.Tool(
|
|
285
|
+
name="remote_tool",
|
|
286
|
+
description="A remote tool",
|
|
287
|
+
inputSchema={"type": "object"},
|
|
288
|
+
)
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
class MockConnector:
|
|
292
|
+
is_connected = True
|
|
293
|
+
_tools_cache = mock_tools
|
|
294
|
+
call_tool = AsyncMock(
|
|
295
|
+
return_value=MCPToolResult(
|
|
296
|
+
content=[mcp_types.TextContent(type="text", text="remote result")],
|
|
297
|
+
isError=False,
|
|
298
|
+
)
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
@property
|
|
302
|
+
def cached_tools(self) -> list[mcp_types.Tool]:
|
|
303
|
+
return self._tools_cache
|
|
304
|
+
|
|
305
|
+
@property
|
|
306
|
+
def cached_prompts(self) -> list[mcp_types.Prompt]:
|
|
307
|
+
return []
|
|
308
|
+
|
|
309
|
+
@property
|
|
310
|
+
def cached_resources(self) -> list[mcp_types.Resource]:
|
|
311
|
+
return []
|
|
312
|
+
|
|
313
|
+
async def connect(self) -> None:
|
|
314
|
+
pass
|
|
315
|
+
|
|
316
|
+
async def disconnect(self) -> None:
|
|
317
|
+
pass
|
|
318
|
+
|
|
319
|
+
async def list_tools(self) -> list[mcp_types.Tool]:
|
|
320
|
+
return self._tools_cache
|
|
321
|
+
|
|
322
|
+
mock_conn = MockConnector()
|
|
323
|
+
env._connections["mock"] = mock_conn # type: ignore
|
|
324
|
+
|
|
325
|
+
# Build routing
|
|
326
|
+
await env._build_routing()
|
|
327
|
+
|
|
328
|
+
# Call the handler that MCP will call
|
|
329
|
+
result = await env._env_call_tool("remote_tool", {"arg": "value"})
|
|
330
|
+
|
|
331
|
+
# Verify the connector was called
|
|
332
|
+
mock_conn.call_tool.assert_called_once_with("remote_tool", {"arg": "value"})
|
|
333
|
+
assert len(result) == 1
|
|
334
|
+
|
|
335
|
+
def test_setup_handlers_registers_custom_handlers(self) -> None:
|
|
336
|
+
"""Verify _setup_handlers registers our _env_list_tools and _env_call_tool."""
|
|
337
|
+
from hud.environment import Environment
|
|
338
|
+
|
|
339
|
+
env = Environment("test")
|
|
340
|
+
|
|
341
|
+
# Verify the custom handlers exist
|
|
342
|
+
assert hasattr(env, "_env_list_tools")
|
|
343
|
+
assert hasattr(env, "_env_call_tool")
|
|
344
|
+
assert callable(env._env_list_tools)
|
|
345
|
+
assert callable(env._env_call_tool)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class TestEnvironmentToolFiltering:
|
|
349
|
+
"""Tests for agent-level tool filtering with wildcard support (v4 backwards compat)."""
|
|
350
|
+
|
|
351
|
+
@pytest.mark.asyncio
|
|
352
|
+
async def test_as_tools_no_filter(self) -> None:
|
|
353
|
+
"""as_tools returns all tools when no filter is set."""
|
|
354
|
+
from hud.environment import Environment
|
|
355
|
+
|
|
356
|
+
env = Environment("test")
|
|
357
|
+
|
|
358
|
+
@env.tool()
|
|
359
|
+
def tool_a() -> str:
|
|
360
|
+
"""Tool A."""
|
|
361
|
+
return "a"
|
|
362
|
+
|
|
363
|
+
@env.tool()
|
|
364
|
+
def tool_b() -> str:
|
|
365
|
+
"""Tool B."""
|
|
366
|
+
return "b"
|
|
367
|
+
|
|
368
|
+
await env._build_routing()
|
|
369
|
+
|
|
370
|
+
tools = env.as_tools()
|
|
371
|
+
tool_names = [t.name for t in tools]
|
|
372
|
+
|
|
373
|
+
assert "tool_a" in tool_names
|
|
374
|
+
assert "tool_b" in tool_names
|
|
375
|
+
|
|
376
|
+
@pytest.mark.asyncio
|
|
377
|
+
async def test_as_tools_exact_include(self) -> None:
|
|
378
|
+
"""as_tools filters with exact include list."""
|
|
379
|
+
from hud.environment import Environment
|
|
380
|
+
|
|
381
|
+
env = Environment("test")
|
|
382
|
+
|
|
383
|
+
@env.tool()
|
|
384
|
+
def tool_a() -> str:
|
|
385
|
+
"""Tool A."""
|
|
386
|
+
return "a"
|
|
387
|
+
|
|
388
|
+
@env.tool()
|
|
389
|
+
def tool_b() -> str:
|
|
390
|
+
"""Tool B."""
|
|
391
|
+
return "b"
|
|
392
|
+
|
|
393
|
+
env._agent_include = ["tool_a"]
|
|
394
|
+
await env._build_routing()
|
|
395
|
+
|
|
396
|
+
tools = env.as_tools()
|
|
397
|
+
tool_names = [t.name for t in tools]
|
|
398
|
+
|
|
399
|
+
assert "tool_a" in tool_names
|
|
400
|
+
assert "tool_b" not in tool_names
|
|
401
|
+
|
|
402
|
+
@pytest.mark.asyncio
|
|
403
|
+
async def test_as_tools_exact_exclude(self) -> None:
|
|
404
|
+
"""as_tools filters with exact exclude list."""
|
|
405
|
+
from hud.environment import Environment
|
|
406
|
+
|
|
407
|
+
env = Environment("test")
|
|
408
|
+
|
|
409
|
+
@env.tool()
|
|
410
|
+
def tool_a() -> str:
|
|
411
|
+
"""Tool A."""
|
|
412
|
+
return "a"
|
|
413
|
+
|
|
414
|
+
@env.tool()
|
|
415
|
+
def tool_b() -> str:
|
|
416
|
+
"""Tool B."""
|
|
417
|
+
return "b"
|
|
418
|
+
|
|
419
|
+
env._agent_exclude = ["tool_a"]
|
|
420
|
+
await env._build_routing()
|
|
421
|
+
|
|
422
|
+
tools = env.as_tools()
|
|
423
|
+
tool_names = [t.name for t in tools]
|
|
424
|
+
|
|
425
|
+
assert "tool_a" not in tool_names
|
|
426
|
+
assert "tool_b" in tool_names
|
|
427
|
+
|
|
428
|
+
@pytest.mark.asyncio
|
|
429
|
+
async def test_as_tools_wildcard_exclude_prefix(self) -> None:
|
|
430
|
+
"""as_tools filters with wildcard prefix pattern (e.g., 'setup_*')."""
|
|
431
|
+
from hud.environment import Environment
|
|
432
|
+
|
|
433
|
+
env = Environment("test")
|
|
434
|
+
|
|
435
|
+
@env.tool()
|
|
436
|
+
def setup_database() -> str:
|
|
437
|
+
"""Setup tool."""
|
|
438
|
+
return "setup"
|
|
439
|
+
|
|
440
|
+
@env.tool()
|
|
441
|
+
def setup_user() -> str:
|
|
442
|
+
"""Another setup tool."""
|
|
443
|
+
return "setup"
|
|
444
|
+
|
|
445
|
+
@env.tool()
|
|
446
|
+
def run_query() -> str:
|
|
447
|
+
"""Regular tool."""
|
|
448
|
+
return "query"
|
|
449
|
+
|
|
450
|
+
env._agent_exclude = ["setup_*"]
|
|
451
|
+
await env._build_routing()
|
|
452
|
+
|
|
453
|
+
tools = env.as_tools()
|
|
454
|
+
tool_names = [t.name for t in tools]
|
|
455
|
+
|
|
456
|
+
assert "setup_database" not in tool_names
|
|
457
|
+
assert "setup_user" not in tool_names
|
|
458
|
+
assert "run_query" in tool_names
|
|
459
|
+
|
|
460
|
+
@pytest.mark.asyncio
|
|
461
|
+
async def test_as_tools_wildcard_exclude_contains(self) -> None:
|
|
462
|
+
"""as_tools filters with wildcard contains pattern (e.g., '*setup*')."""
|
|
463
|
+
from hud.environment import Environment
|
|
464
|
+
|
|
465
|
+
env = Environment("test")
|
|
466
|
+
|
|
467
|
+
@env.tool()
|
|
468
|
+
def hud_setup() -> str:
|
|
469
|
+
"""Contains setup."""
|
|
470
|
+
return "setup"
|
|
471
|
+
|
|
472
|
+
@env.tool()
|
|
473
|
+
def setup_env() -> str:
|
|
474
|
+
"""Starts with setup."""
|
|
475
|
+
return "setup"
|
|
476
|
+
|
|
477
|
+
@env.tool()
|
|
478
|
+
def my_setup_tool() -> str:
|
|
479
|
+
"""Contains setup in middle."""
|
|
480
|
+
return "setup"
|
|
481
|
+
|
|
482
|
+
@env.tool()
|
|
483
|
+
def run_query() -> str:
|
|
484
|
+
"""No setup in name."""
|
|
485
|
+
return "query"
|
|
486
|
+
|
|
487
|
+
env._agent_exclude = ["*setup*"]
|
|
488
|
+
await env._build_routing()
|
|
489
|
+
|
|
490
|
+
tools = env.as_tools()
|
|
491
|
+
tool_names = [t.name for t in tools]
|
|
492
|
+
|
|
493
|
+
assert "hud_setup" not in tool_names
|
|
494
|
+
assert "setup_env" not in tool_names
|
|
495
|
+
assert "my_setup_tool" not in tool_names
|
|
496
|
+
assert "run_query" in tool_names
|
|
497
|
+
|
|
498
|
+
@pytest.mark.asyncio
|
|
499
|
+
async def test_as_tools_multiple_wildcard_patterns(self) -> None:
|
|
500
|
+
"""as_tools filters with multiple wildcard patterns."""
|
|
501
|
+
from hud.environment import Environment
|
|
502
|
+
|
|
503
|
+
env = Environment("test")
|
|
504
|
+
|
|
505
|
+
@env.tool()
|
|
506
|
+
def setup_db() -> str:
|
|
507
|
+
"""Setup tool."""
|
|
508
|
+
return "setup"
|
|
509
|
+
|
|
510
|
+
@env.tool()
|
|
511
|
+
def evaluate_result() -> str:
|
|
512
|
+
"""Evaluate tool."""
|
|
513
|
+
return "evaluate"
|
|
514
|
+
|
|
515
|
+
@env.tool()
|
|
516
|
+
def checkout_branch() -> str:
|
|
517
|
+
"""Checkout tool."""
|
|
518
|
+
return "checkout"
|
|
519
|
+
|
|
520
|
+
@env.tool()
|
|
521
|
+
def run_query() -> str:
|
|
522
|
+
"""Regular tool."""
|
|
523
|
+
return "query"
|
|
524
|
+
|
|
525
|
+
env._agent_exclude = ["*setup*", "*evaluate*", "checkout_branch"]
|
|
526
|
+
await env._build_routing()
|
|
527
|
+
|
|
528
|
+
tools = env.as_tools()
|
|
529
|
+
tool_names = [t.name for t in tools]
|
|
530
|
+
|
|
531
|
+
assert "setup_db" not in tool_names
|
|
532
|
+
assert "evaluate_result" not in tool_names
|
|
533
|
+
assert "checkout_branch" not in tool_names
|
|
534
|
+
assert "run_query" in tool_names
|
|
535
|
+
|
|
536
|
+
@pytest.mark.asyncio
|
|
537
|
+
async def test_as_tools_wildcard_include_all(self) -> None:
|
|
538
|
+
"""as_tools with ['*'] include pattern matches all tools."""
|
|
539
|
+
from hud.environment import Environment
|
|
540
|
+
|
|
541
|
+
env = Environment("test")
|
|
542
|
+
|
|
543
|
+
@env.tool()
|
|
544
|
+
def tool_a() -> str:
|
|
545
|
+
"""Tool A."""
|
|
546
|
+
return "a"
|
|
547
|
+
|
|
548
|
+
@env.tool()
|
|
549
|
+
def tool_b() -> str:
|
|
550
|
+
"""Tool B."""
|
|
551
|
+
return "b"
|
|
552
|
+
|
|
553
|
+
env._agent_include = ["*"]
|
|
554
|
+
await env._build_routing()
|
|
555
|
+
|
|
556
|
+
tools = env.as_tools()
|
|
557
|
+
tool_names = [t.name for t in tools]
|
|
558
|
+
|
|
559
|
+
assert "tool_a" in tool_names
|
|
560
|
+
assert "tool_b" in tool_names
|
|
561
|
+
|
|
562
|
+
@pytest.mark.asyncio
|
|
563
|
+
async def test_as_tools_include_and_exclude_combined(self) -> None:
|
|
564
|
+
"""as_tools applies both include and exclude filters."""
|
|
565
|
+
from hud.environment import Environment
|
|
566
|
+
|
|
567
|
+
env = Environment("test")
|
|
568
|
+
|
|
569
|
+
@env.tool()
|
|
570
|
+
def browser_navigate() -> str:
|
|
571
|
+
"""Browser tool."""
|
|
572
|
+
return "nav"
|
|
573
|
+
|
|
574
|
+
@env.tool()
|
|
575
|
+
def browser_setup() -> str:
|
|
576
|
+
"""Browser setup - should be excluded."""
|
|
577
|
+
return "setup"
|
|
578
|
+
|
|
579
|
+
@env.tool()
|
|
580
|
+
def file_read() -> str:
|
|
581
|
+
"""File tool - not included."""
|
|
582
|
+
return "read"
|
|
583
|
+
|
|
584
|
+
env._agent_include = ["browser_*"]
|
|
585
|
+
env._agent_exclude = ["*setup*"]
|
|
586
|
+
await env._build_routing()
|
|
587
|
+
|
|
588
|
+
tools = env.as_tools()
|
|
589
|
+
tool_names = [t.name for t in tools]
|
|
590
|
+
|
|
591
|
+
assert "browser_navigate" in tool_names
|
|
592
|
+
assert "browser_setup" not in tool_names # Excluded by *setup*
|
|
593
|
+
assert "file_read" not in tool_names # Not included by browser_*
|