hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +70 -5
- hud/agents/base.py +238 -500
- hud/agents/claude.py +236 -247
- hud/agents/gateway.py +42 -0
- hud/agents/gemini.py +264 -0
- hud/agents/gemini_cua.py +324 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +48 -36
- hud/agents/openai.py +282 -296
- hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
- hud/agents/operator.py +199 -0
- hud/agents/resolver.py +70 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +381 -214
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +377 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_resolver.py +192 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/agents/types.py +148 -0
- hud/cli/__init__.py +493 -546
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +699 -113
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +889 -732
- hud/cli/eval.py +793 -667
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/pull.py +1 -1
- hud/cli/push.py +38 -13
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +110 -8
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push.py +1 -1
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +70 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +45 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +326 -0
- hud/datasets/runner.py +198 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +52 -0
- hud/environment/connection.py +258 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +137 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +835 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +263 -0
- hud/environment/scenarios.py +620 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +205 -0
- hud/environment/tests/test_environment.py +593 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +242 -0
- hud/environment/tests/test_scenarios.py +1086 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +727 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +187 -0
- hud/eval/manager.py +533 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +372 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +291 -0
- hud/eval/types.py +65 -0
- hud/eval/utils.py +194 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +308 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +165 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +18 -2
- hud/tools/agent.py +223 -0
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +36 -3
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_agent_tool.py +355 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +194 -56
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +89 -18
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.13.dist-info/METADATA +264 -0
- hud_python-0.5.13.dist-info/RECORD +305 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
"""Tests for AgentTool - scenario-to-agent composition."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import inspect
|
|
6
|
+
from unittest.mock import AsyncMock, MagicMock, patch
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
from hud.environment import Environment
|
|
11
|
+
from hud.eval.task import Task
|
|
12
|
+
from hud.tools.agent import AgentTool, _is_eval_only
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestIsEvalOnly:
|
|
16
|
+
"""Tests for _is_eval_only helper function."""
|
|
17
|
+
|
|
18
|
+
def test_required_param_not_eval_only(self) -> None:
|
|
19
|
+
"""Required params (no default) are not eval-only."""
|
|
20
|
+
|
|
21
|
+
def fn(x: str) -> None:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
sig = inspect.signature(fn)
|
|
25
|
+
param = sig.parameters["x"]
|
|
26
|
+
assert not _is_eval_only(param)
|
|
27
|
+
|
|
28
|
+
def test_optional_with_value_not_eval_only(self) -> None:
|
|
29
|
+
"""Optional params with non-None default are not eval-only."""
|
|
30
|
+
|
|
31
|
+
def fn(x: str = "default") -> None:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
sig = inspect.signature(fn)
|
|
35
|
+
param = sig.parameters["x"]
|
|
36
|
+
assert not _is_eval_only(param)
|
|
37
|
+
|
|
38
|
+
def test_optional_none_without_union_not_eval_only(self) -> None:
|
|
39
|
+
"""Optional with None default but no None in type is not eval-only."""
|
|
40
|
+
|
|
41
|
+
def fn(x: str = None) -> None: # type: ignore[assignment] # noqa: RUF013
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
sig = inspect.signature(fn)
|
|
45
|
+
param = sig.parameters["x"]
|
|
46
|
+
assert not _is_eval_only(param)
|
|
47
|
+
|
|
48
|
+
def test_optional_none_with_union_is_eval_only(self) -> None:
|
|
49
|
+
"""Params with `X | None = None` pattern are eval-only."""
|
|
50
|
+
|
|
51
|
+
def fn(x: str | None = None) -> None:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
sig = inspect.signature(fn)
|
|
55
|
+
param = sig.parameters["x"]
|
|
56
|
+
assert _is_eval_only(param)
|
|
57
|
+
|
|
58
|
+
def test_optional_int_none_is_eval_only(self) -> None:
|
|
59
|
+
"""Works with int | None = None too."""
|
|
60
|
+
|
|
61
|
+
def fn(x: int | None = None) -> None:
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
sig = inspect.signature(fn)
|
|
65
|
+
param = sig.parameters["x"]
|
|
66
|
+
assert _is_eval_only(param)
|
|
67
|
+
|
|
68
|
+
def test_string_annotation_with_none_union(self) -> None:
|
|
69
|
+
"""Handles string annotations like 'str | None'."""
|
|
70
|
+
# Simulate string annotation
|
|
71
|
+
param = inspect.Parameter(
|
|
72
|
+
"x",
|
|
73
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
74
|
+
default=None,
|
|
75
|
+
annotation="str | None",
|
|
76
|
+
)
|
|
77
|
+
assert _is_eval_only(param)
|
|
78
|
+
|
|
79
|
+
def test_string_annotation_without_none(self) -> None:
|
|
80
|
+
"""String annotations without None are not eval-only."""
|
|
81
|
+
param = inspect.Parameter(
|
|
82
|
+
"x",
|
|
83
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
84
|
+
default=None,
|
|
85
|
+
annotation="str",
|
|
86
|
+
)
|
|
87
|
+
assert not _is_eval_only(param)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class TestAgentToolInit:
|
|
91
|
+
"""Tests for AgentTool initialization."""
|
|
92
|
+
|
|
93
|
+
def test_requires_model_or_agent(self) -> None:
|
|
94
|
+
"""Must provide either model or agent."""
|
|
95
|
+
task = Task(args={})
|
|
96
|
+
|
|
97
|
+
with pytest.raises(ValueError, match="Must provide either"):
|
|
98
|
+
AgentTool(task)
|
|
99
|
+
|
|
100
|
+
def test_cannot_provide_both_model_and_agent(self) -> None:
|
|
101
|
+
"""Cannot provide both model and agent."""
|
|
102
|
+
task = Task(args={})
|
|
103
|
+
mock_agent = MagicMock()
|
|
104
|
+
|
|
105
|
+
with pytest.raises(ValueError, match="Cannot provide both"):
|
|
106
|
+
AgentTool(task, model="claude", agent=mock_agent) # type: ignore[arg-type]
|
|
107
|
+
|
|
108
|
+
def test_accepts_model_string(self) -> None:
|
|
109
|
+
"""Can create with model string."""
|
|
110
|
+
task = Task(scenario="test", args={})
|
|
111
|
+
tool = AgentTool(task, model="claude")
|
|
112
|
+
|
|
113
|
+
assert tool._model == "claude"
|
|
114
|
+
assert tool._agent_cls is None
|
|
115
|
+
|
|
116
|
+
def test_accepts_agent_class(self) -> None:
|
|
117
|
+
"""Can create with custom agent class."""
|
|
118
|
+
task = Task(scenario="test", args={})
|
|
119
|
+
mock_agent_cls = MagicMock()
|
|
120
|
+
tool = AgentTool(task, agent=mock_agent_cls) # type: ignore[arg-type]
|
|
121
|
+
|
|
122
|
+
assert tool._model is None
|
|
123
|
+
assert tool._agent_cls is mock_agent_cls
|
|
124
|
+
|
|
125
|
+
def test_name_defaults_to_scenario(self) -> None:
|
|
126
|
+
"""Tool name defaults to scenario name."""
|
|
127
|
+
task = Task(scenario="investigate", args={})
|
|
128
|
+
tool = AgentTool(task, model="claude")
|
|
129
|
+
|
|
130
|
+
assert tool.name == "investigate"
|
|
131
|
+
|
|
132
|
+
def test_name_can_be_overridden(self) -> None:
|
|
133
|
+
"""Tool name can be overridden."""
|
|
134
|
+
task = Task(scenario="investigate", args={})
|
|
135
|
+
tool = AgentTool(task, model="claude", name="custom_name")
|
|
136
|
+
|
|
137
|
+
assert tool.name == "custom_name"
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class TestAgentToolParamFiltering:
|
|
141
|
+
"""Tests for parameter filtering (eval-only params hidden)."""
|
|
142
|
+
|
|
143
|
+
def test_filters_eval_only_params(self) -> None:
|
|
144
|
+
"""Eval-only params (| None = None) are filtered from visible_params."""
|
|
145
|
+
env = Environment("test")
|
|
146
|
+
|
|
147
|
+
# Use Union syntax for consistency across Python versions
|
|
148
|
+
@env.scenario()
|
|
149
|
+
async def investigate(
|
|
150
|
+
issue_id: str,
|
|
151
|
+
include_traces: bool = True,
|
|
152
|
+
expected_cause: str | None = None, # Eval only
|
|
153
|
+
):
|
|
154
|
+
yield {"task": f"Investigate {issue_id}"}
|
|
155
|
+
|
|
156
|
+
task = env("investigate")
|
|
157
|
+
tool = AgentTool(task, model="claude")
|
|
158
|
+
|
|
159
|
+
# visible_params should only have issue_id and include_traces
|
|
160
|
+
assert "issue_id" in tool._visible_params
|
|
161
|
+
assert "include_traces" in tool._visible_params
|
|
162
|
+
assert "expected_cause" not in tool._visible_params
|
|
163
|
+
|
|
164
|
+
def test_all_required_params_visible(self) -> None:
|
|
165
|
+
"""All required params are visible."""
|
|
166
|
+
env = Environment("test")
|
|
167
|
+
|
|
168
|
+
@env.scenario()
|
|
169
|
+
async def search(query: str, limit: int):
|
|
170
|
+
yield {"task": f"Search: {query}"}
|
|
171
|
+
|
|
172
|
+
task = env("search")
|
|
173
|
+
tool = AgentTool(task, model="claude")
|
|
174
|
+
|
|
175
|
+
assert "query" in tool._visible_params
|
|
176
|
+
assert "limit" in tool._visible_params
|
|
177
|
+
|
|
178
|
+
def test_optional_with_default_visible(self) -> None:
|
|
179
|
+
"""Optional params with non-None defaults are visible."""
|
|
180
|
+
env = Environment("test")
|
|
181
|
+
|
|
182
|
+
@env.scenario()
|
|
183
|
+
async def fetch(url: str, request_timeout: int = 30, retries: int = 3):
|
|
184
|
+
yield {"task": f"Fetch {url}"}
|
|
185
|
+
|
|
186
|
+
task = env("fetch")
|
|
187
|
+
tool = AgentTool(task, model="claude")
|
|
188
|
+
|
|
189
|
+
assert "url" in tool._visible_params
|
|
190
|
+
assert "request_timeout" in tool._visible_params
|
|
191
|
+
assert "retries" in tool._visible_params
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class TestAgentToolSchema:
|
|
195
|
+
"""Tests for JSON schema generation."""
|
|
196
|
+
|
|
197
|
+
def test_builds_json_schema(self) -> None:
|
|
198
|
+
"""Builds proper JSON schema from visible params."""
|
|
199
|
+
env = Environment("test")
|
|
200
|
+
|
|
201
|
+
@env.scenario()
|
|
202
|
+
async def investigate(issue_id: str, verbose: bool = False):
|
|
203
|
+
yield {"task": f"Investigate {issue_id}"}
|
|
204
|
+
|
|
205
|
+
task = env("investigate")
|
|
206
|
+
tool = AgentTool(task, model="claude")
|
|
207
|
+
|
|
208
|
+
schema = tool._param_schema
|
|
209
|
+
assert schema is not None
|
|
210
|
+
assert schema["type"] == "object"
|
|
211
|
+
assert "issue_id" in schema["properties"]
|
|
212
|
+
assert "verbose" in schema["properties"]
|
|
213
|
+
assert "issue_id" in schema["required"]
|
|
214
|
+
assert "verbose" not in schema["required"] # Has default
|
|
215
|
+
|
|
216
|
+
def test_schema_excludes_eval_only(self) -> None:
|
|
217
|
+
"""Schema excludes eval-only params."""
|
|
218
|
+
env = Environment("test")
|
|
219
|
+
|
|
220
|
+
@env.scenario()
|
|
221
|
+
async def check(
|
|
222
|
+
item_id: str,
|
|
223
|
+
expected_status: str | None = None, # Eval only
|
|
224
|
+
):
|
|
225
|
+
yield {"task": f"Check {item_id}"}
|
|
226
|
+
|
|
227
|
+
task = env("check")
|
|
228
|
+
tool = AgentTool(task, model="claude")
|
|
229
|
+
|
|
230
|
+
schema = tool._param_schema
|
|
231
|
+
assert schema is not None
|
|
232
|
+
assert "item_id" in schema["properties"]
|
|
233
|
+
assert "expected_status" not in schema["properties"]
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class TestAgentToolMCP:
|
|
237
|
+
"""Tests for MCP tool integration."""
|
|
238
|
+
|
|
239
|
+
def test_mcp_property_returns_tool(self) -> None:
|
|
240
|
+
"""The mcp property returns a FastMCP FunctionTool."""
|
|
241
|
+
from fastmcp.tools import FunctionTool
|
|
242
|
+
|
|
243
|
+
env = Environment("test")
|
|
244
|
+
|
|
245
|
+
@env.scenario()
|
|
246
|
+
async def greet(name: str):
|
|
247
|
+
yield {"task": f"Greet {name}"}
|
|
248
|
+
|
|
249
|
+
task = env("greet")
|
|
250
|
+
tool = AgentTool(task, model="claude")
|
|
251
|
+
|
|
252
|
+
mcp_tool = tool.mcp
|
|
253
|
+
assert isinstance(mcp_tool, FunctionTool)
|
|
254
|
+
|
|
255
|
+
def test_mcp_has_filtered_parameters(self) -> None:
|
|
256
|
+
"""MCP tool has filtered parameter schema."""
|
|
257
|
+
env = Environment("test")
|
|
258
|
+
|
|
259
|
+
@env.scenario()
|
|
260
|
+
async def analyze(
|
|
261
|
+
data: str,
|
|
262
|
+
expected_result: str | None = None, # Eval only
|
|
263
|
+
):
|
|
264
|
+
yield {"task": f"Analyze {data}"}
|
|
265
|
+
|
|
266
|
+
task = env("analyze")
|
|
267
|
+
tool = AgentTool(task, model="claude")
|
|
268
|
+
|
|
269
|
+
mcp_tool = tool.mcp
|
|
270
|
+
params = mcp_tool.parameters # FunctionTool uses 'parameters'
|
|
271
|
+
|
|
272
|
+
assert "data" in params["properties"]
|
|
273
|
+
assert "expected_result" not in params["properties"]
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class TestAgentToolCall:
|
|
277
|
+
"""Tests for AgentTool.__call__."""
|
|
278
|
+
|
|
279
|
+
@pytest.mark.asyncio
|
|
280
|
+
async def test_filters_kwargs_to_visible_only(self) -> None:
|
|
281
|
+
"""Call filters kwargs to visible params only."""
|
|
282
|
+
# Import modules first so patches work
|
|
283
|
+
import hud.agents
|
|
284
|
+
import hud.eval.manager # noqa: F401
|
|
285
|
+
|
|
286
|
+
env = Environment("test")
|
|
287
|
+
|
|
288
|
+
@env.scenario()
|
|
289
|
+
async def process(item: str, expected: str | None = None):
|
|
290
|
+
yield {"task": f"Process {item}"}
|
|
291
|
+
|
|
292
|
+
task = env("process")
|
|
293
|
+
tool = AgentTool(task, model="claude")
|
|
294
|
+
|
|
295
|
+
# Mock the eval context and agent
|
|
296
|
+
with (
|
|
297
|
+
patch("hud.eval.manager.run_eval") as mock_run_eval,
|
|
298
|
+
patch("hud.agents.create_agent") as mock_create_agent,
|
|
299
|
+
):
|
|
300
|
+
mock_ctx = AsyncMock()
|
|
301
|
+
mock_ctx.__aenter__ = AsyncMock(return_value=mock_ctx)
|
|
302
|
+
mock_ctx.__aexit__ = AsyncMock(return_value=None)
|
|
303
|
+
mock_run_eval.return_value = mock_ctx
|
|
304
|
+
|
|
305
|
+
mock_agent = MagicMock()
|
|
306
|
+
mock_agent.run = AsyncMock(return_value=MagicMock(content="result"))
|
|
307
|
+
mock_create_agent.return_value = mock_agent
|
|
308
|
+
|
|
309
|
+
# Call with both visible and eval-only params
|
|
310
|
+
await tool(item="test", expected="should_be_filtered")
|
|
311
|
+
|
|
312
|
+
# Check that task was created with filtered args
|
|
313
|
+
call_args = mock_run_eval.call_args
|
|
314
|
+
task_arg = call_args[0][0]
|
|
315
|
+
assert "item" in task_arg.args
|
|
316
|
+
assert "expected" not in task_arg.args # Filtered out
|
|
317
|
+
|
|
318
|
+
@pytest.mark.asyncio
|
|
319
|
+
async def test_merges_template_args(self) -> None:
|
|
320
|
+
"""Call merges kwargs with template args."""
|
|
321
|
+
# Import modules first so patches work
|
|
322
|
+
import hud.agents
|
|
323
|
+
import hud.eval.manager # noqa: F401
|
|
324
|
+
|
|
325
|
+
env = Environment("test")
|
|
326
|
+
|
|
327
|
+
@env.scenario()
|
|
328
|
+
async def search(query: str, limit: int = 10):
|
|
329
|
+
yield {"task": f"Search {query}"}
|
|
330
|
+
|
|
331
|
+
# Create template with some args pre-filled
|
|
332
|
+
task = env("search", limit=5)
|
|
333
|
+
tool = AgentTool(task, model="claude")
|
|
334
|
+
|
|
335
|
+
with (
|
|
336
|
+
patch("hud.eval.manager.run_eval") as mock_run_eval,
|
|
337
|
+
patch("hud.agents.create_agent") as mock_create_agent,
|
|
338
|
+
):
|
|
339
|
+
mock_ctx = AsyncMock()
|
|
340
|
+
mock_ctx.__aenter__ = AsyncMock(return_value=mock_ctx)
|
|
341
|
+
mock_ctx.__aexit__ = AsyncMock(return_value=None)
|
|
342
|
+
mock_run_eval.return_value = mock_ctx
|
|
343
|
+
|
|
344
|
+
mock_agent = MagicMock()
|
|
345
|
+
mock_agent.run = AsyncMock(return_value=MagicMock(content="result"))
|
|
346
|
+
mock_create_agent.return_value = mock_agent
|
|
347
|
+
|
|
348
|
+
# Call with additional arg
|
|
349
|
+
await tool(query="test query")
|
|
350
|
+
|
|
351
|
+
# Check merged args
|
|
352
|
+
call_args = mock_run_eval.call_args
|
|
353
|
+
task_arg = call_args[0][0]
|
|
354
|
+
assert task_arg.args["query"] == "test query"
|
|
355
|
+
assert task_arg.args["limit"] == 5 # From template
|