hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +70 -5
- hud/agents/base.py +238 -500
- hud/agents/claude.py +236 -247
- hud/agents/gateway.py +42 -0
- hud/agents/gemini.py +264 -0
- hud/agents/gemini_cua.py +324 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +48 -36
- hud/agents/openai.py +282 -296
- hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
- hud/agents/operator.py +199 -0
- hud/agents/resolver.py +70 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +381 -214
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +377 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_resolver.py +192 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/agents/types.py +148 -0
- hud/cli/__init__.py +493 -546
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +699 -113
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +889 -732
- hud/cli/eval.py +793 -667
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/pull.py +1 -1
- hud/cli/push.py +38 -13
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +110 -8
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push.py +1 -1
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +70 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +45 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +326 -0
- hud/datasets/runner.py +198 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +52 -0
- hud/environment/connection.py +258 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +137 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +835 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +263 -0
- hud/environment/scenarios.py +620 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +205 -0
- hud/environment/tests/test_environment.py +593 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +242 -0
- hud/environment/tests/test_scenarios.py +1086 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +727 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +187 -0
- hud/eval/manager.py +533 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +372 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +291 -0
- hud/eval/types.py +65 -0
- hud/eval/utils.py +194 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +308 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +165 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +18 -2
- hud/tools/agent.py +223 -0
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +36 -3
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_agent_tool.py +355 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +194 -56
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +89 -18
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.13.dist-info/METADATA +264 -0
- hud_python-0.5.13.dist-info/RECORD +305 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""Tests for @env.tool() decorator and tool operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from hud.environment import Environment
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestToolDecorator:
|
|
11
|
+
"""Tests for @env.tool() decorator."""
|
|
12
|
+
|
|
13
|
+
def test_tool_registers_function(self) -> None:
|
|
14
|
+
"""@env.tool registers the function in tool manager."""
|
|
15
|
+
env = Environment("test-env")
|
|
16
|
+
|
|
17
|
+
@env.tool()
|
|
18
|
+
def add(a: int, b: int) -> int:
|
|
19
|
+
"""Add two numbers."""
|
|
20
|
+
return a + b
|
|
21
|
+
|
|
22
|
+
# Check tool was registered
|
|
23
|
+
tool_names = list(env._tool_manager._tools.keys())
|
|
24
|
+
assert "add" in tool_names
|
|
25
|
+
|
|
26
|
+
def test_tool_with_custom_name(self) -> None:
|
|
27
|
+
"""@env.tool(name=...) uses custom name."""
|
|
28
|
+
env = Environment("test-env")
|
|
29
|
+
|
|
30
|
+
@env.tool(name="custom_add")
|
|
31
|
+
def add(a: int, b: int) -> int:
|
|
32
|
+
return a + b
|
|
33
|
+
|
|
34
|
+
tool_names = list(env._tool_manager._tools.keys())
|
|
35
|
+
assert "custom_add" in tool_names
|
|
36
|
+
assert "add" not in tool_names
|
|
37
|
+
|
|
38
|
+
def test_tool_preserves_docstring(self) -> None:
|
|
39
|
+
"""@env.tool preserves function docstring as description."""
|
|
40
|
+
env = Environment("test-env")
|
|
41
|
+
|
|
42
|
+
@env.tool()
|
|
43
|
+
def greet(name: str) -> str:
|
|
44
|
+
"""Greet someone by name."""
|
|
45
|
+
return f"Hello, {name}!"
|
|
46
|
+
|
|
47
|
+
tool = env._tool_manager._tools.get("greet")
|
|
48
|
+
assert tool is not None
|
|
49
|
+
assert "Greet someone by name" in (tool.description or "")
|
|
50
|
+
|
|
51
|
+
def test_tool_async_function(self) -> None:
|
|
52
|
+
"""@env.tool works with async functions."""
|
|
53
|
+
env = Environment("test-env")
|
|
54
|
+
|
|
55
|
+
@env.tool()
|
|
56
|
+
async def fetch_data(url: str) -> str:
|
|
57
|
+
"""Fetch data from URL."""
|
|
58
|
+
return f"Data from {url}"
|
|
59
|
+
|
|
60
|
+
tool_names = list(env._tool_manager._tools.keys())
|
|
61
|
+
assert "fetch_data" in tool_names
|
|
62
|
+
|
|
63
|
+
def test_tool_returns_function(self) -> None:
|
|
64
|
+
"""@env.tool returns the original function."""
|
|
65
|
+
env = Environment("test-env")
|
|
66
|
+
|
|
67
|
+
@env.tool()
|
|
68
|
+
def add(a: int, b: int) -> int:
|
|
69
|
+
return a + b
|
|
70
|
+
|
|
71
|
+
# Should be able to call it directly
|
|
72
|
+
assert add(2, 3) == 5
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class TestListTools:
|
|
76
|
+
"""Tests for list_tools and as_tools."""
|
|
77
|
+
|
|
78
|
+
@pytest.mark.asyncio
|
|
79
|
+
async def test_as_tools_returns_registered_tools(self) -> None:
|
|
80
|
+
"""as_tools returns list of registered MCP tools."""
|
|
81
|
+
env = Environment("test-env")
|
|
82
|
+
|
|
83
|
+
@env.tool()
|
|
84
|
+
def tool1() -> str:
|
|
85
|
+
return "1"
|
|
86
|
+
|
|
87
|
+
@env.tool()
|
|
88
|
+
def tool2() -> str:
|
|
89
|
+
return "2"
|
|
90
|
+
|
|
91
|
+
async with env:
|
|
92
|
+
tools = env.as_tools()
|
|
93
|
+
tool_names = [t.name for t in tools]
|
|
94
|
+
assert "tool1" in tool_names
|
|
95
|
+
assert "tool2" in tool_names
|
|
96
|
+
|
|
97
|
+
@pytest.mark.asyncio
|
|
98
|
+
async def test_as_tools_empty_when_no_tools(self) -> None:
|
|
99
|
+
"""as_tools returns empty list when no tools registered."""
|
|
100
|
+
env = Environment("test-env")
|
|
101
|
+
async with env:
|
|
102
|
+
tools = env.as_tools()
|
|
103
|
+
# May have built-in _hud_submit tool
|
|
104
|
+
user_tools = [t for t in tools if not t.name.startswith("_")]
|
|
105
|
+
assert len(user_tools) == 0
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class TestCallTool:
|
|
109
|
+
"""Tests for call_tool method."""
|
|
110
|
+
|
|
111
|
+
@pytest.mark.asyncio
|
|
112
|
+
async def test_call_tool_executes_function(self) -> None:
|
|
113
|
+
"""call_tool executes registered tool function."""
|
|
114
|
+
env = Environment("test-env")
|
|
115
|
+
executed = []
|
|
116
|
+
|
|
117
|
+
@env.tool()
|
|
118
|
+
def greet(name: str) -> str:
|
|
119
|
+
executed.append(name)
|
|
120
|
+
return f"Hello, {name}!"
|
|
121
|
+
|
|
122
|
+
async with env:
|
|
123
|
+
result = await env.call_tool("greet", name="Alice")
|
|
124
|
+
|
|
125
|
+
assert executed == ["Alice"]
|
|
126
|
+
assert result is not None
|
|
127
|
+
|
|
128
|
+
@pytest.mark.asyncio
|
|
129
|
+
async def test_call_tool_async_function(self) -> None:
|
|
130
|
+
"""call_tool works with async tool functions."""
|
|
131
|
+
env = Environment("test-env")
|
|
132
|
+
|
|
133
|
+
@env.tool()
|
|
134
|
+
async def async_greet(name: str) -> str:
|
|
135
|
+
return f"Hello, {name}!"
|
|
136
|
+
|
|
137
|
+
async with env:
|
|
138
|
+
result = await env.call_tool("async_greet", name="Bob")
|
|
139
|
+
|
|
140
|
+
assert result is not None
|
|
141
|
+
|
|
142
|
+
@pytest.mark.asyncio
|
|
143
|
+
async def test_call_tool_not_found(self) -> None:
|
|
144
|
+
"""call_tool raises for unknown tool."""
|
|
145
|
+
env = Environment("test-env")
|
|
146
|
+
|
|
147
|
+
async with env:
|
|
148
|
+
with pytest.raises(ValueError, match="Tool not found"):
|
|
149
|
+
await env.call_tool("nonexistent")
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class TestMockMode:
|
|
153
|
+
"""Tests for mock mode."""
|
|
154
|
+
|
|
155
|
+
def test_mock_mode_default_false(self) -> None:
|
|
156
|
+
"""Mock mode is False by default."""
|
|
157
|
+
env = Environment("test-env")
|
|
158
|
+
assert env._mock_mode is False
|
|
159
|
+
assert env.is_mock is False
|
|
160
|
+
|
|
161
|
+
def test_mock_enables_mock_mode(self) -> None:
|
|
162
|
+
"""mock() enables mock mode."""
|
|
163
|
+
env = Environment("test-env")
|
|
164
|
+
env.mock()
|
|
165
|
+
assert env._mock_mode is True
|
|
166
|
+
assert env.is_mock is True
|
|
167
|
+
|
|
168
|
+
def test_unmock_disables_mock_mode(self) -> None:
|
|
169
|
+
"""unmock() disables mock mode."""
|
|
170
|
+
env = Environment("test-env")
|
|
171
|
+
env.mock()
|
|
172
|
+
env.unmock()
|
|
173
|
+
assert env._mock_mode is False
|
|
174
|
+
|
|
175
|
+
def test_mock_returns_self_for_chaining(self) -> None:
|
|
176
|
+
"""mock() returns self for chaining."""
|
|
177
|
+
env = Environment("test-env")
|
|
178
|
+
result = env.mock()
|
|
179
|
+
assert result is env
|
|
180
|
+
|
|
181
|
+
def test_mock_tool_sets_custom_output(self) -> None:
|
|
182
|
+
"""mock_tool() sets custom output for a tool."""
|
|
183
|
+
env = Environment("test-env")
|
|
184
|
+
env.mock_tool("navigate", "Custom result")
|
|
185
|
+
assert env._mock_outputs["navigate"] == "Custom result"
|
|
186
|
+
|
|
187
|
+
@pytest.mark.asyncio
|
|
188
|
+
async def test_mock_mode_returns_mock_response(self) -> None:
|
|
189
|
+
"""Mock mode returns mock response instead of executing tool."""
|
|
190
|
+
env = Environment("test-env")
|
|
191
|
+
call_count = 0
|
|
192
|
+
|
|
193
|
+
@env.tool()
|
|
194
|
+
def real_tool() -> str:
|
|
195
|
+
nonlocal call_count
|
|
196
|
+
call_count += 1
|
|
197
|
+
return "real result"
|
|
198
|
+
|
|
199
|
+
env.mock()
|
|
200
|
+
env.mock_tool("real_tool", "mocked result")
|
|
201
|
+
|
|
202
|
+
async with env:
|
|
203
|
+
result = await env.call_tool("real_tool")
|
|
204
|
+
|
|
205
|
+
# Tool should not be called in mock mode
|
|
206
|
+
assert call_count == 0
|
|
207
|
+
# Should get the mock result
|
|
208
|
+
assert result is not None
|
hud/environment/types.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Environment types for configuration and tracing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
__all__ = ["EnvConfig"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class EnvConfig(BaseModel):
|
|
11
|
+
"""Environment configuration for Tasks.
|
|
12
|
+
|
|
13
|
+
Specifies which hub to connect to and optional tool filtering.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
name: Hub name to connect via connect_hub() (e.g., "browser", "sheets")
|
|
17
|
+
include: Optional whitelist of tool names to include
|
|
18
|
+
exclude: Optional blacklist of tool names to exclude
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
name: str = Field(description="Hub name to connect to")
|
|
22
|
+
include: list[str] | None = Field(default=None, description="Whitelist of tool names")
|
|
23
|
+
exclude: list[str] | None = Field(default=None, description="Blacklist of tool names")
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Environment utilities."""
|
|
2
|
+
|
|
3
|
+
from hud.environment.utils.formats import (
|
|
4
|
+
ToolFormat,
|
|
5
|
+
format_result,
|
|
6
|
+
parse_tool_call,
|
|
7
|
+
parse_tool_calls,
|
|
8
|
+
result_to_string,
|
|
9
|
+
)
|
|
10
|
+
from hud.environment.utils.schema import (
|
|
11
|
+
ensure_strict_schema,
|
|
12
|
+
json_type_to_python,
|
|
13
|
+
schema_to_pydantic,
|
|
14
|
+
)
|
|
15
|
+
from hud.environment.utils.tool_wrappers import (
|
|
16
|
+
create_async_tool_fn,
|
|
17
|
+
create_sync_tool_fn,
|
|
18
|
+
create_tool_fns,
|
|
19
|
+
stringify_result,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"ToolFormat",
|
|
24
|
+
"create_async_tool_fn",
|
|
25
|
+
"create_sync_tool_fn",
|
|
26
|
+
"create_tool_fns",
|
|
27
|
+
"ensure_strict_schema",
|
|
28
|
+
"format_result",
|
|
29
|
+
"json_type_to_python",
|
|
30
|
+
"parse_tool_call",
|
|
31
|
+
"parse_tool_calls",
|
|
32
|
+
"result_to_string",
|
|
33
|
+
"schema_to_pydantic",
|
|
34
|
+
"stringify_result",
|
|
35
|
+
]
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Tool format parsing and conversion for OpenAI, Claude, Gemini, and MCP."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from enum import Enum, auto
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from hud.types import MCPToolCall, MCPToolResult
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ToolFormat",
|
|
13
|
+
"format_result",
|
|
14
|
+
"parse_tool_call",
|
|
15
|
+
"parse_tool_calls",
|
|
16
|
+
"result_to_string",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ToolFormat(Enum):
|
|
21
|
+
"""Detected tool call format."""
|
|
22
|
+
|
|
23
|
+
OPENAI = auto() # function.arguments as JSON string
|
|
24
|
+
CLAUDE = auto() # type="tool_use", input as dict
|
|
25
|
+
GEMINI = auto() # functionCall with args
|
|
26
|
+
MCP = auto() # name + arguments
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# -----------------------------------------------------------------------------
|
|
30
|
+
# Parsing
|
|
31
|
+
# -----------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _to_dict(obj: Any) -> dict[str, Any]:
|
|
35
|
+
"""Convert object to dict for uniform processing."""
|
|
36
|
+
if isinstance(obj, dict):
|
|
37
|
+
return obj
|
|
38
|
+
if hasattr(obj, "model_dump"):
|
|
39
|
+
return obj.model_dump()
|
|
40
|
+
if hasattr(obj, "__dict__"):
|
|
41
|
+
return vars(obj)
|
|
42
|
+
raise ValueError(f"Cannot convert {type(obj).__name__} to dict")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _parse_json_args(args: Any) -> dict[str, Any]:
|
|
46
|
+
"""Parse arguments, handling JSON strings."""
|
|
47
|
+
if not args:
|
|
48
|
+
return {}
|
|
49
|
+
if isinstance(args, str):
|
|
50
|
+
try:
|
|
51
|
+
return json.loads(args)
|
|
52
|
+
except json.JSONDecodeError:
|
|
53
|
+
return {}
|
|
54
|
+
return args
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def parse_tool_call(call: Any, **kwargs: Any) -> tuple[MCPToolCall, ToolFormat]:
|
|
58
|
+
"""Parse any tool call format into (MCPToolCall, ToolFormat).
|
|
59
|
+
|
|
60
|
+
Supports:
|
|
61
|
+
- String (tool name only, or with kwargs)
|
|
62
|
+
- Tuple: (name,), (name, args), (name, args, id)
|
|
63
|
+
- MCPToolCall
|
|
64
|
+
- OpenAI: {function: {name, arguments}, id}
|
|
65
|
+
- Claude: {type: "tool_use", name, input, id}
|
|
66
|
+
- Gemini: {functionCall: {name, args}} or {name, args}
|
|
67
|
+
- Generic: {name, arguments}
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
call: Tool call in any supported format.
|
|
71
|
+
**kwargs: Additional arguments (merged when call is a string).
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Tuple of (MCPToolCall, ToolFormat) for the parsed call.
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
ValueError: If format is unrecognized.
|
|
78
|
+
"""
|
|
79
|
+
# Primitives
|
|
80
|
+
if isinstance(call, str):
|
|
81
|
+
return MCPToolCall(name=call, arguments=kwargs or {}), ToolFormat.MCP
|
|
82
|
+
|
|
83
|
+
if isinstance(call, tuple):
|
|
84
|
+
tc = MCPToolCall(name=call[0], arguments=call[1] if len(call) > 1 else {})
|
|
85
|
+
if len(call) > 2:
|
|
86
|
+
tc.id = call[2]
|
|
87
|
+
return tc, ToolFormat.MCP
|
|
88
|
+
|
|
89
|
+
if isinstance(call, MCPToolCall):
|
|
90
|
+
return call, ToolFormat.MCP
|
|
91
|
+
|
|
92
|
+
# Convert to dict
|
|
93
|
+
d = _to_dict(call)
|
|
94
|
+
|
|
95
|
+
# OpenAI: {function: {name, arguments}, id}
|
|
96
|
+
if "function" in d:
|
|
97
|
+
f = _to_dict(d["function"]) if not isinstance(d["function"], dict) else d["function"]
|
|
98
|
+
tc = MCPToolCall(name=f["name"], arguments=_parse_json_args(f.get("arguments")))
|
|
99
|
+
if d.get("id"):
|
|
100
|
+
tc.id = d["id"]
|
|
101
|
+
return tc, ToolFormat.OPENAI
|
|
102
|
+
|
|
103
|
+
# Claude: {type: "tool_use", name, input, id}
|
|
104
|
+
if d.get("type") == "tool_use":
|
|
105
|
+
tc = MCPToolCall(name=d["name"], arguments=d.get("input") or {})
|
|
106
|
+
if d.get("id"):
|
|
107
|
+
tc.id = d["id"]
|
|
108
|
+
return tc, ToolFormat.CLAUDE
|
|
109
|
+
|
|
110
|
+
# Gemini: {functionCall: {name, args}} or {name, args}
|
|
111
|
+
if "functionCall" in d:
|
|
112
|
+
fc = d["functionCall"]
|
|
113
|
+
return MCPToolCall(name=fc["name"], arguments=fc.get("args") or {}), ToolFormat.GEMINI
|
|
114
|
+
|
|
115
|
+
if "args" in d and "name" in d and "arguments" not in d:
|
|
116
|
+
return MCPToolCall(name=d["name"], arguments=d.get("args") or {}), ToolFormat.GEMINI
|
|
117
|
+
|
|
118
|
+
# Generic: {name, arguments/input}
|
|
119
|
+
if "name" in d:
|
|
120
|
+
tc = MCPToolCall(name=d["name"], arguments=d.get("arguments") or d.get("input") or {})
|
|
121
|
+
if d.get("id"):
|
|
122
|
+
tc.id = d["id"]
|
|
123
|
+
return tc, ToolFormat.MCP
|
|
124
|
+
|
|
125
|
+
raise ValueError(f"Unrecognized tool call format: {list(d.keys())}")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _is_tool_block(item: Any) -> bool:
|
|
129
|
+
"""Check if item is a tool call (not text/other content)."""
|
|
130
|
+
t = item.get("type") if isinstance(item, dict) else getattr(item, "type", None)
|
|
131
|
+
return t is None or t in ("tool_use", "function")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def parse_tool_calls(calls: Any) -> list[tuple[MCPToolCall, ToolFormat]]:
|
|
135
|
+
"""Parse multiple tool calls, filtering non-tool content (e.g. Claude TextBlock).
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
calls: Single call or list of calls in any format.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
List of (MCPToolCall, ToolFormat) tuples.
|
|
142
|
+
"""
|
|
143
|
+
if calls is None:
|
|
144
|
+
return []
|
|
145
|
+
if not isinstance(calls, list):
|
|
146
|
+
try:
|
|
147
|
+
return [parse_tool_call(calls)]
|
|
148
|
+
except ValueError:
|
|
149
|
+
return []
|
|
150
|
+
|
|
151
|
+
results = []
|
|
152
|
+
for item in calls:
|
|
153
|
+
if not _is_tool_block(item):
|
|
154
|
+
continue
|
|
155
|
+
try:
|
|
156
|
+
results.append(parse_tool_call(item))
|
|
157
|
+
except ValueError:
|
|
158
|
+
continue
|
|
159
|
+
return results
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# -----------------------------------------------------------------------------
|
|
163
|
+
# Result Formatting
|
|
164
|
+
# -----------------------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def result_to_string(result: MCPToolResult) -> str:
|
|
168
|
+
"""Convert MCPToolResult content to string.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
result: MCP tool result with content blocks.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
String representation of the result content.
|
|
175
|
+
"""
|
|
176
|
+
if not result.content:
|
|
177
|
+
return ""
|
|
178
|
+
parts = []
|
|
179
|
+
for block in result.content:
|
|
180
|
+
if (text := getattr(block, "text", None)) is not None:
|
|
181
|
+
parts.append(str(text))
|
|
182
|
+
elif (data := getattr(block, "data", None)) is not None:
|
|
183
|
+
parts.append(f"[binary: {len(data)} bytes]")
|
|
184
|
+
return "\n".join(parts)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def format_result(result: MCPToolResult, tc: MCPToolCall, fmt: ToolFormat) -> Any:
|
|
188
|
+
"""Format MCPToolResult based on the input format.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
result: MCP tool result.
|
|
192
|
+
tc: Original tool call (for id/name).
|
|
193
|
+
fmt: Target format.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
OpenAI: {"role": "tool", "tool_call_id": ..., "content": ...}
|
|
197
|
+
Claude: {"type": "tool_result", "tool_use_id": ..., "content": ..., "is_error"?: bool}
|
|
198
|
+
Gemini: {"functionResponse": {"name": ..., "response": {"result": ...}}}
|
|
199
|
+
MCP: MCPToolResult unchanged
|
|
200
|
+
"""
|
|
201
|
+
content = result_to_string(result)
|
|
202
|
+
|
|
203
|
+
if fmt == ToolFormat.OPENAI:
|
|
204
|
+
return {"role": "tool", "tool_call_id": tc.id, "content": content}
|
|
205
|
+
|
|
206
|
+
if fmt == ToolFormat.CLAUDE:
|
|
207
|
+
r: dict[str, Any] = {"type": "tool_result", "tool_use_id": tc.id, "content": content}
|
|
208
|
+
if result.isError:
|
|
209
|
+
r["is_error"] = True
|
|
210
|
+
return r
|
|
211
|
+
|
|
212
|
+
if fmt == ToolFormat.GEMINI:
|
|
213
|
+
return {"functionResponse": {"name": tc.name, "response": {"result": content}}}
|
|
214
|
+
|
|
215
|
+
return result # MCP format - return as-is
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""Schema utilities for tool definitions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ensure_strict_schema",
|
|
10
|
+
"json_type_to_python",
|
|
11
|
+
"schema_to_pydantic",
|
|
12
|
+
"validate_openai_schema",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def ensure_strict_schema(schema: dict[str, Any]) -> dict[str, Any]:
|
|
19
|
+
"""Ensure a JSON schema is compatible with OpenAI's strict mode.
|
|
20
|
+
|
|
21
|
+
OpenAI strict mode requires:
|
|
22
|
+
- additionalProperties: false on all objects
|
|
23
|
+
- All properties must be in required
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
schema: Original JSON schema.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Modified schema for strict mode.
|
|
30
|
+
"""
|
|
31
|
+
schema = dict(schema)
|
|
32
|
+
|
|
33
|
+
if schema.get("type") == "object":
|
|
34
|
+
schema["additionalProperties"] = False
|
|
35
|
+
|
|
36
|
+
if "properties" in schema:
|
|
37
|
+
# All properties must be required
|
|
38
|
+
schema["required"] = list(schema["properties"].keys())
|
|
39
|
+
|
|
40
|
+
# Recursively process nested objects
|
|
41
|
+
for prop_schema in schema["properties"].values():
|
|
42
|
+
if isinstance(prop_schema, dict):
|
|
43
|
+
_ensure_strict_recursive(prop_schema)
|
|
44
|
+
|
|
45
|
+
return schema
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _ensure_strict_recursive(schema: dict[str, Any]) -> None:
|
|
49
|
+
"""Recursively apply strict mode to nested schemas."""
|
|
50
|
+
if schema.get("type") == "object":
|
|
51
|
+
schema["additionalProperties"] = False
|
|
52
|
+
if "properties" in schema:
|
|
53
|
+
schema["required"] = list(schema["properties"].keys())
|
|
54
|
+
for prop_schema in schema["properties"].values():
|
|
55
|
+
if isinstance(prop_schema, dict):
|
|
56
|
+
_ensure_strict_recursive(prop_schema)
|
|
57
|
+
|
|
58
|
+
elif schema.get("type") == "array" and "items" in schema:
|
|
59
|
+
if isinstance(schema["items"], dict):
|
|
60
|
+
_ensure_strict_recursive(schema["items"])
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def schema_to_pydantic(name: str, schema: dict[str, Any]) -> type:
|
|
64
|
+
"""Convert JSON schema to a Pydantic model.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
name: Model name (used for class name).
|
|
68
|
+
schema: JSON schema with properties.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Dynamically created Pydantic model class.
|
|
72
|
+
"""
|
|
73
|
+
from pydantic import Field, create_model
|
|
74
|
+
|
|
75
|
+
properties = schema.get("properties", {})
|
|
76
|
+
required = set(schema.get("required", []))
|
|
77
|
+
|
|
78
|
+
fields = {}
|
|
79
|
+
for prop_name, prop_schema in properties.items():
|
|
80
|
+
prop_type = json_type_to_python(prop_schema.get("type", "string"))
|
|
81
|
+
default = ... if prop_name in required else None
|
|
82
|
+
description = prop_schema.get("description", "")
|
|
83
|
+
fields[prop_name] = (prop_type, Field(default=default, description=description))
|
|
84
|
+
|
|
85
|
+
return create_model(f"{name}Input", **fields)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def json_type_to_python(json_type: str) -> type:
|
|
89
|
+
"""Map JSON schema type to Python type.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
json_type: JSON schema type string.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Corresponding Python type.
|
|
96
|
+
"""
|
|
97
|
+
mapping = {
|
|
98
|
+
"string": str,
|
|
99
|
+
"integer": int,
|
|
100
|
+
"number": float,
|
|
101
|
+
"boolean": bool,
|
|
102
|
+
"array": list,
|
|
103
|
+
"object": dict,
|
|
104
|
+
}
|
|
105
|
+
return mapping.get(json_type, str)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def validate_openai_schema(
|
|
109
|
+
schema: dict[str, Any],
|
|
110
|
+
tool_name: str = "unknown",
|
|
111
|
+
path: str = "",
|
|
112
|
+
) -> list[str]:
|
|
113
|
+
"""Validate a JSON schema for OpenAI API compatibility.
|
|
114
|
+
|
|
115
|
+
OpenAI's API has specific requirements for tool schemas:
|
|
116
|
+
- Arrays must have 'items' (not 'prefixItems' which tuples generate)
|
|
117
|
+
- Certain schema features like 'prefixItems' are not supported
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
schema: JSON schema to validate.
|
|
121
|
+
tool_name: Name of the tool (for error messages).
|
|
122
|
+
path: Current path in schema (for error context).
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
List of validation error messages. Empty if valid.
|
|
126
|
+
"""
|
|
127
|
+
errors: list[str] = []
|
|
128
|
+
|
|
129
|
+
if not isinstance(schema, dict):
|
|
130
|
+
return errors
|
|
131
|
+
|
|
132
|
+
# Check for prefixItems (generated by tuple types)
|
|
133
|
+
if "prefixItems" in schema:
|
|
134
|
+
errors.append(
|
|
135
|
+
f"Tool '{tool_name}' has 'prefixItems' at {path or 'root'} "
|
|
136
|
+
"(likely from tuple type). Use list[Model] instead of tuple."
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Check arrays have 'items'
|
|
140
|
+
if schema.get("type") == "array" and "items" not in schema and "prefixItems" not in schema:
|
|
141
|
+
errors.append(
|
|
142
|
+
f"Tool '{tool_name}' has array at {path or 'root'} without 'items'. "
|
|
143
|
+
"OpenAI requires 'items' for array schemas."
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Recursively check nested schemas
|
|
147
|
+
# Check properties
|
|
148
|
+
if "properties" in schema:
|
|
149
|
+
for prop_name, prop_schema in schema["properties"].items():
|
|
150
|
+
prop_path = f"{path}.{prop_name}" if path else prop_name
|
|
151
|
+
errors.extend(validate_openai_schema(prop_schema, tool_name, prop_path))
|
|
152
|
+
|
|
153
|
+
# Check items
|
|
154
|
+
if "items" in schema and isinstance(schema["items"], dict):
|
|
155
|
+
items_path = f"{path}[items]" if path else "[items]"
|
|
156
|
+
errors.extend(validate_openai_schema(schema["items"], tool_name, items_path))
|
|
157
|
+
|
|
158
|
+
# Check anyOf/oneOf/allOf
|
|
159
|
+
for key in ("anyOf", "oneOf", "allOf"):
|
|
160
|
+
if key in schema:
|
|
161
|
+
for i, sub_schema in enumerate(schema[key]):
|
|
162
|
+
sub_path = f"{path}.{key}[{i}]" if path else f"{key}[{i}]"
|
|
163
|
+
errors.extend(validate_openai_schema(sub_schema, tool_name, sub_path))
|
|
164
|
+
|
|
165
|
+
# Check $defs (definitions)
|
|
166
|
+
if "$defs" in schema:
|
|
167
|
+
for def_name, def_schema in schema["$defs"].items():
|
|
168
|
+
def_path = f"$defs.{def_name}"
|
|
169
|
+
errors.extend(validate_openai_schema(def_schema, tool_name, def_path))
|
|
170
|
+
|
|
171
|
+
return errors
|