hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +11 -5
- hud/agents/base.py +220 -500
- hud/agents/claude.py +200 -240
- hud/agents/gemini.py +275 -0
- hud/agents/gemini_cua.py +335 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +41 -36
- hud/agents/openai.py +291 -292
- hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
- hud/agents/operator.py +211 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +379 -210
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +376 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/cli/__init__.py +461 -545
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +664 -110
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +882 -734
- hud/cli/eval.py +782 -668
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/push.py +29 -11
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +108 -6
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +69 -0
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +40 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +327 -0
- hud/datasets/runner.py +192 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +50 -0
- hud/environment/connection.py +206 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +109 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +694 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +112 -0
- hud/environment/scenarios.py +493 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +218 -0
- hud/environment/tests/test_environment.py +161 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +201 -0
- hud/environment/tests/test_scenarios.py +280 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +674 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +185 -0
- hud/eval/manager.py +466 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +340 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +145 -0
- hud/eval/types.py +63 -0
- hud/eval/utils.py +183 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +151 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +158 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +16 -2
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +4 -0
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +167 -57
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +61 -3
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.1.dist-info/METADATA +264 -0
- hud_python-0.5.1.dist-info/RECORD +299 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""OpenAI integrations - format conversion and Agents SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
8
|
+
|
|
9
|
+
from hud.environment.utils.schema import ensure_strict_schema, validate_openai_schema
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
import mcp.types as mcp_types
|
|
13
|
+
from openai.types.chat import ChatCompletionToolUnionParam
|
|
14
|
+
|
|
15
|
+
__all__ = ["OpenAIMixin"]
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class OpenAIMixin:
|
|
21
|
+
"""Mixin providing OpenAI format conversion and Agents SDK integration.
|
|
22
|
+
|
|
23
|
+
Format methods (no deps):
|
|
24
|
+
as_openai_chat_tools() - Chat Completions format
|
|
25
|
+
as_openai_responses_tools() - Responses API format
|
|
26
|
+
|
|
27
|
+
Integration methods (requires openai-agents):
|
|
28
|
+
as_openai_agent_tools() - Agents SDK FunctionTool objects
|
|
29
|
+
|
|
30
|
+
Note: The OpenAI Agents SDK also supports:
|
|
31
|
+
- HostedMCPTool - MCP tools hosted by OpenAI
|
|
32
|
+
- MCPServerStdio/Sse/StreamableHttp - Direct MCP server connections
|
|
33
|
+
|
|
34
|
+
For MCP server integration, use as_mcp_server() from the mcp integration.
|
|
35
|
+
|
|
36
|
+
Requires: as_tools() -> list[mcp_types.Tool], call_tool(name, args)
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def as_tools(self) -> list[mcp_types.Tool]:
|
|
40
|
+
raise NotImplementedError
|
|
41
|
+
|
|
42
|
+
async def call_tool(self, name: str, arguments: dict[str, Any]) -> Any:
|
|
43
|
+
raise NotImplementedError
|
|
44
|
+
|
|
45
|
+
# =========================================================================
|
|
46
|
+
# Format Conversion (no external deps)
|
|
47
|
+
# =========================================================================
|
|
48
|
+
|
|
49
|
+
def as_openai_chat_tools(
|
|
50
|
+
self, *, strict: bool = False, validate: bool = True
|
|
51
|
+
) -> list[ChatCompletionToolUnionParam]:
|
|
52
|
+
"""Convert to OpenAI Chat Completions tool format.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
strict: Enable strict mode for structured outputs
|
|
56
|
+
validate: Validate schemas and skip incompatible tools with warnings
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
List of tool definitions for OpenAI Chat Completions API.
|
|
60
|
+
|
|
61
|
+
Example:
|
|
62
|
+
```python
|
|
63
|
+
from openai import OpenAI
|
|
64
|
+
|
|
65
|
+
client = OpenAI()
|
|
66
|
+
async with env:
|
|
67
|
+
response = client.chat.completions.create(
|
|
68
|
+
model="gpt-4o",
|
|
69
|
+
messages=[{"role": "user", "content": "Navigate to google.com"}],
|
|
70
|
+
tools=env.as_openai_chat_tools(),
|
|
71
|
+
)
|
|
72
|
+
# Execute tool calls and get results in OpenAI format
|
|
73
|
+
results = await env.call_tools(response.choices[0].message.tool_calls)
|
|
74
|
+
# results are {"role": "tool", "tool_call_id": ..., "content": ...}
|
|
75
|
+
```
|
|
76
|
+
"""
|
|
77
|
+
tools: list[ChatCompletionToolUnionParam] = []
|
|
78
|
+
for t in self.as_tools():
|
|
79
|
+
schema = dict(t.inputSchema) if t.inputSchema else {"type": "object", "properties": {}}
|
|
80
|
+
|
|
81
|
+
# Validate schema for OpenAI compatibility
|
|
82
|
+
if validate:
|
|
83
|
+
errors = validate_openai_schema(schema, t.name)
|
|
84
|
+
if errors:
|
|
85
|
+
for error in errors:
|
|
86
|
+
logger.warning("Skipping tool: %s", error)
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
if strict:
|
|
90
|
+
schema = ensure_strict_schema(schema)
|
|
91
|
+
|
|
92
|
+
tools.append(
|
|
93
|
+
cast(
|
|
94
|
+
"ChatCompletionToolUnionParam",
|
|
95
|
+
{
|
|
96
|
+
"type": "function",
|
|
97
|
+
"function": {
|
|
98
|
+
"name": t.name,
|
|
99
|
+
"description": t.description or "",
|
|
100
|
+
"parameters": schema,
|
|
101
|
+
**({"strict": True} if strict else {}),
|
|
102
|
+
},
|
|
103
|
+
},
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
return tools
|
|
107
|
+
|
|
108
|
+
def as_openai_responses_tools(self, *, validate: bool = True) -> list[dict[str, Any]]:
|
|
109
|
+
"""Convert to OpenAI Responses API tool format.
|
|
110
|
+
|
|
111
|
+
Note: Like Chat Completions, you must execute tools yourself.
|
|
112
|
+
OpenAI only auto-executes their built-in tools (code_interpreter, etc).
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
validate: Validate schemas and skip incompatible tools with warnings
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
List of tool definitions for OpenAI Responses API.
|
|
119
|
+
|
|
120
|
+
Example:
|
|
121
|
+
```python
|
|
122
|
+
from openai import OpenAI
|
|
123
|
+
|
|
124
|
+
client = OpenAI()
|
|
125
|
+
async with env:
|
|
126
|
+
response = client.responses.create(
|
|
127
|
+
model="gpt-4o",
|
|
128
|
+
input="Navigate to google.com",
|
|
129
|
+
tools=env.as_openai_responses_tools(),
|
|
130
|
+
)
|
|
131
|
+
# Check for function calls in the response
|
|
132
|
+
for item in response.output:
|
|
133
|
+
if item.type == "function_call":
|
|
134
|
+
result = await env.call_tool(item.name, **item.arguments)
|
|
135
|
+
```
|
|
136
|
+
"""
|
|
137
|
+
tools = []
|
|
138
|
+
for t in self.as_tools():
|
|
139
|
+
schema = dict(t.inputSchema) if t.inputSchema else {"type": "object", "properties": {}}
|
|
140
|
+
|
|
141
|
+
# Validate schema for OpenAI compatibility
|
|
142
|
+
if validate:
|
|
143
|
+
errors = validate_openai_schema(schema, t.name)
|
|
144
|
+
if errors:
|
|
145
|
+
for error in errors:
|
|
146
|
+
logger.warning("Skipping tool: %s", error)
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
tools.append(
|
|
150
|
+
{
|
|
151
|
+
"type": "function",
|
|
152
|
+
"name": t.name,
|
|
153
|
+
"description": t.description or "",
|
|
154
|
+
"parameters": schema,
|
|
155
|
+
}
|
|
156
|
+
)
|
|
157
|
+
return tools
|
|
158
|
+
|
|
159
|
+
# =========================================================================
|
|
160
|
+
# Agents SDK Integration (requires openai-agents)
|
|
161
|
+
# =========================================================================
|
|
162
|
+
|
|
163
|
+
def as_openai_agent_tools(self, *, validate: bool = True) -> list[Any]:
|
|
164
|
+
"""Convert to OpenAI Agents SDK FunctionTool objects.
|
|
165
|
+
|
|
166
|
+
This creates FunctionTool objects that automatically execute against
|
|
167
|
+
this environment. The Agents SDK Runner handles the tool loop.
|
|
168
|
+
|
|
169
|
+
Note: The Agents SDK also supports other tool types:
|
|
170
|
+
- HostedMCPTool: MCP tools hosted by OpenAI
|
|
171
|
+
- MCPServerStdio/Sse/StreamableHttp: Direct MCP server connections
|
|
172
|
+
|
|
173
|
+
For direct MCP integration, consider using as_mcp_server().
|
|
174
|
+
|
|
175
|
+
Requires: pip install openai-agents
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
validate: Validate schemas and skip incompatible tools with warnings
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
List of FunctionTool objects for OpenAI Agents SDK.
|
|
182
|
+
|
|
183
|
+
Example:
|
|
184
|
+
```python
|
|
185
|
+
from agents import Agent, Runner
|
|
186
|
+
|
|
187
|
+
async with env:
|
|
188
|
+
agent = Agent(
|
|
189
|
+
name="browser-agent",
|
|
190
|
+
instructions="You browse the web.",
|
|
191
|
+
tools=env.as_openai_agent_tools(),
|
|
192
|
+
)
|
|
193
|
+
result = await Runner.run(agent, "Go to google.com")
|
|
194
|
+
print(result.final_output)
|
|
195
|
+
```
|
|
196
|
+
"""
|
|
197
|
+
try:
|
|
198
|
+
from agents import FunctionTool
|
|
199
|
+
except ImportError as e:
|
|
200
|
+
raise ImportError(
|
|
201
|
+
"OpenAI Agents SDK not installed. Install with: pip install openai-agents"
|
|
202
|
+
) from e
|
|
203
|
+
|
|
204
|
+
tools = []
|
|
205
|
+
for t in self.as_tools():
|
|
206
|
+
schema = dict(t.inputSchema) if t.inputSchema else {"type": "object", "properties": {}}
|
|
207
|
+
|
|
208
|
+
# Validate schema for OpenAI compatibility
|
|
209
|
+
if validate:
|
|
210
|
+
errors = validate_openai_schema(schema, t.name)
|
|
211
|
+
if errors:
|
|
212
|
+
for error in errors:
|
|
213
|
+
logger.warning("Skipping tool: %s", error)
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
tool = _create_function_tool(self, t, FunctionTool)
|
|
217
|
+
tools.append(tool)
|
|
218
|
+
return tools
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _create_function_tool(env: OpenAIMixin, tool: mcp_types.Tool, FunctionTool: type) -> Any:
|
|
222
|
+
"""Create a FunctionTool that calls back to the environment."""
|
|
223
|
+
schema = tool.inputSchema or {"type": "object", "properties": {}}
|
|
224
|
+
|
|
225
|
+
async def async_wrapper(ctx: Any, args_json: str) -> str:
|
|
226
|
+
"""Async wrapper for the tool that matches FunctionTool signature."""
|
|
227
|
+
kwargs = json.loads(args_json) if args_json else {}
|
|
228
|
+
result = await env.call_tool(tool.name, **kwargs)
|
|
229
|
+
if isinstance(result, str):
|
|
230
|
+
return result
|
|
231
|
+
return json.dumps(result) if result else ""
|
|
232
|
+
|
|
233
|
+
return FunctionTool(
|
|
234
|
+
name=tool.name,
|
|
235
|
+
description=tool.description or "",
|
|
236
|
+
params_json_schema=schema,
|
|
237
|
+
on_invoke_tool=async_wrapper,
|
|
238
|
+
)
|
hud/environment/mock.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""Mock functionality for Environment."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
import mcp.types as mcp_types
|
|
9
|
+
|
|
10
|
+
from hud.types import MCPToolResult
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from hud.environment.environment import Environment
|
|
14
|
+
|
|
15
|
+
__all__ = ["MockMixin", "generate_mock_value"]
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def generate_mock_value(schema: dict[str, Any], depth: int = 0) -> Any:
|
|
21
|
+
"""Generate a reasonable mock value from a JSON schema.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
schema: JSON schema dict with 'type', 'properties', etc.
|
|
25
|
+
depth: Current recursion depth (to prevent infinite loops).
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
A mock value that matches the schema.
|
|
29
|
+
"""
|
|
30
|
+
if depth > 10: # Prevent infinite recursion
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
# Handle $ref - we don't resolve refs, just return placeholder
|
|
34
|
+
if "$ref" in schema:
|
|
35
|
+
return {}
|
|
36
|
+
|
|
37
|
+
# Handle anyOf/oneOf/allOf - pick first option
|
|
38
|
+
if "anyOf" in schema:
|
|
39
|
+
return generate_mock_value(schema["anyOf"][0], depth + 1)
|
|
40
|
+
if "oneOf" in schema:
|
|
41
|
+
return generate_mock_value(schema["oneOf"][0], depth + 1)
|
|
42
|
+
if "allOf" in schema:
|
|
43
|
+
# Merge all schemas
|
|
44
|
+
merged: dict[str, Any] = {}
|
|
45
|
+
for sub_schema in schema["allOf"]:
|
|
46
|
+
result = generate_mock_value(sub_schema, depth + 1)
|
|
47
|
+
if isinstance(result, dict):
|
|
48
|
+
merged.update(result)
|
|
49
|
+
return merged
|
|
50
|
+
|
|
51
|
+
# Check for const or enum first
|
|
52
|
+
if "const" in schema:
|
|
53
|
+
return schema["const"]
|
|
54
|
+
if "enum" in schema:
|
|
55
|
+
return schema["enum"][0] if schema["enum"] else None
|
|
56
|
+
|
|
57
|
+
# Check for default value
|
|
58
|
+
if "default" in schema:
|
|
59
|
+
return schema["default"]
|
|
60
|
+
|
|
61
|
+
# Handle by type
|
|
62
|
+
schema_type = schema.get("type")
|
|
63
|
+
|
|
64
|
+
if schema_type == "string":
|
|
65
|
+
# Check for format hints
|
|
66
|
+
fmt = schema.get("format", "")
|
|
67
|
+
if fmt == "uri" or fmt == "url":
|
|
68
|
+
return "https://example.com"
|
|
69
|
+
if fmt == "email":
|
|
70
|
+
return "user@example.com"
|
|
71
|
+
if fmt == "date":
|
|
72
|
+
return "2024-01-01"
|
|
73
|
+
if fmt == "date-time":
|
|
74
|
+
return "2024-01-01T00:00:00Z"
|
|
75
|
+
if fmt == "uuid":
|
|
76
|
+
return "00000000-0000-0000-0000-000000000000"
|
|
77
|
+
# Use title/description hint if available
|
|
78
|
+
title = schema.get("title", "").lower()
|
|
79
|
+
if "url" in title or "link" in title:
|
|
80
|
+
return "https://example.com"
|
|
81
|
+
if "name" in title:
|
|
82
|
+
return "mock_name"
|
|
83
|
+
if "id" in title:
|
|
84
|
+
return "mock_id"
|
|
85
|
+
return "mock_string"
|
|
86
|
+
|
|
87
|
+
if schema_type == "number" or schema_type == "integer":
|
|
88
|
+
# Check for bounds
|
|
89
|
+
minimum = schema.get("minimum", 0)
|
|
90
|
+
maximum = schema.get("maximum", 100)
|
|
91
|
+
if schema_type == "integer":
|
|
92
|
+
return int((minimum + maximum) / 2) if maximum != float("inf") else minimum
|
|
93
|
+
return float((minimum + maximum) / 2) if maximum != float("inf") else float(minimum)
|
|
94
|
+
|
|
95
|
+
if schema_type == "boolean":
|
|
96
|
+
return True
|
|
97
|
+
|
|
98
|
+
if schema_type == "null":
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
if schema_type == "array":
|
|
102
|
+
items_schema = schema.get("items", {})
|
|
103
|
+
if items_schema:
|
|
104
|
+
# Generate one item
|
|
105
|
+
return [generate_mock_value(items_schema, depth + 1)]
|
|
106
|
+
return []
|
|
107
|
+
|
|
108
|
+
if schema_type == "object" or "properties" in schema:
|
|
109
|
+
result: dict[str, Any] = {}
|
|
110
|
+
properties = schema.get("properties", {})
|
|
111
|
+
required = set(schema.get("required", []))
|
|
112
|
+
|
|
113
|
+
for prop_name, prop_schema in properties.items():
|
|
114
|
+
# Only include required properties or first few optional ones
|
|
115
|
+
if prop_name in required or len(result) < 3:
|
|
116
|
+
result[prop_name] = generate_mock_value(prop_schema, depth + 1)
|
|
117
|
+
|
|
118
|
+
return result
|
|
119
|
+
|
|
120
|
+
# Handle list of types
|
|
121
|
+
if isinstance(schema_type, list):
|
|
122
|
+
# Pick first non-null type
|
|
123
|
+
for t in schema_type:
|
|
124
|
+
if t != "null":
|
|
125
|
+
return generate_mock_value({"type": t}, depth + 1)
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
# Fallback for unknown schema
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def generate_mock_tool_result(tool: mcp_types.Tool) -> MCPToolResult:
|
|
133
|
+
"""Generate a mock result for a tool based on its output schema.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
tool: MCP Tool with inputSchema and optionally outputSchema.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
MCPToolResult with mock content.
|
|
140
|
+
"""
|
|
141
|
+
# Check if tool has an output schema
|
|
142
|
+
output_schema = getattr(tool, "outputSchema", None)
|
|
143
|
+
|
|
144
|
+
if output_schema:
|
|
145
|
+
mock_value = generate_mock_value(output_schema)
|
|
146
|
+
content_text = str(mock_value) if mock_value is not None else "mock_result"
|
|
147
|
+
else:
|
|
148
|
+
# Generate a sensible default based on tool name
|
|
149
|
+
tool_name = tool.name
|
|
150
|
+
if "screenshot" in tool_name.lower() or "image" in tool_name.lower():
|
|
151
|
+
content_text = "[mock image data]"
|
|
152
|
+
elif "get" in tool_name.lower() or "list" in tool_name.lower():
|
|
153
|
+
content_text = "[]"
|
|
154
|
+
elif "check" in tool_name.lower() or "verify" in tool_name.lower():
|
|
155
|
+
content_text = "true"
|
|
156
|
+
elif "count" in tool_name.lower():
|
|
157
|
+
content_text = "0"
|
|
158
|
+
else:
|
|
159
|
+
content_text = "mock_success"
|
|
160
|
+
|
|
161
|
+
return MCPToolResult(
|
|
162
|
+
content=[mcp_types.TextContent(type="text", text=content_text)],
|
|
163
|
+
isError=False,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class MockMixin:
|
|
168
|
+
"""Mixin that adds mock functionality to Environment.
|
|
169
|
+
|
|
170
|
+
When mock mode is enabled:
|
|
171
|
+
- All tool calls return mock values instead of executing
|
|
172
|
+
- Specific tools can have custom mock outputs via mock_tool()
|
|
173
|
+
- Tools are automatically mocked with reasonable defaults based on their schemas
|
|
174
|
+
|
|
175
|
+
Usage:
|
|
176
|
+
env = Environment("test").connect_hub("browser")
|
|
177
|
+
env.mock() # Enable mock mode
|
|
178
|
+
|
|
179
|
+
# Set specific mock outputs
|
|
180
|
+
env.mock_tool("navigate", "Navigation successful")
|
|
181
|
+
env.mock_tool("screenshot", {"image": "base64data..."})
|
|
182
|
+
|
|
183
|
+
async with env:
|
|
184
|
+
result = await env.call_tool("navigate", url="https://example.com")
|
|
185
|
+
# Returns: MCPToolResult with "Navigation successful"
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
_mock_mode: bool
|
|
189
|
+
_mock_outputs: dict[str, Any]
|
|
190
|
+
_mock_tool_schemas: dict[str, mcp_types.Tool]
|
|
191
|
+
|
|
192
|
+
def _init_mock(self) -> None:
|
|
193
|
+
"""Initialize mock state. Called from Environment.__init__."""
|
|
194
|
+
self._mock_mode = False
|
|
195
|
+
self._mock_outputs = {}
|
|
196
|
+
self._mock_tool_schemas = {}
|
|
197
|
+
|
|
198
|
+
def mock(self) -> Environment:
|
|
199
|
+
"""Enable mock mode - all tool calls will return mock values.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
self for chaining.
|
|
203
|
+
|
|
204
|
+
Example:
|
|
205
|
+
env = Environment("test").connect_hub("browser").mock()
|
|
206
|
+
"""
|
|
207
|
+
self._mock_mode = True
|
|
208
|
+
logger.info("Mock mode enabled for environment %s", getattr(self, "name", "unknown"))
|
|
209
|
+
return self # type: ignore[return-value]
|
|
210
|
+
|
|
211
|
+
def unmock(self) -> Environment:
|
|
212
|
+
"""Disable mock mode - tool calls will execute normally.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
self for chaining.
|
|
216
|
+
"""
|
|
217
|
+
self._mock_mode = False
|
|
218
|
+
logger.info("Mock mode disabled for environment %s", getattr(self, "name", "unknown"))
|
|
219
|
+
return self # type: ignore[return-value]
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def is_mock(self) -> bool:
|
|
223
|
+
"""Check if mock mode is enabled."""
|
|
224
|
+
return self._mock_mode
|
|
225
|
+
|
|
226
|
+
def mock_tool(self, name: str, output: Any) -> Environment:
|
|
227
|
+
"""Set a specific mock output for a tool.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
name: Tool name (with prefix if applicable).
|
|
231
|
+
output: The value to return when this tool is called.
|
|
232
|
+
Can be a string, dict, or any JSON-serializable value.
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
self for chaining.
|
|
236
|
+
|
|
237
|
+
Example:
|
|
238
|
+
env.mock_tool("navigate", "Success")
|
|
239
|
+
env.mock_tool("screenshot", {"type": "image", "data": "..."})
|
|
240
|
+
env.mock_tool("get_elements", [{"id": "1", "text": "Button"}])
|
|
241
|
+
"""
|
|
242
|
+
self._mock_outputs[name] = output
|
|
243
|
+
logger.debug("Mock output set for tool %s", name)
|
|
244
|
+
return self # type: ignore[return-value]
|
|
245
|
+
|
|
246
|
+
def _get_mock_result(self, name: str, arguments: dict[str, Any]) -> MCPToolResult:
|
|
247
|
+
"""Get mock result for a tool call.
|
|
248
|
+
|
|
249
|
+
Priority:
|
|
250
|
+
1. Custom mock output set via mock_tool()
|
|
251
|
+
2. Auto-generated mock based on tool's output schema
|
|
252
|
+
3. Default mock value
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
name: Tool name.
|
|
256
|
+
arguments: Tool arguments (for potential future use).
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
MCPToolResult with mock content.
|
|
260
|
+
"""
|
|
261
|
+
# Check for custom mock output
|
|
262
|
+
if name in self._mock_outputs:
|
|
263
|
+
output = self._mock_outputs[name]
|
|
264
|
+
# Convert to string if not already
|
|
265
|
+
if isinstance(output, str):
|
|
266
|
+
content_text = output
|
|
267
|
+
else:
|
|
268
|
+
import json
|
|
269
|
+
|
|
270
|
+
try:
|
|
271
|
+
content_text = json.dumps(output)
|
|
272
|
+
except (TypeError, ValueError):
|
|
273
|
+
content_text = str(output)
|
|
274
|
+
|
|
275
|
+
return MCPToolResult(
|
|
276
|
+
content=[mcp_types.TextContent(type="text", text=content_text)],
|
|
277
|
+
isError=False,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Try to find tool schema for auto-generation
|
|
281
|
+
if name in self._mock_tool_schemas:
|
|
282
|
+
return generate_mock_tool_result(self._mock_tool_schemas[name])
|
|
283
|
+
|
|
284
|
+
# Check router for tool schema
|
|
285
|
+
router = getattr(self, "_router", None)
|
|
286
|
+
if router:
|
|
287
|
+
for tool in router.tools:
|
|
288
|
+
if tool.name == name:
|
|
289
|
+
self._mock_tool_schemas[name] = tool
|
|
290
|
+
return generate_mock_tool_result(tool)
|
|
291
|
+
|
|
292
|
+
# Default fallback
|
|
293
|
+
return MCPToolResult(
|
|
294
|
+
content=[mcp_types.TextContent(type="text", text="mock_success")],
|
|
295
|
+
isError=False,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
def _populate_mock_schemas(self) -> None:
|
|
299
|
+
"""Populate mock tool schemas from router after connection.
|
|
300
|
+
|
|
301
|
+
Called after _build_routing to cache tool schemas for mock generation.
|
|
302
|
+
"""
|
|
303
|
+
router = getattr(self, "_router", None)
|
|
304
|
+
if router:
|
|
305
|
+
for tool in router.tools:
|
|
306
|
+
self._mock_tool_schemas[tool.name] = tool
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Tool routing for Environment."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
import mcp.types as mcp_types
|
|
12
|
+
|
|
13
|
+
from hud.environment.connection import Connector
|
|
14
|
+
|
|
15
|
+
__all__ = ["LOCAL_CONNECTION", "ConflictResolution", "ToolRouter"]
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
LOCAL_CONNECTION = "__local__"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ConflictResolution(str, Enum):
|
|
23
|
+
"""Strategy for resolving tool name conflicts."""
|
|
24
|
+
|
|
25
|
+
PREFIX = "prefix" # Add connection name as prefix
|
|
26
|
+
FIRST_WINS = "first_wins" # First connection wins
|
|
27
|
+
LAST_WINS = "last_wins" # Last connection wins
|
|
28
|
+
ERROR = "error" # Raise error on conflict
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ToolRouter:
|
|
33
|
+
"""Routes tool calls to local or remote handlers with conflict resolution."""
|
|
34
|
+
|
|
35
|
+
conflict_resolution: ConflictResolution = ConflictResolution.PREFIX
|
|
36
|
+
_tools: list[mcp_types.Tool] = field(default_factory=list)
|
|
37
|
+
_routing: dict[str, str] = field(default_factory=dict) # name -> connection
|
|
38
|
+
_local_names: set[str] = field(default_factory=set)
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def tools(self) -> list[mcp_types.Tool]:
|
|
42
|
+
return self._tools
|
|
43
|
+
|
|
44
|
+
def is_local(self, name: str) -> bool:
|
|
45
|
+
return name in self._local_names
|
|
46
|
+
|
|
47
|
+
def get_connection(self, name: str) -> str | None:
|
|
48
|
+
"""Get connection name for tool, None if local or not found."""
|
|
49
|
+
conn = self._routing.get(name)
|
|
50
|
+
return None if conn == LOCAL_CONNECTION else conn
|
|
51
|
+
|
|
52
|
+
def clear(self) -> None:
|
|
53
|
+
self._tools.clear()
|
|
54
|
+
self._routing.clear()
|
|
55
|
+
self._local_names.clear()
|
|
56
|
+
|
|
57
|
+
def build(
|
|
58
|
+
self,
|
|
59
|
+
local_tools: list[mcp_types.Tool],
|
|
60
|
+
connections: dict[str, Connector],
|
|
61
|
+
connection_order: list[str],
|
|
62
|
+
) -> None:
|
|
63
|
+
"""Build routing from local tools and connection caches.
|
|
64
|
+
|
|
65
|
+
Local tools always have priority over remote tools.
|
|
66
|
+
Tools starting with '_' are internal and hidden from listing
|
|
67
|
+
(but still callable directly).
|
|
68
|
+
"""
|
|
69
|
+
self.clear()
|
|
70
|
+
seen: dict[str, str] = {}
|
|
71
|
+
|
|
72
|
+
# Local tools first (always priority)
|
|
73
|
+
for tool in local_tools:
|
|
74
|
+
# Always add to routing (so tool is callable)
|
|
75
|
+
seen[tool.name] = LOCAL_CONNECTION
|
|
76
|
+
self._routing[tool.name] = LOCAL_CONNECTION
|
|
77
|
+
self._local_names.add(tool.name)
|
|
78
|
+
# Only add to visible list if not internal (underscore prefix)
|
|
79
|
+
if not tool.name.startswith("_"):
|
|
80
|
+
self._tools.append(tool)
|
|
81
|
+
|
|
82
|
+
# Remote connections in order
|
|
83
|
+
for conn_name in connection_order:
|
|
84
|
+
if conn_name not in connections:
|
|
85
|
+
continue
|
|
86
|
+
for tool in connections[conn_name].cached_tools:
|
|
87
|
+
name = tool.name
|
|
88
|
+
if name in seen:
|
|
89
|
+
existing = seen[name]
|
|
90
|
+
if existing == LOCAL_CONNECTION:
|
|
91
|
+
continue # Local always wins
|
|
92
|
+
if not self._handle_conflict(name, existing, conn_name):
|
|
93
|
+
continue
|
|
94
|
+
self._tools = [t for t in self._tools if t.name != name]
|
|
95
|
+
|
|
96
|
+
# Always add to routing (so tool is callable)
|
|
97
|
+
seen[name] = conn_name
|
|
98
|
+
self._routing[name] = conn_name
|
|
99
|
+
# Only add to visible list if not internal (underscore prefix)
|
|
100
|
+
if not name.startswith("_"):
|
|
101
|
+
self._tools.append(tool)
|
|
102
|
+
|
|
103
|
+
logger.debug("Router: %d tools (%d local)", len(self._tools), len(self._local_names))
|
|
104
|
+
|
|
105
|
+
def _handle_conflict(self, name: str, existing: str, new: str) -> bool:
|
|
106
|
+
"""Handle remote-to-remote conflict. Returns True to replace existing."""
|
|
107
|
+
if self.conflict_resolution == ConflictResolution.ERROR:
|
|
108
|
+
raise ValueError(f"Tool conflict: '{name}' in '{existing}' and '{new}'")
|
|
109
|
+
if self.conflict_resolution == ConflictResolution.FIRST_WINS:
|
|
110
|
+
return False
|
|
111
|
+
# LAST_WINS returns True, PREFIX (shouldn't conflict) returns False
|
|
112
|
+
return self.conflict_resolution == ConflictResolution.LAST_WINS
|