hud-python 0.4.52__py3-none-any.whl → 0.4.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/base.py +9 -2
- hud/agents/openai_chat_generic.py +15 -3
- hud/agents/tests/test_base.py +15 -0
- hud/agents/tests/test_base_runtime.py +164 -0
- hud/cli/__init__.py +6 -3
- hud/cli/build.py +35 -27
- hud/cli/dev.py +11 -29
- hud/cli/eval.py +61 -61
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +24 -2
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +134 -0
- hud/cli/tests/test_mcp_server.py +8 -7
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/utils/docker.py +120 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +2 -2
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_runner.py +106 -0
- hud/datasets/tests/test_utils.py +228 -0
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_instrumentation.py +207 -0
- hud/server/tests/test_server_extra.py +2 -0
- hud/shared/exceptions.py +35 -4
- hud/shared/hints.py +25 -0
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +31 -23
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/tests/test_async_context.py +242 -0
- hud/telemetry/tests/test_instrument.py +414 -0
- hud/telemetry/tests/test_job.py +609 -0
- hud/telemetry/tests/test_trace.py +183 -5
- hud/tools/computer/settings.py +2 -2
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/types.py +7 -1
- hud/utils/agent_factories.py +1 -3
- hud/utils/mcp.py +1 -1
- hud/utils/tests/test_agent_factories.py +60 -0
- hud/utils/tests/test_mcp.py +4 -6
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tasks.py +187 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/METADATA +47 -48
- {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/RECORD +69 -31
- {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
- {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0
hud/agents/base.py
CHANGED
|
@@ -137,7 +137,11 @@ class MCPAgent(ABC):
|
|
|
137
137
|
"No MCPClient. Please provide one when initializing the agent or pass a Task with mcp_config." # noqa: E501
|
|
138
138
|
)
|
|
139
139
|
|
|
140
|
-
|
|
140
|
+
try:
|
|
141
|
+
client_cfg = getattr(self.mcp_client, "mcp_config", None)
|
|
142
|
+
except Exception:
|
|
143
|
+
client_cfg = None
|
|
144
|
+
await self._setup_config(client_cfg)
|
|
141
145
|
|
|
142
146
|
# Initialize client if needed
|
|
143
147
|
try:
|
|
@@ -618,8 +622,11 @@ class MCPAgent(ABC):
|
|
|
618
622
|
except Exception as e:
|
|
619
623
|
self.console.error_log(f"Response lifecycle tool failed: {e}")
|
|
620
624
|
|
|
621
|
-
async def _setup_config(self, mcp_config: dict[str, dict[str, Any]]) -> None:
|
|
625
|
+
async def _setup_config(self, mcp_config: dict[str, dict[str, Any]] | None) -> None:
|
|
622
626
|
"""Inject metadata into the metadata of the initialize request."""
|
|
627
|
+
if not isinstance(mcp_config, dict):
|
|
628
|
+
return
|
|
629
|
+
|
|
623
630
|
if self.metadata:
|
|
624
631
|
patch_mcp_config(
|
|
625
632
|
mcp_config,
|
|
@@ -20,6 +20,7 @@ import logging
|
|
|
20
20
|
from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
21
21
|
|
|
22
22
|
import mcp.types as types
|
|
23
|
+
from openai import AsyncOpenAI
|
|
23
24
|
|
|
24
25
|
from hud import instrument
|
|
25
26
|
from hud.types import AgentResponse, MCPToolCall, MCPToolResult
|
|
@@ -28,7 +29,6 @@ from hud.utils.hud_console import HUDConsole
|
|
|
28
29
|
from .base import MCPAgent
|
|
29
30
|
|
|
30
31
|
if TYPE_CHECKING:
|
|
31
|
-
from openai import AsyncOpenAI
|
|
32
32
|
from openai.types.chat import ChatCompletionToolParam
|
|
33
33
|
|
|
34
34
|
logger = logging.getLogger(__name__)
|
|
@@ -42,14 +42,26 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
42
42
|
def __init__(
|
|
43
43
|
self,
|
|
44
44
|
*,
|
|
45
|
-
openai_client: AsyncOpenAI | None,
|
|
45
|
+
openai_client: AsyncOpenAI | None = None,
|
|
46
|
+
api_key: str | None = None,
|
|
47
|
+
base_url: str | None = None,
|
|
46
48
|
model_name: str = "gpt-4o-mini",
|
|
47
49
|
completion_kwargs: dict[str, Any] | None = None,
|
|
48
50
|
**agent_kwargs: Any,
|
|
49
51
|
) -> None:
|
|
50
52
|
# Accept base-agent settings via **agent_kwargs (e.g., mcp_client, system_prompt, etc.)
|
|
51
53
|
super().__init__(**agent_kwargs)
|
|
52
|
-
|
|
54
|
+
|
|
55
|
+
# Handle client creation - support both patterns
|
|
56
|
+
if openai_client is not None:
|
|
57
|
+
# Use provided client (backward compatibility)
|
|
58
|
+
self.oai = openai_client
|
|
59
|
+
elif api_key is not None or base_url is not None:
|
|
60
|
+
# Create client from config (new pattern, consistent with other agents)
|
|
61
|
+
self.oai = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
|
62
|
+
else:
|
|
63
|
+
raise ValueError("Either openai_client or (api_key and base_url) must be provided")
|
|
64
|
+
|
|
53
65
|
self.model_name = model_name
|
|
54
66
|
self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
|
|
55
67
|
self.mcp_schemas = []
|
hud/agents/tests/test_base.py
CHANGED
|
@@ -329,6 +329,21 @@ class TestBaseMCPAgent:
|
|
|
329
329
|
# call_tools doesn't validate empty names, it will return error
|
|
330
330
|
await agent.call_tools(tool_call)
|
|
331
331
|
|
|
332
|
+
def test_get_tool_schemas(self):
|
|
333
|
+
"""Test getting tool schemas."""
|
|
334
|
+
agent = MockMCPAgent()
|
|
335
|
+
|
|
336
|
+
agent._available_tools = [
|
|
337
|
+
types.Tool(name="tool1", description="Tool 1", inputSchema={"type": "object"}),
|
|
338
|
+
types.Tool(name="setup", description="Setup", inputSchema={"type": "object"}),
|
|
339
|
+
]
|
|
340
|
+
|
|
341
|
+
schemas = agent.get_tool_schemas()
|
|
342
|
+
|
|
343
|
+
# Should include non-lifecycle tools
|
|
344
|
+
assert len(schemas) == 2
|
|
345
|
+
assert schemas[0]["name"] == "tool1"
|
|
346
|
+
|
|
332
347
|
def test_get_tools_by_server(self):
|
|
333
348
|
"""Test getting tools grouped by server."""
|
|
334
349
|
agent = MockMCPAgent()
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from unittest import mock
|
|
4
|
+
|
|
5
|
+
import mcp.types as types
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from hud.agents.base import MCPAgent, find_content, find_reward, text_to_blocks
|
|
9
|
+
from hud.types import AgentResponse, MCPToolCall, MCPToolResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DummyAgent(MCPAgent):
|
|
13
|
+
async def get_system_messages(self):
|
|
14
|
+
return [types.TextContent(text="sys", type="text")]
|
|
15
|
+
|
|
16
|
+
async def get_response(self, messages):
|
|
17
|
+
# Single step: no tool calls -> done
|
|
18
|
+
return AgentResponse(content="ok", tool_calls=[], done=True)
|
|
19
|
+
|
|
20
|
+
async def format_blocks(self, blocks):
|
|
21
|
+
# Return as-is
|
|
22
|
+
return blocks
|
|
23
|
+
|
|
24
|
+
async def format_tool_results(self, tool_calls, tool_results):
|
|
25
|
+
return [types.TextContent(text="tools", type="text")]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.mark.asyncio
|
|
29
|
+
async def test_run_with_string_prompt_auto_client(monkeypatch):
|
|
30
|
+
# Fake MCPClient with required methods
|
|
31
|
+
fake_client = mock.AsyncMock()
|
|
32
|
+
fake_client.initialize.return_value = None
|
|
33
|
+
fake_client.list_tools.return_value = []
|
|
34
|
+
fake_client.shutdown.return_value = None
|
|
35
|
+
|
|
36
|
+
# Patch MCPClient construction inside initialize()
|
|
37
|
+
with mock.patch("hud.clients.MCPClient", return_value=fake_client):
|
|
38
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
|
|
39
|
+
result = await agent.run("hello", max_steps=1)
|
|
40
|
+
assert result.done is True and result.isError is False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_find_reward_and_content_extractors():
|
|
44
|
+
# Structured content
|
|
45
|
+
r = MCPToolResult(
|
|
46
|
+
content=text_to_blocks("{}"), isError=False, structuredContent={"reward": 0.7}
|
|
47
|
+
)
|
|
48
|
+
assert find_reward(r) == 0.7
|
|
49
|
+
|
|
50
|
+
# Text JSON
|
|
51
|
+
r2 = MCPToolResult(content=text_to_blocks('{"score": 0.5, "content": "hi"}'), isError=False)
|
|
52
|
+
assert find_reward(r2) == 0.5
|
|
53
|
+
assert find_content(r2) == "hi"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@pytest.mark.asyncio
|
|
57
|
+
async def test_call_tools_error_paths():
|
|
58
|
+
fake_client = mock.AsyncMock()
|
|
59
|
+
# First call succeeds
|
|
60
|
+
ok_result = MCPToolResult(content=text_to_blocks("ok"), isError=False)
|
|
61
|
+
fake_client.call_tool.side_effect = [ok_result, RuntimeError("boom")]
|
|
62
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
|
|
63
|
+
results = await agent.call_tools(
|
|
64
|
+
[MCPToolCall(name="a", arguments={}), MCPToolCall(name="b", arguments={})]
|
|
65
|
+
)
|
|
66
|
+
assert results[0].isError is False
|
|
67
|
+
assert results[1].isError is True
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@pytest.mark.asyncio
|
|
71
|
+
async def test_initialize_without_client_raises_valueerror():
|
|
72
|
+
agent = DummyAgent(mcp_client=None, auto_trace=False)
|
|
73
|
+
with pytest.raises(ValueError):
|
|
74
|
+
await agent.initialize(None)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_get_available_tools_before_initialize_raises():
|
|
78
|
+
agent = DummyAgent(mcp_client=mock.AsyncMock(), auto_trace=False)
|
|
79
|
+
with pytest.raises(RuntimeError):
|
|
80
|
+
agent.get_available_tools()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@pytest.mark.asyncio
|
|
84
|
+
async def test_format_message_invalid_type_raises():
|
|
85
|
+
agent = DummyAgent(mcp_client=mock.AsyncMock(), auto_trace=False)
|
|
86
|
+
with pytest.raises(ValueError):
|
|
87
|
+
await agent.format_message({"oops": 1}) # type: ignore
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@pytest.mark.asyncio
|
|
91
|
+
async def test_call_tools_timeout_error_shutdown_called():
|
|
92
|
+
fake_client = mock.AsyncMock()
|
|
93
|
+
fake_client.call_tool.side_effect = TimeoutError("timeout")
|
|
94
|
+
fake_client.shutdown.return_value = None
|
|
95
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
|
|
96
|
+
with pytest.raises(TimeoutError):
|
|
97
|
+
await agent.call_tools(MCPToolCall(name="x", arguments={}))
|
|
98
|
+
fake_client.shutdown.assert_awaited_once()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_text_to_blocks_shapes():
|
|
102
|
+
blocks = text_to_blocks("x")
|
|
103
|
+
assert isinstance(blocks, list) and blocks and isinstance(blocks[0], types.TextContent)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@pytest.mark.asyncio
|
|
107
|
+
async def test_run_returns_connection_error_trace(monkeypatch):
|
|
108
|
+
fake_client = mock.AsyncMock()
|
|
109
|
+
fake_client.mcp_config = {}
|
|
110
|
+
fake_client.initialize.side_effect = RuntimeError("Connection refused http://localhost:1234")
|
|
111
|
+
fake_client.list_tools.return_value = []
|
|
112
|
+
fake_client.shutdown.return_value = None
|
|
113
|
+
|
|
114
|
+
class DummyCM:
|
|
115
|
+
def __exit__(self, *args, **kwargs):
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
|
|
119
|
+
|
|
120
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
|
|
121
|
+
result = await agent.run("p", max_steps=1)
|
|
122
|
+
assert result.isError is True
|
|
123
|
+
assert "Could not connect" in (result.content or "")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@pytest.mark.asyncio
|
|
127
|
+
async def test_run_calls_response_tool_when_configured(monkeypatch):
|
|
128
|
+
fake_client = mock.AsyncMock()
|
|
129
|
+
fake_client.mcp_config = {}
|
|
130
|
+
fake_client.initialize.return_value = None
|
|
131
|
+
fake_client.list_tools.return_value = []
|
|
132
|
+
fake_client.shutdown.return_value = None
|
|
133
|
+
ok = MCPToolResult(content=text_to_blocks("ok"), isError=False)
|
|
134
|
+
fake_client.call_tool.return_value = ok
|
|
135
|
+
|
|
136
|
+
class DummyCM:
|
|
137
|
+
def __exit__(self, *args, **kwargs):
|
|
138
|
+
return False
|
|
139
|
+
|
|
140
|
+
monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
|
|
141
|
+
|
|
142
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False, response_tool_name="submit")
|
|
143
|
+
result = await agent.run("hello", max_steps=1)
|
|
144
|
+
assert result.isError is False
|
|
145
|
+
fake_client.call_tool.assert_awaited()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@pytest.mark.asyncio
|
|
149
|
+
async def test_get_available_tools_after_initialize(monkeypatch):
|
|
150
|
+
fake_client = mock.AsyncMock()
|
|
151
|
+
fake_client.mcp_config = {}
|
|
152
|
+
fake_client.initialize.return_value = None
|
|
153
|
+
fake_client.list_tools.return_value = []
|
|
154
|
+
fake_client.shutdown.return_value = None
|
|
155
|
+
|
|
156
|
+
class DummyCM:
|
|
157
|
+
def __exit__(self, *args, **kwargs):
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
|
|
161
|
+
|
|
162
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
|
|
163
|
+
await agent.initialize(None)
|
|
164
|
+
assert agent.get_available_tools() == []
|
hud/cli/__init__.py
CHANGED
|
@@ -242,15 +242,18 @@ def debug(
|
|
|
242
242
|
if build and not build_environment(directory, image_name):
|
|
243
243
|
raise typer.Exit(1)
|
|
244
244
|
|
|
245
|
-
# Build Docker command
|
|
246
|
-
from .utils.docker import
|
|
245
|
+
# Build Docker command with folder-mode envs
|
|
246
|
+
from .utils.docker import create_docker_run_command
|
|
247
247
|
|
|
248
|
-
command =
|
|
248
|
+
command = create_docker_run_command(
|
|
249
|
+
image_name, docker_args=docker_args, env_dir=directory
|
|
250
|
+
)
|
|
249
251
|
else:
|
|
250
252
|
# Assume it's an image name
|
|
251
253
|
image = first_param
|
|
252
254
|
from .utils.docker import build_run_command
|
|
253
255
|
|
|
256
|
+
# Image-only mode: do not auto-inject local .env
|
|
254
257
|
command = build_run_command(image, docker_args)
|
|
255
258
|
else:
|
|
256
259
|
console.print(
|
hud/cli/build.py
CHANGED
|
@@ -161,49 +161,42 @@ async def analyze_mcp_environment(
|
|
|
161
161
|
hud_console = HUDConsole()
|
|
162
162
|
env_vars = env_vars or {}
|
|
163
163
|
|
|
164
|
-
# Build Docker command to run the image
|
|
165
|
-
|
|
164
|
+
# Build Docker command to run the image, injecting any provided env vars
|
|
165
|
+
from hud.cli.utils.docker import build_env_flags
|
|
166
166
|
|
|
167
|
-
|
|
168
|
-
for key, value in env_vars.items():
|
|
169
|
-
docker_cmd.extend(["-e", f"{key}={value}"])
|
|
167
|
+
docker_cmd = ["docker", "run", "--rm", "-i", *build_env_flags(env_vars), image]
|
|
170
168
|
|
|
171
|
-
|
|
169
|
+
# Show full docker command being used for analysis
|
|
170
|
+
hud_console.dim_info("Command:", " ".join(docker_cmd))
|
|
172
171
|
|
|
173
|
-
# Create MCP config
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
172
|
+
# Create MCP config consistently with analyze helpers
|
|
173
|
+
from hud.cli.analyze import parse_docker_command
|
|
174
|
+
|
|
175
|
+
mcp_config = parse_docker_command(docker_cmd)
|
|
177
176
|
|
|
178
177
|
# Initialize client and measure timing
|
|
179
178
|
start_time = time.time()
|
|
180
|
-
client = MCPClient(mcp_config=
|
|
179
|
+
client = MCPClient(mcp_config=mcp_config, verbose=verbose, auto_trace=False)
|
|
181
180
|
initialized = False
|
|
182
181
|
|
|
183
182
|
try:
|
|
184
183
|
if verbose:
|
|
185
|
-
hud_console.info(
|
|
184
|
+
hud_console.info("Initializing MCP client...")
|
|
186
185
|
|
|
187
|
-
# Add timeout to fail fast instead of hanging (
|
|
186
|
+
# Add timeout to fail fast instead of hanging (60 seconds)
|
|
188
187
|
await asyncio.wait_for(client.initialize(), timeout=60.0)
|
|
189
188
|
initialized = True
|
|
190
189
|
initialize_ms = int((time.time() - start_time) * 1000)
|
|
191
190
|
|
|
192
|
-
#
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
# Extract tool information
|
|
196
|
-
tool_info = []
|
|
197
|
-
for tool in tools:
|
|
198
|
-
tool_dict = {"name": tool.name, "description": tool.description}
|
|
199
|
-
if hasattr(tool, "inputSchema") and tool.inputSchema:
|
|
200
|
-
tool_dict["inputSchema"] = tool.inputSchema
|
|
201
|
-
tool_info.append(tool_dict)
|
|
191
|
+
# Delegate to standard analysis helper for consistency
|
|
192
|
+
full_analysis = await client.analyze_environment()
|
|
202
193
|
|
|
194
|
+
# Normalize to build's expected fields
|
|
195
|
+
tools_list = full_analysis.get("tools", [])
|
|
203
196
|
return {
|
|
204
197
|
"initializeMs": initialize_ms,
|
|
205
|
-
"toolCount": len(
|
|
206
|
-
"tools":
|
|
198
|
+
"toolCount": len(tools_list),
|
|
199
|
+
"tools": tools_list,
|
|
207
200
|
"success": True,
|
|
208
201
|
}
|
|
209
202
|
except TimeoutError:
|
|
@@ -295,6 +288,10 @@ def build_environment(
|
|
|
295
288
|
hud_console.error(f"Directory not found: {directory}")
|
|
296
289
|
raise typer.Exit(1)
|
|
297
290
|
|
|
291
|
+
from hud.cli.utils.docker import require_docker_running
|
|
292
|
+
|
|
293
|
+
require_docker_running()
|
|
294
|
+
|
|
298
295
|
# Step 1: Check for hud.lock.yaml (previous build)
|
|
299
296
|
lock_path = env_dir / "hud.lock.yaml"
|
|
300
297
|
base_name = None
|
|
@@ -355,13 +352,24 @@ def build_environment(
|
|
|
355
352
|
|
|
356
353
|
hud_console.success(f"Built temporary image: {temp_tag}")
|
|
357
354
|
|
|
358
|
-
# Analyze the environment
|
|
355
|
+
# Analyze the environment (merge folder .env if present)
|
|
359
356
|
hud_console.progress_message("Analyzing MCP environment...")
|
|
360
357
|
|
|
361
358
|
loop = asyncio.new_event_loop()
|
|
362
359
|
asyncio.set_event_loop(loop)
|
|
363
360
|
try:
|
|
364
|
-
|
|
361
|
+
# Merge .env from env_dir for analysis only
|
|
362
|
+
try:
|
|
363
|
+
from hud.cli.utils.docker import load_env_vars_for_dir
|
|
364
|
+
|
|
365
|
+
env_from_file = load_env_vars_for_dir(env_dir)
|
|
366
|
+
except Exception:
|
|
367
|
+
env_from_file = {}
|
|
368
|
+
merged_env_for_analysis = {**env_from_file, **(env_vars or {})}
|
|
369
|
+
|
|
370
|
+
analysis = loop.run_until_complete(
|
|
371
|
+
analyze_mcp_environment(temp_tag, verbose, merged_env_for_analysis)
|
|
372
|
+
)
|
|
365
373
|
except Exception as e:
|
|
366
374
|
hud_console.error(f"Failed to analyze MCP environment: {e}")
|
|
367
375
|
hud_console.info("")
|
hud/cli/dev.py
CHANGED
|
@@ -504,15 +504,12 @@ def run_docker_dev_server(
|
|
|
504
504
|
base_name = image_name.replace(":", "-").replace("/", "-")
|
|
505
505
|
container_name = f"{base_name}-dev-{pid}"
|
|
506
506
|
|
|
507
|
-
# Build docker run command with volume mounts
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
"--rm",
|
|
512
|
-
"-i",
|
|
507
|
+
# Build docker run command with volume mounts and folder-mode envs
|
|
508
|
+
from .utils.docker import create_docker_run_command
|
|
509
|
+
|
|
510
|
+
base_args = [
|
|
513
511
|
"--name",
|
|
514
512
|
container_name,
|
|
515
|
-
# Mount both server and environment for hot-reload
|
|
516
513
|
"-v",
|
|
517
514
|
f"{env_dir.absolute()}/server:/app/server:rw",
|
|
518
515
|
"-v",
|
|
@@ -524,29 +521,14 @@ def run_docker_dev_server(
|
|
|
524
521
|
"-e",
|
|
525
522
|
"HUD_DEV=1",
|
|
526
523
|
]
|
|
524
|
+
combined_args = [*base_args, *docker_args] if docker_args else base_args
|
|
525
|
+
docker_cmd = create_docker_run_command(
|
|
526
|
+
image_name,
|
|
527
|
+
docker_args=combined_args,
|
|
528
|
+
env_dir=env_dir,
|
|
529
|
+
)
|
|
527
530
|
|
|
528
|
-
#
|
|
529
|
-
env_file = env_dir / ".env"
|
|
530
|
-
loaded_env_vars: dict[str, str] = {}
|
|
531
|
-
if env_file.exists():
|
|
532
|
-
try:
|
|
533
|
-
from hud.cli.utils.config import parse_env_file
|
|
534
|
-
|
|
535
|
-
env_contents = env_file.read_text(encoding="utf-8")
|
|
536
|
-
loaded_env_vars = parse_env_file(env_contents)
|
|
537
|
-
for key, value in loaded_env_vars.items():
|
|
538
|
-
docker_cmd.extend(["-e", f"{key}={value}"])
|
|
539
|
-
if verbose and loaded_env_vars:
|
|
540
|
-
hud_console.info(f"Loaded {len(loaded_env_vars)} env var(s) from .env")
|
|
541
|
-
except Exception as e:
|
|
542
|
-
hud_console.warning(f"Failed to load .env file: {e}")
|
|
543
|
-
|
|
544
|
-
# Add user-provided Docker arguments
|
|
545
|
-
if docker_args:
|
|
546
|
-
docker_cmd.extend(docker_args)
|
|
547
|
-
|
|
548
|
-
# Append the image name
|
|
549
|
-
docker_cmd.append(image_name)
|
|
531
|
+
# Env flags already injected by create_docker_run_command
|
|
550
532
|
|
|
551
533
|
# Print startup info
|
|
552
534
|
hud_console.header("HUD Development Mode (Docker)")
|
hud/cli/eval.py
CHANGED
|
@@ -68,6 +68,50 @@ def get_available_models() -> list[dict[str, str | None]]:
|
|
|
68
68
|
return []
|
|
69
69
|
|
|
70
70
|
|
|
71
|
+
def _build_vllm_config(
|
|
72
|
+
vllm_base_url: str | None,
|
|
73
|
+
model: str | None,
|
|
74
|
+
allowed_tools: list[str] | None,
|
|
75
|
+
verbose: bool,
|
|
76
|
+
) -> dict[str, Any]:
|
|
77
|
+
"""Build configuration for vLLM agent.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
vllm_base_url: Optional base URL for vLLM server
|
|
81
|
+
model: Model name to use
|
|
82
|
+
allowed_tools: Optional list of allowed tools
|
|
83
|
+
verbose: Enable verbose output
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Dictionary with agent configuration
|
|
87
|
+
"""
|
|
88
|
+
# Determine base URL and API key
|
|
89
|
+
if vllm_base_url is not None:
|
|
90
|
+
base_url = vllm_base_url
|
|
91
|
+
api_key = settings.api_key if base_url.startswith(settings.hud_rl_url) else "token-abc123"
|
|
92
|
+
hud_console.info(f"Using vLLM server at {base_url}")
|
|
93
|
+
else:
|
|
94
|
+
base_url = "http://localhost:8000/v1"
|
|
95
|
+
api_key = "token-abc123"
|
|
96
|
+
|
|
97
|
+
config: dict[str, Any] = {
|
|
98
|
+
"api_key": api_key,
|
|
99
|
+
"base_url": base_url,
|
|
100
|
+
"model_name": model or "served-model",
|
|
101
|
+
"verbose": verbose,
|
|
102
|
+
"completion_kwargs": {
|
|
103
|
+
"temperature": 0.7,
|
|
104
|
+
"max_tokens": 2048,
|
|
105
|
+
"tool_choice": "auto",
|
|
106
|
+
},
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if allowed_tools:
|
|
110
|
+
config["allowed_tools"] = allowed_tools
|
|
111
|
+
|
|
112
|
+
return config
|
|
113
|
+
|
|
114
|
+
|
|
71
115
|
def build_agent(
|
|
72
116
|
agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"],
|
|
73
117
|
*,
|
|
@@ -86,8 +130,6 @@ def build_agent(
|
|
|
86
130
|
elif agent_type == "vllm":
|
|
87
131
|
# Create a generic OpenAI agent for vLLM server
|
|
88
132
|
try:
|
|
89
|
-
from openai import AsyncOpenAI
|
|
90
|
-
|
|
91
133
|
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
|
|
92
134
|
except ImportError as e:
|
|
93
135
|
hud_console.error(
|
|
@@ -96,36 +138,14 @@ def build_agent(
|
|
|
96
138
|
)
|
|
97
139
|
raise typer.Exit(1) from e
|
|
98
140
|
|
|
99
|
-
#
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
api_key = (
|
|
105
|
-
settings.api_key if base_url.startswith(settings.hud_rl_url) else "token-abc123"
|
|
106
|
-
)
|
|
107
|
-
else:
|
|
108
|
-
# Default to localhost
|
|
109
|
-
base_url = "http://localhost:8000/v1"
|
|
110
|
-
api_key = "token-abc123"
|
|
111
|
-
|
|
112
|
-
# Create OpenAI client for vLLM
|
|
113
|
-
openai_client = AsyncOpenAI(
|
|
114
|
-
base_url=base_url,
|
|
115
|
-
api_key=api_key,
|
|
116
|
-
timeout=30.0,
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
return GenericOpenAIChatAgent(
|
|
120
|
-
openai_client=openai_client,
|
|
121
|
-
model_name=model or "served-model", # Default model name
|
|
141
|
+
# Use the shared config builder
|
|
142
|
+
config = _build_vllm_config(
|
|
143
|
+
vllm_base_url=vllm_base_url,
|
|
144
|
+
model=model,
|
|
145
|
+
allowed_tools=allowed_tools,
|
|
122
146
|
verbose=verbose,
|
|
123
|
-
completion_kwargs={
|
|
124
|
-
"temperature": 0.7,
|
|
125
|
-
"max_tokens": 2048,
|
|
126
|
-
"tool_choice": "required", # if self.actor_config.force_tool_choice else "auto",
|
|
127
|
-
},
|
|
128
147
|
)
|
|
148
|
+
return GenericOpenAIChatAgent(**config)
|
|
129
149
|
|
|
130
150
|
elif agent_type == "openai":
|
|
131
151
|
try:
|
|
@@ -257,25 +277,17 @@ async def run_single_task(
|
|
|
257
277
|
agent_config["allowed_tools"] = allowed_tools
|
|
258
278
|
elif agent_type == "vllm":
|
|
259
279
|
# Special handling for vLLM
|
|
260
|
-
|
|
261
|
-
|
|
280
|
+
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
|
|
281
|
+
|
|
282
|
+
agent_class = GenericOpenAIChatAgent
|
|
283
|
+
|
|
284
|
+
# Use the shared config builder
|
|
285
|
+
agent_config = _build_vllm_config(
|
|
286
|
+
vllm_base_url=vllm_base_url,
|
|
262
287
|
model=model,
|
|
263
288
|
allowed_tools=allowed_tools,
|
|
264
289
|
verbose=verbose,
|
|
265
|
-
vllm_base_url=vllm_base_url,
|
|
266
290
|
)
|
|
267
|
-
agent_config = {
|
|
268
|
-
"openai_client": sample_agent.oai,
|
|
269
|
-
"model_name": sample_agent.model_name,
|
|
270
|
-
"verbose": verbose,
|
|
271
|
-
"completion_kwargs": sample_agent.completion_kwargs,
|
|
272
|
-
}
|
|
273
|
-
if allowed_tools:
|
|
274
|
-
agent_config["allowed_tools"] = allowed_tools
|
|
275
|
-
|
|
276
|
-
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
|
|
277
|
-
|
|
278
|
-
agent_class = GenericOpenAIChatAgent
|
|
279
291
|
elif agent_type == "openai":
|
|
280
292
|
from hud.agents import OperatorAgent
|
|
281
293
|
|
|
@@ -382,6 +394,7 @@ async def run_full_dataset(
|
|
|
382
394
|
dataset_name = f"Dataset: {path.name}" if path.exists() else source.split("/")[-1]
|
|
383
395
|
|
|
384
396
|
# Build agent class + config for run_dataset
|
|
397
|
+
agent_config: dict[str, Any]
|
|
385
398
|
if agent_type == "integration_test": # --integration-test mode
|
|
386
399
|
from hud.agents.misc.integration_test_agent import IntegrationTestRunner
|
|
387
400
|
|
|
@@ -399,24 +412,13 @@ async def run_full_dataset(
|
|
|
399
412
|
)
|
|
400
413
|
raise typer.Exit(1) from e
|
|
401
414
|
|
|
402
|
-
# Use
|
|
403
|
-
|
|
404
|
-
|
|
415
|
+
# Use the shared config builder
|
|
416
|
+
agent_config = _build_vllm_config(
|
|
417
|
+
vllm_base_url=vllm_base_url,
|
|
405
418
|
model=model,
|
|
406
419
|
allowed_tools=allowed_tools,
|
|
407
420
|
verbose=verbose,
|
|
408
|
-
vllm_base_url=vllm_base_url,
|
|
409
421
|
)
|
|
410
|
-
|
|
411
|
-
# Extract the config from the sample agent
|
|
412
|
-
agent_config: dict[str, Any] = {
|
|
413
|
-
"openai_client": sample_agent.oai,
|
|
414
|
-
"model_name": sample_agent.model_name,
|
|
415
|
-
"verbose": verbose,
|
|
416
|
-
"completion_kwargs": sample_agent.completion_kwargs,
|
|
417
|
-
}
|
|
418
|
-
if allowed_tools:
|
|
419
|
-
agent_config["allowed_tools"] = allowed_tools
|
|
420
422
|
elif agent_type == "openai":
|
|
421
423
|
try:
|
|
422
424
|
from hud.agents import OperatorAgent
|
|
@@ -630,8 +632,6 @@ def eval_command(
|
|
|
630
632
|
# Run with verbose output for debugging
|
|
631
633
|
hud eval task.json --verbose
|
|
632
634
|
"""
|
|
633
|
-
from hud.settings import settings
|
|
634
|
-
|
|
635
635
|
# Always configure basic logging so agent steps can be logged
|
|
636
636
|
# Set to INFO by default for consistency with run_evaluation.py
|
|
637
637
|
if very_verbose:
|