hud-python 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +13 -1
- hud/agents/base.py +14 -3
- hud/agents/lite_llm.py +1 -1
- hud/agents/openai_chat_generic.py +15 -3
- hud/agents/tests/test_base.py +9 -2
- hud/agents/tests/test_base_runtime.py +164 -0
- hud/cli/__init__.py +18 -25
- hud/cli/build.py +35 -27
- hud/cli/dev.py +11 -29
- hud/cli/eval.py +114 -145
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +26 -3
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +134 -0
- hud/cli/tests/test_eval.py +4 -0
- hud/cli/tests/test_mcp_server.py +8 -7
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/utils/docker.py +120 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +257 -0
- hud/clients/base.py +1 -1
- hud/clients/mcp_use.py +3 -1
- hud/datasets/parallel.py +2 -2
- hud/datasets/runner.py +85 -24
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_runner.py +106 -0
- hud/datasets/tests/test_utils.py +228 -0
- hud/otel/config.py +8 -6
- hud/otel/context.py +4 -4
- hud/otel/exporters.py +231 -57
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_instrumentation.py +207 -0
- hud/rl/learner.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/shared/exceptions.py +35 -9
- hud/shared/hints.py +25 -0
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +39 -30
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +30 -6
- hud/telemetry/async_context.py +331 -0
- hud/telemetry/job.py +51 -12
- hud/telemetry/tests/test_async_context.py +242 -0
- hud/telemetry/tests/test_instrument.py +414 -0
- hud/telemetry/tests/test_job.py +609 -0
- hud/telemetry/tests/test_trace.py +184 -6
- hud/telemetry/trace.py +16 -17
- hud/tools/computer/qwen.py +4 -1
- hud/tools/computer/settings.py +2 -2
- hud/tools/executors/base.py +4 -2
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/types.py +7 -1
- hud/utils/agent_factories.py +1 -3
- hud/utils/mcp.py +1 -1
- hud/utils/task_tracking.py +223 -0
- hud/utils/tests/test_agent_factories.py +60 -0
- hud/utils/tests/test_mcp.py +4 -6
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tasks.py +187 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/METADATA +48 -48
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/RECORD +88 -47
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0
hud/__init__.py
CHANGED
|
@@ -5,10 +5,22 @@ tools for building, evaluating, and training AI agents.
|
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
|
-
from .telemetry import
|
|
8
|
+
from .telemetry import (
|
|
9
|
+
Trace,
|
|
10
|
+
async_job,
|
|
11
|
+
async_trace,
|
|
12
|
+
clear_trace,
|
|
13
|
+
create_job,
|
|
14
|
+
get_trace,
|
|
15
|
+
instrument,
|
|
16
|
+
job,
|
|
17
|
+
trace,
|
|
18
|
+
)
|
|
9
19
|
|
|
10
20
|
__all__ = [
|
|
11
21
|
"Trace",
|
|
22
|
+
"async_job",
|
|
23
|
+
"async_trace",
|
|
12
24
|
"clear_trace",
|
|
13
25
|
"create_job",
|
|
14
26
|
"get_trace",
|
hud/agents/base.py
CHANGED
|
@@ -55,6 +55,7 @@ class MCPAgent(ABC):
|
|
|
55
55
|
# Filtering
|
|
56
56
|
allowed_tools: list[str] | None = None,
|
|
57
57
|
disallowed_tools: list[str] | None = None,
|
|
58
|
+
response_tool_name: str | None = None,
|
|
58
59
|
# Messages
|
|
59
60
|
system_prompt: str = GLOBAL_SYSTEM_PROMPT,
|
|
60
61
|
append_setup_output: bool = True,
|
|
@@ -74,6 +75,7 @@ class MCPAgent(ABC):
|
|
|
74
75
|
that provides `mcp_config`.
|
|
75
76
|
allowed_tools: Names of tools to allow (None means allow all).
|
|
76
77
|
disallowed_tools: Names of tools to always exclude.
|
|
78
|
+
response_tool_name: Name of the tool to use for response.
|
|
77
79
|
system_prompt: System prompt to seed the conversation.
|
|
78
80
|
append_setup_output: Whether to append setup tool output to the
|
|
79
81
|
first turn's messages.
|
|
@@ -108,7 +110,7 @@ class MCPAgent(ABC):
|
|
|
108
110
|
|
|
109
111
|
# Initialize these here so methods can be called before initialize()
|
|
110
112
|
self._tool_map: dict[str, types.Tool] = {} # Simplified: just name to tool
|
|
111
|
-
self.response_tool_name =
|
|
113
|
+
self.response_tool_name = response_tool_name
|
|
112
114
|
|
|
113
115
|
# Trace
|
|
114
116
|
self._auto_trace = auto_trace
|
|
@@ -135,7 +137,11 @@ class MCPAgent(ABC):
|
|
|
135
137
|
"No MCPClient. Please provide one when initializing the agent or pass a Task with mcp_config." # noqa: E501
|
|
136
138
|
)
|
|
137
139
|
|
|
138
|
-
|
|
140
|
+
try:
|
|
141
|
+
client_cfg = getattr(self.mcp_client, "mcp_config", None)
|
|
142
|
+
except Exception:
|
|
143
|
+
client_cfg = None
|
|
144
|
+
await self._setup_config(client_cfg)
|
|
139
145
|
|
|
140
146
|
# Initialize client if needed
|
|
141
147
|
try:
|
|
@@ -168,6 +174,8 @@ class MCPAgent(ABC):
|
|
|
168
174
|
self.disallowed_tools.extend(task.agent_config["disallowed_tools"])
|
|
169
175
|
else: # If disallowed_tools is None, we overwrite it
|
|
170
176
|
self.disallowed_tools = task.agent_config["disallowed_tools"]
|
|
177
|
+
if "response_tool_name" in task.agent_config:
|
|
178
|
+
self.response_tool_name = task.agent_config["response_tool_name"]
|
|
171
179
|
|
|
172
180
|
all_tools = await self.mcp_client.list_tools()
|
|
173
181
|
self._available_tools = []
|
|
@@ -614,8 +622,11 @@ class MCPAgent(ABC):
|
|
|
614
622
|
except Exception as e:
|
|
615
623
|
self.console.error_log(f"Response lifecycle tool failed: {e}")
|
|
616
624
|
|
|
617
|
-
async def _setup_config(self, mcp_config: dict[str, dict[str, Any]]) -> None:
|
|
625
|
+
async def _setup_config(self, mcp_config: dict[str, dict[str, Any]] | None) -> None:
|
|
618
626
|
"""Inject metadata into the metadata of the initialize request."""
|
|
627
|
+
if not isinstance(mcp_config, dict):
|
|
628
|
+
return
|
|
629
|
+
|
|
619
630
|
if self.metadata:
|
|
620
631
|
patch_mcp_config(
|
|
621
632
|
mcp_config,
|
hud/agents/lite_llm.py
CHANGED
|
@@ -47,7 +47,7 @@ class LiteAgent(GenericOpenAIChatAgent):
|
|
|
47
47
|
**agent_kwargs,
|
|
48
48
|
)
|
|
49
49
|
|
|
50
|
-
def get_tool_schemas(self) -> list[
|
|
50
|
+
def get_tool_schemas(self) -> list[Any]:
|
|
51
51
|
# Prefer LiteLLM's stricter transformer (handles Bedrock & friends)
|
|
52
52
|
if transform_mcp_tool_to_openai_tool is not None:
|
|
53
53
|
return [
|
|
@@ -20,6 +20,7 @@ import logging
|
|
|
20
20
|
from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
21
21
|
|
|
22
22
|
import mcp.types as types
|
|
23
|
+
from openai import AsyncOpenAI
|
|
23
24
|
|
|
24
25
|
from hud import instrument
|
|
25
26
|
from hud.types import AgentResponse, MCPToolCall, MCPToolResult
|
|
@@ -28,7 +29,6 @@ from hud.utils.hud_console import HUDConsole
|
|
|
28
29
|
from .base import MCPAgent
|
|
29
30
|
|
|
30
31
|
if TYPE_CHECKING:
|
|
31
|
-
from openai import AsyncOpenAI
|
|
32
32
|
from openai.types.chat import ChatCompletionToolParam
|
|
33
33
|
|
|
34
34
|
logger = logging.getLogger(__name__)
|
|
@@ -42,14 +42,26 @@ class GenericOpenAIChatAgent(MCPAgent):
|
|
|
42
42
|
def __init__(
|
|
43
43
|
self,
|
|
44
44
|
*,
|
|
45
|
-
openai_client: AsyncOpenAI | None,
|
|
45
|
+
openai_client: AsyncOpenAI | None = None,
|
|
46
|
+
api_key: str | None = None,
|
|
47
|
+
base_url: str | None = None,
|
|
46
48
|
model_name: str = "gpt-4o-mini",
|
|
47
49
|
completion_kwargs: dict[str, Any] | None = None,
|
|
48
50
|
**agent_kwargs: Any,
|
|
49
51
|
) -> None:
|
|
50
52
|
# Accept base-agent settings via **agent_kwargs (e.g., mcp_client, system_prompt, etc.)
|
|
51
53
|
super().__init__(**agent_kwargs)
|
|
52
|
-
|
|
54
|
+
|
|
55
|
+
# Handle client creation - support both patterns
|
|
56
|
+
if openai_client is not None:
|
|
57
|
+
# Use provided client (backward compatibility)
|
|
58
|
+
self.oai = openai_client
|
|
59
|
+
elif api_key is not None or base_url is not None:
|
|
60
|
+
# Create client from config (new pattern, consistent with other agents)
|
|
61
|
+
self.oai = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
|
62
|
+
else:
|
|
63
|
+
raise ValueError("Either openai_client or (api_key and base_url) must be provided")
|
|
64
|
+
|
|
53
65
|
self.model_name = model_name
|
|
54
66
|
self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
|
|
55
67
|
self.mcp_schemas = []
|
hud/agents/tests/test_base.py
CHANGED
|
@@ -94,7 +94,7 @@ class TestBaseMCPAgent:
|
|
|
94
94
|
|
|
95
95
|
assert agent.mcp_client is not None
|
|
96
96
|
assert agent.allowed_tools is None
|
|
97
|
-
assert agent.disallowed_tools
|
|
97
|
+
assert agent.disallowed_tools is None
|
|
98
98
|
assert agent.initial_screenshot is True
|
|
99
99
|
assert agent.system_prompt is not None # Default system prompt is set
|
|
100
100
|
|
|
@@ -241,6 +241,13 @@ class TestBaseMCPAgent:
|
|
|
241
241
|
assert "tool2" not in tool_names # Not in allowed list
|
|
242
242
|
assert "tool3" not in tool_names # In disallowed list
|
|
243
243
|
|
|
244
|
+
# Make sure tool schemas are correct
|
|
245
|
+
schemas = agent.get_tool_schemas()
|
|
246
|
+
assert len(schemas) == 1
|
|
247
|
+
assert schemas[0]["name"] == "tool1"
|
|
248
|
+
assert schemas[0]["description"] == "Tool 1"
|
|
249
|
+
assert schemas[0]["parameters"] == {"type": "object"}
|
|
250
|
+
|
|
244
251
|
@pytest.mark.asyncio
|
|
245
252
|
async def test_call_tool_success(self):
|
|
246
253
|
"""Test successful tool call."""
|
|
@@ -334,7 +341,7 @@ class TestBaseMCPAgent:
|
|
|
334
341
|
schemas = agent.get_tool_schemas()
|
|
335
342
|
|
|
336
343
|
# Should include non-lifecycle tools
|
|
337
|
-
assert len(schemas) ==
|
|
344
|
+
assert len(schemas) == 2
|
|
338
345
|
assert schemas[0]["name"] == "tool1"
|
|
339
346
|
|
|
340
347
|
def test_get_tools_by_server(self):
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from unittest import mock
|
|
4
|
+
|
|
5
|
+
import mcp.types as types
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from hud.agents.base import MCPAgent, find_content, find_reward, text_to_blocks
|
|
9
|
+
from hud.types import AgentResponse, MCPToolCall, MCPToolResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DummyAgent(MCPAgent):
|
|
13
|
+
async def get_system_messages(self):
|
|
14
|
+
return [types.TextContent(text="sys", type="text")]
|
|
15
|
+
|
|
16
|
+
async def get_response(self, messages):
|
|
17
|
+
# Single step: no tool calls -> done
|
|
18
|
+
return AgentResponse(content="ok", tool_calls=[], done=True)
|
|
19
|
+
|
|
20
|
+
async def format_blocks(self, blocks):
|
|
21
|
+
# Return as-is
|
|
22
|
+
return blocks
|
|
23
|
+
|
|
24
|
+
async def format_tool_results(self, tool_calls, tool_results):
|
|
25
|
+
return [types.TextContent(text="tools", type="text")]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.mark.asyncio
|
|
29
|
+
async def test_run_with_string_prompt_auto_client(monkeypatch):
|
|
30
|
+
# Fake MCPClient with required methods
|
|
31
|
+
fake_client = mock.AsyncMock()
|
|
32
|
+
fake_client.initialize.return_value = None
|
|
33
|
+
fake_client.list_tools.return_value = []
|
|
34
|
+
fake_client.shutdown.return_value = None
|
|
35
|
+
|
|
36
|
+
# Patch MCPClient construction inside initialize()
|
|
37
|
+
with mock.patch("hud.clients.MCPClient", return_value=fake_client):
|
|
38
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
|
|
39
|
+
result = await agent.run("hello", max_steps=1)
|
|
40
|
+
assert result.done is True and result.isError is False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_find_reward_and_content_extractors():
|
|
44
|
+
# Structured content
|
|
45
|
+
r = MCPToolResult(
|
|
46
|
+
content=text_to_blocks("{}"), isError=False, structuredContent={"reward": 0.7}
|
|
47
|
+
)
|
|
48
|
+
assert find_reward(r) == 0.7
|
|
49
|
+
|
|
50
|
+
# Text JSON
|
|
51
|
+
r2 = MCPToolResult(content=text_to_blocks('{"score": 0.5, "content": "hi"}'), isError=False)
|
|
52
|
+
assert find_reward(r2) == 0.5
|
|
53
|
+
assert find_content(r2) == "hi"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@pytest.mark.asyncio
|
|
57
|
+
async def test_call_tools_error_paths():
|
|
58
|
+
fake_client = mock.AsyncMock()
|
|
59
|
+
# First call succeeds
|
|
60
|
+
ok_result = MCPToolResult(content=text_to_blocks("ok"), isError=False)
|
|
61
|
+
fake_client.call_tool.side_effect = [ok_result, RuntimeError("boom")]
|
|
62
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
|
|
63
|
+
results = await agent.call_tools(
|
|
64
|
+
[MCPToolCall(name="a", arguments={}), MCPToolCall(name="b", arguments={})]
|
|
65
|
+
)
|
|
66
|
+
assert results[0].isError is False
|
|
67
|
+
assert results[1].isError is True
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@pytest.mark.asyncio
|
|
71
|
+
async def test_initialize_without_client_raises_valueerror():
|
|
72
|
+
agent = DummyAgent(mcp_client=None, auto_trace=False)
|
|
73
|
+
with pytest.raises(ValueError):
|
|
74
|
+
await agent.initialize(None)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_get_available_tools_before_initialize_raises():
|
|
78
|
+
agent = DummyAgent(mcp_client=mock.AsyncMock(), auto_trace=False)
|
|
79
|
+
with pytest.raises(RuntimeError):
|
|
80
|
+
agent.get_available_tools()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@pytest.mark.asyncio
|
|
84
|
+
async def test_format_message_invalid_type_raises():
|
|
85
|
+
agent = DummyAgent(mcp_client=mock.AsyncMock(), auto_trace=False)
|
|
86
|
+
with pytest.raises(ValueError):
|
|
87
|
+
await agent.format_message({"oops": 1}) # type: ignore
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@pytest.mark.asyncio
|
|
91
|
+
async def test_call_tools_timeout_error_shutdown_called():
|
|
92
|
+
fake_client = mock.AsyncMock()
|
|
93
|
+
fake_client.call_tool.side_effect = TimeoutError("timeout")
|
|
94
|
+
fake_client.shutdown.return_value = None
|
|
95
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
|
|
96
|
+
with pytest.raises(TimeoutError):
|
|
97
|
+
await agent.call_tools(MCPToolCall(name="x", arguments={}))
|
|
98
|
+
fake_client.shutdown.assert_awaited_once()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_text_to_blocks_shapes():
|
|
102
|
+
blocks = text_to_blocks("x")
|
|
103
|
+
assert isinstance(blocks, list) and blocks and isinstance(blocks[0], types.TextContent)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@pytest.mark.asyncio
|
|
107
|
+
async def test_run_returns_connection_error_trace(monkeypatch):
|
|
108
|
+
fake_client = mock.AsyncMock()
|
|
109
|
+
fake_client.mcp_config = {}
|
|
110
|
+
fake_client.initialize.side_effect = RuntimeError("Connection refused http://localhost:1234")
|
|
111
|
+
fake_client.list_tools.return_value = []
|
|
112
|
+
fake_client.shutdown.return_value = None
|
|
113
|
+
|
|
114
|
+
class DummyCM:
|
|
115
|
+
def __exit__(self, *args, **kwargs):
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
|
|
119
|
+
|
|
120
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
|
|
121
|
+
result = await agent.run("p", max_steps=1)
|
|
122
|
+
assert result.isError is True
|
|
123
|
+
assert "Could not connect" in (result.content or "")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@pytest.mark.asyncio
|
|
127
|
+
async def test_run_calls_response_tool_when_configured(monkeypatch):
|
|
128
|
+
fake_client = mock.AsyncMock()
|
|
129
|
+
fake_client.mcp_config = {}
|
|
130
|
+
fake_client.initialize.return_value = None
|
|
131
|
+
fake_client.list_tools.return_value = []
|
|
132
|
+
fake_client.shutdown.return_value = None
|
|
133
|
+
ok = MCPToolResult(content=text_to_blocks("ok"), isError=False)
|
|
134
|
+
fake_client.call_tool.return_value = ok
|
|
135
|
+
|
|
136
|
+
class DummyCM:
|
|
137
|
+
def __exit__(self, *args, **kwargs):
|
|
138
|
+
return False
|
|
139
|
+
|
|
140
|
+
monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
|
|
141
|
+
|
|
142
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False, response_tool_name="submit")
|
|
143
|
+
result = await agent.run("hello", max_steps=1)
|
|
144
|
+
assert result.isError is False
|
|
145
|
+
fake_client.call_tool.assert_awaited()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@pytest.mark.asyncio
|
|
149
|
+
async def test_get_available_tools_after_initialize(monkeypatch):
|
|
150
|
+
fake_client = mock.AsyncMock()
|
|
151
|
+
fake_client.mcp_config = {}
|
|
152
|
+
fake_client.initialize.return_value = None
|
|
153
|
+
fake_client.list_tools.return_value = []
|
|
154
|
+
fake_client.shutdown.return_value = None
|
|
155
|
+
|
|
156
|
+
class DummyCM:
|
|
157
|
+
def __exit__(self, *args, **kwargs):
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
monkeypatch.setattr("hud.utils.mcp.setup_hud_telemetry", lambda *args, **kwargs: DummyCM())
|
|
161
|
+
|
|
162
|
+
agent = DummyAgent(mcp_client=fake_client, auto_trace=False)
|
|
163
|
+
await agent.initialize(None)
|
|
164
|
+
assert agent.get_available_tools() == []
|
hud/cli/__init__.py
CHANGED
|
@@ -242,15 +242,18 @@ def debug(
|
|
|
242
242
|
if build and not build_environment(directory, image_name):
|
|
243
243
|
raise typer.Exit(1)
|
|
244
244
|
|
|
245
|
-
# Build Docker command
|
|
246
|
-
from .utils.docker import
|
|
245
|
+
# Build Docker command with folder-mode envs
|
|
246
|
+
from .utils.docker import create_docker_run_command
|
|
247
247
|
|
|
248
|
-
command =
|
|
248
|
+
command = create_docker_run_command(
|
|
249
|
+
image_name, docker_args=docker_args, env_dir=directory
|
|
250
|
+
)
|
|
249
251
|
else:
|
|
250
252
|
# Assume it's an image name
|
|
251
253
|
image = first_param
|
|
252
254
|
from .utils.docker import build_run_command
|
|
253
255
|
|
|
256
|
+
# Image-only mode: do not auto-inject local .env
|
|
254
257
|
command = build_run_command(image, docker_args)
|
|
255
258
|
else:
|
|
256
259
|
console.print(
|
|
@@ -796,33 +799,19 @@ def eval(
|
|
|
796
799
|
help="Comma-separated list of allowed tools",
|
|
797
800
|
),
|
|
798
801
|
max_concurrent: int = typer.Option(
|
|
799
|
-
|
|
802
|
+
30,
|
|
800
803
|
"--max-concurrent",
|
|
801
|
-
help="
|
|
804
|
+
help="Maximum concurrent tasks (1-200 recommended, prevents rate limits)",
|
|
802
805
|
),
|
|
803
806
|
max_steps: int | None = typer.Option(
|
|
804
807
|
None,
|
|
805
808
|
"--max-steps",
|
|
806
809
|
help="Maximum steps per task (default: 10 for single, 50 for full)",
|
|
807
810
|
),
|
|
808
|
-
parallel: bool = typer.Option(
|
|
809
|
-
False,
|
|
810
|
-
"--parallel",
|
|
811
|
-
help="Use process-based parallel execution for large datasets (100+ tasks)",
|
|
812
|
-
),
|
|
813
|
-
max_workers: int | None = typer.Option(
|
|
814
|
-
None,
|
|
815
|
-
"--max-workers",
|
|
816
|
-
help="Number of worker processes for parallel mode (auto-optimized if not set)",
|
|
817
|
-
),
|
|
818
|
-
max_concurrent_per_worker: int = typer.Option(
|
|
819
|
-
20,
|
|
820
|
-
"--max-concurrent-per-worker",
|
|
821
|
-
help="Maximum concurrent tasks per worker in parallel mode",
|
|
822
|
-
),
|
|
823
811
|
verbose: bool = typer.Option(
|
|
824
812
|
False,
|
|
825
813
|
"--verbose",
|
|
814
|
+
"-v",
|
|
826
815
|
help="Enable verbose output from the agent",
|
|
827
816
|
),
|
|
828
817
|
very_verbose: bool = typer.Option(
|
|
@@ -867,14 +856,14 @@ def eval(
|
|
|
867
856
|
|
|
868
857
|
source = find_tasks_file(None, msg="Select a tasks file to run")
|
|
869
858
|
hud_console.success(f"Selected: {source}")
|
|
870
|
-
except Exception
|
|
859
|
+
except (FileNotFoundError, Exception):
|
|
871
860
|
hud_console.error(
|
|
872
861
|
"No source provided and no task/eval JSON files found in current directory"
|
|
873
862
|
)
|
|
874
863
|
hud_console.info(
|
|
875
864
|
"Usage: hud eval <source> or create a task JSON file (e.g., task.json, tasks.jsonl)"
|
|
876
865
|
)
|
|
877
|
-
raise typer.Exit(1) from
|
|
866
|
+
raise typer.Exit(1) from None
|
|
878
867
|
|
|
879
868
|
# Import eval_command lazily to avoid importing agent dependencies
|
|
880
869
|
try:
|
|
@@ -950,9 +939,6 @@ def eval(
|
|
|
950
939
|
allowed_tools=allowed_tools,
|
|
951
940
|
max_concurrent=max_concurrent,
|
|
952
941
|
max_steps=max_steps,
|
|
953
|
-
parallel=parallel,
|
|
954
|
-
max_workers=max_workers,
|
|
955
|
-
max_concurrent_per_worker=max_concurrent_per_worker,
|
|
956
942
|
verbose=verbose,
|
|
957
943
|
very_verbose=very_verbose,
|
|
958
944
|
vllm_base_url=vllm_base_url,
|
|
@@ -1126,6 +1112,13 @@ def set(
|
|
|
1126
1112
|
|
|
1127
1113
|
def main() -> None:
|
|
1128
1114
|
"""Main entry point for the CLI."""
|
|
1115
|
+
# Check for updates (including on --version command)
|
|
1116
|
+
# Skip only on help-only commands
|
|
1117
|
+
if not (len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ["--help", "-h"])):
|
|
1118
|
+
from .utils.version_check import display_update_prompt
|
|
1119
|
+
|
|
1120
|
+
display_update_prompt()
|
|
1121
|
+
|
|
1129
1122
|
# Handle --version flag before Typer parses args
|
|
1130
1123
|
if "--version" in sys.argv:
|
|
1131
1124
|
try:
|
hud/cli/build.py
CHANGED
|
@@ -161,49 +161,42 @@ async def analyze_mcp_environment(
|
|
|
161
161
|
hud_console = HUDConsole()
|
|
162
162
|
env_vars = env_vars or {}
|
|
163
163
|
|
|
164
|
-
# Build Docker command to run the image
|
|
165
|
-
|
|
164
|
+
# Build Docker command to run the image, injecting any provided env vars
|
|
165
|
+
from hud.cli.utils.docker import build_env_flags
|
|
166
166
|
|
|
167
|
-
|
|
168
|
-
for key, value in env_vars.items():
|
|
169
|
-
docker_cmd.extend(["-e", f"{key}={value}"])
|
|
167
|
+
docker_cmd = ["docker", "run", "--rm", "-i", *build_env_flags(env_vars), image]
|
|
170
168
|
|
|
171
|
-
|
|
169
|
+
# Show full docker command being used for analysis
|
|
170
|
+
hud_console.dim_info("Command:", " ".join(docker_cmd))
|
|
172
171
|
|
|
173
|
-
# Create MCP config
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
172
|
+
# Create MCP config consistently with analyze helpers
|
|
173
|
+
from hud.cli.analyze import parse_docker_command
|
|
174
|
+
|
|
175
|
+
mcp_config = parse_docker_command(docker_cmd)
|
|
177
176
|
|
|
178
177
|
# Initialize client and measure timing
|
|
179
178
|
start_time = time.time()
|
|
180
|
-
client = MCPClient(mcp_config=
|
|
179
|
+
client = MCPClient(mcp_config=mcp_config, verbose=verbose, auto_trace=False)
|
|
181
180
|
initialized = False
|
|
182
181
|
|
|
183
182
|
try:
|
|
184
183
|
if verbose:
|
|
185
|
-
hud_console.info(
|
|
184
|
+
hud_console.info("Initializing MCP client...")
|
|
186
185
|
|
|
187
|
-
# Add timeout to fail fast instead of hanging (
|
|
186
|
+
# Add timeout to fail fast instead of hanging (60 seconds)
|
|
188
187
|
await asyncio.wait_for(client.initialize(), timeout=60.0)
|
|
189
188
|
initialized = True
|
|
190
189
|
initialize_ms = int((time.time() - start_time) * 1000)
|
|
191
190
|
|
|
192
|
-
#
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
# Extract tool information
|
|
196
|
-
tool_info = []
|
|
197
|
-
for tool in tools:
|
|
198
|
-
tool_dict = {"name": tool.name, "description": tool.description}
|
|
199
|
-
if hasattr(tool, "inputSchema") and tool.inputSchema:
|
|
200
|
-
tool_dict["inputSchema"] = tool.inputSchema
|
|
201
|
-
tool_info.append(tool_dict)
|
|
191
|
+
# Delegate to standard analysis helper for consistency
|
|
192
|
+
full_analysis = await client.analyze_environment()
|
|
202
193
|
|
|
194
|
+
# Normalize to build's expected fields
|
|
195
|
+
tools_list = full_analysis.get("tools", [])
|
|
203
196
|
return {
|
|
204
197
|
"initializeMs": initialize_ms,
|
|
205
|
-
"toolCount": len(
|
|
206
|
-
"tools":
|
|
198
|
+
"toolCount": len(tools_list),
|
|
199
|
+
"tools": tools_list,
|
|
207
200
|
"success": True,
|
|
208
201
|
}
|
|
209
202
|
except TimeoutError:
|
|
@@ -295,6 +288,10 @@ def build_environment(
|
|
|
295
288
|
hud_console.error(f"Directory not found: {directory}")
|
|
296
289
|
raise typer.Exit(1)
|
|
297
290
|
|
|
291
|
+
from hud.cli.utils.docker import require_docker_running
|
|
292
|
+
|
|
293
|
+
require_docker_running()
|
|
294
|
+
|
|
298
295
|
# Step 1: Check for hud.lock.yaml (previous build)
|
|
299
296
|
lock_path = env_dir / "hud.lock.yaml"
|
|
300
297
|
base_name = None
|
|
@@ -355,13 +352,24 @@ def build_environment(
|
|
|
355
352
|
|
|
356
353
|
hud_console.success(f"Built temporary image: {temp_tag}")
|
|
357
354
|
|
|
358
|
-
# Analyze the environment
|
|
355
|
+
# Analyze the environment (merge folder .env if present)
|
|
359
356
|
hud_console.progress_message("Analyzing MCP environment...")
|
|
360
357
|
|
|
361
358
|
loop = asyncio.new_event_loop()
|
|
362
359
|
asyncio.set_event_loop(loop)
|
|
363
360
|
try:
|
|
364
|
-
|
|
361
|
+
# Merge .env from env_dir for analysis only
|
|
362
|
+
try:
|
|
363
|
+
from hud.cli.utils.docker import load_env_vars_for_dir
|
|
364
|
+
|
|
365
|
+
env_from_file = load_env_vars_for_dir(env_dir)
|
|
366
|
+
except Exception:
|
|
367
|
+
env_from_file = {}
|
|
368
|
+
merged_env_for_analysis = {**env_from_file, **(env_vars or {})}
|
|
369
|
+
|
|
370
|
+
analysis = loop.run_until_complete(
|
|
371
|
+
analyze_mcp_environment(temp_tag, verbose, merged_env_for_analysis)
|
|
372
|
+
)
|
|
365
373
|
except Exception as e:
|
|
366
374
|
hud_console.error(f"Failed to analyze MCP environment: {e}")
|
|
367
375
|
hud_console.info("")
|
hud/cli/dev.py
CHANGED
|
@@ -504,15 +504,12 @@ def run_docker_dev_server(
|
|
|
504
504
|
base_name = image_name.replace(":", "-").replace("/", "-")
|
|
505
505
|
container_name = f"{base_name}-dev-{pid}"
|
|
506
506
|
|
|
507
|
-
# Build docker run command with volume mounts
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
"--rm",
|
|
512
|
-
"-i",
|
|
507
|
+
# Build docker run command with volume mounts and folder-mode envs
|
|
508
|
+
from .utils.docker import create_docker_run_command
|
|
509
|
+
|
|
510
|
+
base_args = [
|
|
513
511
|
"--name",
|
|
514
512
|
container_name,
|
|
515
|
-
# Mount both server and environment for hot-reload
|
|
516
513
|
"-v",
|
|
517
514
|
f"{env_dir.absolute()}/server:/app/server:rw",
|
|
518
515
|
"-v",
|
|
@@ -524,29 +521,14 @@ def run_docker_dev_server(
|
|
|
524
521
|
"-e",
|
|
525
522
|
"HUD_DEV=1",
|
|
526
523
|
]
|
|
524
|
+
combined_args = [*base_args, *docker_args] if docker_args else base_args
|
|
525
|
+
docker_cmd = create_docker_run_command(
|
|
526
|
+
image_name,
|
|
527
|
+
docker_args=combined_args,
|
|
528
|
+
env_dir=env_dir,
|
|
529
|
+
)
|
|
527
530
|
|
|
528
|
-
#
|
|
529
|
-
env_file = env_dir / ".env"
|
|
530
|
-
loaded_env_vars: dict[str, str] = {}
|
|
531
|
-
if env_file.exists():
|
|
532
|
-
try:
|
|
533
|
-
from hud.cli.utils.config import parse_env_file
|
|
534
|
-
|
|
535
|
-
env_contents = env_file.read_text(encoding="utf-8")
|
|
536
|
-
loaded_env_vars = parse_env_file(env_contents)
|
|
537
|
-
for key, value in loaded_env_vars.items():
|
|
538
|
-
docker_cmd.extend(["-e", f"{key}={value}"])
|
|
539
|
-
if verbose and loaded_env_vars:
|
|
540
|
-
hud_console.info(f"Loaded {len(loaded_env_vars)} env var(s) from .env")
|
|
541
|
-
except Exception as e:
|
|
542
|
-
hud_console.warning(f"Failed to load .env file: {e}")
|
|
543
|
-
|
|
544
|
-
# Add user-provided Docker arguments
|
|
545
|
-
if docker_args:
|
|
546
|
-
docker_cmd.extend(docker_args)
|
|
547
|
-
|
|
548
|
-
# Append the image name
|
|
549
|
-
docker_cmd.append(image_name)
|
|
531
|
+
# Env flags already injected by create_docker_run_command
|
|
550
532
|
|
|
551
533
|
# Print startup info
|
|
552
534
|
hud_console.header("HUD Development Mode (Docker)")
|