hud-python 0.5.1__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. hud/__init__.py +1 -1
  2. hud/agents/__init__.py +65 -6
  3. hud/agents/base.py +33 -15
  4. hud/agents/claude.py +60 -31
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +15 -26
  7. hud/agents/gemini_cua.py +6 -17
  8. hud/agents/misc/response_agent.py +7 -0
  9. hud/agents/openai.py +16 -29
  10. hud/agents/openai_chat.py +3 -19
  11. hud/agents/operator.py +5 -17
  12. hud/agents/resolver.py +70 -0
  13. hud/agents/tests/test_claude.py +2 -4
  14. hud/agents/tests/test_openai.py +2 -1
  15. hud/agents/tests/test_resolver.py +192 -0
  16. hud/agents/types.py +148 -0
  17. hud/cli/__init__.py +34 -3
  18. hud/cli/build.py +37 -5
  19. hud/cli/dev.py +11 -2
  20. hud/cli/eval.py +51 -39
  21. hud/cli/flows/init.py +1 -1
  22. hud/cli/pull.py +1 -1
  23. hud/cli/push.py +9 -2
  24. hud/cli/tests/test_build.py +2 -2
  25. hud/cli/tests/test_push.py +1 -1
  26. hud/cli/utils/metadata.py +1 -1
  27. hud/cli/utils/tests/test_metadata.py +1 -1
  28. hud/clients/mcp_use.py +6 -1
  29. hud/datasets/loader.py +17 -18
  30. hud/datasets/runner.py +16 -10
  31. hud/datasets/tests/test_loader.py +15 -15
  32. hud/environment/__init__.py +5 -3
  33. hud/environment/connection.py +58 -6
  34. hud/environment/connectors/mcp_config.py +29 -1
  35. hud/environment/environment.py +218 -77
  36. hud/environment/router.py +175 -24
  37. hud/environment/scenarios.py +313 -186
  38. hud/environment/tests/test_connectors.py +10 -23
  39. hud/environment/tests/test_environment.py +432 -0
  40. hud/environment/tests/test_local_connectors.py +81 -40
  41. hud/environment/tests/test_scenarios.py +820 -14
  42. hud/eval/context.py +63 -10
  43. hud/eval/instrument.py +4 -2
  44. hud/eval/manager.py +79 -12
  45. hud/eval/task.py +36 -4
  46. hud/eval/tests/test_eval.py +1 -1
  47. hud/eval/tests/test_task.py +147 -1
  48. hud/eval/types.py +2 -0
  49. hud/eval/utils.py +14 -3
  50. hud/patches/mcp_patches.py +178 -21
  51. hud/telemetry/instrument.py +8 -1
  52. hud/telemetry/tests/test_eval_telemetry.py +8 -8
  53. hud/tools/__init__.py +2 -0
  54. hud/tools/agent.py +223 -0
  55. hud/tools/computer/__init__.py +34 -5
  56. hud/tools/shell.py +3 -3
  57. hud/tools/tests/test_agent_tool.py +355 -0
  58. hud/types.py +62 -34
  59. hud/utils/hud_console.py +30 -17
  60. hud/utils/strict_schema.py +1 -1
  61. hud/utils/tests/test_version.py +1 -1
  62. hud/version.py +1 -1
  63. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/METADATA +2 -2
  64. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/RECORD +67 -61
  65. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/WHEEL +0 -0
  66. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  67. {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,355 @@
1
+ """Tests for AgentTool - scenario-to-agent composition."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import inspect
6
+ from unittest.mock import AsyncMock, MagicMock, patch
7
+
8
+ import pytest
9
+
10
+ from hud.environment import Environment
11
+ from hud.eval.task import Task
12
+ from hud.tools.agent import AgentTool, _is_eval_only
13
+
14
+
15
+ class TestIsEvalOnly:
16
+ """Tests for _is_eval_only helper function."""
17
+
18
+ def test_required_param_not_eval_only(self) -> None:
19
+ """Required params (no default) are not eval-only."""
20
+
21
+ def fn(x: str) -> None:
22
+ pass
23
+
24
+ sig = inspect.signature(fn)
25
+ param = sig.parameters["x"]
26
+ assert not _is_eval_only(param)
27
+
28
+ def test_optional_with_value_not_eval_only(self) -> None:
29
+ """Optional params with non-None default are not eval-only."""
30
+
31
+ def fn(x: str = "default") -> None:
32
+ pass
33
+
34
+ sig = inspect.signature(fn)
35
+ param = sig.parameters["x"]
36
+ assert not _is_eval_only(param)
37
+
38
+ def test_optional_none_without_union_not_eval_only(self) -> None:
39
+ """Optional with None default but no None in type is not eval-only."""
40
+
41
+ def fn(x: str = None) -> None: # type: ignore[assignment] # noqa: RUF013
42
+ pass
43
+
44
+ sig = inspect.signature(fn)
45
+ param = sig.parameters["x"]
46
+ assert not _is_eval_only(param)
47
+
48
+ def test_optional_none_with_union_is_eval_only(self) -> None:
49
+ """Params with `X | None = None` pattern are eval-only."""
50
+
51
+ def fn(x: str | None = None) -> None:
52
+ pass
53
+
54
+ sig = inspect.signature(fn)
55
+ param = sig.parameters["x"]
56
+ assert _is_eval_only(param)
57
+
58
+ def test_optional_int_none_is_eval_only(self) -> None:
59
+ """Works with int | None = None too."""
60
+
61
+ def fn(x: int | None = None) -> None:
62
+ pass
63
+
64
+ sig = inspect.signature(fn)
65
+ param = sig.parameters["x"]
66
+ assert _is_eval_only(param)
67
+
68
+ def test_string_annotation_with_none_union(self) -> None:
69
+ """Handles string annotations like 'str | None'."""
70
+ # Simulate string annotation
71
+ param = inspect.Parameter(
72
+ "x",
73
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
74
+ default=None,
75
+ annotation="str | None",
76
+ )
77
+ assert _is_eval_only(param)
78
+
79
+ def test_string_annotation_without_none(self) -> None:
80
+ """String annotations without None are not eval-only."""
81
+ param = inspect.Parameter(
82
+ "x",
83
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
84
+ default=None,
85
+ annotation="str",
86
+ )
87
+ assert not _is_eval_only(param)
88
+
89
+
90
+ class TestAgentToolInit:
91
+ """Tests for AgentTool initialization."""
92
+
93
+ def test_requires_model_or_agent(self) -> None:
94
+ """Must provide either model or agent."""
95
+ task = Task(args={})
96
+
97
+ with pytest.raises(ValueError, match="Must provide either"):
98
+ AgentTool(task)
99
+
100
+ def test_cannot_provide_both_model_and_agent(self) -> None:
101
+ """Cannot provide both model and agent."""
102
+ task = Task(args={})
103
+ mock_agent = MagicMock()
104
+
105
+ with pytest.raises(ValueError, match="Cannot provide both"):
106
+ AgentTool(task, model="claude", agent=mock_agent) # type: ignore[arg-type]
107
+
108
+ def test_accepts_model_string(self) -> None:
109
+ """Can create with model string."""
110
+ task = Task(scenario="test", args={})
111
+ tool = AgentTool(task, model="claude")
112
+
113
+ assert tool._model == "claude"
114
+ assert tool._agent_cls is None
115
+
116
+ def test_accepts_agent_class(self) -> None:
117
+ """Can create with custom agent class."""
118
+ task = Task(scenario="test", args={})
119
+ mock_agent_cls = MagicMock()
120
+ tool = AgentTool(task, agent=mock_agent_cls) # type: ignore[arg-type]
121
+
122
+ assert tool._model is None
123
+ assert tool._agent_cls is mock_agent_cls
124
+
125
+ def test_name_defaults_to_scenario(self) -> None:
126
+ """Tool name defaults to scenario name."""
127
+ task = Task(scenario="investigate", args={})
128
+ tool = AgentTool(task, model="claude")
129
+
130
+ assert tool.name == "investigate"
131
+
132
+ def test_name_can_be_overridden(self) -> None:
133
+ """Tool name can be overridden."""
134
+ task = Task(scenario="investigate", args={})
135
+ tool = AgentTool(task, model="claude", name="custom_name")
136
+
137
+ assert tool.name == "custom_name"
138
+
139
+
140
+ class TestAgentToolParamFiltering:
141
+ """Tests for parameter filtering (eval-only params hidden)."""
142
+
143
+ def test_filters_eval_only_params(self) -> None:
144
+ """Eval-only params (| None = None) are filtered from visible_params."""
145
+ env = Environment("test")
146
+
147
+ # Use Union syntax for consistency across Python versions
148
+ @env.scenario()
149
+ async def investigate(
150
+ issue_id: str,
151
+ include_traces: bool = True,
152
+ expected_cause: str | None = None, # Eval only
153
+ ):
154
+ yield {"task": f"Investigate {issue_id}"}
155
+
156
+ task = env("investigate")
157
+ tool = AgentTool(task, model="claude")
158
+
159
+ # visible_params should only have issue_id and include_traces
160
+ assert "issue_id" in tool._visible_params
161
+ assert "include_traces" in tool._visible_params
162
+ assert "expected_cause" not in tool._visible_params
163
+
164
+ def test_all_required_params_visible(self) -> None:
165
+ """All required params are visible."""
166
+ env = Environment("test")
167
+
168
+ @env.scenario()
169
+ async def search(query: str, limit: int):
170
+ yield {"task": f"Search: {query}"}
171
+
172
+ task = env("search")
173
+ tool = AgentTool(task, model="claude")
174
+
175
+ assert "query" in tool._visible_params
176
+ assert "limit" in tool._visible_params
177
+
178
+ def test_optional_with_default_visible(self) -> None:
179
+ """Optional params with non-None defaults are visible."""
180
+ env = Environment("test")
181
+
182
+ @env.scenario()
183
+ async def fetch(url: str, request_timeout: int = 30, retries: int = 3):
184
+ yield {"task": f"Fetch {url}"}
185
+
186
+ task = env("fetch")
187
+ tool = AgentTool(task, model="claude")
188
+
189
+ assert "url" in tool._visible_params
190
+ assert "request_timeout" in tool._visible_params
191
+ assert "retries" in tool._visible_params
192
+
193
+
194
+ class TestAgentToolSchema:
195
+ """Tests for JSON schema generation."""
196
+
197
+ def test_builds_json_schema(self) -> None:
198
+ """Builds proper JSON schema from visible params."""
199
+ env = Environment("test")
200
+
201
+ @env.scenario()
202
+ async def investigate(issue_id: str, verbose: bool = False):
203
+ yield {"task": f"Investigate {issue_id}"}
204
+
205
+ task = env("investigate")
206
+ tool = AgentTool(task, model="claude")
207
+
208
+ schema = tool._param_schema
209
+ assert schema is not None
210
+ assert schema["type"] == "object"
211
+ assert "issue_id" in schema["properties"]
212
+ assert "verbose" in schema["properties"]
213
+ assert "issue_id" in schema["required"]
214
+ assert "verbose" not in schema["required"] # Has default
215
+
216
+ def test_schema_excludes_eval_only(self) -> None:
217
+ """Schema excludes eval-only params."""
218
+ env = Environment("test")
219
+
220
+ @env.scenario()
221
+ async def check(
222
+ item_id: str,
223
+ expected_status: str | None = None, # Eval only
224
+ ):
225
+ yield {"task": f"Check {item_id}"}
226
+
227
+ task = env("check")
228
+ tool = AgentTool(task, model="claude")
229
+
230
+ schema = tool._param_schema
231
+ assert schema is not None
232
+ assert "item_id" in schema["properties"]
233
+ assert "expected_status" not in schema["properties"]
234
+
235
+
236
+ class TestAgentToolMCP:
237
+ """Tests for MCP tool integration."""
238
+
239
+ def test_mcp_property_returns_tool(self) -> None:
240
+ """The mcp property returns a FastMCP FunctionTool."""
241
+ from fastmcp.tools import FunctionTool
242
+
243
+ env = Environment("test")
244
+
245
+ @env.scenario()
246
+ async def greet(name: str):
247
+ yield {"task": f"Greet {name}"}
248
+
249
+ task = env("greet")
250
+ tool = AgentTool(task, model="claude")
251
+
252
+ mcp_tool = tool.mcp
253
+ assert isinstance(mcp_tool, FunctionTool)
254
+
255
+ def test_mcp_has_filtered_parameters(self) -> None:
256
+ """MCP tool has filtered parameter schema."""
257
+ env = Environment("test")
258
+
259
+ @env.scenario()
260
+ async def analyze(
261
+ data: str,
262
+ expected_result: str | None = None, # Eval only
263
+ ):
264
+ yield {"task": f"Analyze {data}"}
265
+
266
+ task = env("analyze")
267
+ tool = AgentTool(task, model="claude")
268
+
269
+ mcp_tool = tool.mcp
270
+ params = mcp_tool.parameters # FunctionTool uses 'parameters'
271
+
272
+ assert "data" in params["properties"]
273
+ assert "expected_result" not in params["properties"]
274
+
275
+
276
+ class TestAgentToolCall:
277
+ """Tests for AgentTool.__call__."""
278
+
279
+ @pytest.mark.asyncio
280
+ async def test_filters_kwargs_to_visible_only(self) -> None:
281
+ """Call filters kwargs to visible params only."""
282
+ # Import modules first so patches work
283
+ import hud.agents
284
+ import hud.eval.manager # noqa: F401
285
+
286
+ env = Environment("test")
287
+
288
+ @env.scenario()
289
+ async def process(item: str, expected: str | None = None):
290
+ yield {"task": f"Process {item}"}
291
+
292
+ task = env("process")
293
+ tool = AgentTool(task, model="claude")
294
+
295
+ # Mock the eval context and agent
296
+ with (
297
+ patch("hud.eval.manager.run_eval") as mock_run_eval,
298
+ patch("hud.agents.create_agent") as mock_create_agent,
299
+ ):
300
+ mock_ctx = AsyncMock()
301
+ mock_ctx.__aenter__ = AsyncMock(return_value=mock_ctx)
302
+ mock_ctx.__aexit__ = AsyncMock(return_value=None)
303
+ mock_run_eval.return_value = mock_ctx
304
+
305
+ mock_agent = MagicMock()
306
+ mock_agent.run = AsyncMock(return_value=MagicMock(content="result"))
307
+ mock_create_agent.return_value = mock_agent
308
+
309
+ # Call with both visible and eval-only params
310
+ await tool(item="test", expected="should_be_filtered")
311
+
312
+ # Check that task was created with filtered args
313
+ call_args = mock_run_eval.call_args
314
+ task_arg = call_args[0][0]
315
+ assert "item" in task_arg.args
316
+ assert "expected" not in task_arg.args # Filtered out
317
+
318
+ @pytest.mark.asyncio
319
+ async def test_merges_template_args(self) -> None:
320
+ """Call merges kwargs with template args."""
321
+ # Import modules first so patches work
322
+ import hud.agents
323
+ import hud.eval.manager # noqa: F401
324
+
325
+ env = Environment("test")
326
+
327
+ @env.scenario()
328
+ async def search(query: str, limit: int = 10):
329
+ yield {"task": f"Search {query}"}
330
+
331
+ # Create template with some args pre-filled
332
+ task = env("search", limit=5)
333
+ tool = AgentTool(task, model="claude")
334
+
335
+ with (
336
+ patch("hud.eval.manager.run_eval") as mock_run_eval,
337
+ patch("hud.agents.create_agent") as mock_create_agent,
338
+ ):
339
+ mock_ctx = AsyncMock()
340
+ mock_ctx.__aenter__ = AsyncMock(return_value=mock_ctx)
341
+ mock_ctx.__aexit__ = AsyncMock(return_value=None)
342
+ mock_run_eval.return_value = mock_ctx
343
+
344
+ mock_agent = MagicMock()
345
+ mock_agent.run = AsyncMock(return_value=MagicMock(content="result"))
346
+ mock_create_agent.return_value = mock_agent
347
+
348
+ # Call with additional arg
349
+ await tool(query="test query")
350
+
351
+ # Check merged args
352
+ call_args = mock_run_eval.call_args
353
+ task_arg = call_args[0][0]
354
+ assert task_arg.args["query"] == "test query"
355
+ assert task_arg.args["limit"] == 5 # From template
hud/types.py CHANGED
@@ -8,7 +8,7 @@ from typing import Any, Literal
8
8
 
9
9
  import mcp.types as types
10
10
  from mcp.types import CallToolRequestParams, CallToolResult
11
- from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator
11
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
12
12
 
13
13
  from hud.settings import settings
14
14
  from hud.utils.env import resolve_env_vars as _resolve_env_vars
@@ -31,59 +31,87 @@ class AgentType(str, Enum):
31
31
 
32
32
  @property
33
33
  def cls(self) -> type:
34
- from hud.agents import OpenAIAgent, OperatorAgent
35
- from hud.agents.claude import ClaudeAgent
36
- from hud.agents.gemini import GeminiAgent
37
- from hud.agents.gemini_cua import GeminiCUAAgent
38
- from hud.agents.openai_chat import OpenAIChatAgent
34
+ if self == AgentType.CLAUDE:
35
+ from hud.agents.claude import ClaudeAgent
39
36
 
40
- mapping: dict[AgentType, type] = {
41
- AgentType.CLAUDE: ClaudeAgent,
42
- AgentType.OPENAI: OpenAIAgent,
43
- AgentType.OPERATOR: OperatorAgent,
44
- AgentType.GEMINI: GeminiAgent,
45
- AgentType.GEMINI_CUA: GeminiCUAAgent,
46
- AgentType.OPENAI_COMPATIBLE: OpenAIChatAgent,
47
- }
48
- if self == AgentType.INTEGRATION_TEST:
37
+ return ClaudeAgent
38
+ elif self == AgentType.OPENAI:
39
+ from hud.agents import OpenAIAgent
40
+
41
+ return OpenAIAgent
42
+ elif self == AgentType.OPERATOR:
43
+ from hud.agents import OperatorAgent
44
+
45
+ return OperatorAgent
46
+ elif self == AgentType.GEMINI:
47
+ from hud.agents.gemini import GeminiAgent
48
+
49
+ return GeminiAgent
50
+ elif self == AgentType.GEMINI_CUA:
51
+ from hud.agents.gemini_cua import GeminiCUAAgent
52
+
53
+ return GeminiCUAAgent
54
+ elif self == AgentType.OPENAI_COMPATIBLE:
55
+ from hud.agents.openai_chat import OpenAIChatAgent
56
+
57
+ return OpenAIChatAgent
58
+ elif self == AgentType.INTEGRATION_TEST:
49
59
  from hud.agents.misc.integration_test_agent import IntegrationTestRunner
50
60
 
51
61
  return IntegrationTestRunner
52
- if self not in mapping:
62
+ else:
53
63
  raise ValueError(f"Unsupported agent type: {self}")
64
+
65
+ @property
66
+ def config_cls(self) -> type:
67
+ """Get config class without importing agent (avoids SDK dependency)."""
68
+ from hud.agents.types import (
69
+ ClaudeConfig,
70
+ GeminiConfig,
71
+ GeminiCUAConfig,
72
+ OpenAIChatConfig,
73
+ OpenAIConfig,
74
+ OperatorConfig,
75
+ )
76
+
77
+ mapping: dict[AgentType, type] = {
78
+ AgentType.CLAUDE: ClaudeConfig,
79
+ AgentType.OPENAI: OpenAIConfig,
80
+ AgentType.OPERATOR: OperatorConfig,
81
+ AgentType.GEMINI: GeminiConfig,
82
+ AgentType.GEMINI_CUA: GeminiCUAConfig,
83
+ AgentType.OPENAI_COMPATIBLE: OpenAIChatConfig,
84
+ AgentType.INTEGRATION_TEST: BaseAgentConfig,
85
+ }
86
+ if self not in mapping:
87
+ raise ValueError(f"Unsupported agent type for config: {self}")
54
88
  return mapping[self]
55
89
 
56
90
 
57
91
  class BaseAgentConfig(BaseModel):
58
92
  """Agent configuration for LLM-specific settings.
59
93
 
60
- Note: allowed_tools, disallowed_tools, append_setup_output, and initial_screenshot
61
- are kept for backwards compatibility with v4 task configs but are no longer applied
62
- at the agent level. These should be configured on the Environment/Task instead.
94
+ Note: allowed_tools, disallowed_tools, response_tool_name, append_setup_output,
95
+ and initial_screenshot are kept for backwards compatibility with v4 task configs
96
+ but are no longer applied at the agent level. These should be configured on the
97
+ Environment/Task instead.
63
98
  """
64
99
 
65
100
  model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid", populate_by_name=True)
66
101
 
67
- # Model identifier - use 'model' (preferred) or 'checkpoint_name' (alias)
68
- model: str | None = Field(
69
- default=None, validation_alias=AliasChoices("model", "checkpoint_name")
70
- )
71
- model_name: str = "Agent" # Human-readable display name
72
-
73
102
  # LLM-specific setting
74
103
  system_prompt: str | None = None
75
104
 
76
- # Deprecated: kept for backwards compat with v4 task configs, not applied by agent
105
+ # Deprecated: kept for backwards compat with v4 task configs
106
+ # allowed_tools/disallowed_tools are applied at Environment level
107
+ # append_setup_output is applied by EvalContext -> agent
108
+ # response_tool_name and initial_screenshot are parsed but NOT implemented
77
109
  allowed_tools: list[str] | None = None
78
110
  disallowed_tools: list[str] | None = None
79
- append_setup_output: bool = True
80
- append_setup_tool: bool = True # Alias for append_setup_output (backwards compat)
81
- initial_screenshot: bool = True
82
-
83
- @property
84
- def checkpoint_name(self) -> str | None:
85
- """Alias for model (for backwards compatibility)."""
86
- return self.model
111
+ response_tool_name: str | None = None # Not implemented
112
+ append_setup_output: bool = False
113
+ append_setup_tool: bool = False # Alias for append_setup_output
114
+ initial_screenshot: bool = False # Not implemented
87
115
 
88
116
 
89
117
  class LegacyTask(BaseModel):
hud/utils/hud_console.py CHANGED
@@ -21,6 +21,7 @@ import traceback
21
21
  from typing import TYPE_CHECKING, Any, Literal, Self
22
22
 
23
23
  from rich.console import Console
24
+ from rich.markup import escape
24
25
  from rich.panel import Panel
25
26
  from rich.table import Table
26
27
 
@@ -95,7 +96,7 @@ class HUDConsole:
95
96
  stderr: If True, output to stderr (default), otherwise stdout
96
97
  """
97
98
  console = self._stderr_console if stderr else self._stdout_console
98
- console.print(f"[{GREEN}]✅ {message}[/{GREEN}]")
99
+ console.print(f"[{GREEN}]✅ {escape(message)}[/{GREEN}]")
99
100
 
100
101
  def error(self, message: str, stderr: bool = True) -> None:
101
102
  """Print an error message.
@@ -106,10 +107,12 @@ class HUDConsole:
106
107
  """
107
108
  console = self._stderr_console if stderr else self._stdout_console
108
109
  tb = traceback.format_exc()
110
+ escaped_message = escape(message)
109
111
  if "NoneType: None" not in tb:
110
- console.print(f"[{RED} not bold]❌ {message}\n{tb}[/{RED} not bold]")
112
+ escaped_tb = escape(tb)
113
+ console.print(f"[{RED} not bold]❌ {escaped_message}\n{escaped_tb}[/{RED} not bold]")
111
114
  else:
112
- console.print(f"[{RED} not bold]❌ {message}[/{RED} not bold]")
115
+ console.print(f"[{RED} not bold]❌ {escaped_message}[/{RED} not bold]")
113
116
 
114
117
  def warning(self, message: str, stderr: bool = True) -> None:
115
118
  """Print a warning message.
@@ -119,7 +122,7 @@ class HUDConsole:
119
122
  stderr: If True, output to stderr (default), otherwise stdout
120
123
  """
121
124
  console = self._stderr_console if stderr else self._stdout_console
122
- console.print(f"⚠️ [{YELLOW} not bold]{message}[/{YELLOW} not bold]")
125
+ console.print(f"⚠️ [{YELLOW} not bold]{escape(message)}[/{YELLOW} not bold]")
123
126
 
124
127
  def info(self, message: str, stderr: bool = True) -> None:
125
128
  """Print an info message.
@@ -129,7 +132,7 @@ class HUDConsole:
129
132
  stderr: If True, output to stderr (default), otherwise stdout
130
133
  """
131
134
  console = self._stderr_console if stderr else self._stdout_console
132
- console.print(f"[{TEXT} not bold]{message}[/{TEXT} not bold]")
135
+ console.print(f"[{TEXT} not bold]{escape(message)}[/{TEXT} not bold]")
133
136
 
134
137
  def print(self, message: str, stderr: bool = True) -> None:
135
138
  """Print a message.
@@ -151,7 +154,7 @@ class HUDConsole:
151
154
  """
152
155
  console = self._stderr_console if stderr else self._stdout_console
153
156
  console.print(
154
- f"[{DIM} not bold][default]{label}[/default][/{DIM} not bold] [default]{value}[/default]" # noqa: E501
157
+ f"[{DIM} not bold][default]{escape(label)}[/default][/{DIM} not bold] [default]{escape(value)}[/default]" # noqa: E501
155
158
  )
156
159
 
157
160
  def link(self, url: str, stderr: bool = True) -> None:
@@ -162,7 +165,7 @@ class HUDConsole:
162
165
  stderr: If True, output to stderr (default), otherwise stdout
163
166
  """
164
167
  console = self._stderr_console if stderr else self._stdout_console
165
- console.print(f"[{SECONDARY} underline]{url}[/{SECONDARY} underline]")
168
+ console.print(f"[{SECONDARY} underline]{escape(url)}[/{SECONDARY} underline]")
166
169
 
167
170
  def json_config(self, json_str: str, stderr: bool = True) -> None:
168
171
  """Print JSON configuration with neutral theme.
@@ -173,7 +176,7 @@ class HUDConsole:
173
176
  """
174
177
  # Print JSON with neutral grey text
175
178
  console = self._stderr_console if stderr else self._stdout_console
176
- console.print(f"[{TEXT}]{json_str}[/{TEXT}]")
179
+ console.print(f"[{TEXT}]{escape(json_str)}[/{TEXT}]")
177
180
 
178
181
  def key_value_table(
179
182
  self, data: dict[str, str | int | float], show_header: bool = False, stderr: bool = True
@@ -203,7 +206,7 @@ class HUDConsole:
203
206
  stderr: If True, output to stderr (default), otherwise stdout
204
207
  """
205
208
  console = self._stderr_console if stderr else self._stdout_console
206
- console.print(f"[{DIM}]{message}[/{DIM}]")
209
+ console.print(f"[{DIM}]{escape(message)}[/{DIM}]")
207
210
 
208
211
  def phase(self, phase_num: int, title: str, stderr: bool = True) -> None:
209
212
  """Print a phase header (for debug command).
@@ -236,7 +239,7 @@ class HUDConsole:
236
239
  stderr: If True, output to stderr (default), otherwise stdout
237
240
  """
238
241
  console = self._stderr_console if stderr else self._stdout_console
239
- console.print(f"[rgb(181,137,0)]💡 Hint: {hint}[/rgb(181,137,0)]")
242
+ console.print(f"[rgb(181,137,0)]💡 Hint: {escape(hint)}[/rgb(181,137,0)]")
240
243
 
241
244
  def status_item(
242
245
  self,
@@ -265,10 +268,14 @@ class HUDConsole:
265
268
  indicator = indicators.get(status, indicators["info"])
266
269
  console = self._stderr_console if stderr else self._stdout_console
267
270
 
271
+ escaped_label = escape(label)
272
+ escaped_value = escape(value)
268
273
  if primary:
269
- console.print(f"{indicator} {label}: [bold {SECONDARY}]{value}[/bold {SECONDARY}]")
274
+ console.print(
275
+ f"{indicator} {escaped_label}: [bold {SECONDARY}]{escaped_value}[/bold {SECONDARY}]"
276
+ )
270
277
  else:
271
- console.print(f"{indicator} {label}: [{TEXT}]{value}[/{TEXT}]")
278
+ console.print(f"{indicator} {escaped_label}: [{TEXT}]{escaped_value}[/{TEXT}]")
272
279
 
273
280
  def command_example(
274
281
  self, command: str, description: str | None = None, stderr: bool = True
@@ -546,7 +553,12 @@ class HUDConsole:
546
553
  except (TypeError, ValueError):
547
554
  args_str = str(arguments)[:60]
548
555
 
549
- return f"[{GOLD}]→[/{GOLD}] [bold {TEXT}]{name}[/bold {TEXT}][{DIM}]({args_str})[/{DIM}]"
556
+ escaped_name = escape(name)
557
+ escaped_args = escape(args_str)
558
+ return (
559
+ f"[{GOLD}]→[/{GOLD}] [bold {TEXT}]{escaped_name}[/bold {TEXT}]"
560
+ f"[{DIM}]({escaped_args})[/{DIM}]"
561
+ )
550
562
 
551
563
  def format_tool_result(self, content: str, is_error: bool = False) -> str:
552
564
  """Format a tool result in compact HUD style.
@@ -562,11 +574,12 @@ class HUDConsole:
562
574
  if len(content) > 80:
563
575
  content = content[:77] + "..."
564
576
 
577
+ escaped_content = escape(content)
565
578
  # Format with status using HUD colors
566
579
  if is_error:
567
- return f" [{RED}]✗[/{RED}] [{DIM}]{content}[/{DIM}]"
580
+ return f" [{RED}]✗[/{RED}] [{DIM}]{escaped_content}[/{DIM}]"
568
581
  else:
569
- return f" [{GREEN}]✓[/{GREEN}] [{TEXT}]{content}[/{TEXT}]"
582
+ return f" [{GREEN}]✓[/{GREEN}] [{TEXT}]{escaped_content}[/{TEXT}]"
570
583
 
571
584
  def confirm(self, message: str, default: bool = True) -> bool:
572
585
  """Print a confirmation message.
@@ -590,12 +603,12 @@ class HUDConsole:
590
603
  stderr: If True, output to stderr
591
604
  """
592
605
  console = self._stderr_console if stderr else self._stdout_console
593
- console.print(f"[{color}]{symbol}[/{color}] {message}")
606
+ console.print(f"[{color}]{symbol}[/{color}] {escape(message)}")
594
607
 
595
608
  def detail(self, message: str, stderr: bool = True) -> None:
596
609
  """Print an indented detail line with gold pointer symbol."""
597
610
  console = self._stderr_console if stderr else self._stdout_console
598
- console.print(f" [{GOLD}]{Symbols.ITEM}[/{GOLD}] {message}")
611
+ console.print(f" [{GOLD}]{Symbols.ITEM}[/{GOLD}] {escape(message)}")
599
612
 
600
613
  def flow(self, message: str, stderr: bool = True) -> None:
601
614
  """Print a flow/transition message with wave symbol."""
@@ -118,7 +118,7 @@ def _ensure_strict_json_schema(
118
118
  if "default" in json_schema:
119
119
  json_schema.pop("default")
120
120
 
121
- for keyword in ("title", "examples"):
121
+ for keyword in ("title", "examples", "format"):
122
122
  json_schema.pop(keyword, None)
123
123
 
124
124
  ref = json_schema.get("$ref")
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.5.1"
8
+ assert hud.__version__ == "0.5.13"
hud/version.py CHANGED
@@ -4,4 +4,4 @@ Version information for the HUD SDK.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.5.1"
7
+ __version__ = "0.5.13"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.5.1
3
+ Version: 0.5.13
4
4
  Summary: SDK for the HUD platform.
5
5
  Project-URL: Homepage, https://github.com/hud-evals/hud-python
6
6
  Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -91,7 +91,7 @@ Requires-Dist: pyright==1.1.407; extra == 'dev'
91
91
  Requires-Dist: pytest-asyncio; extra == 'dev'
92
92
  Requires-Dist: pytest-cov; extra == 'dev'
93
93
  Requires-Dist: pytest-mock; extra == 'dev'
94
- Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
94
+ Requires-Dist: pytest>=8.1.1; extra == 'dev'
95
95
  Requires-Dist: ruff>=0.11.8; extra == 'dev'
96
96
  Requires-Dist: tornado>=6.5.2; extra == 'dev'
97
97
  Description-Content-Type: text/markdown