hud-python 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (88) hide show
  1. hud/__init__.py +13 -1
  2. hud/agents/base.py +14 -3
  3. hud/agents/lite_llm.py +1 -1
  4. hud/agents/openai_chat_generic.py +15 -3
  5. hud/agents/tests/test_base.py +9 -2
  6. hud/agents/tests/test_base_runtime.py +164 -0
  7. hud/cli/__init__.py +18 -25
  8. hud/cli/build.py +35 -27
  9. hud/cli/dev.py +11 -29
  10. hud/cli/eval.py +114 -145
  11. hud/cli/tests/test_analyze_module.py +120 -0
  12. hud/cli/tests/test_build.py +26 -3
  13. hud/cli/tests/test_build_failure.py +41 -0
  14. hud/cli/tests/test_build_module.py +50 -0
  15. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  16. hud/cli/tests/test_cli_root.py +134 -0
  17. hud/cli/tests/test_eval.py +4 -0
  18. hud/cli/tests/test_mcp_server.py +8 -7
  19. hud/cli/tests/test_push_happy.py +74 -0
  20. hud/cli/tests/test_push_wrapper.py +23 -0
  21. hud/cli/utils/docker.py +120 -1
  22. hud/cli/utils/runner.py +1 -1
  23. hud/cli/utils/tasks.py +4 -1
  24. hud/cli/utils/tests/__init__.py +0 -0
  25. hud/cli/utils/tests/test_config.py +58 -0
  26. hud/cli/utils/tests/test_docker.py +93 -0
  27. hud/cli/utils/tests/test_docker_hints.py +71 -0
  28. hud/cli/utils/tests/test_env_check.py +74 -0
  29. hud/cli/utils/tests/test_environment.py +42 -0
  30. hud/cli/utils/tests/test_interactive_module.py +60 -0
  31. hud/cli/utils/tests/test_local_runner.py +50 -0
  32. hud/cli/utils/tests/test_logging_utils.py +23 -0
  33. hud/cli/utils/tests/test_metadata.py +49 -0
  34. hud/cli/utils/tests/test_package_runner.py +35 -0
  35. hud/cli/utils/tests/test_registry_utils.py +49 -0
  36. hud/cli/utils/tests/test_remote_runner.py +25 -0
  37. hud/cli/utils/tests/test_runner_modules.py +52 -0
  38. hud/cli/utils/tests/test_source_hash.py +36 -0
  39. hud/cli/utils/tests/test_tasks.py +80 -0
  40. hud/cli/utils/version_check.py +257 -0
  41. hud/clients/base.py +1 -1
  42. hud/clients/mcp_use.py +3 -1
  43. hud/datasets/parallel.py +2 -2
  44. hud/datasets/runner.py +85 -24
  45. hud/datasets/tests/__init__.py +0 -0
  46. hud/datasets/tests/test_runner.py +106 -0
  47. hud/datasets/tests/test_utils.py +228 -0
  48. hud/otel/config.py +8 -6
  49. hud/otel/context.py +4 -4
  50. hud/otel/exporters.py +231 -57
  51. hud/otel/tests/__init__.py +0 -1
  52. hud/otel/tests/test_instrumentation.py +207 -0
  53. hud/rl/learner.py +1 -1
  54. hud/server/tests/test_server_extra.py +2 -0
  55. hud/shared/exceptions.py +35 -9
  56. hud/shared/hints.py +25 -0
  57. hud/shared/requests.py +15 -3
  58. hud/shared/tests/test_exceptions.py +39 -30
  59. hud/shared/tests/test_hints.py +167 -0
  60. hud/telemetry/__init__.py +30 -6
  61. hud/telemetry/async_context.py +331 -0
  62. hud/telemetry/job.py +51 -12
  63. hud/telemetry/tests/test_async_context.py +242 -0
  64. hud/telemetry/tests/test_instrument.py +414 -0
  65. hud/telemetry/tests/test_job.py +609 -0
  66. hud/telemetry/tests/test_trace.py +184 -6
  67. hud/telemetry/trace.py +16 -17
  68. hud/tools/computer/qwen.py +4 -1
  69. hud/tools/computer/settings.py +2 -2
  70. hud/tools/executors/base.py +4 -2
  71. hud/tools/tests/test_submit.py +85 -0
  72. hud/tools/tests/test_types.py +193 -0
  73. hud/types.py +7 -1
  74. hud/utils/agent_factories.py +1 -3
  75. hud/utils/mcp.py +1 -1
  76. hud/utils/task_tracking.py +223 -0
  77. hud/utils/tests/test_agent_factories.py +60 -0
  78. hud/utils/tests/test_mcp.py +4 -6
  79. hud/utils/tests/test_pretty_errors.py +186 -0
  80. hud/utils/tests/test_tasks.py +187 -0
  81. hud/utils/tests/test_tool_shorthand.py +154 -0
  82. hud/utils/tests/test_version.py +1 -1
  83. hud/version.py +1 -1
  84. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/METADATA +48 -48
  85. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/RECORD +88 -47
  86. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
  87. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
  88. {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0
@@ -2,9 +2,11 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from unittest.mock import patch
5
+ from unittest.mock import AsyncMock, patch
6
6
 
7
- from hud.telemetry.trace import trace
7
+ import pytest
8
+
9
+ from hud.telemetry.trace import Trace, trace
8
10
 
9
11
 
10
12
  class TestTraceAPI:
@@ -19,11 +21,11 @@ class TestTraceAPI:
19
21
  patch("hud.settings.get_settings", return_value=mock_settings),
20
22
  patch("hud.telemetry.trace.OtelTrace") as mock_otel_trace,
21
23
  ):
22
- mock_otel_trace.return_value.__enter__.return_value = "custom-otlp-trace"
24
+ mock_otel_trace.return_value.__enter__.return_value = "1234567890"
23
25
 
24
26
  with trace("test-trace") as task_run_id:
25
27
  # Should use placeholder ID for custom backends
26
- assert task_run_id.id == "custom-otlp-trace"
28
+ assert len(task_run_id.id) == 36
27
29
 
28
30
  def test_trace_with_enabled_telemetry_and_api_key(self):
29
31
  """Test trace behavior when telemetry is enabled with API key."""
@@ -59,5 +61,181 @@ class TestTraceAPI:
59
61
  mock_otel_trace.return_value.__enter__.return_value = "custom-otlp-trace"
60
62
 
61
63
  with trace("test-trace") as task_run_id:
62
- # Should use custom backend placeholder
63
- assert task_run_id.id == "custom-otlp-trace"
64
+ # In absence of HUD API key, ID should still be a string
65
+ assert isinstance(task_run_id.id, str)
66
+
67
+ def test_trace_with_job_id(self):
68
+ """Test trace with job_id parameter."""
69
+ mock_settings = type("Settings", (), {"telemetry_enabled": True, "api_key": "test-key"})()
70
+
71
+ with (
72
+ patch("hud.settings.get_settings", return_value=mock_settings),
73
+ patch("hud.telemetry.trace.OtelTrace") as mock_otel_trace,
74
+ trace("test-trace", job_id="job-123") as trace_obj,
75
+ ):
76
+ assert trace_obj.job_id == "job-123"
77
+
78
+ # Check OtelTrace was called with job_id
79
+ call_kwargs = mock_otel_trace.call_args[1]
80
+ assert call_kwargs["job_id"] == "job-123"
81
+
82
+ def test_trace_with_task_id(self):
83
+ """Test trace with task_id parameter."""
84
+ mock_settings = type("Settings", (), {"telemetry_enabled": True, "api_key": "test-key"})()
85
+
86
+ with (
87
+ patch("hud.settings.get_settings", return_value=mock_settings),
88
+ patch("hud.telemetry.trace.OtelTrace"),
89
+ trace("test-trace", task_id="task-456") as trace_obj,
90
+ ):
91
+ assert trace_obj.task_id == "task-456"
92
+
93
+ def test_trace_with_attributes(self):
94
+ """Test trace with custom attributes."""
95
+ mock_settings = type("Settings", (), {"telemetry_enabled": True, "api_key": "test-key"})()
96
+
97
+ with (
98
+ patch("hud.settings.get_settings", return_value=mock_settings),
99
+ patch("hud.telemetry.trace.OtelTrace") as mock_otel_trace,
100
+ trace("test-trace", attrs={"custom": "value"}),
101
+ ):
102
+ # Check OtelTrace was called with attributes
103
+ call_kwargs = mock_otel_trace.call_args[1]
104
+ assert call_kwargs["attributes"] == {"custom": "value"}
105
+
106
+ def test_trace_non_root(self):
107
+ """Test trace with root=False."""
108
+ mock_settings = type("Settings", (), {"telemetry_enabled": True, "api_key": "test-key"})()
109
+
110
+ with (
111
+ patch("hud.settings.get_settings", return_value=mock_settings),
112
+ patch("hud.telemetry.trace.OtelTrace") as mock_otel_trace,
113
+ trace("test-trace", root=False),
114
+ ):
115
+ # Check OtelTrace was called with is_root=False
116
+ call_kwargs = mock_otel_trace.call_args[1]
117
+ assert call_kwargs["is_root"] is False
118
+
119
+
120
+ class TestTraceClass:
121
+ """Tests for Trace class."""
122
+
123
+ def test_trace_initialization(self):
124
+ """Test Trace initialization."""
125
+ trace_obj = Trace(
126
+ trace_id="test-id",
127
+ name="Test Trace",
128
+ job_id="job-123",
129
+ task_id="task-456",
130
+ )
131
+
132
+ assert trace_obj.id == "test-id"
133
+ assert trace_obj.name == "Test Trace"
134
+ assert trace_obj.job_id == "job-123"
135
+ assert trace_obj.task_id == "task-456"
136
+ assert trace_obj.created_at is not None
137
+
138
+ @pytest.mark.asyncio
139
+ async def test_trace_log(self):
140
+ """Test Trace async log method."""
141
+ trace_obj = Trace("test-id", "Test")
142
+
143
+ with (
144
+ patch("hud.telemetry.trace.settings") as mock_settings,
145
+ patch("hud.telemetry.trace.make_request", new_callable=AsyncMock) as mock_request,
146
+ ):
147
+ mock_settings.telemetry_enabled = True
148
+ mock_settings.api_key = "test-key"
149
+ mock_settings.hud_telemetry_url = "https://test.com"
150
+
151
+ await trace_obj.log({"metric": 1.0})
152
+
153
+ mock_request.assert_called_once()
154
+ call_kwargs = mock_request.call_args[1]
155
+ assert call_kwargs["json"]["metrics"] == {"metric": 1.0}
156
+
157
+ @pytest.mark.asyncio
158
+ async def test_trace_log_telemetry_disabled(self):
159
+ """Test Trace log when telemetry is disabled."""
160
+ trace_obj = Trace("test-id", "Test")
161
+
162
+ with (
163
+ patch("hud.telemetry.trace.settings") as mock_settings,
164
+ patch("hud.telemetry.trace.make_request", new_callable=AsyncMock) as mock_request,
165
+ ):
166
+ mock_settings.telemetry_enabled = False
167
+
168
+ await trace_obj.log({"metric": 1.0})
169
+
170
+ mock_request.assert_not_called()
171
+
172
+ @pytest.mark.asyncio
173
+ async def test_trace_log_error(self):
174
+ """Test Trace log handles errors gracefully."""
175
+ trace_obj = Trace("test-id", "Test")
176
+
177
+ with (
178
+ patch("hud.telemetry.trace.settings") as mock_settings,
179
+ patch("hud.telemetry.trace.make_request", new_callable=AsyncMock) as mock_request,
180
+ ):
181
+ mock_settings.telemetry_enabled = True
182
+ mock_settings.api_key = "test-key"
183
+ mock_settings.hud_telemetry_url = "https://test.com"
184
+ mock_request.side_effect = Exception("Network error")
185
+
186
+ # Should not raise
187
+ await trace_obj.log({"metric": 1.0})
188
+
189
+ def test_trace_log_sync(self):
190
+ """Test Trace sync log method."""
191
+ trace_obj = Trace("test-id", "Test")
192
+
193
+ with (
194
+ patch("hud.telemetry.trace.settings") as mock_settings,
195
+ patch("hud.telemetry.trace.make_request_sync") as mock_request,
196
+ ):
197
+ mock_settings.telemetry_enabled = True
198
+ mock_settings.api_key = "test-key"
199
+ mock_settings.hud_telemetry_url = "https://test.com"
200
+
201
+ trace_obj.log_sync({"metric": 1.0})
202
+
203
+ mock_request.assert_called_once()
204
+
205
+ def test_trace_log_sync_telemetry_disabled(self):
206
+ """Test Trace sync log when telemetry is disabled."""
207
+ trace_obj = Trace("test-id", "Test")
208
+
209
+ with (
210
+ patch("hud.telemetry.trace.settings") as mock_settings,
211
+ patch("hud.telemetry.trace.make_request_sync") as mock_request,
212
+ ):
213
+ mock_settings.telemetry_enabled = False
214
+
215
+ trace_obj.log_sync({"metric": 1.0})
216
+
217
+ mock_request.assert_not_called()
218
+
219
+ def test_trace_log_sync_error(self):
220
+ """Test Trace sync log handles errors gracefully."""
221
+ trace_obj = Trace("test-id", "Test")
222
+
223
+ with (
224
+ patch("hud.telemetry.trace.settings") as mock_settings,
225
+ patch("hud.telemetry.trace.make_request_sync") as mock_request,
226
+ ):
227
+ mock_settings.telemetry_enabled = True
228
+ mock_settings.api_key = "test-key"
229
+ mock_settings.hud_telemetry_url = "https://test.com"
230
+ mock_request.side_effect = Exception("Network error")
231
+
232
+ # Should not raise
233
+ trace_obj.log_sync({"metric": 1.0})
234
+
235
+ def test_trace_repr(self):
236
+ """Test Trace __repr__."""
237
+ trace_obj = Trace("test-id", "Test Trace")
238
+
239
+ repr_str = repr(trace_obj)
240
+ assert "test-id" in repr_str
241
+ assert "Test Trace" in repr_str
hud/telemetry/trace.py CHANGED
@@ -94,7 +94,7 @@ def trace(
94
94
  job_id: str | None = None,
95
95
  task_id: str | None = None,
96
96
  ) -> Generator[Trace, None, None]:
97
- """Start a HUD trace context.
97
+ """Start a HUD trace context for telemetry tracking.
98
98
 
99
99
  A unique task_run_id is automatically generated for each trace.
100
100
 
@@ -108,22 +108,21 @@ def trace(
108
108
  Yields:
109
109
  Trace: The trace object with logging capabilities
110
110
 
111
- Usage:
112
- import hud
113
-
114
- # Basic usage
115
- with hud.trace("My Task") as trace:
116
- # Your code here
117
- trace.log_sync({"step": 1, "progress": 0.5})
118
-
119
- # Async logging
120
- async with hud.trace("Async Task") as trace:
121
- await trace.log({"loss": 0.23, "accuracy": 0.95})
122
-
123
- # With job association
124
- with hud.job("Training Run") as job:
125
- with hud.trace("Epoch 1", job_id=job.id) as trace:
126
- trace.log_sync({"epoch": 1, "loss": 0.5})
111
+ Example:
112
+ >>> import hud
113
+ >>> # Synchronous code
114
+ >>> with hud.trace("My Task") as trace:
115
+ ... do_work()
116
+ ... trace.log_sync({"step": 1, "progress": 0.5})
117
+ >>> # For async code with HIGH CONCURRENCY (200+ tasks), use async_trace
118
+ >>> async with hud.async_trace("My Async Task") as trace:
119
+ ... await do_async_work()
120
+ ... await trace.log({"loss": 0.23, "accuracy": 0.95})
121
+
122
+ Note:
123
+ For simple async code (< 30 parallel tasks), this context manager works fine
124
+ with `async with`. Use `hud.async_trace()` only for high-concurrency scenarios
125
+ (200+ parallel tasks) where event loop blocking becomes an issue.
127
126
  """
128
127
  # Ensure telemetry is configured
129
128
  configure_telemetry()
@@ -424,7 +424,10 @@ coordinate on the screen.
424
424
  # Rescale screenshot if requested
425
425
  screenshot_base64 = await self._rescale_screenshot(screenshot_base64)
426
426
  result = ContentResult(
427
- output=result.output, error=result.error, base64_image=screenshot_base64
427
+ # note: we suppress the output since it's not useful
428
+ output="",
429
+ error=result.error,
430
+ base64_image=screenshot_base64,
428
431
  )
429
432
 
430
433
  # Convert to content blocks
@@ -63,12 +63,12 @@ class ComputerSettings(BaseSettings):
63
63
  )
64
64
 
65
65
  QWEN_COMPUTER_WIDTH: int = Field(
66
- default=1920,
66
+ default=700,
67
67
  description="Width of the display to use for the Qwen computer tools",
68
68
  validation_alias="QWEN_COMPUTER_WIDTH",
69
69
  )
70
70
  QWEN_COMPUTER_HEIGHT: int = Field(
71
- default=1080,
71
+ default=448,
72
72
  description="Height of the display to use for the Qwen computer tools",
73
73
  validation_alias="QWEN_COMPUTER_HEIGHT",
74
74
  )
@@ -280,7 +280,7 @@ class BaseExecutor:
280
280
 
281
281
  # ===== Utility Actions =====
282
282
 
283
- async def wait(self, time: int) -> ContentResult:
283
+ async def wait(self, time: int, take_screenshot: bool = True) -> ContentResult:
284
284
  """
285
285
  Wait for specified time.
286
286
 
@@ -289,7 +289,9 @@ class BaseExecutor:
289
289
  """
290
290
  duration_seconds = time / 1000.0
291
291
  await asyncio.sleep(duration_seconds)
292
- return ContentResult(output=f"Waited {time}ms")
292
+ # take screenshot
293
+ screenshot = await self.screenshot() if take_screenshot else None
294
+ return ContentResult(output=f"Waited {time}ms", base64_image=screenshot)
293
295
 
294
296
  async def screenshot(self) -> str | None:
295
297
  """
@@ -0,0 +1,85 @@
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+ from mcp.types import TextContent
5
+
6
+ from hud.tools.submit import SubmitTool, get_submission, set_submission
7
+
8
+
9
+ @pytest.fixture(autouse=True)
10
+ def reset_submission():
11
+ """Reset submission before each test."""
12
+ set_submission(None)
13
+ yield
14
+ set_submission(None)
15
+
16
+
17
+ def test_set_and_get_submission():
18
+ """Test setting and getting submission value."""
19
+ assert get_submission() is None
20
+
21
+ set_submission("test value")
22
+ assert get_submission() == "test value"
23
+
24
+ set_submission("another value")
25
+ assert get_submission() == "another value"
26
+
27
+ set_submission(None)
28
+ assert get_submission() is None
29
+
30
+
31
+ @pytest.mark.asyncio
32
+ async def test_submit_tool_with_response():
33
+ """Test SubmitTool with a response string."""
34
+ tool = SubmitTool()
35
+
36
+ result = await tool(response="Test response")
37
+
38
+ assert get_submission() == "Test response"
39
+ assert len(result) == 1
40
+ assert isinstance(result[0], TextContent)
41
+ assert result[0].text == "Test response"
42
+
43
+
44
+ @pytest.mark.asyncio
45
+ async def test_submit_tool_with_none():
46
+ """Test SubmitTool with None response."""
47
+ tool = SubmitTool()
48
+
49
+ result = await tool(response=None)
50
+
51
+ assert get_submission() is None
52
+ assert len(result) == 0
53
+
54
+
55
+ @pytest.mark.asyncio
56
+ async def test_submit_tool_with_empty_string():
57
+ """Test SubmitTool with empty string."""
58
+ tool = SubmitTool()
59
+
60
+ result = await tool(response="")
61
+
62
+ assert get_submission() == ""
63
+ assert len(result) == 0
64
+
65
+
66
+ @pytest.mark.asyncio
67
+ async def test_submit_tool_overwrite():
68
+ """Test that submitting overwrites previous submission."""
69
+ tool = SubmitTool()
70
+
71
+ await tool(response="First submission")
72
+ assert get_submission() == "First submission"
73
+
74
+ await tool(response="Second submission")
75
+ assert get_submission() == "Second submission"
76
+
77
+
78
+ @pytest.mark.asyncio
79
+ async def test_submit_tool_properties():
80
+ """Test SubmitTool properties."""
81
+ tool = SubmitTool()
82
+
83
+ assert tool.name == "response"
84
+ assert tool.title == "Submit Tool"
85
+ assert "final response" in tool.description.lower()
@@ -0,0 +1,193 @@
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+ from mcp.types import ImageContent, TextContent
5
+
6
+ from hud.tools.types import ContentResult, EvaluationResult, ToolError
7
+
8
+
9
+ def test_evaluation_result_defaults():
10
+ """Test EvaluationResult with default values."""
11
+ result = EvaluationResult()
12
+
13
+ assert result.reward == 0.0
14
+ assert result.done is False
15
+ assert result.content is None
16
+ assert result.info == {}
17
+ assert result.isError is False
18
+
19
+
20
+ def test_evaluation_result_with_values():
21
+ """Test EvaluationResult with custom values."""
22
+ result = EvaluationResult(
23
+ reward=0.95,
24
+ done=True,
25
+ content="Task completed successfully",
26
+ info={"steps": 5},
27
+ isError=False,
28
+ )
29
+
30
+ assert result.reward == 0.95
31
+ assert result.done is True
32
+ assert result.content == "Task completed successfully"
33
+ assert result.info == {"steps": 5}
34
+ assert result.isError is False
35
+
36
+
37
+ def test_content_result_defaults():
38
+ """Test ContentResult with default values."""
39
+ result = ContentResult()
40
+
41
+ assert result.output is None
42
+ assert result.error is None
43
+ assert result.base64_image is None
44
+ assert result.system is None
45
+
46
+
47
+ def test_content_result_with_values():
48
+ """Test ContentResult with custom values."""
49
+ result = ContentResult(
50
+ output="Command executed",
51
+ error="No errors",
52
+ base64_image="base64data",
53
+ system="System message",
54
+ )
55
+
56
+ assert result.output == "Command executed"
57
+ assert result.error == "No errors"
58
+ assert result.base64_image == "base64data"
59
+ assert result.system == "System message"
60
+
61
+
62
+ def test_content_result_add_both_output():
63
+ """Test adding two ContentResults with output."""
64
+ result1 = ContentResult(output="Part 1")
65
+ result2 = ContentResult(output=" Part 2")
66
+
67
+ combined = result1 + result2
68
+
69
+ assert combined.output == "Part 1 Part 2"
70
+ assert combined.error is None
71
+ assert combined.base64_image is None
72
+
73
+
74
+ def test_content_result_add_both_error():
75
+ """Test adding two ContentResults with errors."""
76
+ result1 = ContentResult(error="Error 1")
77
+ result2 = ContentResult(error=" Error 2")
78
+
79
+ combined = result1 + result2
80
+
81
+ assert combined.error == "Error 1 Error 2"
82
+ assert combined.output is None
83
+
84
+
85
+ def test_content_result_add_both_system():
86
+ """Test adding two ContentResults with system messages."""
87
+ result1 = ContentResult(system="System 1")
88
+ result2 = ContentResult(system=" System 2")
89
+
90
+ combined = result1 + result2
91
+
92
+ assert combined.system == "System 1 System 2"
93
+
94
+
95
+ def test_content_result_add_one_sided():
96
+ """Test adding ContentResults where only one has values."""
97
+ result1 = ContentResult(output="Output")
98
+ result2 = ContentResult(error="Error")
99
+
100
+ combined = result1 + result2
101
+
102
+ assert combined.output == "Output"
103
+ assert combined.error == "Error"
104
+
105
+
106
+ def test_content_result_add_images_raises_error():
107
+ """Test that combining two results with images raises an error."""
108
+ result1 = ContentResult(base64_image="image1")
109
+ result2 = ContentResult(base64_image="image2")
110
+
111
+ with pytest.raises(ValueError, match="Cannot combine tool results"):
112
+ _ = result1 + result2
113
+
114
+
115
+ def test_content_result_add_one_image():
116
+ """Test adding ContentResults where only one has an image."""
117
+ result1 = ContentResult(base64_image="image1")
118
+ result2 = ContentResult(output="Output")
119
+
120
+ combined = result1 + result2
121
+
122
+ assert combined.base64_image == "image1"
123
+ assert combined.output == "Output"
124
+
125
+
126
+ def test_content_result_to_content_blocks_output():
127
+ """Test converting ContentResult with output to content blocks."""
128
+ result = ContentResult(output="Test output")
129
+
130
+ blocks = result.to_content_blocks()
131
+
132
+ assert len(blocks) == 1
133
+ assert isinstance(blocks[0], TextContent)
134
+ assert blocks[0].text == "Test output"
135
+
136
+
137
+ def test_content_result_to_content_blocks_error():
138
+ """Test converting ContentResult with error to content blocks."""
139
+ result = ContentResult(error="Test error")
140
+
141
+ blocks = result.to_content_blocks()
142
+
143
+ assert len(blocks) == 1
144
+ assert isinstance(blocks[0], TextContent)
145
+ assert blocks[0].text == "Test error"
146
+
147
+
148
+ def test_content_result_to_content_blocks_image():
149
+ """Test converting ContentResult with image to content blocks."""
150
+ result = ContentResult(base64_image="base64data")
151
+
152
+ blocks = result.to_content_blocks()
153
+
154
+ assert len(blocks) == 1
155
+ assert isinstance(blocks[0], ImageContent)
156
+ assert blocks[0].data == "base64data"
157
+ assert blocks[0].mimeType == "image/png"
158
+
159
+
160
+ def test_content_result_to_content_blocks_all():
161
+ """Test converting ContentResult with all fields to content blocks."""
162
+ result = ContentResult(
163
+ output="Output",
164
+ error="Error",
165
+ base64_image="image",
166
+ )
167
+
168
+ blocks = result.to_content_blocks()
169
+
170
+ assert len(blocks) == 3
171
+ assert isinstance(blocks[0], TextContent)
172
+ assert blocks[0].text == "Output"
173
+ assert isinstance(blocks[1], TextContent)
174
+ assert blocks[1].text == "Error"
175
+ assert isinstance(blocks[2], ImageContent)
176
+ assert blocks[2].data == "image"
177
+
178
+
179
+ def test_content_result_to_content_blocks_empty():
180
+ """Test converting empty ContentResult to content blocks."""
181
+ result = ContentResult()
182
+
183
+ blocks = result.to_content_blocks()
184
+
185
+ assert len(blocks) == 0
186
+
187
+
188
+ def test_tool_error():
189
+ """Test ToolError exception."""
190
+ error = ToolError("Test error message")
191
+
192
+ assert isinstance(error, Exception)
193
+ assert str(error) == "Test error message"
hud/types.py CHANGED
@@ -17,6 +17,9 @@ from hud.utils.tool_shorthand import normalize_to_tool_call_dict
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
+ # Guard to ensure we only log missing HUD_API_KEY once
21
+ _missing_api_key_error_logged: bool = False
22
+
20
23
 
21
24
  class Task(BaseModel):
22
25
  """
@@ -119,7 +122,10 @@ class Task(BaseModel):
119
122
  if settings.api_key:
120
123
  mapping["HUD_API_KEY"] = settings.api_key
121
124
  else:
122
- logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
125
+ global _missing_api_key_error_logged
126
+ if not _missing_api_key_error_logged:
127
+ logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
128
+ _missing_api_key_error_logged = True
123
129
 
124
130
  def substitute_in_value(obj: Any) -> Any:
125
131
  """Recursively substitute variables in nested structures."""
@@ -36,9 +36,7 @@ def create_openai_agent(**kwargs: Any) -> GenericOpenAIChatAgent:
36
36
  api_key = kwargs.pop("api_key", None)
37
37
  base_url = kwargs.pop("base_url", None)
38
38
 
39
- openai_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
40
-
41
- return GenericOpenAIChatAgent(openai_client=openai_client, **kwargs)
39
+ return GenericOpenAIChatAgent(api_key=api_key, base_url=base_url, **kwargs)
42
40
 
43
41
 
44
42
  def create_grounded_agent(**kwargs: Any) -> GroundedOpenAIChatAgent:
hud/utils/mcp.py CHANGED
@@ -46,7 +46,7 @@ def setup_hud_telemetry(
46
46
  The auto-created trace context manager if one was created, None otherwise.
47
47
  Caller is responsible for exiting the context manager.
48
48
  """
49
- if not mcp_config:
49
+ if mcp_config is None:
50
50
  raise ValueError("Please run initialize() before setting up client-side telemetry")
51
51
 
52
52
  # Check if there are any HUD servers to setup telemetry for