hud-python 0.4.52__py3-none-any.whl → 0.4.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/base.py +9 -2
- hud/agents/openai_chat_generic.py +15 -3
- hud/agents/tests/test_base.py +15 -0
- hud/agents/tests/test_base_runtime.py +164 -0
- hud/cli/__init__.py +6 -3
- hud/cli/build.py +35 -27
- hud/cli/dev.py +11 -29
- hud/cli/eval.py +61 -61
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +24 -2
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +134 -0
- hud/cli/tests/test_mcp_server.py +8 -7
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/utils/docker.py +120 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +2 -2
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_runner.py +106 -0
- hud/datasets/tests/test_utils.py +228 -0
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_instrumentation.py +207 -0
- hud/server/tests/test_server_extra.py +2 -0
- hud/shared/exceptions.py +35 -4
- hud/shared/hints.py +25 -0
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +31 -23
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/tests/test_async_context.py +242 -0
- hud/telemetry/tests/test_instrument.py +414 -0
- hud/telemetry/tests/test_job.py +609 -0
- hud/telemetry/tests/test_trace.py +183 -5
- hud/tools/computer/settings.py +2 -2
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/types.py +7 -1
- hud/utils/agent_factories.py +1 -3
- hud/utils/mcp.py +1 -1
- hud/utils/tests/test_agent_factories.py +60 -0
- hud/utils/tests/test_mcp.py +4 -6
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tasks.py +187 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/METADATA +47 -48
- {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/RECORD +69 -31
- {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
- {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.52.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,9 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from unittest.mock import patch
|
|
5
|
+
from unittest.mock import AsyncMock, patch
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from hud.telemetry.trace import Trace, trace
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class TestTraceAPI:
|
|
@@ -19,11 +21,11 @@ class TestTraceAPI:
|
|
|
19
21
|
patch("hud.settings.get_settings", return_value=mock_settings),
|
|
20
22
|
patch("hud.telemetry.trace.OtelTrace") as mock_otel_trace,
|
|
21
23
|
):
|
|
22
|
-
mock_otel_trace.return_value.__enter__.return_value = "
|
|
24
|
+
mock_otel_trace.return_value.__enter__.return_value = "1234567890"
|
|
23
25
|
|
|
24
26
|
with trace("test-trace") as task_run_id:
|
|
25
|
-
#
|
|
26
|
-
assert
|
|
27
|
+
# Should use placeholder ID for custom backends
|
|
28
|
+
assert len(task_run_id.id) == 36
|
|
27
29
|
|
|
28
30
|
def test_trace_with_enabled_telemetry_and_api_key(self):
|
|
29
31
|
"""Test trace behavior when telemetry is enabled with API key."""
|
|
@@ -61,3 +63,179 @@ class TestTraceAPI:
|
|
|
61
63
|
with trace("test-trace") as task_run_id:
|
|
62
64
|
# In absence of HUD API key, ID should still be a string
|
|
63
65
|
assert isinstance(task_run_id.id, str)
|
|
66
|
+
|
|
67
|
+
def test_trace_with_job_id(self):
|
|
68
|
+
"""Test trace with job_id parameter."""
|
|
69
|
+
mock_settings = type("Settings", (), {"telemetry_enabled": True, "api_key": "test-key"})()
|
|
70
|
+
|
|
71
|
+
with (
|
|
72
|
+
patch("hud.settings.get_settings", return_value=mock_settings),
|
|
73
|
+
patch("hud.telemetry.trace.OtelTrace") as mock_otel_trace,
|
|
74
|
+
trace("test-trace", job_id="job-123") as trace_obj,
|
|
75
|
+
):
|
|
76
|
+
assert trace_obj.job_id == "job-123"
|
|
77
|
+
|
|
78
|
+
# Check OtelTrace was called with job_id
|
|
79
|
+
call_kwargs = mock_otel_trace.call_args[1]
|
|
80
|
+
assert call_kwargs["job_id"] == "job-123"
|
|
81
|
+
|
|
82
|
+
def test_trace_with_task_id(self):
|
|
83
|
+
"""Test trace with task_id parameter."""
|
|
84
|
+
mock_settings = type("Settings", (), {"telemetry_enabled": True, "api_key": "test-key"})()
|
|
85
|
+
|
|
86
|
+
with (
|
|
87
|
+
patch("hud.settings.get_settings", return_value=mock_settings),
|
|
88
|
+
patch("hud.telemetry.trace.OtelTrace"),
|
|
89
|
+
trace("test-trace", task_id="task-456") as trace_obj,
|
|
90
|
+
):
|
|
91
|
+
assert trace_obj.task_id == "task-456"
|
|
92
|
+
|
|
93
|
+
def test_trace_with_attributes(self):
|
|
94
|
+
"""Test trace with custom attributes."""
|
|
95
|
+
mock_settings = type("Settings", (), {"telemetry_enabled": True, "api_key": "test-key"})()
|
|
96
|
+
|
|
97
|
+
with (
|
|
98
|
+
patch("hud.settings.get_settings", return_value=mock_settings),
|
|
99
|
+
patch("hud.telemetry.trace.OtelTrace") as mock_otel_trace,
|
|
100
|
+
trace("test-trace", attrs={"custom": "value"}),
|
|
101
|
+
):
|
|
102
|
+
# Check OtelTrace was called with attributes
|
|
103
|
+
call_kwargs = mock_otel_trace.call_args[1]
|
|
104
|
+
assert call_kwargs["attributes"] == {"custom": "value"}
|
|
105
|
+
|
|
106
|
+
def test_trace_non_root(self):
|
|
107
|
+
"""Test trace with root=False."""
|
|
108
|
+
mock_settings = type("Settings", (), {"telemetry_enabled": True, "api_key": "test-key"})()
|
|
109
|
+
|
|
110
|
+
with (
|
|
111
|
+
patch("hud.settings.get_settings", return_value=mock_settings),
|
|
112
|
+
patch("hud.telemetry.trace.OtelTrace") as mock_otel_trace,
|
|
113
|
+
trace("test-trace", root=False),
|
|
114
|
+
):
|
|
115
|
+
# Check OtelTrace was called with is_root=False
|
|
116
|
+
call_kwargs = mock_otel_trace.call_args[1]
|
|
117
|
+
assert call_kwargs["is_root"] is False
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class TestTraceClass:
|
|
121
|
+
"""Tests for Trace class."""
|
|
122
|
+
|
|
123
|
+
def test_trace_initialization(self):
|
|
124
|
+
"""Test Trace initialization."""
|
|
125
|
+
trace_obj = Trace(
|
|
126
|
+
trace_id="test-id",
|
|
127
|
+
name="Test Trace",
|
|
128
|
+
job_id="job-123",
|
|
129
|
+
task_id="task-456",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
assert trace_obj.id == "test-id"
|
|
133
|
+
assert trace_obj.name == "Test Trace"
|
|
134
|
+
assert trace_obj.job_id == "job-123"
|
|
135
|
+
assert trace_obj.task_id == "task-456"
|
|
136
|
+
assert trace_obj.created_at is not None
|
|
137
|
+
|
|
138
|
+
@pytest.mark.asyncio
|
|
139
|
+
async def test_trace_log(self):
|
|
140
|
+
"""Test Trace async log method."""
|
|
141
|
+
trace_obj = Trace("test-id", "Test")
|
|
142
|
+
|
|
143
|
+
with (
|
|
144
|
+
patch("hud.telemetry.trace.settings") as mock_settings,
|
|
145
|
+
patch("hud.telemetry.trace.make_request", new_callable=AsyncMock) as mock_request,
|
|
146
|
+
):
|
|
147
|
+
mock_settings.telemetry_enabled = True
|
|
148
|
+
mock_settings.api_key = "test-key"
|
|
149
|
+
mock_settings.hud_telemetry_url = "https://test.com"
|
|
150
|
+
|
|
151
|
+
await trace_obj.log({"metric": 1.0})
|
|
152
|
+
|
|
153
|
+
mock_request.assert_called_once()
|
|
154
|
+
call_kwargs = mock_request.call_args[1]
|
|
155
|
+
assert call_kwargs["json"]["metrics"] == {"metric": 1.0}
|
|
156
|
+
|
|
157
|
+
@pytest.mark.asyncio
|
|
158
|
+
async def test_trace_log_telemetry_disabled(self):
|
|
159
|
+
"""Test Trace log when telemetry is disabled."""
|
|
160
|
+
trace_obj = Trace("test-id", "Test")
|
|
161
|
+
|
|
162
|
+
with (
|
|
163
|
+
patch("hud.telemetry.trace.settings") as mock_settings,
|
|
164
|
+
patch("hud.telemetry.trace.make_request", new_callable=AsyncMock) as mock_request,
|
|
165
|
+
):
|
|
166
|
+
mock_settings.telemetry_enabled = False
|
|
167
|
+
|
|
168
|
+
await trace_obj.log({"metric": 1.0})
|
|
169
|
+
|
|
170
|
+
mock_request.assert_not_called()
|
|
171
|
+
|
|
172
|
+
@pytest.mark.asyncio
|
|
173
|
+
async def test_trace_log_error(self):
|
|
174
|
+
"""Test Trace log handles errors gracefully."""
|
|
175
|
+
trace_obj = Trace("test-id", "Test")
|
|
176
|
+
|
|
177
|
+
with (
|
|
178
|
+
patch("hud.telemetry.trace.settings") as mock_settings,
|
|
179
|
+
patch("hud.telemetry.trace.make_request", new_callable=AsyncMock) as mock_request,
|
|
180
|
+
):
|
|
181
|
+
mock_settings.telemetry_enabled = True
|
|
182
|
+
mock_settings.api_key = "test-key"
|
|
183
|
+
mock_settings.hud_telemetry_url = "https://test.com"
|
|
184
|
+
mock_request.side_effect = Exception("Network error")
|
|
185
|
+
|
|
186
|
+
# Should not raise
|
|
187
|
+
await trace_obj.log({"metric": 1.0})
|
|
188
|
+
|
|
189
|
+
def test_trace_log_sync(self):
|
|
190
|
+
"""Test Trace sync log method."""
|
|
191
|
+
trace_obj = Trace("test-id", "Test")
|
|
192
|
+
|
|
193
|
+
with (
|
|
194
|
+
patch("hud.telemetry.trace.settings") as mock_settings,
|
|
195
|
+
patch("hud.telemetry.trace.make_request_sync") as mock_request,
|
|
196
|
+
):
|
|
197
|
+
mock_settings.telemetry_enabled = True
|
|
198
|
+
mock_settings.api_key = "test-key"
|
|
199
|
+
mock_settings.hud_telemetry_url = "https://test.com"
|
|
200
|
+
|
|
201
|
+
trace_obj.log_sync({"metric": 1.0})
|
|
202
|
+
|
|
203
|
+
mock_request.assert_called_once()
|
|
204
|
+
|
|
205
|
+
def test_trace_log_sync_telemetry_disabled(self):
|
|
206
|
+
"""Test Trace sync log when telemetry is disabled."""
|
|
207
|
+
trace_obj = Trace("test-id", "Test")
|
|
208
|
+
|
|
209
|
+
with (
|
|
210
|
+
patch("hud.telemetry.trace.settings") as mock_settings,
|
|
211
|
+
patch("hud.telemetry.trace.make_request_sync") as mock_request,
|
|
212
|
+
):
|
|
213
|
+
mock_settings.telemetry_enabled = False
|
|
214
|
+
|
|
215
|
+
trace_obj.log_sync({"metric": 1.0})
|
|
216
|
+
|
|
217
|
+
mock_request.assert_not_called()
|
|
218
|
+
|
|
219
|
+
def test_trace_log_sync_error(self):
|
|
220
|
+
"""Test Trace sync log handles errors gracefully."""
|
|
221
|
+
trace_obj = Trace("test-id", "Test")
|
|
222
|
+
|
|
223
|
+
with (
|
|
224
|
+
patch("hud.telemetry.trace.settings") as mock_settings,
|
|
225
|
+
patch("hud.telemetry.trace.make_request_sync") as mock_request,
|
|
226
|
+
):
|
|
227
|
+
mock_settings.telemetry_enabled = True
|
|
228
|
+
mock_settings.api_key = "test-key"
|
|
229
|
+
mock_settings.hud_telemetry_url = "https://test.com"
|
|
230
|
+
mock_request.side_effect = Exception("Network error")
|
|
231
|
+
|
|
232
|
+
# Should not raise
|
|
233
|
+
trace_obj.log_sync({"metric": 1.0})
|
|
234
|
+
|
|
235
|
+
def test_trace_repr(self):
|
|
236
|
+
"""Test Trace __repr__."""
|
|
237
|
+
trace_obj = Trace("test-id", "Test Trace")
|
|
238
|
+
|
|
239
|
+
repr_str = repr(trace_obj)
|
|
240
|
+
assert "test-id" in repr_str
|
|
241
|
+
assert "Test Trace" in repr_str
|
hud/tools/computer/settings.py
CHANGED
|
@@ -63,12 +63,12 @@ class ComputerSettings(BaseSettings):
|
|
|
63
63
|
)
|
|
64
64
|
|
|
65
65
|
QWEN_COMPUTER_WIDTH: int = Field(
|
|
66
|
-
default=
|
|
66
|
+
default=700,
|
|
67
67
|
description="Width of the display to use for the Qwen computer tools",
|
|
68
68
|
validation_alias="QWEN_COMPUTER_WIDTH",
|
|
69
69
|
)
|
|
70
70
|
QWEN_COMPUTER_HEIGHT: int = Field(
|
|
71
|
-
default=
|
|
71
|
+
default=448,
|
|
72
72
|
description="Height of the display to use for the Qwen computer tools",
|
|
73
73
|
validation_alias="QWEN_COMPUTER_HEIGHT",
|
|
74
74
|
)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from mcp.types import TextContent
|
|
5
|
+
|
|
6
|
+
from hud.tools.submit import SubmitTool, get_submission, set_submission
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture(autouse=True)
|
|
10
|
+
def reset_submission():
|
|
11
|
+
"""Reset submission before each test."""
|
|
12
|
+
set_submission(None)
|
|
13
|
+
yield
|
|
14
|
+
set_submission(None)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_set_and_get_submission():
|
|
18
|
+
"""Test setting and getting submission value."""
|
|
19
|
+
assert get_submission() is None
|
|
20
|
+
|
|
21
|
+
set_submission("test value")
|
|
22
|
+
assert get_submission() == "test value"
|
|
23
|
+
|
|
24
|
+
set_submission("another value")
|
|
25
|
+
assert get_submission() == "another value"
|
|
26
|
+
|
|
27
|
+
set_submission(None)
|
|
28
|
+
assert get_submission() is None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.mark.asyncio
|
|
32
|
+
async def test_submit_tool_with_response():
|
|
33
|
+
"""Test SubmitTool with a response string."""
|
|
34
|
+
tool = SubmitTool()
|
|
35
|
+
|
|
36
|
+
result = await tool(response="Test response")
|
|
37
|
+
|
|
38
|
+
assert get_submission() == "Test response"
|
|
39
|
+
assert len(result) == 1
|
|
40
|
+
assert isinstance(result[0], TextContent)
|
|
41
|
+
assert result[0].text == "Test response"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@pytest.mark.asyncio
|
|
45
|
+
async def test_submit_tool_with_none():
|
|
46
|
+
"""Test SubmitTool with None response."""
|
|
47
|
+
tool = SubmitTool()
|
|
48
|
+
|
|
49
|
+
result = await tool(response=None)
|
|
50
|
+
|
|
51
|
+
assert get_submission() is None
|
|
52
|
+
assert len(result) == 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@pytest.mark.asyncio
|
|
56
|
+
async def test_submit_tool_with_empty_string():
|
|
57
|
+
"""Test SubmitTool with empty string."""
|
|
58
|
+
tool = SubmitTool()
|
|
59
|
+
|
|
60
|
+
result = await tool(response="")
|
|
61
|
+
|
|
62
|
+
assert get_submission() == ""
|
|
63
|
+
assert len(result) == 0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@pytest.mark.asyncio
|
|
67
|
+
async def test_submit_tool_overwrite():
|
|
68
|
+
"""Test that submitting overwrites previous submission."""
|
|
69
|
+
tool = SubmitTool()
|
|
70
|
+
|
|
71
|
+
await tool(response="First submission")
|
|
72
|
+
assert get_submission() == "First submission"
|
|
73
|
+
|
|
74
|
+
await tool(response="Second submission")
|
|
75
|
+
assert get_submission() == "Second submission"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@pytest.mark.asyncio
|
|
79
|
+
async def test_submit_tool_properties():
|
|
80
|
+
"""Test SubmitTool properties."""
|
|
81
|
+
tool = SubmitTool()
|
|
82
|
+
|
|
83
|
+
assert tool.name == "response"
|
|
84
|
+
assert tool.title == "Submit Tool"
|
|
85
|
+
assert "final response" in tool.description.lower()
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from mcp.types import ImageContent, TextContent
|
|
5
|
+
|
|
6
|
+
from hud.tools.types import ContentResult, EvaluationResult, ToolError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_evaluation_result_defaults():
|
|
10
|
+
"""Test EvaluationResult with default values."""
|
|
11
|
+
result = EvaluationResult()
|
|
12
|
+
|
|
13
|
+
assert result.reward == 0.0
|
|
14
|
+
assert result.done is False
|
|
15
|
+
assert result.content is None
|
|
16
|
+
assert result.info == {}
|
|
17
|
+
assert result.isError is False
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_evaluation_result_with_values():
|
|
21
|
+
"""Test EvaluationResult with custom values."""
|
|
22
|
+
result = EvaluationResult(
|
|
23
|
+
reward=0.95,
|
|
24
|
+
done=True,
|
|
25
|
+
content="Task completed successfully",
|
|
26
|
+
info={"steps": 5},
|
|
27
|
+
isError=False,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
assert result.reward == 0.95
|
|
31
|
+
assert result.done is True
|
|
32
|
+
assert result.content == "Task completed successfully"
|
|
33
|
+
assert result.info == {"steps": 5}
|
|
34
|
+
assert result.isError is False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_content_result_defaults():
|
|
38
|
+
"""Test ContentResult with default values."""
|
|
39
|
+
result = ContentResult()
|
|
40
|
+
|
|
41
|
+
assert result.output is None
|
|
42
|
+
assert result.error is None
|
|
43
|
+
assert result.base64_image is None
|
|
44
|
+
assert result.system is None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_content_result_with_values():
|
|
48
|
+
"""Test ContentResult with custom values."""
|
|
49
|
+
result = ContentResult(
|
|
50
|
+
output="Command executed",
|
|
51
|
+
error="No errors",
|
|
52
|
+
base64_image="base64data",
|
|
53
|
+
system="System message",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
assert result.output == "Command executed"
|
|
57
|
+
assert result.error == "No errors"
|
|
58
|
+
assert result.base64_image == "base64data"
|
|
59
|
+
assert result.system == "System message"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_content_result_add_both_output():
|
|
63
|
+
"""Test adding two ContentResults with output."""
|
|
64
|
+
result1 = ContentResult(output="Part 1")
|
|
65
|
+
result2 = ContentResult(output=" Part 2")
|
|
66
|
+
|
|
67
|
+
combined = result1 + result2
|
|
68
|
+
|
|
69
|
+
assert combined.output == "Part 1 Part 2"
|
|
70
|
+
assert combined.error is None
|
|
71
|
+
assert combined.base64_image is None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_content_result_add_both_error():
|
|
75
|
+
"""Test adding two ContentResults with errors."""
|
|
76
|
+
result1 = ContentResult(error="Error 1")
|
|
77
|
+
result2 = ContentResult(error=" Error 2")
|
|
78
|
+
|
|
79
|
+
combined = result1 + result2
|
|
80
|
+
|
|
81
|
+
assert combined.error == "Error 1 Error 2"
|
|
82
|
+
assert combined.output is None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_content_result_add_both_system():
|
|
86
|
+
"""Test adding two ContentResults with system messages."""
|
|
87
|
+
result1 = ContentResult(system="System 1")
|
|
88
|
+
result2 = ContentResult(system=" System 2")
|
|
89
|
+
|
|
90
|
+
combined = result1 + result2
|
|
91
|
+
|
|
92
|
+
assert combined.system == "System 1 System 2"
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_content_result_add_one_sided():
|
|
96
|
+
"""Test adding ContentResults where only one has values."""
|
|
97
|
+
result1 = ContentResult(output="Output")
|
|
98
|
+
result2 = ContentResult(error="Error")
|
|
99
|
+
|
|
100
|
+
combined = result1 + result2
|
|
101
|
+
|
|
102
|
+
assert combined.output == "Output"
|
|
103
|
+
assert combined.error == "Error"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_content_result_add_images_raises_error():
|
|
107
|
+
"""Test that combining two results with images raises an error."""
|
|
108
|
+
result1 = ContentResult(base64_image="image1")
|
|
109
|
+
result2 = ContentResult(base64_image="image2")
|
|
110
|
+
|
|
111
|
+
with pytest.raises(ValueError, match="Cannot combine tool results"):
|
|
112
|
+
_ = result1 + result2
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_content_result_add_one_image():
|
|
116
|
+
"""Test adding ContentResults where only one has an image."""
|
|
117
|
+
result1 = ContentResult(base64_image="image1")
|
|
118
|
+
result2 = ContentResult(output="Output")
|
|
119
|
+
|
|
120
|
+
combined = result1 + result2
|
|
121
|
+
|
|
122
|
+
assert combined.base64_image == "image1"
|
|
123
|
+
assert combined.output == "Output"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def test_content_result_to_content_blocks_output():
|
|
127
|
+
"""Test converting ContentResult with output to content blocks."""
|
|
128
|
+
result = ContentResult(output="Test output")
|
|
129
|
+
|
|
130
|
+
blocks = result.to_content_blocks()
|
|
131
|
+
|
|
132
|
+
assert len(blocks) == 1
|
|
133
|
+
assert isinstance(blocks[0], TextContent)
|
|
134
|
+
assert blocks[0].text == "Test output"
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def test_content_result_to_content_blocks_error():
|
|
138
|
+
"""Test converting ContentResult with error to content blocks."""
|
|
139
|
+
result = ContentResult(error="Test error")
|
|
140
|
+
|
|
141
|
+
blocks = result.to_content_blocks()
|
|
142
|
+
|
|
143
|
+
assert len(blocks) == 1
|
|
144
|
+
assert isinstance(blocks[0], TextContent)
|
|
145
|
+
assert blocks[0].text == "Test error"
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def test_content_result_to_content_blocks_image():
|
|
149
|
+
"""Test converting ContentResult with image to content blocks."""
|
|
150
|
+
result = ContentResult(base64_image="base64data")
|
|
151
|
+
|
|
152
|
+
blocks = result.to_content_blocks()
|
|
153
|
+
|
|
154
|
+
assert len(blocks) == 1
|
|
155
|
+
assert isinstance(blocks[0], ImageContent)
|
|
156
|
+
assert blocks[0].data == "base64data"
|
|
157
|
+
assert blocks[0].mimeType == "image/png"
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def test_content_result_to_content_blocks_all():
|
|
161
|
+
"""Test converting ContentResult with all fields to content blocks."""
|
|
162
|
+
result = ContentResult(
|
|
163
|
+
output="Output",
|
|
164
|
+
error="Error",
|
|
165
|
+
base64_image="image",
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
blocks = result.to_content_blocks()
|
|
169
|
+
|
|
170
|
+
assert len(blocks) == 3
|
|
171
|
+
assert isinstance(blocks[0], TextContent)
|
|
172
|
+
assert blocks[0].text == "Output"
|
|
173
|
+
assert isinstance(blocks[1], TextContent)
|
|
174
|
+
assert blocks[1].text == "Error"
|
|
175
|
+
assert isinstance(blocks[2], ImageContent)
|
|
176
|
+
assert blocks[2].data == "image"
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def test_content_result_to_content_blocks_empty():
|
|
180
|
+
"""Test converting empty ContentResult to content blocks."""
|
|
181
|
+
result = ContentResult()
|
|
182
|
+
|
|
183
|
+
blocks = result.to_content_blocks()
|
|
184
|
+
|
|
185
|
+
assert len(blocks) == 0
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def test_tool_error():
|
|
189
|
+
"""Test ToolError exception."""
|
|
190
|
+
error = ToolError("Test error message")
|
|
191
|
+
|
|
192
|
+
assert isinstance(error, Exception)
|
|
193
|
+
assert str(error) == "Test error message"
|
hud/types.py
CHANGED
|
@@ -17,6 +17,9 @@ from hud.utils.tool_shorthand import normalize_to_tool_call_dict
|
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
|
+
# Guard to ensure we only log missing HUD_API_KEY once
|
|
21
|
+
_missing_api_key_error_logged: bool = False
|
|
22
|
+
|
|
20
23
|
|
|
21
24
|
class Task(BaseModel):
|
|
22
25
|
"""
|
|
@@ -119,7 +122,10 @@ class Task(BaseModel):
|
|
|
119
122
|
if settings.api_key:
|
|
120
123
|
mapping["HUD_API_KEY"] = settings.api_key
|
|
121
124
|
else:
|
|
122
|
-
|
|
125
|
+
global _missing_api_key_error_logged
|
|
126
|
+
if not _missing_api_key_error_logged:
|
|
127
|
+
logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
|
|
128
|
+
_missing_api_key_error_logged = True
|
|
123
129
|
|
|
124
130
|
def substitute_in_value(obj: Any) -> Any:
|
|
125
131
|
"""Recursively substitute variables in nested structures."""
|
hud/utils/agent_factories.py
CHANGED
|
@@ -36,9 +36,7 @@ def create_openai_agent(**kwargs: Any) -> GenericOpenAIChatAgent:
|
|
|
36
36
|
api_key = kwargs.pop("api_key", None)
|
|
37
37
|
base_url = kwargs.pop("base_url", None)
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
return GenericOpenAIChatAgent(openai_client=openai_client, **kwargs)
|
|
39
|
+
return GenericOpenAIChatAgent(api_key=api_key, base_url=base_url, **kwargs)
|
|
42
40
|
|
|
43
41
|
|
|
44
42
|
def create_grounded_agent(**kwargs: Any) -> GroundedOpenAIChatAgent:
|
hud/utils/mcp.py
CHANGED
|
@@ -46,7 +46,7 @@ def setup_hud_telemetry(
|
|
|
46
46
|
The auto-created trace context manager if one was created, None otherwise.
|
|
47
47
|
Caller is responsible for exiting the context manager.
|
|
48
48
|
"""
|
|
49
|
-
if
|
|
49
|
+
if mcp_config is None:
|
|
50
50
|
raise ValueError("Please run initialize() before setting up client-side telemetry")
|
|
51
51
|
|
|
52
52
|
# Check if there are any HUD servers to setup telemetry for
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_create_openai_agent():
|
|
7
|
+
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
|
|
8
|
+
from hud.utils.agent_factories import create_openai_agent
|
|
9
|
+
|
|
10
|
+
agent = create_openai_agent(
|
|
11
|
+
api_key="test_key", model_name="test_model", completion_kwargs={"temperature": 0.5}
|
|
12
|
+
)
|
|
13
|
+
assert isinstance(agent, GenericOpenAIChatAgent)
|
|
14
|
+
assert agent.model_name == "test_model"
|
|
15
|
+
assert agent.completion_kwargs["temperature"] == 0.5
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_create_grounded_agent():
|
|
19
|
+
with (
|
|
20
|
+
patch("hud.utils.agent_factories.AsyncOpenAI") as mock_async_openai,
|
|
21
|
+
patch("hud.utils.agent_factories.GrounderConfig"),
|
|
22
|
+
patch("hud.utils.agent_factories.GroundedOpenAIChatAgent") as mock_agent_class,
|
|
23
|
+
):
|
|
24
|
+
mock_agent = MagicMock()
|
|
25
|
+
mock_agent_class.return_value = mock_agent
|
|
26
|
+
|
|
27
|
+
from hud.utils.agent_factories import create_grounded_agent
|
|
28
|
+
|
|
29
|
+
agent = create_grounded_agent(
|
|
30
|
+
api_key="test_key",
|
|
31
|
+
grounder_api_key="grounder_key",
|
|
32
|
+
model_name="test_model",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
assert agent == mock_agent
|
|
36
|
+
mock_async_openai.assert_called_with(api_key="test_key", base_url=None)
|
|
37
|
+
mock_agent_class.assert_called_once()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_create_grounded_agent_custom_grounder():
|
|
41
|
+
with (
|
|
42
|
+
patch("hud.utils.agent_factories.AsyncOpenAI"),
|
|
43
|
+
patch("hud.utils.agent_factories.GrounderConfig") as mock_grounder_config,
|
|
44
|
+
patch("hud.utils.agent_factories.GroundedOpenAIChatAgent"),
|
|
45
|
+
):
|
|
46
|
+
from hud.utils.agent_factories import create_grounded_agent
|
|
47
|
+
|
|
48
|
+
create_grounded_agent(
|
|
49
|
+
api_key="test_key",
|
|
50
|
+
grounder_api_key="grounder_key",
|
|
51
|
+
model_name="test_model",
|
|
52
|
+
grounder_api_base="https://custom.api",
|
|
53
|
+
grounder_model="custom/model",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
mock_grounder_config.assert_called_with(
|
|
57
|
+
api_base="https://custom.api",
|
|
58
|
+
model="custom/model",
|
|
59
|
+
api_key="grounder_key",
|
|
60
|
+
)
|
hud/utils/tests/test_mcp.py
CHANGED
|
@@ -90,12 +90,10 @@ class TestPatchMCPConfig:
|
|
|
90
90
|
class TestSetupHUDTelemetry:
|
|
91
91
|
"""Tests for setup_hud_telemetry function."""
|
|
92
92
|
|
|
93
|
-
def
|
|
94
|
-
"""Test that empty config
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
):
|
|
98
|
-
setup_hud_telemetry({})
|
|
93
|
+
def test_empty_config_returns_none(self):
|
|
94
|
+
"""Test that empty config returns None (no servers to set up telemetry for)."""
|
|
95
|
+
result = setup_hud_telemetry({})
|
|
96
|
+
assert result is None
|
|
99
97
|
|
|
100
98
|
def test_none_config_raises_error(self):
|
|
101
99
|
"""Test that None config raises ValueError."""
|