hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -22
- hud/agents/__init__.py +13 -15
- hud/agents/base.py +599 -599
- hud/agents/claude.py +373 -373
- hud/agents/langchain.py +261 -250
- hud/agents/misc/__init__.py +7 -7
- hud/agents/misc/response_agent.py +82 -80
- hud/agents/openai.py +352 -352
- hud/agents/openai_chat_generic.py +154 -154
- hud/agents/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -742
- hud/agents/tests/test_claude.py +324 -324
- hud/agents/tests/test_client.py +363 -363
- hud/agents/tests/test_openai.py +237 -237
- hud/cli/__init__.py +617 -617
- hud/cli/__main__.py +8 -8
- hud/cli/analyze.py +371 -371
- hud/cli/analyze_metadata.py +230 -230
- hud/cli/build.py +498 -427
- hud/cli/clone.py +185 -185
- hud/cli/cursor.py +92 -92
- hud/cli/debug.py +392 -392
- hud/cli/docker_utils.py +83 -83
- hud/cli/init.py +280 -281
- hud/cli/interactive.py +353 -353
- hud/cli/mcp_server.py +764 -756
- hud/cli/pull.py +330 -336
- hud/cli/push.py +404 -370
- hud/cli/remote_runner.py +311 -311
- hud/cli/runner.py +160 -160
- hud/cli/tests/__init__.py +3 -3
- hud/cli/tests/test_analyze.py +284 -284
- hud/cli/tests/test_cli_init.py +265 -265
- hud/cli/tests/test_cli_main.py +27 -27
- hud/cli/tests/test_clone.py +142 -142
- hud/cli/tests/test_cursor.py +253 -253
- hud/cli/tests/test_debug.py +453 -453
- hud/cli/tests/test_mcp_server.py +139 -139
- hud/cli/tests/test_utils.py +388 -388
- hud/cli/utils.py +263 -263
- hud/clients/README.md +143 -143
- hud/clients/__init__.py +16 -16
- hud/clients/base.py +378 -379
- hud/clients/fastmcp.py +222 -222
- hud/clients/mcp_use.py +298 -278
- hud/clients/tests/__init__.py +1 -1
- hud/clients/tests/test_client_integration.py +111 -111
- hud/clients/tests/test_fastmcp.py +342 -342
- hud/clients/tests/test_protocol.py +188 -188
- hud/clients/utils/__init__.py +1 -1
- hud/clients/utils/retry_transport.py +160 -160
- hud/datasets.py +327 -322
- hud/misc/__init__.py +1 -1
- hud/misc/claude_plays_pokemon.py +292 -292
- hud/otel/__init__.py +35 -35
- hud/otel/collector.py +142 -142
- hud/otel/config.py +164 -164
- hud/otel/context.py +536 -536
- hud/otel/exporters.py +366 -366
- hud/otel/instrumentation.py +97 -97
- hud/otel/processors.py +118 -118
- hud/otel/tests/__init__.py +1 -1
- hud/otel/tests/test_processors.py +197 -197
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -114
- hud/server/helper/__init__.py +5 -5
- hud/server/low_level.py +132 -132
- hud/server/server.py +170 -166
- hud/server/tests/__init__.py +3 -3
- hud/settings.py +73 -73
- hud/shared/__init__.py +5 -5
- hud/shared/exceptions.py +180 -180
- hud/shared/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -157
- hud/shared/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -25
- hud/telemetry/instrument.py +379 -379
- hud/telemetry/job.py +309 -309
- hud/telemetry/replay.py +74 -74
- hud/telemetry/trace.py +83 -83
- hud/tools/__init__.py +33 -33
- hud/tools/base.py +365 -365
- hud/tools/bash.py +161 -161
- hud/tools/computer/__init__.py +15 -15
- hud/tools/computer/anthropic.py +437 -437
- hud/tools/computer/hud.py +376 -376
- hud/tools/computer/openai.py +295 -295
- hud/tools/computer/settings.py +82 -82
- hud/tools/edit.py +314 -314
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -539
- hud/tools/executors/pyautogui.py +621 -621
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -511
- hud/tools/playwright.py +412 -412
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -282
- hud/tools/tests/test_bash.py +158 -158
- hud/tools/tests/test_bash_extended.py +197 -197
- hud/tools/tests/test_computer.py +425 -425
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -259
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -145
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -72
- hud/tools/utils.py +50 -50
- hud/types.py +136 -136
- hud/utils/__init__.py +10 -10
- hud/utils/async_utils.py +65 -65
- hud/utils/design.py +236 -168
- hud/utils/mcp.py +55 -55
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -173
- hud/utils/tests/test_init.py +17 -17
- hud/utils/tests/test_progress.py +261 -261
- hud/utils/tests/test_telemetry.py +82 -82
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
- hud_python-0.4.3.dist-info/RECORD +131 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
- hud/agents/art.py +0 -101
- hud_python-0.4.1.dist-info/RECORD +0 -132
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/tools/tests/test_utils.py
CHANGED
|
@@ -1,156 +1,156 @@
|
|
|
1
|
-
"""Tests for tools utils."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import asyncio
|
|
6
|
-
from unittest.mock import AsyncMock, patch
|
|
7
|
-
|
|
8
|
-
import pytest
|
|
9
|
-
|
|
10
|
-
from hud.tools.utils import maybe_truncate, run
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class TestRun:
|
|
14
|
-
"""Tests for the run function."""
|
|
15
|
-
|
|
16
|
-
@pytest.mark.asyncio
|
|
17
|
-
async def test_run_string_command_success(self):
|
|
18
|
-
"""Test running a string command successfully."""
|
|
19
|
-
mock_proc = AsyncMock()
|
|
20
|
-
mock_proc.returncode = 0
|
|
21
|
-
mock_proc.communicate = AsyncMock(return_value=(b"output", b""))
|
|
22
|
-
|
|
23
|
-
with patch("asyncio.create_subprocess_shell", return_value=mock_proc) as mock_shell:
|
|
24
|
-
return_code, stdout, stderr = await run("echo test")
|
|
25
|
-
|
|
26
|
-
assert return_code == 0
|
|
27
|
-
assert stdout == "output"
|
|
28
|
-
assert stderr == ""
|
|
29
|
-
mock_shell.assert_called_once()
|
|
30
|
-
|
|
31
|
-
@pytest.mark.asyncio
|
|
32
|
-
async def test_run_list_command_success(self):
|
|
33
|
-
"""Test running a list command successfully."""
|
|
34
|
-
mock_proc = AsyncMock()
|
|
35
|
-
mock_proc.returncode = 0
|
|
36
|
-
mock_proc.communicate = AsyncMock(return_value=(b"hello world", b""))
|
|
37
|
-
|
|
38
|
-
with patch("asyncio.create_subprocess_exec", return_value=mock_proc) as mock_exec:
|
|
39
|
-
return_code, stdout, stderr = await run(["echo", "hello", "world"])
|
|
40
|
-
|
|
41
|
-
assert return_code == 0
|
|
42
|
-
assert stdout == "hello world"
|
|
43
|
-
assert stderr == ""
|
|
44
|
-
mock_exec.assert_called_once_with(
|
|
45
|
-
"echo",
|
|
46
|
-
"hello",
|
|
47
|
-
"world",
|
|
48
|
-
stdin=None,
|
|
49
|
-
stdout=asyncio.subprocess.PIPE,
|
|
50
|
-
stderr=asyncio.subprocess.PIPE,
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
@pytest.mark.asyncio
|
|
54
|
-
async def test_run_with_input(self):
|
|
55
|
-
"""Test running a command with input."""
|
|
56
|
-
mock_proc = AsyncMock()
|
|
57
|
-
mock_proc.returncode = 0
|
|
58
|
-
mock_proc.communicate = AsyncMock(return_value=(b"processed", b""))
|
|
59
|
-
|
|
60
|
-
with patch("asyncio.create_subprocess_shell", return_value=mock_proc):
|
|
61
|
-
return_code, stdout, stderr = await run("cat", input="test input")
|
|
62
|
-
|
|
63
|
-
assert return_code == 0
|
|
64
|
-
assert stdout == "processed"
|
|
65
|
-
mock_proc.communicate.assert_called_once_with(input=b"test input")
|
|
66
|
-
|
|
67
|
-
@pytest.mark.asyncio
|
|
68
|
-
async def test_run_with_error(self):
|
|
69
|
-
"""Test running a command that returns an error."""
|
|
70
|
-
mock_proc = AsyncMock()
|
|
71
|
-
mock_proc.returncode = 1
|
|
72
|
-
mock_proc.communicate = AsyncMock(return_value=(b"", b"error message"))
|
|
73
|
-
|
|
74
|
-
with patch("asyncio.create_subprocess_shell", return_value=mock_proc):
|
|
75
|
-
return_code, stdout, stderr = await run("false")
|
|
76
|
-
|
|
77
|
-
assert return_code == 1
|
|
78
|
-
assert stdout == ""
|
|
79
|
-
assert stderr == "error message"
|
|
80
|
-
|
|
81
|
-
@pytest.mark.asyncio
|
|
82
|
-
async def test_run_with_timeout(self):
|
|
83
|
-
"""Test running a command with custom timeout."""
|
|
84
|
-
mock_proc = AsyncMock()
|
|
85
|
-
mock_proc.returncode = 0
|
|
86
|
-
mock_proc.communicate = AsyncMock(return_value=(b"done", b""))
|
|
87
|
-
|
|
88
|
-
with (
|
|
89
|
-
patch("asyncio.create_subprocess_shell", return_value=mock_proc),
|
|
90
|
-
patch("asyncio.wait_for") as mock_wait_for,
|
|
91
|
-
):
|
|
92
|
-
mock_wait_for.return_value = (b"done", b"")
|
|
93
|
-
|
|
94
|
-
return_code, stdout, stderr = await run("sleep 1", timeout=5.0)
|
|
95
|
-
|
|
96
|
-
# Check that wait_for was called with the correct timeout
|
|
97
|
-
mock_wait_for.assert_called_once()
|
|
98
|
-
assert mock_wait_for.call_args[1]["timeout"] == 5.0
|
|
99
|
-
|
|
100
|
-
@pytest.mark.asyncio
|
|
101
|
-
async def test_run_timeout_exception(self):
|
|
102
|
-
"""Test running a command that times out."""
|
|
103
|
-
mock_proc = AsyncMock()
|
|
104
|
-
|
|
105
|
-
with (
|
|
106
|
-
patch("asyncio.create_subprocess_shell", return_value=mock_proc),
|
|
107
|
-
patch("asyncio.wait_for", side_effect=TimeoutError()),
|
|
108
|
-
pytest.raises(asyncio.TimeoutError),
|
|
109
|
-
):
|
|
110
|
-
await run("sleep infinity", timeout=0.1)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
class TestMaybeTruncate:
|
|
114
|
-
"""Tests for the maybe_truncate function."""
|
|
115
|
-
|
|
116
|
-
def test_maybe_truncate_short_text(self):
|
|
117
|
-
"""Test that short text is not truncated."""
|
|
118
|
-
text = "This is a short text"
|
|
119
|
-
result = maybe_truncate(text)
|
|
120
|
-
assert result == text
|
|
121
|
-
|
|
122
|
-
def test_maybe_truncate_long_text_default(self):
|
|
123
|
-
"""Test that long text is truncated with default limit."""
|
|
124
|
-
text = "x" * 30000 # Much longer than default limit
|
|
125
|
-
result = maybe_truncate(text)
|
|
126
|
-
|
|
127
|
-
assert len(result) < len(text)
|
|
128
|
-
assert result.endswith("... (truncated)")
|
|
129
|
-
assert len(result) == 20480 + len("... (truncated)")
|
|
130
|
-
|
|
131
|
-
def test_maybe_truncate_custom_limit(self):
|
|
132
|
-
"""Test truncation with custom limit."""
|
|
133
|
-
text = "abcdefghijklmnopqrstuvwxyz"
|
|
134
|
-
result = maybe_truncate(text, max_length=10)
|
|
135
|
-
|
|
136
|
-
assert result == "abcdefghij... (truncated)"
|
|
137
|
-
|
|
138
|
-
def test_maybe_truncate_exact_limit(self):
|
|
139
|
-
"""Test text exactly at limit is not truncated."""
|
|
140
|
-
text = "x" * 100
|
|
141
|
-
result = maybe_truncate(text, max_length=100)
|
|
142
|
-
|
|
143
|
-
assert result == text
|
|
144
|
-
|
|
145
|
-
def test_maybe_truncate_empty_string(self):
|
|
146
|
-
"""Test empty string handling."""
|
|
147
|
-
result = maybe_truncate("")
|
|
148
|
-
assert result == ""
|
|
149
|
-
|
|
150
|
-
def test_maybe_truncate_unicode(self):
|
|
151
|
-
"""Test truncation with unicode characters."""
|
|
152
|
-
text = "🎉" * 5000
|
|
153
|
-
result = maybe_truncate(text, max_length=10)
|
|
154
|
-
|
|
155
|
-
assert len(result) > 10 # Because of "... (truncated)" suffix
|
|
156
|
-
assert result.endswith("... (truncated)")
|
|
1
|
+
"""Tests for tools utils."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from unittest.mock import AsyncMock, patch
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
from hud.tools.utils import maybe_truncate, run
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestRun:
|
|
14
|
+
"""Tests for the run function."""
|
|
15
|
+
|
|
16
|
+
@pytest.mark.asyncio
|
|
17
|
+
async def test_run_string_command_success(self):
|
|
18
|
+
"""Test running a string command successfully."""
|
|
19
|
+
mock_proc = AsyncMock()
|
|
20
|
+
mock_proc.returncode = 0
|
|
21
|
+
mock_proc.communicate = AsyncMock(return_value=(b"output", b""))
|
|
22
|
+
|
|
23
|
+
with patch("asyncio.create_subprocess_shell", return_value=mock_proc) as mock_shell:
|
|
24
|
+
return_code, stdout, stderr = await run("echo test")
|
|
25
|
+
|
|
26
|
+
assert return_code == 0
|
|
27
|
+
assert stdout == "output"
|
|
28
|
+
assert stderr == ""
|
|
29
|
+
mock_shell.assert_called_once()
|
|
30
|
+
|
|
31
|
+
@pytest.mark.asyncio
|
|
32
|
+
async def test_run_list_command_success(self):
|
|
33
|
+
"""Test running a list command successfully."""
|
|
34
|
+
mock_proc = AsyncMock()
|
|
35
|
+
mock_proc.returncode = 0
|
|
36
|
+
mock_proc.communicate = AsyncMock(return_value=(b"hello world", b""))
|
|
37
|
+
|
|
38
|
+
with patch("asyncio.create_subprocess_exec", return_value=mock_proc) as mock_exec:
|
|
39
|
+
return_code, stdout, stderr = await run(["echo", "hello", "world"])
|
|
40
|
+
|
|
41
|
+
assert return_code == 0
|
|
42
|
+
assert stdout == "hello world"
|
|
43
|
+
assert stderr == ""
|
|
44
|
+
mock_exec.assert_called_once_with(
|
|
45
|
+
"echo",
|
|
46
|
+
"hello",
|
|
47
|
+
"world",
|
|
48
|
+
stdin=None,
|
|
49
|
+
stdout=asyncio.subprocess.PIPE,
|
|
50
|
+
stderr=asyncio.subprocess.PIPE,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@pytest.mark.asyncio
|
|
54
|
+
async def test_run_with_input(self):
|
|
55
|
+
"""Test running a command with input."""
|
|
56
|
+
mock_proc = AsyncMock()
|
|
57
|
+
mock_proc.returncode = 0
|
|
58
|
+
mock_proc.communicate = AsyncMock(return_value=(b"processed", b""))
|
|
59
|
+
|
|
60
|
+
with patch("asyncio.create_subprocess_shell", return_value=mock_proc):
|
|
61
|
+
return_code, stdout, stderr = await run("cat", input="test input")
|
|
62
|
+
|
|
63
|
+
assert return_code == 0
|
|
64
|
+
assert stdout == "processed"
|
|
65
|
+
mock_proc.communicate.assert_called_once_with(input=b"test input")
|
|
66
|
+
|
|
67
|
+
@pytest.mark.asyncio
|
|
68
|
+
async def test_run_with_error(self):
|
|
69
|
+
"""Test running a command that returns an error."""
|
|
70
|
+
mock_proc = AsyncMock()
|
|
71
|
+
mock_proc.returncode = 1
|
|
72
|
+
mock_proc.communicate = AsyncMock(return_value=(b"", b"error message"))
|
|
73
|
+
|
|
74
|
+
with patch("asyncio.create_subprocess_shell", return_value=mock_proc):
|
|
75
|
+
return_code, stdout, stderr = await run("false")
|
|
76
|
+
|
|
77
|
+
assert return_code == 1
|
|
78
|
+
assert stdout == ""
|
|
79
|
+
assert stderr == "error message"
|
|
80
|
+
|
|
81
|
+
@pytest.mark.asyncio
|
|
82
|
+
async def test_run_with_timeout(self):
|
|
83
|
+
"""Test running a command with custom timeout."""
|
|
84
|
+
mock_proc = AsyncMock()
|
|
85
|
+
mock_proc.returncode = 0
|
|
86
|
+
mock_proc.communicate = AsyncMock(return_value=(b"done", b""))
|
|
87
|
+
|
|
88
|
+
with (
|
|
89
|
+
patch("asyncio.create_subprocess_shell", return_value=mock_proc),
|
|
90
|
+
patch("asyncio.wait_for") as mock_wait_for,
|
|
91
|
+
):
|
|
92
|
+
mock_wait_for.return_value = (b"done", b"")
|
|
93
|
+
|
|
94
|
+
return_code, stdout, stderr = await run("sleep 1", timeout=5.0)
|
|
95
|
+
|
|
96
|
+
# Check that wait_for was called with the correct timeout
|
|
97
|
+
mock_wait_for.assert_called_once()
|
|
98
|
+
assert mock_wait_for.call_args[1]["timeout"] == 5.0
|
|
99
|
+
|
|
100
|
+
@pytest.mark.asyncio
|
|
101
|
+
async def test_run_timeout_exception(self):
|
|
102
|
+
"""Test running a command that times out."""
|
|
103
|
+
mock_proc = AsyncMock()
|
|
104
|
+
|
|
105
|
+
with (
|
|
106
|
+
patch("asyncio.create_subprocess_shell", return_value=mock_proc),
|
|
107
|
+
patch("asyncio.wait_for", side_effect=TimeoutError()),
|
|
108
|
+
pytest.raises(asyncio.TimeoutError),
|
|
109
|
+
):
|
|
110
|
+
await run("sleep infinity", timeout=0.1)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class TestMaybeTruncate:
|
|
114
|
+
"""Tests for the maybe_truncate function."""
|
|
115
|
+
|
|
116
|
+
def test_maybe_truncate_short_text(self):
|
|
117
|
+
"""Test that short text is not truncated."""
|
|
118
|
+
text = "This is a short text"
|
|
119
|
+
result = maybe_truncate(text)
|
|
120
|
+
assert result == text
|
|
121
|
+
|
|
122
|
+
def test_maybe_truncate_long_text_default(self):
|
|
123
|
+
"""Test that long text is truncated with default limit."""
|
|
124
|
+
text = "x" * 30000 # Much longer than default limit
|
|
125
|
+
result = maybe_truncate(text)
|
|
126
|
+
|
|
127
|
+
assert len(result) < len(text)
|
|
128
|
+
assert result.endswith("... (truncated)")
|
|
129
|
+
assert len(result) == 20480 + len("... (truncated)")
|
|
130
|
+
|
|
131
|
+
def test_maybe_truncate_custom_limit(self):
|
|
132
|
+
"""Test truncation with custom limit."""
|
|
133
|
+
text = "abcdefghijklmnopqrstuvwxyz"
|
|
134
|
+
result = maybe_truncate(text, max_length=10)
|
|
135
|
+
|
|
136
|
+
assert result == "abcdefghij... (truncated)"
|
|
137
|
+
|
|
138
|
+
def test_maybe_truncate_exact_limit(self):
|
|
139
|
+
"""Test text exactly at limit is not truncated."""
|
|
140
|
+
text = "x" * 100
|
|
141
|
+
result = maybe_truncate(text, max_length=100)
|
|
142
|
+
|
|
143
|
+
assert result == text
|
|
144
|
+
|
|
145
|
+
def test_maybe_truncate_empty_string(self):
|
|
146
|
+
"""Test empty string handling."""
|
|
147
|
+
result = maybe_truncate("")
|
|
148
|
+
assert result == ""
|
|
149
|
+
|
|
150
|
+
def test_maybe_truncate_unicode(self):
|
|
151
|
+
"""Test truncation with unicode characters."""
|
|
152
|
+
text = "🎉" * 5000
|
|
153
|
+
result = maybe_truncate(text, max_length=10)
|
|
154
|
+
|
|
155
|
+
assert len(result) > 10 # Because of "... (truncated)" suffix
|
|
156
|
+
assert result.endswith("... (truncated)")
|
hud/tools/types.py
CHANGED
|
@@ -1,72 +1,72 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import Any
|
|
4
|
-
|
|
5
|
-
from mcp.types import ContentBlock, ImageContent, TextContent
|
|
6
|
-
from pydantic import BaseModel, ConfigDict, Field
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class EvaluationResult(BaseModel):
|
|
10
|
-
"""Standard evaluation result format."""
|
|
11
|
-
|
|
12
|
-
reward: float = Field(default=0.0, description="Usually a value between 0.0 and 1.0")
|
|
13
|
-
done: bool = Field(default=False, description="Whether the task/episode is complete")
|
|
14
|
-
content: str | None = Field(default=None, description="Additional information")
|
|
15
|
-
info: dict[str, Any] = Field(default_factory=dict, description="Additional information")
|
|
16
|
-
isError: bool = Field(default=False, description="Whether the evaluation failed")
|
|
17
|
-
|
|
18
|
-
model_config = ConfigDict(extra="allow")
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class ContentResult(BaseModel):
|
|
22
|
-
"""Represents the intermediate result of a tool execution.
|
|
23
|
-
|
|
24
|
-
Often useful for tools that need to return multiple types of content.
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
output: str | None = Field(default=None, description="Output text")
|
|
28
|
-
error: str | None = Field(default=None, description="Error message")
|
|
29
|
-
base64_image: str | None = Field(default=None, description="Base64-encoded image")
|
|
30
|
-
system: str | None = Field(default=None, description="System message")
|
|
31
|
-
|
|
32
|
-
def __add__(self, other: ContentResult) -> ContentResult:
|
|
33
|
-
def combine_fields(
|
|
34
|
-
field: str | None, other_field: str | None, concatenate: bool = True
|
|
35
|
-
) -> str | None:
|
|
36
|
-
if field and other_field:
|
|
37
|
-
if concatenate:
|
|
38
|
-
return field + other_field
|
|
39
|
-
raise ValueError("Cannot combine tool results")
|
|
40
|
-
return field or other_field
|
|
41
|
-
|
|
42
|
-
return ContentResult(
|
|
43
|
-
output=combine_fields(self.output, other.output),
|
|
44
|
-
error=combine_fields(self.error, other.error),
|
|
45
|
-
base64_image=combine_fields(self.base64_image, other.base64_image, False),
|
|
46
|
-
system=combine_fields(self.system, other.system),
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
def to_content_blocks(self) -> list[ContentBlock]:
|
|
50
|
-
"""Helper method to convert ContentResult to content blocks.
|
|
51
|
-
|
|
52
|
-
Subclasses can use this when they work with ContentResult internally.
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
result: ContentResult to convert
|
|
56
|
-
|
|
57
|
-
Returns:
|
|
58
|
-
List of ContentBlock
|
|
59
|
-
"""
|
|
60
|
-
blocks: list[ContentBlock] = []
|
|
61
|
-
|
|
62
|
-
if self.output:
|
|
63
|
-
blocks.append(TextContent(text=self.output, type="text"))
|
|
64
|
-
if self.error:
|
|
65
|
-
blocks.append(TextContent(text=self.error, type="text"))
|
|
66
|
-
if self.base64_image:
|
|
67
|
-
blocks.append(ImageContent(data=self.base64_image, mimeType="image/png", type="image"))
|
|
68
|
-
return blocks
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
class ToolError(Exception):
|
|
72
|
-
"""An error raised by a tool."""
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from mcp.types import ContentBlock, ImageContent, TextContent
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class EvaluationResult(BaseModel):
|
|
10
|
+
"""Standard evaluation result format."""
|
|
11
|
+
|
|
12
|
+
reward: float = Field(default=0.0, description="Usually a value between 0.0 and 1.0")
|
|
13
|
+
done: bool = Field(default=False, description="Whether the task/episode is complete")
|
|
14
|
+
content: str | None = Field(default=None, description="Additional information")
|
|
15
|
+
info: dict[str, Any] = Field(default_factory=dict, description="Additional information")
|
|
16
|
+
isError: bool = Field(default=False, description="Whether the evaluation failed")
|
|
17
|
+
|
|
18
|
+
model_config = ConfigDict(extra="allow")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ContentResult(BaseModel):
|
|
22
|
+
"""Represents the intermediate result of a tool execution.
|
|
23
|
+
|
|
24
|
+
Often useful for tools that need to return multiple types of content.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
output: str | None = Field(default=None, description="Output text")
|
|
28
|
+
error: str | None = Field(default=None, description="Error message")
|
|
29
|
+
base64_image: str | None = Field(default=None, description="Base64-encoded image")
|
|
30
|
+
system: str | None = Field(default=None, description="System message")
|
|
31
|
+
|
|
32
|
+
def __add__(self, other: ContentResult) -> ContentResult:
|
|
33
|
+
def combine_fields(
|
|
34
|
+
field: str | None, other_field: str | None, concatenate: bool = True
|
|
35
|
+
) -> str | None:
|
|
36
|
+
if field and other_field:
|
|
37
|
+
if concatenate:
|
|
38
|
+
return field + other_field
|
|
39
|
+
raise ValueError("Cannot combine tool results")
|
|
40
|
+
return field or other_field
|
|
41
|
+
|
|
42
|
+
return ContentResult(
|
|
43
|
+
output=combine_fields(self.output, other.output),
|
|
44
|
+
error=combine_fields(self.error, other.error),
|
|
45
|
+
base64_image=combine_fields(self.base64_image, other.base64_image, False),
|
|
46
|
+
system=combine_fields(self.system, other.system),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def to_content_blocks(self) -> list[ContentBlock]:
|
|
50
|
+
"""Helper method to convert ContentResult to content blocks.
|
|
51
|
+
|
|
52
|
+
Subclasses can use this when they work with ContentResult internally.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
result: ContentResult to convert
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
List of ContentBlock
|
|
59
|
+
"""
|
|
60
|
+
blocks: list[ContentBlock] = []
|
|
61
|
+
|
|
62
|
+
if self.output:
|
|
63
|
+
blocks.append(TextContent(text=self.output, type="text"))
|
|
64
|
+
if self.error:
|
|
65
|
+
blocks.append(TextContent(text=self.error, type="text"))
|
|
66
|
+
if self.base64_image:
|
|
67
|
+
blocks.append(ImageContent(data=self.base64_image, mimeType="image/png", type="image"))
|
|
68
|
+
return blocks
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class ToolError(Exception):
|
|
72
|
+
"""An error raised by a tool."""
|
hud/tools/utils.py
CHANGED
|
@@ -1,50 +1,50 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import subprocess
|
|
5
|
-
|
|
6
|
-
# Default timeout for running commands
|
|
7
|
-
DEFAULT_TIMEOUT = 10.0
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
async def run(
|
|
11
|
-
command: str | list[str],
|
|
12
|
-
input: str | None = None,
|
|
13
|
-
timeout: float | None = DEFAULT_TIMEOUT, # noqa: ASYNC109
|
|
14
|
-
) -> tuple[int, str, str]:
|
|
15
|
-
"""
|
|
16
|
-
Run a command asynchronously and return the result.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
command: Command to run (string or list of strings)
|
|
20
|
-
input: Optional input to send to stdin
|
|
21
|
-
timeout: Timeout in seconds
|
|
22
|
-
|
|
23
|
-
Returns:
|
|
24
|
-
Tuple of (return_code, stdout, stderr)
|
|
25
|
-
"""
|
|
26
|
-
if isinstance(command, str):
|
|
27
|
-
proc = await asyncio.create_subprocess_shell(
|
|
28
|
-
command,
|
|
29
|
-
stdin=subprocess.PIPE if input else None,
|
|
30
|
-
stdout=subprocess.PIPE,
|
|
31
|
-
stderr=subprocess.PIPE,
|
|
32
|
-
)
|
|
33
|
-
else:
|
|
34
|
-
proc = await asyncio.create_subprocess_exec(
|
|
35
|
-
*command,
|
|
36
|
-
stdin=subprocess.PIPE if input else None,
|
|
37
|
-
stdout=subprocess.PIPE,
|
|
38
|
-
stderr=subprocess.PIPE,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
stdout, stderr = await asyncio.wait_for(
|
|
42
|
-
proc.communicate(input=input.encode() if input else None), timeout=timeout
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
return proc.returncode or 0, stdout.decode(), stderr.decode()
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def maybe_truncate(text: str, max_length: int = 2048 * 10) -> str:
|
|
49
|
-
"""Truncate output if too long."""
|
|
50
|
-
return text if len(text) <= max_length else text[:max_length] + "... (truncated)"
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import subprocess
|
|
5
|
+
|
|
6
|
+
# Default timeout for running commands
|
|
7
|
+
DEFAULT_TIMEOUT = 10.0
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def run(
|
|
11
|
+
command: str | list[str],
|
|
12
|
+
input: str | None = None,
|
|
13
|
+
timeout: float | None = DEFAULT_TIMEOUT, # noqa: ASYNC109
|
|
14
|
+
) -> tuple[int, str, str]:
|
|
15
|
+
"""
|
|
16
|
+
Run a command asynchronously and return the result.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
command: Command to run (string or list of strings)
|
|
20
|
+
input: Optional input to send to stdin
|
|
21
|
+
timeout: Timeout in seconds
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Tuple of (return_code, stdout, stderr)
|
|
25
|
+
"""
|
|
26
|
+
if isinstance(command, str):
|
|
27
|
+
proc = await asyncio.create_subprocess_shell(
|
|
28
|
+
command,
|
|
29
|
+
stdin=subprocess.PIPE if input else None,
|
|
30
|
+
stdout=subprocess.PIPE,
|
|
31
|
+
stderr=subprocess.PIPE,
|
|
32
|
+
)
|
|
33
|
+
else:
|
|
34
|
+
proc = await asyncio.create_subprocess_exec(
|
|
35
|
+
*command,
|
|
36
|
+
stdin=subprocess.PIPE if input else None,
|
|
37
|
+
stdout=subprocess.PIPE,
|
|
38
|
+
stderr=subprocess.PIPE,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
stdout, stderr = await asyncio.wait_for(
|
|
42
|
+
proc.communicate(input=input.encode() if input else None), timeout=timeout
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
return proc.returncode or 0, stdout.decode(), stderr.decode()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def maybe_truncate(text: str, max_length: int = 2048 * 10) -> str:
|
|
49
|
+
"""Truncate output if too long."""
|
|
50
|
+
return text if len(text) <= max_length else text[:max_length] + "... (truncated)"
|