hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +11 -5
- hud/agents/base.py +220 -500
- hud/agents/claude.py +200 -240
- hud/agents/gemini.py +275 -0
- hud/agents/gemini_cua.py +335 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +41 -36
- hud/agents/openai.py +291 -292
- hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
- hud/agents/operator.py +211 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +379 -210
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +376 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/cli/__init__.py +461 -545
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +664 -110
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +882 -734
- hud/cli/eval.py +782 -668
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/push.py +29 -11
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +108 -6
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +69 -0
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +40 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +327 -0
- hud/datasets/runner.py +192 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +50 -0
- hud/environment/connection.py +206 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +109 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +694 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +112 -0
- hud/environment/scenarios.py +493 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +218 -0
- hud/environment/tests/test_environment.py +161 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +201 -0
- hud/environment/tests/test_scenarios.py +280 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +674 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +185 -0
- hud/eval/manager.py +466 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +340 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +145 -0
- hud/eval/types.py +63 -0
- hud/eval/utils.py +183 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +151 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +158 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +16 -2
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +4 -0
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +167 -57
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +61 -3
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.1.dist-info/METADATA +264 -0
- hud_python-0.5.1.dist-info/RECORD +299 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,596 @@
|
|
|
1
|
+
"""Tests for shell tool."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from unittest.mock import AsyncMock, MagicMock, patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from hud.tools.shell import (
|
|
10
|
+
ShellCallOutcome,
|
|
11
|
+
ShellCommandOutput,
|
|
12
|
+
ShellResult,
|
|
13
|
+
ShellTool,
|
|
14
|
+
_BashSession,
|
|
15
|
+
)
|
|
16
|
+
from hud.tools.types import ToolError
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestShellCallOutcome:
|
|
20
|
+
"""Tests for ShellCallOutcome dataclass."""
|
|
21
|
+
|
|
22
|
+
def test_to_dict_exit(self):
|
|
23
|
+
"""Test to_dict for exit outcome."""
|
|
24
|
+
outcome = ShellCallOutcome(type="exit", exit_code=0)
|
|
25
|
+
assert outcome.to_dict() == {"type": "exit", "exit_code": 0}
|
|
26
|
+
|
|
27
|
+
def test_to_dict_exit_with_error_code(self):
|
|
28
|
+
"""Test to_dict for exit outcome with non-zero exit code."""
|
|
29
|
+
outcome = ShellCallOutcome(type="exit", exit_code=1)
|
|
30
|
+
assert outcome.to_dict() == {"type": "exit", "exit_code": 1}
|
|
31
|
+
|
|
32
|
+
def test_to_dict_timeout(self):
|
|
33
|
+
"""Test to_dict for timeout outcome."""
|
|
34
|
+
outcome = ShellCallOutcome(type="timeout")
|
|
35
|
+
assert outcome.to_dict() == {"type": "timeout"}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TestShellCommandOutput:
|
|
39
|
+
"""Tests for ShellCommandOutput dataclass."""
|
|
40
|
+
|
|
41
|
+
def test_to_dict(self):
|
|
42
|
+
"""Test to_dict method."""
|
|
43
|
+
output = ShellCommandOutput(
|
|
44
|
+
stdout="hello",
|
|
45
|
+
stderr="",
|
|
46
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
47
|
+
)
|
|
48
|
+
result = output.to_dict()
|
|
49
|
+
assert result["stdout"] == "hello"
|
|
50
|
+
assert result["stderr"] == ""
|
|
51
|
+
assert result["outcome"] == {"type": "exit", "exit_code": 0}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class TestShellResult:
|
|
55
|
+
"""Tests for ShellResult dataclass."""
|
|
56
|
+
|
|
57
|
+
def test_to_dict_without_max_output_length(self):
|
|
58
|
+
"""Test to_dict without max_output_length."""
|
|
59
|
+
result = ShellResult(
|
|
60
|
+
output=[
|
|
61
|
+
ShellCommandOutput(
|
|
62
|
+
stdout="test",
|
|
63
|
+
stderr="",
|
|
64
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
65
|
+
)
|
|
66
|
+
]
|
|
67
|
+
)
|
|
68
|
+
d = result.to_dict()
|
|
69
|
+
assert "output" in d
|
|
70
|
+
assert len(d["output"]) == 1
|
|
71
|
+
assert "max_output_length" not in d
|
|
72
|
+
|
|
73
|
+
def test_to_dict_with_max_output_length(self):
|
|
74
|
+
"""Test to_dict with max_output_length."""
|
|
75
|
+
result = ShellResult(
|
|
76
|
+
output=[
|
|
77
|
+
ShellCommandOutput(
|
|
78
|
+
stdout="test",
|
|
79
|
+
stderr="",
|
|
80
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
81
|
+
)
|
|
82
|
+
],
|
|
83
|
+
max_output_length=1024,
|
|
84
|
+
)
|
|
85
|
+
d = result.to_dict()
|
|
86
|
+
assert d["max_output_length"] == 1024
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class TestBashSession:
|
|
90
|
+
"""Tests for _BashSession."""
|
|
91
|
+
|
|
92
|
+
def test_init(self):
|
|
93
|
+
"""Test session initialization."""
|
|
94
|
+
session = _BashSession()
|
|
95
|
+
assert session._started is False
|
|
96
|
+
assert session._timed_out is False
|
|
97
|
+
|
|
98
|
+
@pytest.mark.asyncio
|
|
99
|
+
async def test_start(self):
|
|
100
|
+
"""Test starting a bash session."""
|
|
101
|
+
session = _BashSession()
|
|
102
|
+
|
|
103
|
+
with patch("asyncio.create_subprocess_shell") as mock_create:
|
|
104
|
+
mock_process = MagicMock()
|
|
105
|
+
mock_create.return_value = mock_process
|
|
106
|
+
|
|
107
|
+
await session.start()
|
|
108
|
+
|
|
109
|
+
assert session._started is True
|
|
110
|
+
assert session._process == mock_process
|
|
111
|
+
mock_create.assert_called_once()
|
|
112
|
+
|
|
113
|
+
@pytest.mark.asyncio
|
|
114
|
+
async def test_start_already_started(self):
|
|
115
|
+
"""Test starting a session that's already started."""
|
|
116
|
+
session = _BashSession()
|
|
117
|
+
session._started = True
|
|
118
|
+
|
|
119
|
+
with patch("asyncio.create_subprocess_shell") as mock_create:
|
|
120
|
+
await session.start()
|
|
121
|
+
mock_create.assert_not_called()
|
|
122
|
+
|
|
123
|
+
def test_stop_not_started(self):
|
|
124
|
+
"""Test stopping a session that hasn't started."""
|
|
125
|
+
session = _BashSession()
|
|
126
|
+
# Should not raise
|
|
127
|
+
session.stop()
|
|
128
|
+
|
|
129
|
+
def test_stop_already_exited(self):
|
|
130
|
+
"""Test stopping a session that already exited."""
|
|
131
|
+
session = _BashSession()
|
|
132
|
+
session._started = True
|
|
133
|
+
mock_process = MagicMock()
|
|
134
|
+
mock_process.returncode = 0 # Already exited
|
|
135
|
+
session._process = mock_process
|
|
136
|
+
|
|
137
|
+
session.stop()
|
|
138
|
+
mock_process.terminate.assert_not_called()
|
|
139
|
+
|
|
140
|
+
def test_stop_running(self):
|
|
141
|
+
"""Test stopping a running session."""
|
|
142
|
+
session = _BashSession()
|
|
143
|
+
session._started = True
|
|
144
|
+
mock_process = MagicMock()
|
|
145
|
+
mock_process.returncode = None # Still running
|
|
146
|
+
session._process = mock_process
|
|
147
|
+
|
|
148
|
+
session.stop()
|
|
149
|
+
mock_process.terminate.assert_called_once()
|
|
150
|
+
|
|
151
|
+
def test_is_alive_not_started(self):
|
|
152
|
+
"""Test is_alive when not started."""
|
|
153
|
+
session = _BashSession()
|
|
154
|
+
assert session.is_alive() is False
|
|
155
|
+
|
|
156
|
+
def test_is_alive_running(self):
|
|
157
|
+
"""Test is_alive when running."""
|
|
158
|
+
session = _BashSession()
|
|
159
|
+
session._started = True
|
|
160
|
+
session._timed_out = False
|
|
161
|
+
mock_process = MagicMock()
|
|
162
|
+
mock_process.returncode = None
|
|
163
|
+
session._process = mock_process
|
|
164
|
+
|
|
165
|
+
assert session.is_alive() is True
|
|
166
|
+
|
|
167
|
+
def test_is_alive_timed_out(self):
|
|
168
|
+
"""Test is_alive when timed out."""
|
|
169
|
+
session = _BashSession()
|
|
170
|
+
session._started = True
|
|
171
|
+
session._timed_out = True
|
|
172
|
+
mock_process = MagicMock()
|
|
173
|
+
mock_process.returncode = None
|
|
174
|
+
session._process = mock_process
|
|
175
|
+
|
|
176
|
+
assert session.is_alive() is False
|
|
177
|
+
|
|
178
|
+
def test_is_alive_process_exited(self):
|
|
179
|
+
"""Test is_alive when process exited."""
|
|
180
|
+
session = _BashSession()
|
|
181
|
+
session._started = True
|
|
182
|
+
session._timed_out = False
|
|
183
|
+
mock_process = MagicMock()
|
|
184
|
+
mock_process.returncode = 0
|
|
185
|
+
session._process = mock_process
|
|
186
|
+
|
|
187
|
+
assert session.is_alive() is False
|
|
188
|
+
|
|
189
|
+
@pytest.mark.asyncio
|
|
190
|
+
async def test_run_not_started(self):
|
|
191
|
+
"""Test running command on a session that hasn't started."""
|
|
192
|
+
session = _BashSession()
|
|
193
|
+
|
|
194
|
+
with pytest.raises(ToolError) as exc_info:
|
|
195
|
+
await session.run("echo test")
|
|
196
|
+
|
|
197
|
+
assert "Session has not started" in str(exc_info.value)
|
|
198
|
+
|
|
199
|
+
@pytest.mark.asyncio
|
|
200
|
+
async def test_run_success(self):
|
|
201
|
+
"""Test successful command execution."""
|
|
202
|
+
session = _BashSession()
|
|
203
|
+
session._started = True
|
|
204
|
+
|
|
205
|
+
# Mock process
|
|
206
|
+
mock_process = MagicMock()
|
|
207
|
+
mock_process.returncode = None
|
|
208
|
+
mock_process.stdin = MagicMock()
|
|
209
|
+
mock_process.stdin.write = MagicMock()
|
|
210
|
+
mock_process.stdin.drain = AsyncMock()
|
|
211
|
+
|
|
212
|
+
# Create mock buffers
|
|
213
|
+
stdout_buffer = MagicMock()
|
|
214
|
+
stdout_buffer.decode.return_value = "Hello World\n<<exit>>0\n"
|
|
215
|
+
stdout_buffer.clear = MagicMock()
|
|
216
|
+
|
|
217
|
+
stderr_buffer = MagicMock()
|
|
218
|
+
stderr_buffer.decode.return_value = ""
|
|
219
|
+
stderr_buffer.clear = MagicMock()
|
|
220
|
+
|
|
221
|
+
mock_process.stdout = MagicMock()
|
|
222
|
+
mock_process.stdout._buffer = stdout_buffer
|
|
223
|
+
mock_process.stderr = MagicMock()
|
|
224
|
+
mock_process.stderr._buffer = stderr_buffer
|
|
225
|
+
|
|
226
|
+
session._process = mock_process
|
|
227
|
+
|
|
228
|
+
# Patch asyncio.sleep to avoid actual delay
|
|
229
|
+
with patch("asyncio.sleep", new_callable=AsyncMock):
|
|
230
|
+
result = await session.run("echo Hello World")
|
|
231
|
+
|
|
232
|
+
assert result.stdout == "Hello World"
|
|
233
|
+
assert result.stderr == ""
|
|
234
|
+
assert result.outcome.type == "exit"
|
|
235
|
+
assert result.outcome.exit_code == 0
|
|
236
|
+
|
|
237
|
+
@pytest.mark.asyncio
|
|
238
|
+
async def test_run_with_exit_code(self):
|
|
239
|
+
"""Test command execution with non-zero exit code."""
|
|
240
|
+
session = _BashSession()
|
|
241
|
+
session._started = True
|
|
242
|
+
|
|
243
|
+
mock_process = MagicMock()
|
|
244
|
+
mock_process.returncode = None
|
|
245
|
+
mock_process.stdin = MagicMock()
|
|
246
|
+
mock_process.stdin.write = MagicMock()
|
|
247
|
+
mock_process.stdin.drain = AsyncMock()
|
|
248
|
+
|
|
249
|
+
stdout_buffer = MagicMock()
|
|
250
|
+
stdout_buffer.decode.return_value = "<<exit>>127\n"
|
|
251
|
+
stdout_buffer.clear = MagicMock()
|
|
252
|
+
|
|
253
|
+
stderr_buffer = MagicMock()
|
|
254
|
+
stderr_buffer.decode.return_value = "command not found"
|
|
255
|
+
stderr_buffer.clear = MagicMock()
|
|
256
|
+
|
|
257
|
+
mock_process.stdout = MagicMock()
|
|
258
|
+
mock_process.stdout._buffer = stdout_buffer
|
|
259
|
+
mock_process.stderr = MagicMock()
|
|
260
|
+
mock_process.stderr._buffer = stderr_buffer
|
|
261
|
+
|
|
262
|
+
session._process = mock_process
|
|
263
|
+
|
|
264
|
+
with patch("asyncio.sleep", new_callable=AsyncMock):
|
|
265
|
+
result = await session.run("nonexistent_command")
|
|
266
|
+
|
|
267
|
+
assert result.outcome.type == "exit"
|
|
268
|
+
assert result.outcome.exit_code == 127
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class TestShellTool:
|
|
272
|
+
"""Tests for ShellTool."""
|
|
273
|
+
|
|
274
|
+
def test_init(self):
|
|
275
|
+
"""Test ShellTool initialization."""
|
|
276
|
+
tool = ShellTool()
|
|
277
|
+
assert tool._session is None
|
|
278
|
+
|
|
279
|
+
@pytest.mark.asyncio
|
|
280
|
+
async def test_call_no_commands(self):
|
|
281
|
+
"""Test calling without commands raises error."""
|
|
282
|
+
tool = ShellTool()
|
|
283
|
+
|
|
284
|
+
with pytest.raises(ToolError) as exc_info:
|
|
285
|
+
await tool()
|
|
286
|
+
|
|
287
|
+
assert "No commands provided" in str(exc_info.value)
|
|
288
|
+
|
|
289
|
+
@pytest.mark.asyncio
|
|
290
|
+
async def test_call_empty_commands(self):
|
|
291
|
+
"""Test calling with empty commands list raises error."""
|
|
292
|
+
tool = ShellTool()
|
|
293
|
+
|
|
294
|
+
with pytest.raises(ToolError) as exc_info:
|
|
295
|
+
await tool(commands=[])
|
|
296
|
+
|
|
297
|
+
assert "No commands provided" in str(exc_info.value)
|
|
298
|
+
|
|
299
|
+
@pytest.mark.asyncio
|
|
300
|
+
async def test_call_with_command(self):
|
|
301
|
+
"""Test calling tool with a command."""
|
|
302
|
+
tool = ShellTool()
|
|
303
|
+
|
|
304
|
+
# Mock session
|
|
305
|
+
mock_session = MagicMock()
|
|
306
|
+
mock_session.is_alive.return_value = True
|
|
307
|
+
mock_session.run = AsyncMock(
|
|
308
|
+
return_value=ShellCommandOutput(
|
|
309
|
+
stdout="test output",
|
|
310
|
+
stderr="",
|
|
311
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
312
|
+
)
|
|
313
|
+
)
|
|
314
|
+
mock_session.start = AsyncMock()
|
|
315
|
+
|
|
316
|
+
with patch("hud.tools.shell._BashSession") as mock_session_class:
|
|
317
|
+
mock_session_class.return_value = mock_session
|
|
318
|
+
|
|
319
|
+
result = await tool(commands=["echo test"])
|
|
320
|
+
|
|
321
|
+
assert isinstance(result, ShellResult)
|
|
322
|
+
assert len(result.output) == 1
|
|
323
|
+
assert result.output[0].stdout == "test output"
|
|
324
|
+
mock_session.start.assert_called_once()
|
|
325
|
+
mock_session.run.assert_called_once_with("echo test", None)
|
|
326
|
+
|
|
327
|
+
@pytest.mark.asyncio
|
|
328
|
+
async def test_call_with_timeout(self):
|
|
329
|
+
"""Test calling tool with timeout_ms."""
|
|
330
|
+
tool = ShellTool()
|
|
331
|
+
|
|
332
|
+
mock_session = MagicMock()
|
|
333
|
+
mock_session.is_alive.return_value = True
|
|
334
|
+
mock_session.run = AsyncMock(
|
|
335
|
+
return_value=ShellCommandOutput(
|
|
336
|
+
stdout="output",
|
|
337
|
+
stderr="",
|
|
338
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
339
|
+
)
|
|
340
|
+
)
|
|
341
|
+
mock_session.start = AsyncMock()
|
|
342
|
+
|
|
343
|
+
with patch("hud.tools.shell._BashSession") as mock_session_class:
|
|
344
|
+
mock_session_class.return_value = mock_session
|
|
345
|
+
|
|
346
|
+
result = await tool(commands=["sleep 1"], timeout_ms=5000)
|
|
347
|
+
|
|
348
|
+
mock_session.run.assert_called_once_with("sleep 1", 5000)
|
|
349
|
+
assert result.max_output_length is None
|
|
350
|
+
|
|
351
|
+
@pytest.mark.asyncio
|
|
352
|
+
async def test_call_with_max_output_length(self):
|
|
353
|
+
"""Test calling tool with max_output_length."""
|
|
354
|
+
tool = ShellTool()
|
|
355
|
+
|
|
356
|
+
mock_session = MagicMock()
|
|
357
|
+
mock_session.is_alive.return_value = True
|
|
358
|
+
mock_session.run = AsyncMock(
|
|
359
|
+
return_value=ShellCommandOutput(
|
|
360
|
+
stdout="output",
|
|
361
|
+
stderr="",
|
|
362
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
363
|
+
)
|
|
364
|
+
)
|
|
365
|
+
mock_session.start = AsyncMock()
|
|
366
|
+
|
|
367
|
+
with patch("hud.tools.shell._BashSession") as mock_session_class:
|
|
368
|
+
mock_session_class.return_value = mock_session
|
|
369
|
+
|
|
370
|
+
result = await tool(commands=["echo test"], max_output_length=2048)
|
|
371
|
+
|
|
372
|
+
assert result.max_output_length == 2048
|
|
373
|
+
|
|
374
|
+
@pytest.mark.asyncio
|
|
375
|
+
async def test_call_multiple_commands(self):
|
|
376
|
+
"""Test calling tool with multiple commands."""
|
|
377
|
+
tool = ShellTool()
|
|
378
|
+
|
|
379
|
+
mock_session = MagicMock()
|
|
380
|
+
mock_session.is_alive.return_value = True
|
|
381
|
+
mock_session.run = AsyncMock(
|
|
382
|
+
side_effect=[
|
|
383
|
+
ShellCommandOutput(
|
|
384
|
+
stdout="first",
|
|
385
|
+
stderr="",
|
|
386
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
387
|
+
),
|
|
388
|
+
ShellCommandOutput(
|
|
389
|
+
stdout="second",
|
|
390
|
+
stderr="",
|
|
391
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
392
|
+
),
|
|
393
|
+
]
|
|
394
|
+
)
|
|
395
|
+
mock_session.start = AsyncMock()
|
|
396
|
+
|
|
397
|
+
with patch("hud.tools.shell._BashSession") as mock_session_class:
|
|
398
|
+
mock_session_class.return_value = mock_session
|
|
399
|
+
|
|
400
|
+
result = await tool(commands=["echo first", "echo second"])
|
|
401
|
+
|
|
402
|
+
assert len(result.output) == 2
|
|
403
|
+
assert result.output[0].stdout == "first"
|
|
404
|
+
assert result.output[1].stdout == "second"
|
|
405
|
+
|
|
406
|
+
@pytest.mark.asyncio
|
|
407
|
+
async def test_call_reuses_session(self):
|
|
408
|
+
"""Test that existing session is reused."""
|
|
409
|
+
tool = ShellTool()
|
|
410
|
+
|
|
411
|
+
mock_session = MagicMock()
|
|
412
|
+
mock_session.is_alive.return_value = True
|
|
413
|
+
mock_session.run = AsyncMock(
|
|
414
|
+
return_value=ShellCommandOutput(
|
|
415
|
+
stdout="output",
|
|
416
|
+
stderr="",
|
|
417
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
418
|
+
)
|
|
419
|
+
)
|
|
420
|
+
mock_session.start = AsyncMock()
|
|
421
|
+
|
|
422
|
+
with patch("hud.tools.shell._BashSession") as mock_session_class:
|
|
423
|
+
mock_session_class.return_value = mock_session
|
|
424
|
+
|
|
425
|
+
# First call
|
|
426
|
+
await tool(commands=["echo first"])
|
|
427
|
+
# Second call
|
|
428
|
+
await tool(commands=["echo second"])
|
|
429
|
+
|
|
430
|
+
# Session should only be created once
|
|
431
|
+
assert mock_session_class.call_count == 1
|
|
432
|
+
|
|
433
|
+
@pytest.mark.asyncio
|
|
434
|
+
async def test_auto_restart_on_timeout(self):
|
|
435
|
+
"""Test auto-restart after timeout."""
|
|
436
|
+
tool = ShellTool()
|
|
437
|
+
|
|
438
|
+
# Create a timed-out session
|
|
439
|
+
old_session = MagicMock()
|
|
440
|
+
old_session._timed_out = True
|
|
441
|
+
old_session._process = MagicMock()
|
|
442
|
+
old_session._process.returncode = None
|
|
443
|
+
old_session.is_alive.return_value = False
|
|
444
|
+
old_session.stop = MagicMock()
|
|
445
|
+
|
|
446
|
+
tool._session = old_session
|
|
447
|
+
|
|
448
|
+
# New session
|
|
449
|
+
new_session = MagicMock()
|
|
450
|
+
new_session.is_alive.return_value = True
|
|
451
|
+
new_session.run = AsyncMock(
|
|
452
|
+
return_value=ShellCommandOutput(
|
|
453
|
+
stdout="output",
|
|
454
|
+
stderr="",
|
|
455
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
456
|
+
)
|
|
457
|
+
)
|
|
458
|
+
new_session.start = AsyncMock()
|
|
459
|
+
|
|
460
|
+
with patch("hud.tools.shell._BashSession") as mock_session_class:
|
|
461
|
+
mock_session_class.return_value = new_session
|
|
462
|
+
|
|
463
|
+
result = await tool(commands=["echo test"])
|
|
464
|
+
|
|
465
|
+
# Old session should be stopped
|
|
466
|
+
old_session.stop.assert_called_once()
|
|
467
|
+
# New session should be created and started
|
|
468
|
+
new_session.start.assert_called_once()
|
|
469
|
+
# Result should include restart message
|
|
470
|
+
assert "timed out" in result.output[0].stderr
|
|
471
|
+
assert "auto-restarted" in result.output[0].stderr
|
|
472
|
+
|
|
473
|
+
@pytest.mark.asyncio
|
|
474
|
+
async def test_auto_restart_on_exit(self):
|
|
475
|
+
"""Test auto-restart after session exit."""
|
|
476
|
+
tool = ShellTool()
|
|
477
|
+
|
|
478
|
+
# Create an exited session
|
|
479
|
+
old_session = MagicMock()
|
|
480
|
+
old_session._timed_out = False
|
|
481
|
+
old_session._process = MagicMock()
|
|
482
|
+
old_session._process.returncode = 1
|
|
483
|
+
old_session.is_alive.return_value = False
|
|
484
|
+
old_session.stop = MagicMock()
|
|
485
|
+
|
|
486
|
+
tool._session = old_session
|
|
487
|
+
|
|
488
|
+
# New session
|
|
489
|
+
new_session = MagicMock()
|
|
490
|
+
new_session.is_alive.return_value = True
|
|
491
|
+
new_session.run = AsyncMock(
|
|
492
|
+
return_value=ShellCommandOutput(
|
|
493
|
+
stdout="output",
|
|
494
|
+
stderr="",
|
|
495
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
496
|
+
)
|
|
497
|
+
)
|
|
498
|
+
new_session.start = AsyncMock()
|
|
499
|
+
|
|
500
|
+
with patch("hud.tools.shell._BashSession") as mock_session_class:
|
|
501
|
+
mock_session_class.return_value = new_session
|
|
502
|
+
|
|
503
|
+
result = await tool(commands=["echo test"])
|
|
504
|
+
|
|
505
|
+
# Result should include restart message with exit code
|
|
506
|
+
assert "exited with code 1" in result.output[0].stderr
|
|
507
|
+
|
|
508
|
+
@pytest.mark.asyncio
|
|
509
|
+
async def test_command_execution_error(self):
|
|
510
|
+
"""Test handling of command execution error."""
|
|
511
|
+
tool = ShellTool()
|
|
512
|
+
|
|
513
|
+
mock_session = MagicMock()
|
|
514
|
+
mock_session.is_alive.return_value = True
|
|
515
|
+
mock_session.run = AsyncMock(side_effect=Exception("Test error"))
|
|
516
|
+
mock_session.start = AsyncMock()
|
|
517
|
+
|
|
518
|
+
with patch("hud.tools.shell._BashSession") as mock_session_class:
|
|
519
|
+
mock_session_class.return_value = mock_session
|
|
520
|
+
|
|
521
|
+
result = await tool(commands=["failing command"])
|
|
522
|
+
|
|
523
|
+
assert len(result.output) == 1
|
|
524
|
+
assert "Test error" in result.output[0].stderr
|
|
525
|
+
assert result.output[0].outcome.exit_code == 1
|
|
526
|
+
|
|
527
|
+
@pytest.mark.asyncio
|
|
528
|
+
async def test_restart_message_added_to_existing_stderr(self):
|
|
529
|
+
"""Test that restart message is prepended to existing stderr."""
|
|
530
|
+
tool = ShellTool()
|
|
531
|
+
|
|
532
|
+
# Create a timed-out session
|
|
533
|
+
old_session = MagicMock()
|
|
534
|
+
old_session._timed_out = True
|
|
535
|
+
old_session._process = MagicMock()
|
|
536
|
+
old_session._process.returncode = None
|
|
537
|
+
old_session.is_alive.return_value = False
|
|
538
|
+
old_session.stop = MagicMock()
|
|
539
|
+
|
|
540
|
+
tool._session = old_session
|
|
541
|
+
|
|
542
|
+
# New session
|
|
543
|
+
new_session = MagicMock()
|
|
544
|
+
new_session.is_alive.return_value = True
|
|
545
|
+
new_session.run = AsyncMock(
|
|
546
|
+
return_value=ShellCommandOutput(
|
|
547
|
+
stdout="output",
|
|
548
|
+
stderr="original error",
|
|
549
|
+
outcome=ShellCallOutcome(type="exit", exit_code=1),
|
|
550
|
+
)
|
|
551
|
+
)
|
|
552
|
+
new_session.start = AsyncMock()
|
|
553
|
+
|
|
554
|
+
with patch("hud.tools.shell._BashSession") as mock_session_class:
|
|
555
|
+
mock_session_class.return_value = new_session
|
|
556
|
+
|
|
557
|
+
result = await tool(commands=["echo test"])
|
|
558
|
+
|
|
559
|
+
# Both restart message and original error should be in stderr
|
|
560
|
+
assert "timed out" in result.output[0].stderr
|
|
561
|
+
assert "original error" in result.output[0].stderr
|
|
562
|
+
|
|
563
|
+
@pytest.mark.asyncio
|
|
564
|
+
async def test_session_dies_mid_execution(self):
|
|
565
|
+
"""Test that session is restarted if it dies mid-execution."""
|
|
566
|
+
tool = ShellTool()
|
|
567
|
+
|
|
568
|
+
mock_session = MagicMock()
|
|
569
|
+
# First command succeeds, then session dies, then restarts
|
|
570
|
+
mock_session.is_alive.side_effect = [True, False, True]
|
|
571
|
+
mock_session.run = AsyncMock(
|
|
572
|
+
side_effect=[
|
|
573
|
+
ShellCommandOutput(
|
|
574
|
+
stdout="first",
|
|
575
|
+
stderr="",
|
|
576
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
577
|
+
),
|
|
578
|
+
ShellCommandOutput(
|
|
579
|
+
stdout="second",
|
|
580
|
+
stderr="",
|
|
581
|
+
outcome=ShellCallOutcome(type="exit", exit_code=0),
|
|
582
|
+
),
|
|
583
|
+
]
|
|
584
|
+
)
|
|
585
|
+
mock_session.start = AsyncMock()
|
|
586
|
+
mock_session._timed_out = True
|
|
587
|
+
mock_session._process = MagicMock()
|
|
588
|
+
mock_session._process.returncode = None
|
|
589
|
+
mock_session.stop = MagicMock()
|
|
590
|
+
|
|
591
|
+
with patch("hud.tools.shell._BashSession") as mock_session_class:
|
|
592
|
+
mock_session_class.return_value = mock_session
|
|
593
|
+
|
|
594
|
+
result = await tool(commands=["echo first", "echo second"])
|
|
595
|
+
|
|
596
|
+
assert len(result.output) == 2
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from mcp.types import TextContent
|
|
5
|
+
|
|
6
|
+
from hud.tools.submit import SubmitTool, get_submission, set_submission
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture(autouse=True)
|
|
10
|
+
def reset_submission():
|
|
11
|
+
"""Reset submission before each test."""
|
|
12
|
+
set_submission(None)
|
|
13
|
+
yield
|
|
14
|
+
set_submission(None)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_set_and_get_submission():
|
|
18
|
+
"""Test setting and getting submission value."""
|
|
19
|
+
assert get_submission() is None
|
|
20
|
+
|
|
21
|
+
set_submission("test value")
|
|
22
|
+
assert get_submission() == "test value"
|
|
23
|
+
|
|
24
|
+
set_submission("another value")
|
|
25
|
+
assert get_submission() == "another value"
|
|
26
|
+
|
|
27
|
+
set_submission(None)
|
|
28
|
+
assert get_submission() is None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.mark.asyncio
|
|
32
|
+
async def test_submit_tool_with_response():
|
|
33
|
+
"""Test SubmitTool with a response string."""
|
|
34
|
+
tool = SubmitTool()
|
|
35
|
+
|
|
36
|
+
result = await tool(response="Test response")
|
|
37
|
+
|
|
38
|
+
assert get_submission() == "Test response"
|
|
39
|
+
assert len(result) == 1
|
|
40
|
+
assert isinstance(result[0], TextContent)
|
|
41
|
+
assert result[0].text == "Test response"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@pytest.mark.asyncio
|
|
45
|
+
async def test_submit_tool_with_none():
|
|
46
|
+
"""Test SubmitTool with None response."""
|
|
47
|
+
tool = SubmitTool()
|
|
48
|
+
|
|
49
|
+
result = await tool(response=None)
|
|
50
|
+
|
|
51
|
+
assert get_submission() is None
|
|
52
|
+
assert len(result) == 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@pytest.mark.asyncio
|
|
56
|
+
async def test_submit_tool_with_empty_string():
|
|
57
|
+
"""Test SubmitTool with empty string."""
|
|
58
|
+
tool = SubmitTool()
|
|
59
|
+
|
|
60
|
+
result = await tool(response="")
|
|
61
|
+
|
|
62
|
+
assert get_submission() == ""
|
|
63
|
+
assert len(result) == 0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@pytest.mark.asyncio
|
|
67
|
+
async def test_submit_tool_overwrite():
|
|
68
|
+
"""Test that submitting overwrites previous submission."""
|
|
69
|
+
tool = SubmitTool()
|
|
70
|
+
|
|
71
|
+
await tool(response="First submission")
|
|
72
|
+
assert get_submission() == "First submission"
|
|
73
|
+
|
|
74
|
+
await tool(response="Second submission")
|
|
75
|
+
assert get_submission() == "Second submission"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@pytest.mark.asyncio
|
|
79
|
+
async def test_submit_tool_properties():
|
|
80
|
+
"""Test SubmitTool properties."""
|
|
81
|
+
tool = SubmitTool()
|
|
82
|
+
|
|
83
|
+
assert tool.name == "response"
|
|
84
|
+
assert tool.title == "Submit Tool"
|
|
85
|
+
assert "final response" in tool.description.lower()
|