hud-python 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +13 -1
- hud/agents/base.py +14 -3
- hud/agents/lite_llm.py +1 -1
- hud/agents/openai_chat_generic.py +15 -3
- hud/agents/tests/test_base.py +9 -2
- hud/agents/tests/test_base_runtime.py +164 -0
- hud/cli/__init__.py +18 -25
- hud/cli/build.py +35 -27
- hud/cli/dev.py +11 -29
- hud/cli/eval.py +114 -145
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +26 -3
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +134 -0
- hud/cli/tests/test_eval.py +4 -0
- hud/cli/tests/test_mcp_server.py +8 -7
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/utils/docker.py +120 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +257 -0
- hud/clients/base.py +1 -1
- hud/clients/mcp_use.py +3 -1
- hud/datasets/parallel.py +2 -2
- hud/datasets/runner.py +85 -24
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_runner.py +106 -0
- hud/datasets/tests/test_utils.py +228 -0
- hud/otel/config.py +8 -6
- hud/otel/context.py +4 -4
- hud/otel/exporters.py +231 -57
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_instrumentation.py +207 -0
- hud/rl/learner.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/shared/exceptions.py +35 -9
- hud/shared/hints.py +25 -0
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +39 -30
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +30 -6
- hud/telemetry/async_context.py +331 -0
- hud/telemetry/job.py +51 -12
- hud/telemetry/tests/test_async_context.py +242 -0
- hud/telemetry/tests/test_instrument.py +414 -0
- hud/telemetry/tests/test_job.py +609 -0
- hud/telemetry/tests/test_trace.py +184 -6
- hud/telemetry/trace.py +16 -17
- hud/tools/computer/qwen.py +4 -1
- hud/tools/computer/settings.py +2 -2
- hud/tools/executors/base.py +4 -2
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/types.py +7 -1
- hud/utils/agent_factories.py +1 -3
- hud/utils/mcp.py +1 -1
- hud/utils/task_tracking.py +223 -0
- hud/utils/tests/test_agent_factories.py +60 -0
- hud/utils/tests/test_mcp.py +4 -6
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tasks.py +187 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/METADATA +48 -48
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/RECORD +88 -47
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""Task tracking for async telemetry operations.
|
|
2
|
+
|
|
3
|
+
This module provides infrastructure to track async tasks created during
|
|
4
|
+
telemetry operations (status updates, metric logging) to ensure they
|
|
5
|
+
complete before process shutdown, preventing telemetry loss.
|
|
6
|
+
|
|
7
|
+
The task tracker maintains strong references to tasks and explicitly cleans
|
|
8
|
+
them up when they complete via callbacks. This ensures tasks are not garbage
|
|
9
|
+
collected before they finish executing.
|
|
10
|
+
|
|
11
|
+
Thread Safety:
|
|
12
|
+
Uses threading.Lock (not asyncio.Lock) because done callbacks run
|
|
13
|
+
synchronously and need to modify the task set safely.
|
|
14
|
+
|
|
15
|
+
Race Condition Prevention:
|
|
16
|
+
The wait_all() method uses a multi-pass approach to catch tasks that
|
|
17
|
+
are created while waiting for existing tasks to complete.
|
|
18
|
+
|
|
19
|
+
This is an internal module used by async context managers and cleanup
|
|
20
|
+
routines. Users typically don't interact with it directly.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import asyncio
|
|
24
|
+
import contextlib
|
|
25
|
+
import logging
|
|
26
|
+
import threading
|
|
27
|
+
from collections.abc import Coroutine
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
# Module exports
|
|
33
|
+
__all__ = ["TaskTracker", "track_task", "wait_all_tasks"]
|
|
34
|
+
|
|
35
|
+
# Global singleton task tracker
|
|
36
|
+
_global_tracker: "TaskTracker | None" = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TaskTracker:
|
|
40
|
+
"""Tracks async tasks to ensure completion before shutdown.
|
|
41
|
+
|
|
42
|
+
Maintains a set of tasks with thread-safe access for both async code
|
|
43
|
+
and synchronous callbacks. Tasks are automatically removed when they
|
|
44
|
+
complete via done callbacks.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self) -> None:
|
|
48
|
+
self._tasks: set[asyncio.Task] = set()
|
|
49
|
+
# Use threading.Lock for synchronous access from done callbacks
|
|
50
|
+
self._lock = threading.Lock()
|
|
51
|
+
|
|
52
|
+
def track_task(self, coro: Coroutine[Any, Any, Any], name: str = "task") -> asyncio.Task | None:
|
|
53
|
+
"""Create and track an async task.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
coro: The coroutine to run
|
|
57
|
+
name: Descriptive name for debugging and logging
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
The created asyncio.Task, or None if no event loop is available
|
|
61
|
+
"""
|
|
62
|
+
try:
|
|
63
|
+
task = asyncio.create_task(coro, name=name)
|
|
64
|
+
|
|
65
|
+
# Add task to tracking set (thread-safe)
|
|
66
|
+
with self._lock:
|
|
67
|
+
self._tasks.add(task)
|
|
68
|
+
task_count = len(self._tasks)
|
|
69
|
+
|
|
70
|
+
# Setup cleanup callback
|
|
71
|
+
def cleanup_callback(completed_task: asyncio.Task) -> None:
|
|
72
|
+
"""Remove completed task from tracking set and log failures."""
|
|
73
|
+
with self._lock:
|
|
74
|
+
self._tasks.discard(completed_task)
|
|
75
|
+
|
|
76
|
+
# Log exceptions outside lock to avoid blocking
|
|
77
|
+
with contextlib.suppress(Exception):
|
|
78
|
+
if not completed_task.cancelled():
|
|
79
|
+
with contextlib.suppress(Exception):
|
|
80
|
+
exc = completed_task.exception()
|
|
81
|
+
if exc:
|
|
82
|
+
logger.warning("Task '%s' failed: %s", name, exc)
|
|
83
|
+
|
|
84
|
+
task.add_done_callback(cleanup_callback)
|
|
85
|
+
logger.debug("Tracking task '%s' (total active: %d)", name, task_count)
|
|
86
|
+
return task
|
|
87
|
+
|
|
88
|
+
except RuntimeError as e:
|
|
89
|
+
# No event loop - fall back to fire_and_forget
|
|
90
|
+
logger.warning("Cannot track task '%s': %s", name, e)
|
|
91
|
+
from hud.utils.async_utils import fire_and_forget
|
|
92
|
+
|
|
93
|
+
fire_and_forget(coro, name)
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
async def wait_all(self, *, timeout_seconds: float = 30.0) -> int:
|
|
97
|
+
"""Wait for all tracked tasks to complete.
|
|
98
|
+
|
|
99
|
+
Uses a multi-pass approach to handle race conditions where tasks are
|
|
100
|
+
added while waiting for existing tasks to complete. This ensures that
|
|
101
|
+
status updates created near the end of execution are still waited for.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
timeout_seconds: Maximum time to wait in seconds
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Number of tasks that completed
|
|
108
|
+
"""
|
|
109
|
+
total_completed = 0
|
|
110
|
+
time_remaining = timeout_seconds
|
|
111
|
+
max_passes = 10 # Prevent infinite loops if tasks keep spawning
|
|
112
|
+
|
|
113
|
+
for pass_num in range(max_passes):
|
|
114
|
+
# Get snapshot of pending tasks (thread-safe)
|
|
115
|
+
with self._lock:
|
|
116
|
+
pending = [t for t in self._tasks if not t.done()]
|
|
117
|
+
|
|
118
|
+
if not pending:
|
|
119
|
+
if pass_num == 0:
|
|
120
|
+
logger.debug("No pending tasks to wait for")
|
|
121
|
+
else:
|
|
122
|
+
logger.debug("All tasks completed after %d passes", pass_num)
|
|
123
|
+
break
|
|
124
|
+
|
|
125
|
+
# Log progress
|
|
126
|
+
if pass_num == 0:
|
|
127
|
+
logger.info("Waiting for %d pending tasks...", len(pending))
|
|
128
|
+
else:
|
|
129
|
+
logger.debug("Pass %d: Waiting for %d tasks", pass_num + 1, len(pending))
|
|
130
|
+
|
|
131
|
+
# Wait for this batch (max 5s per pass to check for new tasks)
|
|
132
|
+
batch_timeout = min(time_remaining, 5.0) if time_remaining > 0 else 5.0
|
|
133
|
+
start_time = asyncio.get_event_loop().time()
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
done, still_pending = await asyncio.wait(
|
|
137
|
+
pending, timeout=batch_timeout, return_when=asyncio.ALL_COMPLETED
|
|
138
|
+
)
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.error("Error waiting for tasks: %s", e)
|
|
141
|
+
break
|
|
142
|
+
|
|
143
|
+
# Update timing
|
|
144
|
+
elapsed = asyncio.get_event_loop().time() - start_time
|
|
145
|
+
time_remaining -= elapsed
|
|
146
|
+
total_completed += len(done)
|
|
147
|
+
|
|
148
|
+
# Handle timeout
|
|
149
|
+
if still_pending:
|
|
150
|
+
if time_remaining <= 0:
|
|
151
|
+
logger.warning(
|
|
152
|
+
"%d tasks still pending after %ss timeout - cancelling",
|
|
153
|
+
len(still_pending),
|
|
154
|
+
timeout_seconds,
|
|
155
|
+
)
|
|
156
|
+
for task in still_pending:
|
|
157
|
+
task.cancel()
|
|
158
|
+
break
|
|
159
|
+
# Otherwise continue to next pass
|
|
160
|
+
else:
|
|
161
|
+
# All tasks from this batch completed, check for new ones
|
|
162
|
+
with self._lock:
|
|
163
|
+
new_pending = [t for t in self._tasks if not t.done()]
|
|
164
|
+
|
|
165
|
+
if not new_pending:
|
|
166
|
+
# No new tasks were added - we're done
|
|
167
|
+
break
|
|
168
|
+
# Otherwise loop to wait for the new tasks
|
|
169
|
+
|
|
170
|
+
if total_completed > 0:
|
|
171
|
+
logger.info("Completed %d tasks", total_completed)
|
|
172
|
+
|
|
173
|
+
return total_completed
|
|
174
|
+
|
|
175
|
+
def get_pending_count(self) -> int:
|
|
176
|
+
"""Get number of pending tasks (thread-safe)."""
|
|
177
|
+
with self._lock:
|
|
178
|
+
return sum(1 for t in self._tasks if not t.done())
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def get_global_tracker() -> TaskTracker:
|
|
182
|
+
"""Get or create the global task tracker."""
|
|
183
|
+
global _global_tracker
|
|
184
|
+
if _global_tracker is None:
|
|
185
|
+
_global_tracker = TaskTracker()
|
|
186
|
+
return _global_tracker
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def track_task(coro: Coroutine[Any, Any, Any], name: str = "task") -> asyncio.Task | None:
|
|
190
|
+
"""Create and track an async task for telemetry operations.
|
|
191
|
+
|
|
192
|
+
This is a convenience function that uses the global tracker to ensure
|
|
193
|
+
the task completes before shutdown. Used internally by async context
|
|
194
|
+
managers for status updates and metric logging.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
coro: The coroutine to track
|
|
198
|
+
name: Descriptive name for debugging
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
The created task, or None if no event loop is available
|
|
202
|
+
"""
|
|
203
|
+
tracker = get_global_tracker()
|
|
204
|
+
return tracker.track_task(coro, name)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
async def wait_all_tasks(*, timeout_seconds: float = 30.0) -> int:
|
|
208
|
+
"""Wait for all tracked telemetry tasks to complete.
|
|
209
|
+
|
|
210
|
+
Ensures that all async telemetry operations (status updates, logs)
|
|
211
|
+
complete before the calling function returns, preventing telemetry loss.
|
|
212
|
+
|
|
213
|
+
Uses a multi-pass approach to handle race conditions where status updates
|
|
214
|
+
are created while waiting for other tasks to complete.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
timeout_seconds: Maximum time to wait for tasks in seconds
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Number of tasks that completed
|
|
221
|
+
"""
|
|
222
|
+
tracker = get_global_tracker()
|
|
223
|
+
return await tracker.wait_all(timeout_seconds=timeout_seconds)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_create_openai_agent():
|
|
7
|
+
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
|
|
8
|
+
from hud.utils.agent_factories import create_openai_agent
|
|
9
|
+
|
|
10
|
+
agent = create_openai_agent(
|
|
11
|
+
api_key="test_key", model_name="test_model", completion_kwargs={"temperature": 0.5}
|
|
12
|
+
)
|
|
13
|
+
assert isinstance(agent, GenericOpenAIChatAgent)
|
|
14
|
+
assert agent.model_name == "test_model"
|
|
15
|
+
assert agent.completion_kwargs["temperature"] == 0.5
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_create_grounded_agent():
|
|
19
|
+
with (
|
|
20
|
+
patch("hud.utils.agent_factories.AsyncOpenAI") as mock_async_openai,
|
|
21
|
+
patch("hud.utils.agent_factories.GrounderConfig"),
|
|
22
|
+
patch("hud.utils.agent_factories.GroundedOpenAIChatAgent") as mock_agent_class,
|
|
23
|
+
):
|
|
24
|
+
mock_agent = MagicMock()
|
|
25
|
+
mock_agent_class.return_value = mock_agent
|
|
26
|
+
|
|
27
|
+
from hud.utils.agent_factories import create_grounded_agent
|
|
28
|
+
|
|
29
|
+
agent = create_grounded_agent(
|
|
30
|
+
api_key="test_key",
|
|
31
|
+
grounder_api_key="grounder_key",
|
|
32
|
+
model_name="test_model",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
assert agent == mock_agent
|
|
36
|
+
mock_async_openai.assert_called_with(api_key="test_key", base_url=None)
|
|
37
|
+
mock_agent_class.assert_called_once()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_create_grounded_agent_custom_grounder():
|
|
41
|
+
with (
|
|
42
|
+
patch("hud.utils.agent_factories.AsyncOpenAI"),
|
|
43
|
+
patch("hud.utils.agent_factories.GrounderConfig") as mock_grounder_config,
|
|
44
|
+
patch("hud.utils.agent_factories.GroundedOpenAIChatAgent"),
|
|
45
|
+
):
|
|
46
|
+
from hud.utils.agent_factories import create_grounded_agent
|
|
47
|
+
|
|
48
|
+
create_grounded_agent(
|
|
49
|
+
api_key="test_key",
|
|
50
|
+
grounder_api_key="grounder_key",
|
|
51
|
+
model_name="test_model",
|
|
52
|
+
grounder_api_base="https://custom.api",
|
|
53
|
+
grounder_model="custom/model",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
mock_grounder_config.assert_called_with(
|
|
57
|
+
api_base="https://custom.api",
|
|
58
|
+
model="custom/model",
|
|
59
|
+
api_key="grounder_key",
|
|
60
|
+
)
|
hud/utils/tests/test_mcp.py
CHANGED
|
@@ -90,12 +90,10 @@ class TestPatchMCPConfig:
|
|
|
90
90
|
class TestSetupHUDTelemetry:
|
|
91
91
|
"""Tests for setup_hud_telemetry function."""
|
|
92
92
|
|
|
93
|
-
def
|
|
94
|
-
"""Test that empty config
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
):
|
|
98
|
-
setup_hud_telemetry({})
|
|
93
|
+
def test_empty_config_returns_none(self):
|
|
94
|
+
"""Test that empty config returns None (no servers to set up telemetry for)."""
|
|
95
|
+
result = setup_hud_telemetry({})
|
|
96
|
+
assert result is None
|
|
99
97
|
|
|
100
98
|
def test_none_config_raises_error(self):
|
|
101
99
|
"""Test that None config raises ValueError."""
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from unittest.mock import MagicMock, patch
|
|
5
|
+
|
|
6
|
+
from hud.utils.pretty_errors import (
|
|
7
|
+
_async_exception_handler,
|
|
8
|
+
_render_and_fallback,
|
|
9
|
+
install_pretty_errors,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_render_and_fallback_hud_exception():
|
|
14
|
+
"""Test _render_and_fallback with HudException."""
|
|
15
|
+
from hud.shared.exceptions import HudException
|
|
16
|
+
|
|
17
|
+
exc = HudException("Test error")
|
|
18
|
+
|
|
19
|
+
with (
|
|
20
|
+
patch("sys.__excepthook__") as mock_excepthook,
|
|
21
|
+
patch("hud.utils.pretty_errors.hud_console") as mock_console,
|
|
22
|
+
patch("sys.stderr.flush"),
|
|
23
|
+
):
|
|
24
|
+
_render_and_fallback(HudException, exc, None)
|
|
25
|
+
|
|
26
|
+
mock_excepthook.assert_called_once()
|
|
27
|
+
mock_console.render_exception.assert_called_once_with(exc)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_render_and_fallback_non_hud_exception():
|
|
31
|
+
"""Test _render_and_fallback with non-HudException."""
|
|
32
|
+
exc = ValueError("Test error")
|
|
33
|
+
|
|
34
|
+
with (
|
|
35
|
+
patch("sys.__excepthook__") as mock_excepthook,
|
|
36
|
+
patch("hud.utils.pretty_errors.hud_console") as mock_console,
|
|
37
|
+
):
|
|
38
|
+
_render_and_fallback(ValueError, exc, None)
|
|
39
|
+
|
|
40
|
+
mock_excepthook.assert_called_once()
|
|
41
|
+
# Should not render for non-HudException
|
|
42
|
+
mock_console.render_exception.assert_not_called()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_render_and_fallback_rendering_error():
|
|
46
|
+
"""Test _render_and_fallback handles rendering errors gracefully."""
|
|
47
|
+
from hud.shared.exceptions import HudException
|
|
48
|
+
|
|
49
|
+
exc = HudException("Test error")
|
|
50
|
+
|
|
51
|
+
with (
|
|
52
|
+
patch("sys.__excepthook__") as mock_excepthook,
|
|
53
|
+
patch("hud.utils.pretty_errors.hud_console") as mock_console,
|
|
54
|
+
):
|
|
55
|
+
mock_console.render_exception.side_effect = Exception("Render failed")
|
|
56
|
+
|
|
57
|
+
# Should not raise
|
|
58
|
+
_render_and_fallback(HudException, exc, None)
|
|
59
|
+
|
|
60
|
+
mock_excepthook.assert_called_once()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_async_exception_handler_with_exception():
|
|
64
|
+
"""Test _async_exception_handler with exception in context."""
|
|
65
|
+
mock_loop = MagicMock()
|
|
66
|
+
context = {"exception": ValueError("Test error")}
|
|
67
|
+
|
|
68
|
+
with patch("hud.utils.pretty_errors.hud_console") as mock_console:
|
|
69
|
+
_async_exception_handler(mock_loop, context)
|
|
70
|
+
|
|
71
|
+
mock_console.render_exception.assert_called_once()
|
|
72
|
+
mock_loop.default_exception_handler.assert_called_once_with(context)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_async_exception_handler_with_message():
|
|
76
|
+
"""Test _async_exception_handler with message only."""
|
|
77
|
+
mock_loop = MagicMock()
|
|
78
|
+
context = {"message": "Error message"}
|
|
79
|
+
|
|
80
|
+
with patch("hud.utils.pretty_errors.hud_console") as mock_console:
|
|
81
|
+
_async_exception_handler(mock_loop, context)
|
|
82
|
+
|
|
83
|
+
mock_console.error.assert_called_once_with("Error message")
|
|
84
|
+
mock_console.render_support_hint.assert_called_once()
|
|
85
|
+
mock_loop.default_exception_handler.assert_called_once()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_async_exception_handler_rendering_error():
|
|
89
|
+
"""Test _async_exception_handler handles rendering errors."""
|
|
90
|
+
mock_loop = MagicMock()
|
|
91
|
+
context = {"exception": ValueError("Test")}
|
|
92
|
+
|
|
93
|
+
with patch("hud.utils.pretty_errors.hud_console") as mock_console:
|
|
94
|
+
mock_console.render_exception.side_effect = Exception("Render failed")
|
|
95
|
+
|
|
96
|
+
# Should not raise, should call default handler
|
|
97
|
+
_async_exception_handler(mock_loop, context)
|
|
98
|
+
|
|
99
|
+
mock_loop.default_exception_handler.assert_called_once()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_install_pretty_errors_with_running_loop():
|
|
103
|
+
"""Test install_pretty_errors with a running event loop."""
|
|
104
|
+
mock_loop = MagicMock()
|
|
105
|
+
|
|
106
|
+
with patch("asyncio.get_running_loop", return_value=mock_loop):
|
|
107
|
+
install_pretty_errors()
|
|
108
|
+
|
|
109
|
+
assert sys.excepthook == _render_and_fallback
|
|
110
|
+
mock_loop.set_exception_handler.assert_called_once_with(_async_exception_handler)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_install_pretty_errors_no_running_loop():
|
|
114
|
+
"""Test install_pretty_errors without a running loop."""
|
|
115
|
+
with (
|
|
116
|
+
patch("asyncio.get_running_loop", side_effect=RuntimeError("No running loop")),
|
|
117
|
+
patch("asyncio.new_event_loop") as mock_new_loop,
|
|
118
|
+
):
|
|
119
|
+
mock_loop = MagicMock()
|
|
120
|
+
mock_new_loop.return_value = mock_loop
|
|
121
|
+
|
|
122
|
+
install_pretty_errors()
|
|
123
|
+
|
|
124
|
+
assert sys.excepthook == _render_and_fallback
|
|
125
|
+
mock_loop.set_exception_handler.assert_called_once()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_install_pretty_errors_new_loop_fails():
|
|
129
|
+
"""Test install_pretty_errors when creating new loop fails."""
|
|
130
|
+
with (
|
|
131
|
+
patch("asyncio.get_running_loop", side_effect=RuntimeError("No running loop")),
|
|
132
|
+
patch("asyncio.new_event_loop", side_effect=Exception("Can't create loop")),
|
|
133
|
+
):
|
|
134
|
+
# Should not raise
|
|
135
|
+
install_pretty_errors()
|
|
136
|
+
|
|
137
|
+
assert sys.excepthook == _render_and_fallback
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_install_pretty_errors_set_handler_fails():
|
|
141
|
+
"""Test install_pretty_errors when set_exception_handler fails."""
|
|
142
|
+
mock_loop = MagicMock()
|
|
143
|
+
mock_loop.set_exception_handler.side_effect = Exception("Can't set handler")
|
|
144
|
+
|
|
145
|
+
with patch("asyncio.get_running_loop", return_value=mock_loop):
|
|
146
|
+
# Should not raise
|
|
147
|
+
install_pretty_errors()
|
|
148
|
+
|
|
149
|
+
assert sys.excepthook == _render_and_fallback
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def test_async_exception_handler_no_exception_or_message():
|
|
153
|
+
"""Test _async_exception_handler with empty context."""
|
|
154
|
+
mock_loop = MagicMock()
|
|
155
|
+
context = {}
|
|
156
|
+
|
|
157
|
+
with patch("hud.utils.pretty_errors.hud_console") as mock_console:
|
|
158
|
+
_async_exception_handler(mock_loop, context)
|
|
159
|
+
|
|
160
|
+
mock_console.render_exception.assert_not_called()
|
|
161
|
+
mock_console.error.assert_not_called()
|
|
162
|
+
mock_loop.default_exception_handler.assert_called_once()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def test_render_and_fallback_with_traceback():
|
|
166
|
+
"""Test _render_and_fallback includes traceback."""
|
|
167
|
+
from hud.shared.exceptions import HudException
|
|
168
|
+
|
|
169
|
+
exc = HudException("Test error")
|
|
170
|
+
|
|
171
|
+
# Create a fake traceback
|
|
172
|
+
try:
|
|
173
|
+
raise exc
|
|
174
|
+
except HudException as e:
|
|
175
|
+
tb = e.__traceback__
|
|
176
|
+
|
|
177
|
+
with (
|
|
178
|
+
patch("sys.__excepthook__") as mock_excepthook,
|
|
179
|
+
patch("hud.utils.pretty_errors.hud_console"),
|
|
180
|
+
patch("sys.stderr.flush"),
|
|
181
|
+
):
|
|
182
|
+
_render_and_fallback(HudException, exc, tb)
|
|
183
|
+
|
|
184
|
+
# Should call excepthook with traceback
|
|
185
|
+
call_args = mock_excepthook.call_args[0]
|
|
186
|
+
assert call_args[2] == tb
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import tempfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from hud.types import Task
|
|
10
|
+
from hud.utils.tasks import load_tasks
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_load_tasks_from_list():
|
|
14
|
+
"""Test loading tasks from a list of dictionaries."""
|
|
15
|
+
task_dicts = [
|
|
16
|
+
{"id": "1", "prompt": "Test task 1", "mcp_config": {}},
|
|
17
|
+
{"id": "2", "prompt": "Test task 2", "mcp_config": {}},
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
tasks = load_tasks(task_dicts)
|
|
21
|
+
|
|
22
|
+
assert len(tasks) == 2
|
|
23
|
+
assert all(isinstance(t, Task) for t in tasks)
|
|
24
|
+
assert tasks[0].prompt == "Test task 1" # type: ignore
|
|
25
|
+
assert tasks[1].prompt == "Test task 2" # type: ignore
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_load_tasks_from_list_raw():
|
|
29
|
+
"""Test loading tasks from a list in raw mode."""
|
|
30
|
+
task_dicts = [
|
|
31
|
+
{"id": "1", "prompt": "Test task 1", "mcp_config": {}},
|
|
32
|
+
{"id": "2", "prompt": "Test task 2", "mcp_config": {}},
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
tasks = load_tasks(task_dicts, raw=True)
|
|
36
|
+
|
|
37
|
+
assert len(tasks) == 2
|
|
38
|
+
assert all(isinstance(t, dict) for t in tasks)
|
|
39
|
+
assert tasks[0]["prompt"] == "Test task 1" # type: ignore
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_load_tasks_from_json_file():
|
|
43
|
+
"""Test loading tasks from a JSON file."""
|
|
44
|
+
task_dicts = [
|
|
45
|
+
{"id": "1", "prompt": "Test task 1", "mcp_config": {}},
|
|
46
|
+
{"id": "2", "prompt": "Test task 2", "mcp_config": {}},
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8") as f:
|
|
50
|
+
json.dump(task_dicts, f)
|
|
51
|
+
temp_path = f.name
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
tasks = load_tasks(temp_path)
|
|
55
|
+
|
|
56
|
+
assert len(tasks) == 2
|
|
57
|
+
assert all(isinstance(t, Task) for t in tasks)
|
|
58
|
+
assert tasks[0].prompt == "Test task 1" # type: ignore
|
|
59
|
+
finally:
|
|
60
|
+
Path(temp_path).unlink()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_load_tasks_from_json_file_raw():
|
|
64
|
+
"""Test loading tasks from a JSON file in raw mode."""
|
|
65
|
+
task_dicts = [
|
|
66
|
+
{"id": "1", "prompt": "Test task 1", "mcp_config": {}},
|
|
67
|
+
{"id": "2", "prompt": "Test task 2", "mcp_config": {}},
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8") as f:
|
|
71
|
+
json.dump(task_dicts, f)
|
|
72
|
+
temp_path = f.name
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
tasks = load_tasks(temp_path, raw=True)
|
|
76
|
+
|
|
77
|
+
assert len(tasks) == 2
|
|
78
|
+
assert all(isinstance(t, dict) for t in tasks)
|
|
79
|
+
finally:
|
|
80
|
+
Path(temp_path).unlink()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_load_tasks_from_jsonl_file():
|
|
84
|
+
"""Test loading tasks from a JSONL file."""
|
|
85
|
+
task_dicts = [
|
|
86
|
+
{"id": "1", "prompt": "Test task 1", "mcp_config": {}},
|
|
87
|
+
{"id": "2", "prompt": "Test task 2", "mcp_config": {}},
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
with tempfile.NamedTemporaryFile(
|
|
91
|
+
mode="w", suffix=".jsonl", delete=False, encoding="utf-8"
|
|
92
|
+
) as f:
|
|
93
|
+
for task_dict in task_dicts:
|
|
94
|
+
f.write(json.dumps(task_dict) + "\n")
|
|
95
|
+
temp_path = f.name
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
tasks = load_tasks(temp_path)
|
|
99
|
+
|
|
100
|
+
assert len(tasks) == 2
|
|
101
|
+
assert all(isinstance(t, Task) for t in tasks)
|
|
102
|
+
assert tasks[0].prompt == "Test task 1" # type: ignore
|
|
103
|
+
finally:
|
|
104
|
+
Path(temp_path).unlink()
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_load_tasks_from_jsonl_file_with_empty_lines():
|
|
108
|
+
"""Test loading tasks from a JSONL file with empty lines."""
|
|
109
|
+
task_dicts = [
|
|
110
|
+
{"id": "1", "prompt": "Test task 1", "mcp_config": {}},
|
|
111
|
+
{"id": "2", "prompt": "Test task 2", "mcp_config": {}},
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
with tempfile.NamedTemporaryFile(
|
|
115
|
+
mode="w", suffix=".jsonl", delete=False, encoding="utf-8"
|
|
116
|
+
) as f:
|
|
117
|
+
f.write(json.dumps(task_dicts[0]) + "\n")
|
|
118
|
+
f.write("\n") # Empty line
|
|
119
|
+
f.write(json.dumps(task_dicts[1]) + "\n")
|
|
120
|
+
temp_path = f.name
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
tasks = load_tasks(temp_path)
|
|
124
|
+
|
|
125
|
+
assert len(tasks) == 2
|
|
126
|
+
assert all(isinstance(t, Task) for t in tasks)
|
|
127
|
+
finally:
|
|
128
|
+
Path(temp_path).unlink()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def test_load_tasks_from_jsonl_file_with_list():
|
|
132
|
+
"""Test loading tasks from a JSONL file where a line contains a list."""
|
|
133
|
+
task_dict = {"id": "1", "prompt": "Test task 1", "mcp_config": {}}
|
|
134
|
+
|
|
135
|
+
with tempfile.NamedTemporaryFile(
|
|
136
|
+
mode="w", suffix=".jsonl", delete=False, encoding="utf-8"
|
|
137
|
+
) as f:
|
|
138
|
+
f.write(json.dumps([task_dict, task_dict]) + "\n")
|
|
139
|
+
temp_path = f.name
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
tasks = load_tasks(temp_path)
|
|
143
|
+
|
|
144
|
+
assert len(tasks) == 2
|
|
145
|
+
assert all(isinstance(t, Task) for t in tasks)
|
|
146
|
+
finally:
|
|
147
|
+
Path(temp_path).unlink()
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def test_load_tasks_json_not_array_error():
|
|
151
|
+
"""Test that loading from JSON file with non-array raises error."""
|
|
152
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8") as f:
|
|
153
|
+
json.dump({"not": "an array"}, f)
|
|
154
|
+
temp_path = f.name
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
with pytest.raises(ValueError, match="JSON file must contain an array"):
|
|
158
|
+
load_tasks(temp_path)
|
|
159
|
+
finally:
|
|
160
|
+
Path(temp_path).unlink()
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def test_load_tasks_invalid_jsonl_format():
|
|
164
|
+
"""Test that loading from JSONL with invalid format raises error."""
|
|
165
|
+
with tempfile.NamedTemporaryFile(
|
|
166
|
+
mode="w", suffix=".jsonl", delete=False, encoding="utf-8"
|
|
167
|
+
) as f:
|
|
168
|
+
f.write(json.dumps("invalid") + "\n")
|
|
169
|
+
temp_path = f.name
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
with pytest.raises(ValueError, match="Invalid JSONL format"):
|
|
173
|
+
load_tasks(temp_path)
|
|
174
|
+
finally:
|
|
175
|
+
Path(temp_path).unlink()
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def test_load_tasks_invalid_input_type():
|
|
179
|
+
"""Test that invalid input type raises TypeError."""
|
|
180
|
+
with pytest.raises(TypeError, match="tasks_input must be str or list"):
|
|
181
|
+
load_tasks(123) # type: ignore
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def test_load_tasks_nonexistent_file():
|
|
185
|
+
"""Test that loading from nonexistent file raises error."""
|
|
186
|
+
with pytest.raises(ValueError, match="neither a file path nor a HuggingFace dataset"):
|
|
187
|
+
load_tasks("nonexistent_file_without_slash")
|