hud-python 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +5 -3
- hud/adapters/__init__.py +2 -1
- hud/adapters/claude/adapter.py +13 -17
- hud/adapters/common/adapter.py +3 -3
- hud/adapters/common/tests/__init__.py +0 -0
- hud/adapters/common/tests/test_adapter.py +277 -0
- hud/adapters/common/types.py +3 -6
- hud/adapters/operator/adapter.py +22 -29
- hud/agent/__init__.py +9 -1
- hud/agent/base.py +28 -28
- hud/agent/claude.py +69 -60
- hud/agent/langchain.py +204 -0
- hud/agent/operator.py +75 -67
- hud/env/__init__.py +5 -5
- hud/env/client.py +2 -2
- hud/env/docker_client.py +37 -39
- hud/env/environment.py +91 -66
- hud/env/local_docker_client.py +5 -7
- hud/env/remote_client.py +40 -29
- hud/env/remote_docker_client.py +13 -3
- hud/evaluators/__init__.py +2 -3
- hud/evaluators/base.py +4 -3
- hud/evaluators/inspect.py +3 -8
- hud/evaluators/judge.py +34 -58
- hud/evaluators/match.py +42 -49
- hud/evaluators/remote.py +13 -26
- hud/evaluators/tests/__init__.py +0 -0
- hud/evaluators/tests/test_inspect.py +12 -0
- hud/evaluators/tests/test_judge.py +231 -0
- hud/evaluators/tests/test_match.py +115 -0
- hud/evaluators/tests/test_remote.py +98 -0
- hud/exceptions.py +167 -0
- hud/gym.py +12 -10
- hud/job.py +525 -47
- hud/server/__init__.py +2 -2
- hud/server/requests.py +148 -186
- hud/server/tests/__init__.py +0 -0
- hud/server/tests/test_requests.py +275 -0
- hud/settings.py +3 -2
- hud/task.py +12 -22
- hud/taskset.py +44 -11
- hud/trajectory.py +6 -9
- hud/types.py +14 -9
- hud/utils/__init__.py +2 -2
- hud/utils/common.py +37 -13
- hud/utils/config.py +44 -29
- hud/utils/progress.py +149 -0
- hud/utils/telemetry.py +10 -11
- hud/utils/tests/__init__.py +0 -0
- hud/utils/tests/test_common.py +52 -0
- hud/utils/tests/test_config.py +129 -0
- hud/utils/tests/test_progress.py +225 -0
- hud/utils/tests/test_telemetry.py +37 -0
- hud/utils/tests/test_version.py +8 -0
- {hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/METADATA +44 -21
- hud_python-0.2.3.dist-info/RECORD +62 -0
- hud_python-0.2.1.dist-info/RECORD +0 -44
- {hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/WHEEL +0 -0
- {hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/licenses/LICENSE +0 -0
hud/utils/progress.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class StepProgressTracker:
|
|
8
|
+
"""
|
|
9
|
+
Tracks progress across potentially parallel async tasks based on steps completed.
|
|
10
|
+
Provides estimates assuming tasks run up to max_steps_per_task.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, total_tasks: int, max_steps_per_task: int) -> None:
|
|
14
|
+
"""
|
|
15
|
+
Initialize the StepProgressTracker.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
total_tasks: The total number of tasks to track.
|
|
19
|
+
max_steps_per_task: The maximum number of steps per task.
|
|
20
|
+
|
|
21
|
+
Raises:
|
|
22
|
+
ValueError: If total_tasks or max_steps_per_task is not positive.
|
|
23
|
+
"""
|
|
24
|
+
if total_tasks <= 0:
|
|
25
|
+
raise ValueError("total_tasks must be positive")
|
|
26
|
+
if max_steps_per_task <= 0:
|
|
27
|
+
raise ValueError("max_steps_per_task must be positive")
|
|
28
|
+
|
|
29
|
+
self.total_tasks = total_tasks
|
|
30
|
+
self.max_steps_per_task = max_steps_per_task
|
|
31
|
+
self.total_potential_steps = total_tasks * max_steps_per_task
|
|
32
|
+
|
|
33
|
+
# Use asyncio.Lock for potentially concurrent updates/reads if needed,
|
|
34
|
+
# but start without for simplicity in single-threaded asyncio.
|
|
35
|
+
# self._lock = asyncio.Lock()
|
|
36
|
+
self._task_steps: dict[str, int] = defaultdict(int)
|
|
37
|
+
self._finished_tasks: dict[str, bool] = defaultdict(bool)
|
|
38
|
+
self._tasks_started = 0
|
|
39
|
+
self._tasks_finished = 0
|
|
40
|
+
|
|
41
|
+
self.start_time: float | None = None
|
|
42
|
+
self.current_total_steps = 0
|
|
43
|
+
|
|
44
|
+
def start_task(self, task_id: str) -> None:
|
|
45
|
+
# async with self._lock: # If using lock
|
|
46
|
+
if self.start_time is None:
|
|
47
|
+
self.start_time = time.monotonic()
|
|
48
|
+
self._task_steps[task_id] = 0
|
|
49
|
+
self._finished_tasks[task_id] = False
|
|
50
|
+
self._tasks_started += 1
|
|
51
|
+
|
|
52
|
+
def increment_step(self, task_id: str) -> None:
|
|
53
|
+
# async with self._lock:
|
|
54
|
+
if (
|
|
55
|
+
not self._finished_tasks[task_id]
|
|
56
|
+
and self._task_steps[task_id] < self.max_steps_per_task
|
|
57
|
+
):
|
|
58
|
+
self._task_steps[task_id] += 1
|
|
59
|
+
# Update overall progress immediately
|
|
60
|
+
self._update_total_steps()
|
|
61
|
+
|
|
62
|
+
def finish_task(self, task_id: str) -> None:
|
|
63
|
+
# async with self._lock:
|
|
64
|
+
if not self._finished_tasks[task_id]:
|
|
65
|
+
# For calculation, consider a finished task as having completed max steps
|
|
66
|
+
self._task_steps[task_id] = self.max_steps_per_task
|
|
67
|
+
self._finished_tasks[task_id] = True
|
|
68
|
+
self._tasks_finished += 1
|
|
69
|
+
# Update overall progress
|
|
70
|
+
self._update_total_steps()
|
|
71
|
+
|
|
72
|
+
def _update_total_steps(self) -> None:
|
|
73
|
+
# This could be expensive if called extremely frequently.
|
|
74
|
+
# Called after increment or finish.
|
|
75
|
+
# async with self._lock:
|
|
76
|
+
self.current_total_steps = sum(self._task_steps.values())
|
|
77
|
+
|
|
78
|
+
def get_progress(self) -> tuple[int, int, float]:
|
|
79
|
+
"""Returns (current_steps, total_potential_steps, percentage)."""
|
|
80
|
+
# async with self._lock:
|
|
81
|
+
# Recalculate here for safety, though _update_total_steps should keep it current
|
|
82
|
+
# current_steps = sum(self._task_steps.values())
|
|
83
|
+
current_steps = self.current_total_steps
|
|
84
|
+
|
|
85
|
+
percentage = 0.0
|
|
86
|
+
if self.total_potential_steps > 0:
|
|
87
|
+
percentage = (current_steps / self.total_potential_steps) * 100
|
|
88
|
+
return current_steps, self.total_potential_steps, percentage
|
|
89
|
+
|
|
90
|
+
def get_stats(self) -> tuple[float, float | None]:
|
|
91
|
+
"""Returns (rate_steps_per_minute, eta_seconds_upper_bound)."""
|
|
92
|
+
# async with self._lock:
|
|
93
|
+
if self.start_time is None or self._tasks_started == 0:
|
|
94
|
+
return 0.0, None # No rate or ETA yet
|
|
95
|
+
|
|
96
|
+
elapsed_time = time.monotonic() - self.start_time
|
|
97
|
+
current_steps = self.current_total_steps
|
|
98
|
+
|
|
99
|
+
rate_sec = 0.0
|
|
100
|
+
if elapsed_time > 0:
|
|
101
|
+
rate_sec = current_steps / elapsed_time
|
|
102
|
+
|
|
103
|
+
rate_min = rate_sec * 60 # Convert rate to steps per minute
|
|
104
|
+
|
|
105
|
+
eta = None
|
|
106
|
+
# ETA calculation still uses rate_sec (steps/second) for time estimation in seconds
|
|
107
|
+
if rate_sec > 0:
|
|
108
|
+
remaining_steps = self.total_potential_steps - current_steps
|
|
109
|
+
eta = remaining_steps / rate_sec if remaining_steps > 0 else 0.0
|
|
110
|
+
|
|
111
|
+
return rate_min, eta # Return rate in steps/min
|
|
112
|
+
|
|
113
|
+
def is_finished(self) -> bool:
|
|
114
|
+
# async with self._lock:
|
|
115
|
+
return self._tasks_finished >= self.total_tasks
|
|
116
|
+
|
|
117
|
+
def display(self, bar_length: int = 40) -> str:
|
|
118
|
+
"""Generates a progress string similar to tqdm."""
|
|
119
|
+
current_steps, total_steps, percentage = self.get_progress()
|
|
120
|
+
rate_min, eta = self.get_stats() # Rate is now per minute
|
|
121
|
+
|
|
122
|
+
# Ensure valid values for display
|
|
123
|
+
current_steps = min(current_steps, total_steps)
|
|
124
|
+
percentage = max(0.0, min(100.0, percentage))
|
|
125
|
+
|
|
126
|
+
filled_length = int(bar_length * current_steps // total_steps) if total_steps else 0
|
|
127
|
+
bar = "█" * filled_length + "-" * (bar_length - filled_length)
|
|
128
|
+
|
|
129
|
+
# Format time
|
|
130
|
+
elapsed_str = "0:00"
|
|
131
|
+
eta_str = "??:??"
|
|
132
|
+
if self.start_time:
|
|
133
|
+
elapsed_seconds = int(time.monotonic() - self.start_time)
|
|
134
|
+
elapsed_str = f"{elapsed_seconds // 60}:{elapsed_seconds % 60:02d}"
|
|
135
|
+
if eta is not None:
|
|
136
|
+
eta_seconds = int(eta)
|
|
137
|
+
eta_str = f"{eta_seconds // 60}:{eta_seconds % 60:02d}"
|
|
138
|
+
elif self.is_finished():
|
|
139
|
+
eta_str = "0:00"
|
|
140
|
+
|
|
141
|
+
# Update rate string format
|
|
142
|
+
rate_str = f"{rate_min:.1f} steps/min" if rate_min > 0 else "?? steps/min"
|
|
143
|
+
|
|
144
|
+
# Format steps - use K/M for large numbers if desired, keep simple for now
|
|
145
|
+
steps_str = f"{current_steps}/{total_steps}"
|
|
146
|
+
|
|
147
|
+
# tasks_str = f" {self._tasks_finished}/{self.total_tasks} tasks" # Optional tasks counter
|
|
148
|
+
|
|
149
|
+
return f"{percentage:3.0f}%|{bar}| {steps_str} [{elapsed_str}<{eta_str}, {rate_str}]"
|
hud/utils/telemetry.py
CHANGED
|
@@ -4,12 +4,11 @@ import logging
|
|
|
4
4
|
|
|
5
5
|
logger = logging.getLogger(__name__)
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
def stream(live_url: str) -> str:
|
|
8
9
|
"""
|
|
9
10
|
Display a stream in the HUD system.
|
|
10
11
|
"""
|
|
11
|
-
if live_url is None:
|
|
12
|
-
raise ValueError("live_url cannot be None")
|
|
13
12
|
from IPython.display import HTML, display
|
|
14
13
|
|
|
15
14
|
html_content = f"""
|
|
@@ -24,44 +23,44 @@ def stream(live_url: str | None = None) -> str:
|
|
|
24
23
|
display(HTML(html_content))
|
|
25
24
|
except Exception as e:
|
|
26
25
|
logger.warning(e)
|
|
27
|
-
|
|
26
|
+
|
|
28
27
|
return html_content
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
def display_screenshot(base64_image: str, width: int = 960, height: int = 540) -> str:
|
|
32
31
|
"""
|
|
33
32
|
Display a base64-encoded screenshot image.
|
|
34
|
-
|
|
33
|
+
|
|
35
34
|
Args:
|
|
36
35
|
base64_image: Base64-encoded image string (without the data URI prefix)
|
|
37
36
|
width: Display width in pixels
|
|
38
37
|
height: Display height in pixels
|
|
39
|
-
|
|
38
|
+
|
|
40
39
|
Returns:
|
|
41
40
|
The HTML string used to display the image
|
|
42
|
-
|
|
41
|
+
|
|
43
42
|
Note:
|
|
44
43
|
This function will both display the image in IPython environments
|
|
45
44
|
and return the HTML string for other contexts.
|
|
46
45
|
"""
|
|
47
46
|
from IPython.display import HTML, display
|
|
48
|
-
|
|
47
|
+
|
|
49
48
|
# Ensure the base64 image doesn't already have the data URI prefix
|
|
50
49
|
if base64_image.startswith("data:image"):
|
|
51
50
|
img_src = base64_image
|
|
52
51
|
else:
|
|
53
52
|
img_src = f"data:image/png;base64,{base64_image}"
|
|
54
|
-
|
|
53
|
+
|
|
55
54
|
html_content = f"""
|
|
56
55
|
<div style="width: {width}px; height: {height}px; overflow: hidden; margin: 10px 0; border: 1px solid #ddd;">
|
|
57
56
|
<img src="{img_src}" style="max-width: 100%; max-height: 100%;">
|
|
58
57
|
</div>
|
|
59
58
|
""" # noqa: E501
|
|
60
|
-
|
|
59
|
+
|
|
61
60
|
# Display in IPython environments
|
|
62
61
|
try:
|
|
63
62
|
display(HTML(html_content))
|
|
64
63
|
except Exception as e:
|
|
65
64
|
logger.warning(e)
|
|
66
|
-
|
|
65
|
+
|
|
67
66
|
return html_content
|
|
File without changes
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import tarfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
from hud.utils.common import directory_to_tar_bytes, get_gym_id
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
import pytest_mock
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_directory_to_tar_bytes(tmpdir_factory: pytest.TempdirFactory):
|
|
17
|
+
"""Test that a directory can be converted to a tar bytes object."""
|
|
18
|
+
temp_dir = tmpdir_factory.mktemp("test_dir")
|
|
19
|
+
temp_dir_path = Path(temp_dir)
|
|
20
|
+
|
|
21
|
+
(temp_dir_path / "test.txt").write_text("test content")
|
|
22
|
+
|
|
23
|
+
nested_dir = temp_dir_path / "nested"
|
|
24
|
+
nested_dir.mkdir(exist_ok=True)
|
|
25
|
+
(nested_dir / "file.txt").write_text("nested content")
|
|
26
|
+
|
|
27
|
+
tar_bytes = directory_to_tar_bytes(temp_dir_path)
|
|
28
|
+
assert tar_bytes is not None
|
|
29
|
+
assert len(tar_bytes) > 0
|
|
30
|
+
|
|
31
|
+
with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:*") as tar:
|
|
32
|
+
members = tar.getmembers()
|
|
33
|
+
member_names = {m.name for m in members}
|
|
34
|
+
|
|
35
|
+
assert "test.txt" in member_names
|
|
36
|
+
assert "nested/file.txt" in member_names
|
|
37
|
+
|
|
38
|
+
test_content = tar.extractfile("test.txt")
|
|
39
|
+
assert test_content is not None
|
|
40
|
+
assert test_content.read().decode() == "test content"
|
|
41
|
+
|
|
42
|
+
nested_content = tar.extractfile("nested/file.txt")
|
|
43
|
+
assert nested_content is not None
|
|
44
|
+
assert nested_content.read().decode() == "nested content"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.mark.asyncio
|
|
48
|
+
async def test_get_gym_id(mocker: pytest_mock.MockerFixture):
|
|
49
|
+
"""Test that the gym ID can be retrieved."""
|
|
50
|
+
mocker.patch("hud.utils.common.make_request", return_value={"id": "test_gym_id"})
|
|
51
|
+
gym_id = await get_gym_id("test_gym")
|
|
52
|
+
assert gym_id == "test_gym_id"
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from hud.utils.common import FunctionConfig
|
|
6
|
+
from hud.utils.config import (
|
|
7
|
+
_is_list_of_configs,
|
|
8
|
+
_is_valid_python_name,
|
|
9
|
+
_split_and_validate_path,
|
|
10
|
+
_validate_hud_config,
|
|
11
|
+
expand_config,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.mark.parametrize(
|
|
16
|
+
"config, expected",
|
|
17
|
+
[
|
|
18
|
+
("test", [{"function": "test", "args": [], "id": None}]),
|
|
19
|
+
(("test",), [{"function": "test", "args": [], "id": None}]),
|
|
20
|
+
(
|
|
21
|
+
[FunctionConfig(function="test", args=[])],
|
|
22
|
+
[{"function": "test", "args": [], "id": None}],
|
|
23
|
+
),
|
|
24
|
+
({"function": "test", "args": []}, [{"function": "test", "args": [], "id": None}]),
|
|
25
|
+
(
|
|
26
|
+
{"function": "test", "args": ["arg1"]},
|
|
27
|
+
[{"function": "test", "args": ["arg1"], "id": None}],
|
|
28
|
+
),
|
|
29
|
+
(
|
|
30
|
+
{"function": "test", "args": ["arg1"], "id": "test_id"},
|
|
31
|
+
[{"function": "test", "args": ["arg1"], "id": "test_id"}],
|
|
32
|
+
),
|
|
33
|
+
(("test", "arg1", "arg2"), [{"function": "test", "args": ["arg1", "arg2"], "id": None}]),
|
|
34
|
+
],
|
|
35
|
+
)
|
|
36
|
+
def test_expand_config(config, expected):
|
|
37
|
+
result = expand_config(config)
|
|
38
|
+
assert len(result) == len(expected)
|
|
39
|
+
for i, item in enumerate(result):
|
|
40
|
+
assert item.function == expected[i]["function"]
|
|
41
|
+
assert item.args == expected[i]["args"]
|
|
42
|
+
assert item.id == expected[i]["id"]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pytest.mark.parametrize(
|
|
46
|
+
"name, expected",
|
|
47
|
+
[
|
|
48
|
+
("valid_name", True),
|
|
49
|
+
("ValidName", True),
|
|
50
|
+
("valid_name_123", True),
|
|
51
|
+
("_valid_name", True),
|
|
52
|
+
("123_invalid", False),
|
|
53
|
+
("invalid-name", False),
|
|
54
|
+
("", False),
|
|
55
|
+
],
|
|
56
|
+
)
|
|
57
|
+
def test_is_valid_python_name(name, expected):
|
|
58
|
+
assert _is_valid_python_name(name) == expected
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_validate_hud_config_valid():
|
|
62
|
+
config = {"function": "test.func", "args": ["arg1", "arg2"]}
|
|
63
|
+
result = _validate_hud_config(config)
|
|
64
|
+
assert result.function == "test.func"
|
|
65
|
+
assert result.args == ["arg1", "arg2"]
|
|
66
|
+
assert result.id is None
|
|
67
|
+
|
|
68
|
+
# Test with single arg (not in a list)
|
|
69
|
+
config = {"function": "test.func", "args": "arg1"}
|
|
70
|
+
result = _validate_hud_config(config)
|
|
71
|
+
assert result.function == "test.func"
|
|
72
|
+
assert result.args == ["arg1"]
|
|
73
|
+
|
|
74
|
+
# Test with ID
|
|
75
|
+
config = {"function": "test.func", "args": [], "id": "test_id"}
|
|
76
|
+
result = _validate_hud_config(config)
|
|
77
|
+
assert result.id == "test_id"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_validate_hud_config_invalid():
|
|
81
|
+
with pytest.raises(ValueError, match="function must be a string"):
|
|
82
|
+
_validate_hud_config({"args": []})
|
|
83
|
+
|
|
84
|
+
with pytest.raises(ValueError, match="function must be a string"):
|
|
85
|
+
_validate_hud_config({"function": 123, "args": []})
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_split_and_validate_path_valid():
|
|
89
|
+
# none should raise
|
|
90
|
+
_split_and_validate_path("module.submodule.function")
|
|
91
|
+
_split_and_validate_path("function")
|
|
92
|
+
_split_and_validate_path("Module_123.function_456")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_split_and_validate_path_invalid():
|
|
96
|
+
with pytest.raises(ValueError, match="Invalid Python identifier in path"):
|
|
97
|
+
_split_and_validate_path("invalid-module.function")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_is_list_of_configs():
|
|
101
|
+
valid_list = [
|
|
102
|
+
FunctionConfig(function="test1", args=[]),
|
|
103
|
+
FunctionConfig(function="test2", args=["arg1"]),
|
|
104
|
+
]
|
|
105
|
+
assert _is_list_of_configs(valid_list) is True
|
|
106
|
+
|
|
107
|
+
# Empty list
|
|
108
|
+
assert _is_list_of_configs([]) is True
|
|
109
|
+
|
|
110
|
+
# Invalid: not a list
|
|
111
|
+
assert _is_list_of_configs("not_a_list") is False
|
|
112
|
+
|
|
113
|
+
# Invalid: list with non-FunctionConfig items
|
|
114
|
+
invalid_list = [FunctionConfig(function="test", args=[]), "not_a_function_config"]
|
|
115
|
+
assert _is_list_of_configs(invalid_list) is False
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def test_expand_config_errors():
|
|
119
|
+
with pytest.raises(ValueError):
|
|
120
|
+
empty_tuple = ()
|
|
121
|
+
expand_config(empty_tuple) # type: ignore
|
|
122
|
+
|
|
123
|
+
with pytest.raises(ValueError):
|
|
124
|
+
invalid_tuple = (123, "arg1")
|
|
125
|
+
expand_config(invalid_tuple) # type: ignore
|
|
126
|
+
|
|
127
|
+
with pytest.raises(ValueError, match="Unknown configuration type"):
|
|
128
|
+
invalid_value = 123
|
|
129
|
+
expand_config(invalid_value) # type: ignore
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""Tests for the progress tracking utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from hud.utils.progress import StepProgressTracker
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture
|
|
11
|
+
def tracker():
|
|
12
|
+
return StepProgressTracker(total_tasks=2, max_steps_per_task=10)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_invalid_inputs_init():
|
|
16
|
+
with pytest.raises(ValueError, match="total_tasks must be positive"):
|
|
17
|
+
StepProgressTracker(total_tasks=0, max_steps_per_task=10)
|
|
18
|
+
|
|
19
|
+
with pytest.raises(ValueError, match="max_steps_per_task must be positive"):
|
|
20
|
+
StepProgressTracker(total_tasks=5, max_steps_per_task=0)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_start_task(tracker):
|
|
24
|
+
assert tracker.start_time is None
|
|
25
|
+
assert tracker._tasks_started == 0
|
|
26
|
+
|
|
27
|
+
tracker.start_task("task1")
|
|
28
|
+
|
|
29
|
+
assert tracker.start_time is not None
|
|
30
|
+
assert tracker._tasks_started == 1
|
|
31
|
+
assert tracker._task_steps["task1"] == 0
|
|
32
|
+
assert not tracker._finished_tasks["task1"]
|
|
33
|
+
|
|
34
|
+
tracker.start_task("task2")
|
|
35
|
+
assert tracker._tasks_started == 2
|
|
36
|
+
assert tracker._task_steps["task2"] == 0
|
|
37
|
+
assert not tracker._finished_tasks["task2"]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_increment_step(tracker):
|
|
41
|
+
tracker.start_task("task1")
|
|
42
|
+
assert tracker.current_total_steps == 0
|
|
43
|
+
|
|
44
|
+
tracker.increment_step("task1")
|
|
45
|
+
assert tracker._task_steps["task1"] == 1
|
|
46
|
+
assert tracker.current_total_steps == 1
|
|
47
|
+
|
|
48
|
+
tracker.increment_step("task1")
|
|
49
|
+
tracker.increment_step("task1")
|
|
50
|
+
assert tracker._task_steps["task1"] == 3
|
|
51
|
+
assert tracker.current_total_steps == 3
|
|
52
|
+
|
|
53
|
+
tracker.start_task("task2")
|
|
54
|
+
tracker.increment_step("task2")
|
|
55
|
+
assert tracker._task_steps["task2"] == 1
|
|
56
|
+
assert tracker.current_total_steps == 4
|
|
57
|
+
|
|
58
|
+
tracker.finish_task("task1")
|
|
59
|
+
initial_steps = tracker.current_total_steps
|
|
60
|
+
tracker.increment_step("task1")
|
|
61
|
+
assert tracker.current_total_steps == initial_steps
|
|
62
|
+
|
|
63
|
+
for _ in range(15):
|
|
64
|
+
tracker.increment_step("task2")
|
|
65
|
+
assert tracker._task_steps["task2"] <= tracker.max_steps_per_task
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_finish_task(tracker):
|
|
69
|
+
tracker.start_task("task1")
|
|
70
|
+
tracker.start_task("task2")
|
|
71
|
+
|
|
72
|
+
tracker.increment_step("task1")
|
|
73
|
+
tracker.increment_step("task1")
|
|
74
|
+
initial_steps = tracker._task_steps["task1"]
|
|
75
|
+
|
|
76
|
+
tracker.finish_task("task1")
|
|
77
|
+
|
|
78
|
+
assert tracker._finished_tasks["task1"]
|
|
79
|
+
assert tracker._tasks_finished == 1
|
|
80
|
+
assert tracker._task_steps["task1"] == tracker.max_steps_per_task
|
|
81
|
+
assert tracker.current_total_steps > initial_steps
|
|
82
|
+
|
|
83
|
+
current_steps = tracker.current_total_steps
|
|
84
|
+
tracker.finish_task("task1")
|
|
85
|
+
assert tracker._tasks_finished == 1
|
|
86
|
+
assert tracker.current_total_steps == current_steps
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_get_progress(tracker):
|
|
90
|
+
steps, total, percentage = tracker.get_progress()
|
|
91
|
+
assert steps == 0
|
|
92
|
+
assert total == tracker.total_potential_steps
|
|
93
|
+
assert percentage == 0.0
|
|
94
|
+
|
|
95
|
+
tracker.start_task("task1")
|
|
96
|
+
tracker.increment_step("task1")
|
|
97
|
+
steps, total, percentage = tracker.get_progress()
|
|
98
|
+
assert steps == 1
|
|
99
|
+
assert total == tracker.total_potential_steps
|
|
100
|
+
assert percentage == (1 / tracker.total_potential_steps) * 100
|
|
101
|
+
|
|
102
|
+
tracker.finish_task("task1")
|
|
103
|
+
steps, total, percentage = tracker.get_progress()
|
|
104
|
+
assert steps == tracker.max_steps_per_task
|
|
105
|
+
assert total == tracker.total_potential_steps
|
|
106
|
+
assert percentage == (tracker.max_steps_per_task / tracker.total_potential_steps) * 100
|
|
107
|
+
|
|
108
|
+
tracker.start_task("task2")
|
|
109
|
+
tracker.finish_task("task2")
|
|
110
|
+
steps, total, percentage = tracker.get_progress()
|
|
111
|
+
assert steps == tracker.total_potential_steps
|
|
112
|
+
assert percentage == 100.0
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_get_stats_no_progress(tracker, mocker):
|
|
116
|
+
rate, eta = tracker.get_stats()
|
|
117
|
+
assert rate == 0.0
|
|
118
|
+
assert eta is None
|
|
119
|
+
|
|
120
|
+
mocker.patch("time.monotonic", return_value=100.0)
|
|
121
|
+
tracker.start_task("task1")
|
|
122
|
+
|
|
123
|
+
mocker.patch("time.monotonic", return_value=100.0)
|
|
124
|
+
rate, eta = tracker.get_stats()
|
|
125
|
+
assert rate == 0.0
|
|
126
|
+
assert eta is None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_get_stats_with_progress(mocker):
|
|
130
|
+
mock_time = mocker.patch("time.monotonic")
|
|
131
|
+
mock_time.return_value = 100.0
|
|
132
|
+
|
|
133
|
+
tracker = StepProgressTracker(total_tasks=1, max_steps_per_task=10)
|
|
134
|
+
tracker.start_task("task1")
|
|
135
|
+
|
|
136
|
+
mock_time.return_value = 160.0
|
|
137
|
+
for _ in range(5):
|
|
138
|
+
tracker.increment_step("task1")
|
|
139
|
+
|
|
140
|
+
rate, eta = tracker.get_stats()
|
|
141
|
+
|
|
142
|
+
assert rate == pytest.approx(5.0)
|
|
143
|
+
assert eta == pytest.approx(60.0)
|
|
144
|
+
|
|
145
|
+
for _ in range(5):
|
|
146
|
+
tracker.increment_step("task1")
|
|
147
|
+
|
|
148
|
+
rate, eta = tracker.get_stats()
|
|
149
|
+
assert rate == pytest.approx(10.0)
|
|
150
|
+
assert eta == pytest.approx(0.0)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_is_finished(tracker):
|
|
154
|
+
assert not tracker.is_finished()
|
|
155
|
+
|
|
156
|
+
tracker.start_task("task1")
|
|
157
|
+
tracker.finish_task("task1")
|
|
158
|
+
assert not tracker.is_finished()
|
|
159
|
+
|
|
160
|
+
tracker.start_task("task2")
|
|
161
|
+
tracker.finish_task("task2")
|
|
162
|
+
assert tracker.is_finished()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def test_display(tracker, mocker):
|
|
166
|
+
mock_time = mocker.patch("time.monotonic")
|
|
167
|
+
mock_time.return_value = 100.0
|
|
168
|
+
tracker.start_task("task1")
|
|
169
|
+
|
|
170
|
+
mock_time.return_value = 130.0
|
|
171
|
+
tracker.increment_step("task1")
|
|
172
|
+
tracker.increment_step("task1")
|
|
173
|
+
|
|
174
|
+
display_str = tracker.display()
|
|
175
|
+
|
|
176
|
+
assert "%" in display_str
|
|
177
|
+
assert "2/20" in display_str
|
|
178
|
+
assert "0:30" in display_str
|
|
179
|
+
assert "steps/min" in display_str
|
|
180
|
+
|
|
181
|
+
tracker.finish_task("task1")
|
|
182
|
+
display_str = tracker.display()
|
|
183
|
+
assert "10/20" in display_str
|
|
184
|
+
|
|
185
|
+
tracker.start_task("task2")
|
|
186
|
+
tracker.finish_task("task2")
|
|
187
|
+
display_str = tracker.display()
|
|
188
|
+
assert "100%" in display_str
|
|
189
|
+
assert "20/20" in display_str
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def test_complex_workflow():
|
|
193
|
+
tracker = StepProgressTracker(total_tasks=5, max_steps_per_task=20)
|
|
194
|
+
|
|
195
|
+
for i in range(5):
|
|
196
|
+
tracker.start_task(f"task{i}")
|
|
197
|
+
|
|
198
|
+
for _ in range(10):
|
|
199
|
+
tracker.increment_step("task0")
|
|
200
|
+
|
|
201
|
+
for _ in range(5):
|
|
202
|
+
tracker.increment_step("task1")
|
|
203
|
+
|
|
204
|
+
tracker.finish_task("task2")
|
|
205
|
+
|
|
206
|
+
for _ in range(15):
|
|
207
|
+
tracker.increment_step("task3")
|
|
208
|
+
|
|
209
|
+
tracker.finish_task("task3")
|
|
210
|
+
|
|
211
|
+
steps, total, percentage = tracker.get_progress()
|
|
212
|
+
expected_steps = 10 + 5 + 20 + 20 + 0
|
|
213
|
+
assert steps == expected_steps
|
|
214
|
+
assert total == 5 * 20
|
|
215
|
+
assert percentage == (expected_steps / total) * 100
|
|
216
|
+
|
|
217
|
+
assert tracker._tasks_finished == 2
|
|
218
|
+
assert not tracker.is_finished()
|
|
219
|
+
|
|
220
|
+
tracker.finish_task("task0")
|
|
221
|
+
tracker.finish_task("task1")
|
|
222
|
+
tracker.finish_task("task4")
|
|
223
|
+
|
|
224
|
+
assert tracker.is_finished()
|
|
225
|
+
assert tracker.get_progress()[2] == 100.0
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from hud.utils.telemetry import stream
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_stream():
|
|
7
|
+
html_content = stream("https://example.com")
|
|
8
|
+
assert html_content is not None
|
|
9
|
+
assert "<div style=" in html_content
|
|
10
|
+
assert 'src="https://example.com"' in html_content
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_display_screenshot():
|
|
14
|
+
from hud.utils.telemetry import display_screenshot
|
|
15
|
+
|
|
16
|
+
# This is a simple 1x1 transparent PNG image in base64 format
|
|
17
|
+
base64_image = (
|
|
18
|
+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQ"
|
|
19
|
+
"AAABJRU5ErkJggg=="
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
html_content = display_screenshot(base64_image)
|
|
23
|
+
assert html_content is not None
|
|
24
|
+
assert "<div style=" in html_content
|
|
25
|
+
assert "width: 960px" in html_content
|
|
26
|
+
assert "height: 540px" in html_content
|
|
27
|
+
assert f"data:image/png;base64,{base64_image}" in html_content
|
|
28
|
+
|
|
29
|
+
# Test with custom dimensions
|
|
30
|
+
custom_html = display_screenshot(base64_image, width=800, height=600)
|
|
31
|
+
assert "width: 800px" in custom_html
|
|
32
|
+
assert "height: 600px" in custom_html
|
|
33
|
+
|
|
34
|
+
# Test with data URI already included
|
|
35
|
+
data_uri = f"data:image/png;base64,{base64_image}"
|
|
36
|
+
uri_html = display_screenshot(data_uri)
|
|
37
|
+
assert data_uri in uri_html
|