hud-python 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +5 -3
- hud/adapters/__init__.py +2 -1
- hud/adapters/claude/adapter.py +13 -17
- hud/adapters/common/adapter.py +3 -3
- hud/adapters/common/tests/__init__.py +0 -0
- hud/adapters/common/tests/test_adapter.py +277 -0
- hud/adapters/common/types.py +3 -6
- hud/adapters/operator/adapter.py +22 -29
- hud/agent/__init__.py +9 -1
- hud/agent/base.py +28 -28
- hud/agent/claude.py +69 -60
- hud/agent/langchain.py +204 -0
- hud/agent/operator.py +75 -67
- hud/env/__init__.py +5 -5
- hud/env/client.py +2 -2
- hud/env/docker_client.py +37 -39
- hud/env/environment.py +91 -66
- hud/env/local_docker_client.py +5 -7
- hud/env/remote_client.py +40 -29
- hud/env/remote_docker_client.py +13 -3
- hud/evaluators/__init__.py +2 -3
- hud/evaluators/base.py +4 -3
- hud/evaluators/inspect.py +3 -8
- hud/evaluators/judge.py +34 -58
- hud/evaluators/match.py +42 -49
- hud/evaluators/remote.py +13 -26
- hud/evaluators/tests/__init__.py +0 -0
- hud/evaluators/tests/test_inspect.py +12 -0
- hud/evaluators/tests/test_judge.py +231 -0
- hud/evaluators/tests/test_match.py +115 -0
- hud/evaluators/tests/test_remote.py +98 -0
- hud/exceptions.py +167 -0
- hud/gym.py +12 -10
- hud/job.py +525 -47
- hud/server/__init__.py +2 -2
- hud/server/requests.py +148 -186
- hud/server/tests/__init__.py +0 -0
- hud/server/tests/test_requests.py +275 -0
- hud/settings.py +3 -2
- hud/task.py +12 -22
- hud/taskset.py +44 -11
- hud/trajectory.py +6 -9
- hud/types.py +14 -9
- hud/utils/__init__.py +2 -2
- hud/utils/common.py +37 -13
- hud/utils/config.py +44 -29
- hud/utils/progress.py +149 -0
- hud/utils/telemetry.py +10 -11
- hud/utils/tests/__init__.py +0 -0
- hud/utils/tests/test_common.py +52 -0
- hud/utils/tests/test_config.py +129 -0
- hud/utils/tests/test_progress.py +225 -0
- hud/utils/tests/test_telemetry.py +37 -0
- hud/utils/tests/test_version.py +8 -0
- {hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/METADATA +44 -21
- hud_python-0.2.3.dist-info/RECORD +62 -0
- hud_python-0.2.1.dist-info/RECORD +0 -44
- {hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/WHEEL +0 -0
- {hud_python-0.2.1.dist-info → hud_python-0.2.3.dist-info}/licenses/LICENSE +0 -0
hud/settings.py
CHANGED
|
@@ -25,19 +25,20 @@ class Settings(BaseSettings):
|
|
|
25
25
|
description="API key for authentication with the HUD API",
|
|
26
26
|
validation_alias="HUD_API_KEY",
|
|
27
27
|
)
|
|
28
|
-
|
|
28
|
+
|
|
29
29
|
anthropic_api_key: str | None = Field(
|
|
30
30
|
default=None,
|
|
31
31
|
description="API key for Anthropic models",
|
|
32
32
|
validation_alias="ANTHROPIC_API_KEY",
|
|
33
33
|
)
|
|
34
|
-
|
|
34
|
+
|
|
35
35
|
openai_api_key: str | None = Field(
|
|
36
36
|
default=None,
|
|
37
37
|
description="API key for OpenAI models",
|
|
38
38
|
validation_alias="OPENAI_API_KEY",
|
|
39
39
|
)
|
|
40
40
|
|
|
41
|
+
|
|
41
42
|
# Create a singleton instance
|
|
42
43
|
settings = Settings()
|
|
43
44
|
|
hud/task.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any
|
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
7
|
from hud.types import CustomGym, Gym
|
|
8
|
-
from hud.utils.common import
|
|
8
|
+
from hud.utils.common import FunctionConfig, FunctionConfigs
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from inspect_ai.dataset import Sample
|
|
@@ -17,12 +17,12 @@ if TYPE_CHECKING:
|
|
|
17
17
|
UBUNTU_DOCKERFILE = "ubuntu:latest"
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
def convert_inspect_setup(setup: str) -> list[
|
|
20
|
+
def convert_inspect_setup(setup: str) -> list[FunctionConfig]:
|
|
21
21
|
"""
|
|
22
22
|
Inspect setup is a single bash string to run in the environment.
|
|
23
|
-
We convert this into a single
|
|
23
|
+
We convert this into a single FunctionConfig using the exec command
|
|
24
24
|
"""
|
|
25
|
-
return [
|
|
25
|
+
return [FunctionConfig(function="bash", args=[setup])]
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class Task(BaseModel):
|
|
@@ -52,16 +52,9 @@ class Task(BaseModel):
|
|
|
52
52
|
|
|
53
53
|
id: str | None = None
|
|
54
54
|
prompt: str
|
|
55
|
-
setup:
|
|
56
|
-
evaluate:
|
|
55
|
+
setup: FunctionConfigs | None = None
|
|
56
|
+
evaluate: FunctionConfigs | None = None
|
|
57
57
|
gym: Gym | None = None
|
|
58
|
-
|
|
59
|
-
target: str | list[str] | None = None
|
|
60
|
-
|
|
61
|
-
choices: list[str] | None = None
|
|
62
|
-
files: dict[str, str] | None = None
|
|
63
|
-
metadata: dict[str, Any] | None = None
|
|
64
|
-
|
|
65
58
|
config: dict[str, Any] | None = None
|
|
66
59
|
|
|
67
60
|
@classmethod
|
|
@@ -75,7 +68,7 @@ class Task(BaseModel):
|
|
|
75
68
|
|
|
76
69
|
Returns:
|
|
77
70
|
Task instance
|
|
78
|
-
|
|
71
|
+
|
|
79
72
|
The Inspect Sample has these fields:
|
|
80
73
|
- input (str | list[ChatMessage]): The input to be submitted to the model
|
|
81
74
|
- choices (list[str] | None): Optional multiple choice answer list
|
|
@@ -103,8 +96,8 @@ class Task(BaseModel):
|
|
|
103
96
|
evaluate_config = ("match_all", sample.target)
|
|
104
97
|
|
|
105
98
|
task_gym: Gym | None = None
|
|
106
|
-
task_setup:
|
|
107
|
-
|
|
99
|
+
task_setup: FunctionConfigs | None = None
|
|
100
|
+
|
|
108
101
|
sandbox = sample.sandbox
|
|
109
102
|
dockerfile = None
|
|
110
103
|
use_qa_gym = True
|
|
@@ -112,7 +105,7 @@ class Task(BaseModel):
|
|
|
112
105
|
if sandbox:
|
|
113
106
|
if isinstance(sandbox, str):
|
|
114
107
|
if sandbox == "docker":
|
|
115
|
-
dockerfile = UBUNTU_DOCKERFILE
|
|
108
|
+
dockerfile = UBUNTU_DOCKERFILE
|
|
116
109
|
use_qa_gym = False
|
|
117
110
|
elif isinstance(sandbox, tuple) and len(sandbox) == 2:
|
|
118
111
|
sandbox_type, sandbox_config = sandbox
|
|
@@ -122,7 +115,7 @@ class Task(BaseModel):
|
|
|
122
115
|
|
|
123
116
|
if use_qa_gym:
|
|
124
117
|
task_gym = "qa"
|
|
125
|
-
task_setup = None
|
|
118
|
+
task_setup = None
|
|
126
119
|
else:
|
|
127
120
|
task_gym = CustomGym(
|
|
128
121
|
dockerfile=dockerfile or UBUNTU_DOCKERFILE,
|
|
@@ -131,14 +124,11 @@ class Task(BaseModel):
|
|
|
131
124
|
task_setup = [x for x in convert_inspect_setup(sample.setup)] if sample.setup else None
|
|
132
125
|
# TODO: Handle sample.files for CustomGym case if needed
|
|
133
126
|
|
|
134
|
-
|
|
135
127
|
return cls(
|
|
136
128
|
id=None,
|
|
137
129
|
prompt=prompt,
|
|
138
130
|
setup=task_setup,
|
|
139
|
-
|
|
140
|
-
choices=sample.choices,
|
|
141
|
-
evaluate=evaluate_config,
|
|
131
|
+
evaluate=evaluate_config,
|
|
142
132
|
gym=task_gym,
|
|
143
133
|
# files=sample.files, # TODO: Decide how/if to handle files
|
|
144
134
|
)
|
hud/taskset.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from typing import TYPE_CHECKING
|
|
4
|
+
from venv import logger
|
|
4
5
|
|
|
5
6
|
from pydantic import BaseModel
|
|
6
7
|
|
|
@@ -23,10 +24,11 @@ class TaskSet(BaseModel):
|
|
|
23
24
|
description: Description of the taskset
|
|
24
25
|
tasks: List of Task objects in the taskset
|
|
25
26
|
"""
|
|
27
|
+
|
|
26
28
|
id: str | None = None
|
|
27
29
|
description: str | None = None
|
|
28
30
|
tasks: list[Task] = []
|
|
29
|
-
|
|
31
|
+
|
|
30
32
|
def __getitem__(self, index: int) -> Task:
|
|
31
33
|
"""
|
|
32
34
|
Allows accessing tasks by index using square bracket notation.
|
|
@@ -41,7 +43,7 @@ class TaskSet(BaseModel):
|
|
|
41
43
|
IndexError: If the index is out of range
|
|
42
44
|
"""
|
|
43
45
|
return self.tasks[index]
|
|
44
|
-
|
|
46
|
+
|
|
45
47
|
def __len__(self) -> int:
|
|
46
48
|
"""
|
|
47
49
|
Returns the number of tasks in the taskset.
|
|
@@ -50,14 +52,40 @@ class TaskSet(BaseModel):
|
|
|
50
52
|
int: The number of tasks in the taskset
|
|
51
53
|
"""
|
|
52
54
|
return len(self.tasks)
|
|
53
|
-
|
|
55
|
+
|
|
54
56
|
def __iter__(self) -> Iterator[Task]:
|
|
55
57
|
"""
|
|
56
58
|
Returns an iterator over the tasks in the taskset.
|
|
57
59
|
"""
|
|
58
60
|
return iter(self.tasks)
|
|
59
61
|
|
|
60
|
-
|
|
62
|
+
async def upload(
|
|
63
|
+
self,
|
|
64
|
+
name: str,
|
|
65
|
+
description: str | None = None,
|
|
66
|
+
api_key: str | None = None,
|
|
67
|
+
) -> None:
|
|
68
|
+
"""
|
|
69
|
+
Uploads the taskset to the server.
|
|
70
|
+
"""
|
|
71
|
+
if api_key is None:
|
|
72
|
+
api_key = settings.api_key
|
|
73
|
+
|
|
74
|
+
await make_request(
|
|
75
|
+
method="POST",
|
|
76
|
+
url=f"{settings.base_url}/v2/tasksets",
|
|
77
|
+
api_key=api_key,
|
|
78
|
+
json={
|
|
79
|
+
"name": name,
|
|
80
|
+
"description": description,
|
|
81
|
+
"tasks": [task.model_dump() for task in self.tasks],
|
|
82
|
+
},
|
|
83
|
+
)
|
|
84
|
+
logger.info(
|
|
85
|
+
"[HUD] Taskset %s uploaded successfully, see it on app.hud.so/tasksets/%s", name, name
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
61
89
|
async def load_taskset(taskset_id: str, api_key: str | None = None) -> TaskSet:
|
|
62
90
|
"""
|
|
63
91
|
Loads a TaskSet by its ID.
|
|
@@ -69,20 +97,25 @@ async def load_taskset(taskset_id: str, api_key: str | None = None) -> TaskSet:
|
|
|
69
97
|
Returns:
|
|
70
98
|
TaskSet: The loaded taskset
|
|
71
99
|
"""
|
|
72
|
-
|
|
100
|
+
|
|
73
101
|
if api_key is None:
|
|
74
102
|
api_key = settings.api_key
|
|
75
|
-
|
|
103
|
+
|
|
76
104
|
data = await make_request(
|
|
77
105
|
method="GET",
|
|
78
106
|
url=f"{settings.base_url}/v2/tasksets/{taskset_id}/tasks",
|
|
79
107
|
api_key=api_key,
|
|
80
108
|
)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
109
|
+
|
|
110
|
+
logger.info(f"[HUD] Taskset {taskset_id} loaded successfully")
|
|
111
|
+
|
|
112
|
+
return TaskSet.model_validate(
|
|
113
|
+
{
|
|
114
|
+
"id": taskset_id,
|
|
115
|
+
"tasks": data["evalset"],
|
|
116
|
+
}
|
|
117
|
+
)
|
|
118
|
+
|
|
86
119
|
|
|
87
120
|
def load_from_inspect(dataset: Dataset) -> TaskSet:
|
|
88
121
|
"""
|
hud/trajectory.py
CHANGED
|
@@ -29,9 +29,7 @@ class Trajectory(BaseModel):
|
|
|
29
29
|
def display(self) -> None:
|
|
30
30
|
trajectory_start_timestamp_str = self.trajectory[0].start_timestamp
|
|
31
31
|
t_start_dt = (
|
|
32
|
-
datetime.datetime.fromisoformat(
|
|
33
|
-
trajectory_start_timestamp_str.replace("Z", "+00:00")
|
|
34
|
-
)
|
|
32
|
+
datetime.datetime.fromisoformat(trajectory_start_timestamp_str.replace("Z", "+00:00"))
|
|
35
33
|
if trajectory_start_timestamp_str
|
|
36
34
|
else None
|
|
37
35
|
)
|
|
@@ -48,16 +46,15 @@ class Trajectory(BaseModel):
|
|
|
48
46
|
display(Markdown(f"[Image Link]({step.observation_url})"))
|
|
49
47
|
except Exception as e:
|
|
50
48
|
print(f" [Error processing image: {e}]")
|
|
51
|
-
elif not step.observation_text:
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
elif not step.observation_text: # Only print if no image AND no text
|
|
50
|
+
print(" No visual or text observation provided.")
|
|
54
51
|
|
|
55
52
|
# Observation Text
|
|
56
53
|
if step.observation_text:
|
|
57
54
|
print(f" Observation Text: {step.observation_text}")
|
|
58
55
|
|
|
59
56
|
# Actions
|
|
60
|
-
print(f"\n Actions: {step.actions}")
|
|
57
|
+
print(f"\n Actions: {step.actions}") # Added newline for spacing
|
|
61
58
|
|
|
62
59
|
# Duration
|
|
63
60
|
duration_str = "N/A"
|
|
@@ -84,7 +81,7 @@ class Trajectory(BaseModel):
|
|
|
84
81
|
total_seconds = total_duration.total_seconds() % 60
|
|
85
82
|
total_duration_str = f"{total_minutes}m {total_seconds:.2f}s"
|
|
86
83
|
except ValueError:
|
|
87
|
-
duration_str = "Error parsing timestamps"
|
|
84
|
+
duration_str = "Error parsing timestamps" # Handle potential format issues
|
|
88
85
|
print(f" Step Duration: {duration_str}")
|
|
89
86
|
print(f" Total Duration: {total_duration_str}")
|
|
90
|
-
display(Markdown("---"))
|
|
87
|
+
display(Markdown("---")) # Use Markdown horizontal rule
|
hud/types.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Any, Literal
|
|
5
|
+
from typing import Any, Literal, TypeAlias
|
|
6
6
|
|
|
7
7
|
from pydantic import BaseModel
|
|
8
8
|
|
|
@@ -10,13 +10,14 @@ from pydantic import BaseModel
|
|
|
10
10
|
class CustomGym(BaseModel):
|
|
11
11
|
"""
|
|
12
12
|
Public environment specification with a dockerfile and controller.
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
If the location is remote, the env will be created on the server.
|
|
15
15
|
If the location is dev, the env will be created locally via docker.
|
|
16
|
-
|
|
16
|
+
|
|
17
17
|
The dockerfile can be specified directly or automatically found in the controller_source_dir.
|
|
18
18
|
If neither is provided, an error will be raised during validation.
|
|
19
19
|
"""
|
|
20
|
+
|
|
20
21
|
type: Literal["public"] = "public"
|
|
21
22
|
dockerfile: str | None = None
|
|
22
23
|
location: Literal["local", "remote"]
|
|
@@ -25,27 +26,25 @@ class CustomGym(BaseModel):
|
|
|
25
26
|
# If none, then the controller must be installed in the environment through the dockerfile
|
|
26
27
|
# Can be provided as a string or Path object
|
|
27
28
|
controller_source_dir: str | Path | None = None
|
|
28
|
-
|
|
29
|
+
|
|
29
30
|
def model_post_init(self, __context: Any, /) -> None:
|
|
30
31
|
"""Validate and set up dockerfile if not explicitly provided."""
|
|
31
32
|
# Convert string path to Path object if needed
|
|
32
33
|
if isinstance(self.controller_source_dir, str):
|
|
33
34
|
self.controller_source_dir = Path(self.controller_source_dir)
|
|
34
|
-
|
|
35
|
+
|
|
35
36
|
if self.dockerfile is None:
|
|
36
37
|
if self.controller_source_dir is None:
|
|
37
38
|
raise ValueError("Either dockerfile or controller_source_dir must be provided")
|
|
38
|
-
|
|
39
|
+
|
|
39
40
|
# Look for Dockerfile in the controller_source_dir
|
|
40
41
|
dockerfile_path = self.controller_source_dir / "Dockerfile"
|
|
41
42
|
if not dockerfile_path.exists():
|
|
42
43
|
raise ValueError(f"Dockerfile not found in {self.controller_source_dir}")
|
|
43
|
-
|
|
44
|
+
|
|
44
45
|
# Read the Dockerfile content
|
|
45
46
|
self.dockerfile = dockerfile_path.read_text()
|
|
46
47
|
|
|
47
|
-
# Strings are identifiers for gyms on the HUD server
|
|
48
|
-
Gym = CustomGym | str
|
|
49
48
|
|
|
50
49
|
class EnvironmentStatus(str, enum.Enum):
|
|
51
50
|
"""
|
|
@@ -63,3 +62,9 @@ class EnvironmentStatus(str, enum.Enum):
|
|
|
63
62
|
COMPLETED = "completed"
|
|
64
63
|
ERROR = "error"
|
|
65
64
|
|
|
65
|
+
|
|
66
|
+
# Available HUD gyms
|
|
67
|
+
ServerGym: TypeAlias = Literal["qa", "hud-browser", "hud-ubuntu", "OSWorld-Ubuntu"]
|
|
68
|
+
|
|
69
|
+
# Gyms can be either custom or server-side
|
|
70
|
+
Gym: TypeAlias = CustomGym | ServerGym
|
hud/utils/__init__.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from .common import ExecuteResult
|
|
4
|
-
from .config import
|
|
4
|
+
from .config import FunctionConfig, FunctionConfigs, expand_config
|
|
5
5
|
from .telemetry import stream
|
|
6
6
|
|
|
7
|
-
__all__ = ["ExecuteResult", "
|
|
7
|
+
__all__ = ["ExecuteResult", "FunctionConfig", "FunctionConfigs", "expand_config", "stream"]
|
hud/utils/common.py
CHANGED
|
@@ -16,29 +16,52 @@ if TYPE_CHECKING:
|
|
|
16
16
|
|
|
17
17
|
logger = logging.getLogger("hud.utils.common")
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
|
|
20
|
+
class FunctionConfig(BaseModel):
|
|
20
21
|
function: str # Format: "x.y.z"
|
|
21
|
-
args: list[Any]
|
|
22
|
+
args: list[Any] # Must be json serializable
|
|
22
23
|
|
|
23
|
-
id: str | None = None
|
|
24
|
+
id: str | None = None # Optional id for remote execution
|
|
24
25
|
|
|
25
26
|
def __len__(self) -> int:
|
|
26
27
|
return len(self.args)
|
|
27
28
|
|
|
28
29
|
def __getitem__(self, index: int) -> Any:
|
|
29
30
|
return self.args[index]
|
|
30
|
-
|
|
31
|
+
|
|
31
32
|
def __iter__(self) -> Iterator[Any]:
|
|
32
33
|
return iter(self.args)
|
|
33
|
-
|
|
34
|
+
|
|
34
35
|
def __str__(self) -> str:
|
|
35
36
|
return f"{self.function}: {', '.join(str(arg) for arg in self.args)}"
|
|
36
37
|
|
|
38
|
+
|
|
37
39
|
# Type alias for the shorthand config, which just converts to function name and args
|
|
38
40
|
ShorthandConfig = tuple[str | dict[str, Any] | list[str] | list[dict[str, Any]], ...]
|
|
39
41
|
|
|
40
42
|
# Type alias for multiple config formats
|
|
41
|
-
|
|
43
|
+
FunctionConfigs = (
|
|
44
|
+
ShorthandConfig
|
|
45
|
+
| FunctionConfig
|
|
46
|
+
| list[FunctionConfig]
|
|
47
|
+
| list[ShorthandConfig]
|
|
48
|
+
| dict[str, Any]
|
|
49
|
+
| str
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class Observation(BaseModel):
|
|
54
|
+
"""
|
|
55
|
+
Observation from the environment.
|
|
56
|
+
|
|
57
|
+
Attributes:
|
|
58
|
+
screenshot: Base64 encoded PNG string of the screen
|
|
59
|
+
text: Text observation, if available
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
screenshot: str | None = None # base64 string png
|
|
63
|
+
text: str | None = None
|
|
64
|
+
|
|
42
65
|
|
|
43
66
|
class ExecuteResult(TypedDict):
|
|
44
67
|
"""
|
|
@@ -49,26 +72,27 @@ class ExecuteResult(TypedDict):
|
|
|
49
72
|
stderr: Standard error from the command
|
|
50
73
|
exit_code: Exit code of the command
|
|
51
74
|
"""
|
|
75
|
+
|
|
52
76
|
stdout: bytes
|
|
53
77
|
stderr: bytes
|
|
54
78
|
exit_code: int
|
|
55
|
-
|
|
56
|
-
|
|
79
|
+
|
|
80
|
+
|
|
57
81
|
def directory_to_tar_bytes(directory_path: Path) -> bytes:
|
|
58
82
|
"""
|
|
59
83
|
Converts a directory to a tar archive and returns it as bytes.
|
|
60
|
-
|
|
84
|
+
|
|
61
85
|
This function creates a tar archive of the specified directory in memory,
|
|
62
86
|
without writing to a temporary file on disk.
|
|
63
|
-
|
|
87
|
+
|
|
64
88
|
Args:
|
|
65
89
|
path: Path to the directory to convert
|
|
66
|
-
|
|
90
|
+
|
|
67
91
|
Returns:
|
|
68
92
|
Bytes of the tar archive
|
|
69
93
|
"""
|
|
70
94
|
output = io.BytesIO()
|
|
71
|
-
|
|
95
|
+
|
|
72
96
|
with tarfile.open(fileobj=output, mode="w") as tar:
|
|
73
97
|
# Walk through the directory
|
|
74
98
|
for file_path in directory_path.rglob("*"):
|
|
@@ -77,7 +101,7 @@ def directory_to_tar_bytes(directory_path: Path) -> bytes:
|
|
|
77
101
|
rel_path = file_path.relative_to(directory_path)
|
|
78
102
|
logger.debug("Adding %s to tar archive", rel_path)
|
|
79
103
|
tar.add(file_path, arcname=str(rel_path))
|
|
80
|
-
|
|
104
|
+
|
|
81
105
|
# Get the bytes from the BytesIO object
|
|
82
106
|
output.seek(0)
|
|
83
107
|
return output.getvalue()
|
hud/utils/config.py
CHANGED
|
@@ -2,8 +2,12 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import re
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
5
6
|
|
|
6
|
-
from hud.utils.common import
|
|
7
|
+
from hud.utils.common import FunctionConfig, FunctionConfigs
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from typing import TypeGuard
|
|
7
11
|
|
|
8
12
|
logger = logging.getLogger("hud.utils.config")
|
|
9
13
|
|
|
@@ -11,22 +15,27 @@ REMOTE_FUNCTION_PREFIX = "private_"
|
|
|
11
15
|
REMOTE_SETUP = "setup"
|
|
12
16
|
REMOTE_EVALUATE = "evaluate"
|
|
13
17
|
|
|
18
|
+
LOCAL_EVALUATORS = ["response_is", "response_includes", "response_match"]
|
|
19
|
+
|
|
20
|
+
|
|
14
21
|
def _is_valid_python_name(name: str) -> bool:
|
|
15
22
|
"""Check if a string is a valid Python identifier."""
|
|
16
23
|
return bool(re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name))
|
|
17
24
|
|
|
18
|
-
|
|
19
|
-
|
|
25
|
+
|
|
26
|
+
def _validate_hud_config(config: dict) -> FunctionConfig:
|
|
27
|
+
"""Validate and convert a dictionary to an FunctionConfig."""
|
|
20
28
|
if not isinstance(config.get("function"), str):
|
|
21
29
|
raise ValueError("function must be a string")
|
|
22
|
-
|
|
30
|
+
|
|
23
31
|
# Validate function path components
|
|
24
32
|
_split_and_validate_path(config["function"])
|
|
25
33
|
|
|
26
34
|
args = config["args"] if isinstance(config.get("args"), list) else [config["args"]]
|
|
27
|
-
|
|
28
|
-
# Create a proper
|
|
29
|
-
return
|
|
35
|
+
|
|
36
|
+
# Create a proper FunctionConfig object instead of using cast
|
|
37
|
+
return FunctionConfig(function=config["function"], args=args, id=config.get("id"))
|
|
38
|
+
|
|
30
39
|
|
|
31
40
|
def _split_and_validate_path(path: str) -> None:
|
|
32
41
|
"""Split a function path into components, validating each part."""
|
|
@@ -34,46 +43,52 @@ def _split_and_validate_path(path: str) -> None:
|
|
|
34
43
|
|
|
35
44
|
if not parts:
|
|
36
45
|
raise ValueError("Empty function path")
|
|
37
|
-
|
|
46
|
+
|
|
38
47
|
# Validate each part
|
|
39
48
|
for part in parts:
|
|
40
49
|
if not _is_valid_python_name(part):
|
|
41
50
|
raise ValueError(f"Invalid Python identifier in path: {part}")
|
|
42
51
|
|
|
43
|
-
|
|
52
|
+
|
|
53
|
+
def _is_list_of_configs(config: FunctionConfigs) -> TypeGuard[list[FunctionConfig]]:
|
|
54
|
+
"""Check if a config is a list of FunctionConfig objects."""
|
|
55
|
+
return isinstance(config, list) and all(isinstance(item, FunctionConfig) for item in config)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def expand_config(config: FunctionConfigs) -> list[FunctionConfig]:
|
|
44
59
|
"""
|
|
45
|
-
Process a config into a standardized list of
|
|
46
|
-
|
|
60
|
+
Process a config into a standardized list of FunctionConfig objects.
|
|
61
|
+
|
|
47
62
|
Args:
|
|
48
63
|
config: Can be:
|
|
49
64
|
- A tuple where first element is function name and rest are args
|
|
50
|
-
- A
|
|
65
|
+
- A FunctionConfig object
|
|
51
66
|
- A dictionary with "function" and "args" keys
|
|
52
|
-
- A list of
|
|
53
|
-
|
|
67
|
+
- A list of FunctionConfig objects
|
|
68
|
+
|
|
54
69
|
Returns:
|
|
55
|
-
list[
|
|
56
|
-
|
|
70
|
+
list[FunctionConfig]: List of standardized configurations
|
|
71
|
+
|
|
57
72
|
Raises:
|
|
58
73
|
ValueError: If the configuration format is invalid
|
|
59
74
|
"""
|
|
60
75
|
logger.debug("Processing config: %s", config)
|
|
61
76
|
|
|
62
|
-
# If it's already a
|
|
63
|
-
if isinstance(config,
|
|
77
|
+
# If it's already a FunctionConfig, just wrap it in a list
|
|
78
|
+
if isinstance(config, FunctionConfig):
|
|
64
79
|
return [config]
|
|
65
|
-
|
|
66
|
-
# If it's a list of
|
|
67
|
-
if
|
|
80
|
+
|
|
81
|
+
# If it's a list of FunctionConfigs, return as is
|
|
82
|
+
if _is_list_of_configs(config):
|
|
68
83
|
return config
|
|
69
|
-
|
|
84
|
+
|
|
70
85
|
# Handle dictionary configuration
|
|
71
86
|
if isinstance(config, dict):
|
|
72
87
|
return [_validate_hud_config(config)]
|
|
73
|
-
|
|
88
|
+
|
|
74
89
|
if isinstance(config, str):
|
|
75
|
-
return [
|
|
76
|
-
|
|
90
|
+
return [FunctionConfig(function=config, args=[])]
|
|
91
|
+
|
|
77
92
|
# Handle tuple format
|
|
78
93
|
if isinstance(config, tuple):
|
|
79
94
|
if len(config) < 1 or not isinstance(config[0], str):
|
|
@@ -81,13 +96,13 @@ def expand_config(config: HudStyleConfigs) -> list[HudStyleConfig]:
|
|
|
81
96
|
"Expected tuple[str, ...], got: {type(config)}"
|
|
82
97
|
logger.error(error_msg)
|
|
83
98
|
raise ValueError(error_msg)
|
|
84
|
-
|
|
99
|
+
|
|
85
100
|
# First element is the function name, rest are args
|
|
86
101
|
function_name = config[0]
|
|
87
102
|
args = list(config[1:]) if len(config) > 1 else []
|
|
88
|
-
|
|
89
|
-
return [
|
|
90
|
-
|
|
103
|
+
|
|
104
|
+
return [FunctionConfig(function=function_name, args=args)]
|
|
105
|
+
|
|
91
106
|
# Unknown configuration type
|
|
92
107
|
error_msg = f"Unknown configuration type: {type(config)}"
|
|
93
108
|
logger.error(error_msg)
|