hud-python 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (46) hide show
  1. hud/__init__.py +16 -12
  2. hud/adapters/__init__.py +4 -2
  3. hud/adapters/claude/adapter.py +0 -1
  4. hud/adapters/common/adapter.py +11 -10
  5. hud/adapters/common/types.py +27 -13
  6. hud/adapters/operator/__init__.py +5 -0
  7. hud/adapters/operator/adapter.py +93 -0
  8. hud/agent/__init__.py +7 -0
  9. hud/agent/base.py +109 -0
  10. hud/agent/claude.py +187 -0
  11. hud/agent/operator.py +190 -0
  12. hud/env/__init__.py +11 -0
  13. hud/env/client.py +35 -0
  14. hud/env/docker_client.py +306 -0
  15. hud/env/environment.py +181 -0
  16. hud/env/local_docker_client.py +249 -0
  17. hud/env/remote_client.py +185 -0
  18. hud/env/remote_docker_client.py +221 -0
  19. hud/evaluators/__init__.py +10 -0
  20. hud/evaluators/base.py +31 -0
  21. hud/evaluators/inspect.py +29 -0
  22. hud/evaluators/judge.py +213 -0
  23. hud/evaluators/match.py +163 -0
  24. hud/evaluators/remote.py +78 -0
  25. hud/gym.py +101 -15
  26. hud/job.py +185 -0
  27. hud/server/__init__.py +2 -2
  28. hud/server/requests.py +87 -0
  29. hud/settings.py +13 -2
  30. hud/task.py +133 -0
  31. hud/taskset.py +95 -0
  32. hud/trajectory.py +90 -0
  33. hud/types.py +65 -0
  34. hud/utils/__init__.py +4 -2
  35. hud/utils/common.py +69 -0
  36. hud/utils/config.py +182 -4
  37. hud/utils/telemetry.py +67 -0
  38. hud_python-0.2.0.dist-info/METADATA +188 -0
  39. hud_python-0.2.0.dist-info/RECORD +44 -0
  40. {hud_python-0.1.4.dist-info → hud_python-0.2.0.dist-info}/licenses/LICENSE +1 -1
  41. hud/client.py +0 -200
  42. hud/environment.py +0 -317
  43. hud/run.py +0 -208
  44. hud_python-0.1.4.dist-info/METADATA +0 -125
  45. hud_python-0.1.4.dist-info/RECORD +0 -21
  46. {hud_python-0.1.4.dist-info → hud_python-0.2.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from typing import Any
5
+
6
+ from hud.evaluators.base import EvaluationResult
7
+ from hud.server import make_request
8
+ from hud.settings import settings
9
+
10
+
11
+ async def _remote_eval_call(
12
+ response: Any,
13
+ answer: Any,
14
+ eval_type: str,
15
+ config: dict[str, Any] | None = None
16
+ ) -> dict[str, Any]:
17
+ """Send an evaluation request to the remote server.
18
+
19
+ Args:
20
+ response: The response to evaluate
21
+ answer: The reference answer to compare against
22
+ eval_type: Type of evaluation (e.g., "match", "judge", "agent")
23
+ config: Optional configuration parameters
24
+
25
+ Returns:
26
+ Dictionary with evaluation results from the server
27
+ """
28
+ try:
29
+ result = await make_request(
30
+ method="POST",
31
+ url=f"{settings.base_url}/evaluations/evaluate",
32
+ json={
33
+ "response": response,
34
+ "answer": answer,
35
+ "type": eval_type,
36
+ "config": config or {}
37
+ },
38
+ api_key=settings.api_key,
39
+ )
40
+ return result
41
+ except Exception as e:
42
+ return {
43
+ "score": -1.0,
44
+ "reason": f"Remote evaluation failed: {e!s}",
45
+ "details": {}
46
+ }
47
+
48
+
49
+ def remote_evaluate(
50
+ response: Any,
51
+ answer: Any,
52
+ eval_type: str = "default",
53
+ config: dict[str, Any] | None = None
54
+ ) -> EvaluationResult:
55
+ """Evaluate a response using remote evaluation services.
56
+
57
+ Args:
58
+ response: The response to evaluate
59
+ answer: The reference answer to compare against
60
+ eval_type: Type of evaluation to perform
61
+ config: Optional configuration for the evaluation
62
+
63
+ Returns:
64
+ EvaluationResult containing the evaluation results
65
+ """
66
+ result = asyncio.run(_remote_eval_call(
67
+ response=response,
68
+ answer=answer,
69
+ eval_type=eval_type,
70
+ config=config
71
+ ))
72
+
73
+ return EvaluationResult(
74
+ score=result.get("score", -1.0),
75
+ reason=result.get("reason", "Remote evaluation completed"),
76
+ mode=eval_type,
77
+ criteria_scores=result.get("details", {})
78
+ )
hud/gym.py CHANGED
@@ -1,22 +1,108 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING, Any
3
6
 
4
- class Gym:
5
- """
6
- Represents a simulation environment in the HUD system.
7
+ from hud.env.environment import Environment
8
+ from hud.env.local_docker_client import LocalDockerClient
9
+ from hud.env.remote_client import RemoteClient
10
+ from hud.env.remote_docker_client import RemoteDockerClient
11
+ from hud.task import Task
12
+ from hud.types import CustomGym, Gym
13
+ from hud.utils.common import get_gym_id
14
+
15
+ if TYPE_CHECKING:
16
+ from hud.job import Job
17
+
18
+ logger = logging.getLogger("hud.gym")
7
19
 
8
- Attributes:
9
- id: Unique identifier for the gym
10
- name: Human-readable name of the gym
20
+ async def make(
21
+ env_src: Gym | Task,
22
+ *,
23
+ job: Job | None = None,
24
+ job_id: str | None = None,
25
+ metadata: dict[str, Any] | None = None,
26
+ ) -> Environment:
11
27
  """
28
+ Create an environment from an environment ID or a Task object.
29
+
30
+ Args:
31
+ env_src: Environment ID or Task object
32
+ job: Job object to associate with this environment
33
+ job_id: ID of job to associate with this environment (deprecated, use job instead)
34
+ metadata: Additional metadata for the environment
35
+ """
36
+ if metadata is None:
37
+ metadata = {}
38
+
39
+ # Handle job parameter
40
+ effective_job_id = None
41
+ if job is not None:
42
+ effective_job_id = job.id
43
+ elif job_id is not None:
44
+ effective_job_id = job_id
45
+ else:
46
+ # Try to get an active job from the decorator context
47
+ try:
48
+ from hud.job import get_active_job
49
+ active_job = get_active_job()
50
+ if active_job:
51
+ effective_job_id = active_job.id
52
+ except ImportError:
53
+ pass # Module not available, skip
54
+
55
+ gym = None
56
+ task = None
57
+ if isinstance(env_src, Gym):
58
+ gym = env_src
59
+ elif isinstance(env_src, Task):
60
+ gym = env_src.gym
61
+ task = env_src
62
+
63
+ if isinstance(gym, CustomGym):
64
+ # Create the environment (depending on location)
65
+ if gym.dockerfile is None:
66
+ raise ValueError("Dockerfile is required for custom environments")
67
+ if gym.location == "local":
68
+ logger.info("Creating local environment")
69
+ client, build_data = await LocalDockerClient.create(gym.dockerfile)
70
+ elif gym.location == "remote":
71
+ logger.info("Creating remote environment")
72
+ client, build_data = await RemoteDockerClient.create(
73
+ dockerfile=gym.dockerfile,
74
+ job_id=effective_job_id,
75
+ task_id=task.id if task else None,
76
+ metadata=metadata,
77
+ )
78
+ else:
79
+ raise ValueError(f"Invalid environment location: {gym.location}")
80
+
81
+ # Set up the environment with a source path
82
+ if gym.controller_source_dir:
83
+ logger.info("Setting source path")
84
+ client.set_source_path(Path(gym.controller_source_dir))
85
+ elif isinstance(gym, str):
86
+ logger.info("Creating private environment")
87
+ # Note: the gym_name_or_id is a unique identifier, but it is not a true
88
+ # gym_id for the purposes of building the environment
89
+ # we therefore fetch the gym_id from the HUD API here
90
+ true_gym_id = await get_gym_id(gym)
91
+
92
+ # Create the environment
93
+ client, build_data = await RemoteClient.create(
94
+ gym_id=true_gym_id,
95
+ job_id=effective_job_id,
96
+ task_id=task.id if task else None,
97
+ metadata=metadata,
98
+ )
99
+ else:
100
+ raise ValueError(f"Invalid gym source: {gym}")
12
101
 
13
- def __init__(self, id: str, name: str) -> None:
14
- """
15
- Initialize a gym.
102
+ # Create the environment itself
103
+ environment = Environment(client=client, metadata=metadata, task=task, build_data=build_data)
104
+
105
+ if task:
106
+ await environment._setup()
16
107
 
17
- Args:
18
- id: Unique identifier
19
- name: Human-readable name
20
- """
21
- self.id = id
22
- self.name = name
108
+ return environment
hud/job.py ADDED
@@ -0,0 +1,185 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import functools
5
+ import inspect
6
+ import logging
7
+ from collections.abc import Callable
8
+ from typing import Any, TypeVar, cast
9
+
10
+ from pydantic import BaseModel, TypeAdapter
11
+
12
+ from hud.server import make_request
13
+ from hud.settings import settings
14
+ from hud.trajectory import Trajectory
15
+
16
+ logger = logging.getLogger("hud.job")
17
+
18
+ # Type variable for the decorator
19
+ T = TypeVar("T", bound=Callable)
20
+
21
+ # Global registry to store active jobs created by decorators
22
+ _ACTIVE_JOBS = {}
23
+
24
+ class Job(BaseModel):
25
+ """
26
+ A job represents a collection of related trajectories.
27
+ It holds metadata and provides methods to interact with job data.
28
+ Instances should typically be obtained via `create_job` or `load_job`.
29
+ """
30
+
31
+ id: str
32
+ name: str
33
+ metadata: dict[str, Any] | None = None
34
+ created_at: datetime.datetime
35
+ status: str
36
+
37
+ async def load_trajectories(self, *, api_key: str | None = None) -> list[Trajectory]:
38
+ """
39
+ Loads the trajectories associated with this job.
40
+
41
+ Returns:
42
+ List[Trajectory]: The trajectories in the job
43
+ """
44
+ api_key = api_key or settings.api_key
45
+
46
+ data = await make_request(
47
+ method="GET",
48
+ url=f"{settings.base_url}/v2/jobs/{self.id}/trajectories",
49
+ api_key=api_key,
50
+ )
51
+
52
+ return TypeAdapter(list[Trajectory]).validate_python(data)
53
+
54
+
55
+ async def create_job(name: str, gym_id: str | None = None,
56
+ evalset_id: str | None = None,
57
+ metadata: dict[str, Any] | None = None) -> Job:
58
+ """
59
+ Creates a new job.
60
+
61
+ Args:
62
+ name: The name of the job
63
+ metadata: Metadata for the job
64
+
65
+ Returns:
66
+ Job: The created job instance
67
+ """
68
+ api_key = settings.api_key
69
+ metadata = metadata or {}
70
+
71
+ data = await make_request(
72
+ method="POST",
73
+ url=f"{settings.base_url}/v2/jobs",
74
+ json={
75
+ "name": name,
76
+ "metadata": metadata,
77
+ "gym_id": gym_id,
78
+ "evalset_id": evalset_id,
79
+ },
80
+ api_key=api_key,
81
+ )
82
+
83
+ # Assume the backend API returns the full job data upon creation
84
+ # or at least the necessary fields (id, name, metadata, created_at, status)
85
+ # If not, we might need to make a subsequent GET request
86
+ job_data = data # Adjust if the API response structure is different
87
+
88
+ return Job(
89
+ id=job_data["id"],
90
+ name=job_data["name"],
91
+ metadata=job_data.get("metadata", {}), # Ensure metadata is dict
92
+ created_at=datetime.datetime.fromisoformat(job_data["created_at"]), # Parse datetime
93
+ status=job_data["status"],
94
+ )
95
+
96
+
97
+ async def load_job(job_id: str, api_key: str | None = None) -> Job:
98
+ """
99
+ Retrieves a job by its ID.
100
+
101
+ Args:
102
+ job_id: The ID of the job to retrieve
103
+
104
+ Returns:
105
+ Job: The retrieved job instance
106
+ """
107
+ api_key = api_key or settings.api_key
108
+
109
+ data = await make_request(
110
+ method="GET",
111
+ url=f"{settings.base_url}/v2/jobs/{job_id}",
112
+ api_key=api_key,
113
+ )
114
+
115
+ if not data:
116
+ raise ValueError(f"Job {job_id} not found")
117
+
118
+ # Validate and create the Job instance from the fetched data
119
+ return Job.model_validate(data)
120
+
121
+
122
+ def job(
123
+ name: str,
124
+ metadata: dict[str, Any] | None = None
125
+ ) -> Callable[[T], T]:
126
+ """
127
+ Decorator to automatically create and associate a job with all environments
128
+ created within the decorated function.
129
+
130
+ Args:
131
+ name: The name of the job
132
+ metadata: Additional metadata for the job
133
+
134
+ Returns:
135
+ A decorator function that creates a job and associates it with environments
136
+ """
137
+ def decorator(func: T) -> T:
138
+ @functools.wraps(func)
139
+ async def wrapper(*args: Any, **kwargs: Any) -> Any:
140
+ # Create a job for this function call using the new function
141
+ job = await create_job(
142
+ name=name,
143
+ metadata=metadata
144
+ )
145
+
146
+ # Store in global registry with a unique key based on function and call
147
+ call_id = f"{func.__module__}.{func.__qualname__}_{id(wrapper)}"
148
+ _ACTIVE_JOBS[call_id] = job
149
+
150
+ try:
151
+ # Add the function's frame to the stack for lookup
152
+ frame = inspect.currentframe()
153
+ if frame:
154
+ frame.f_locals["_job_call_id"] = call_id
155
+
156
+ # Run the decorated function
157
+ result = await func(*args, **kwargs)
158
+ return result
159
+ finally:
160
+ # Clean up
161
+ if call_id in _ACTIVE_JOBS:
162
+ del _ACTIVE_JOBS[call_id]
163
+
164
+ return cast(T, wrapper)
165
+ return decorator
166
+
167
+
168
+ def get_active_job() -> Job | None:
169
+ """
170
+ Get the currently active job from the call stack, if any.
171
+ Used internally by gym.make to automatically associate environments with jobs.
172
+
173
+ Returns:
174
+ The active job or None if no job is active
175
+ """
176
+ # Walk up the stack to find any frame with _job_call_id
177
+ frame = inspect.currentframe()
178
+ while frame:
179
+ if "_job_call_id" in frame.f_locals:
180
+ call_id = frame.f_locals["_job_call_id"]
181
+ if call_id in _ACTIVE_JOBS:
182
+ return _ACTIVE_JOBS[call_id]
183
+ frame = frame.f_back
184
+
185
+ return None
hud/server/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- from .requests import RequestError, make_request
3
+ from .requests import RequestError, make_request, make_request_sync
4
4
 
5
- __all__ = ["RequestError", "make_request"]
5
+ __all__ = ["RequestError", "make_request", "make_request_sync"]
hud/server/requests.py CHANGED
@@ -6,6 +6,7 @@ from __future__ import annotations
6
6
 
7
7
  import asyncio
8
8
  import logging
9
+ import time
9
10
  from typing import Any
10
11
 
11
12
  import httpx
@@ -191,3 +192,89 @@ async def make_request(
191
192
  except Exception as e:
192
193
  raise RequestError(f"Unexpected error: {e!s}") from None
193
194
  raise RequestError(f"Request failed after {max_retries} retries with unknown error")
195
+
196
+
197
+ def make_request_sync(
198
+ method: str,
199
+ url: str,
200
+ json: Any | None = None,
201
+ api_key: str | None = None,
202
+ max_retries: int = 4,
203
+ retry_delay: float = 2.0,
204
+ ) -> dict[str, Any]:
205
+ """
206
+ Make a synchronous HTTP request to the HUD API.
207
+
208
+ Args:
209
+ method: HTTP method (GET, POST, etc.)
210
+ url: Full URL for the request
211
+ json: Optional JSON serializable data
212
+ api_key: API key for authentication
213
+ max_retries: Maximum number of retries
214
+ retry_delay: Delay between retries
215
+ Returns:
216
+ dict: JSON response from the server
217
+
218
+ Raises:
219
+ RequestError: If API key is missing or request fails
220
+ """
221
+ if not api_key:
222
+ raise RequestError("API key is required but not provided")
223
+
224
+ headers = {"Authorization": f"Bearer {api_key}"}
225
+ retry_status_codes = [502, 503, 504]
226
+ attempt = 0
227
+
228
+ while attempt <= max_retries:
229
+ attempt += 1
230
+
231
+ try:
232
+ with httpx.Client(
233
+ timeout=600.0, # Long running requests can take up to 10 minutes
234
+ limits=httpx.Limits(
235
+ max_connections=1000,
236
+ max_keepalive_connections=1000,
237
+ keepalive_expiry=10.0,
238
+ ),
239
+ ) as client:
240
+ response = client.request(
241
+ method=method, url=url, json=json, headers=headers
242
+ )
243
+
244
+ # Check if we got a retriable status code
245
+ if response.status_code in retry_status_codes and attempt <= max_retries:
246
+ retry_time = retry_delay * (2 ** (attempt - 1)) # Exponential backoff
247
+ logger.warning(
248
+ "Received status %d from %s, retrying in %.2f seconds (attempt %d/%d)",
249
+ response.status_code,
250
+ url,
251
+ retry_time,
252
+ attempt,
253
+ max_retries,
254
+ )
255
+ time.sleep(retry_time)
256
+ continue
257
+
258
+ response.raise_for_status()
259
+ result = response.json()
260
+ return result
261
+ except httpx.HTTPStatusError as e:
262
+ raise RequestError.from_http_error(e) from None
263
+ except httpx.RequestError as e:
264
+ if attempt <= max_retries:
265
+ retry_time = retry_delay * (2 ** (attempt - 1))
266
+ logger.warning(
267
+ "Network error %s from %s, retrying in %.2f seconds (attempt %d/%d)",
268
+ str(e),
269
+ url,
270
+ retry_time,
271
+ attempt,
272
+ max_retries,
273
+ )
274
+ time.sleep(retry_time)
275
+ continue
276
+ else:
277
+ raise RequestError(f"Network error: {e!s}") from None
278
+ except Exception as e:
279
+ raise RequestError(f"Unexpected error: {e!s}") from None
280
+ raise RequestError(f"Request failed after {max_retries} retries with unknown error")
hud/settings.py CHANGED
@@ -15,7 +15,7 @@ class Settings(BaseSettings):
15
15
  model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="allow")
16
16
 
17
17
  base_url: str = Field(
18
- default="https://orchestrator.hud.live/hud-gym/api/v1",
18
+ default="https://orcstaging.hud.so/hud-gym/api",
19
19
  description="Base URL for the HUD API",
20
20
  validation_alias="base_url",
21
21
  )
@@ -25,7 +25,18 @@ class Settings(BaseSettings):
25
25
  description="API key for authentication with the HUD API",
26
26
  validation_alias="HUD_API_KEY",
27
27
  )
28
-
28
+
29
+ anthropic_api_key: str | None = Field(
30
+ default=None,
31
+ description="API key for Anthropic models",
32
+ validation_alias="ANTHROPIC_API_KEY",
33
+ )
34
+
35
+ openai_api_key: str | None = Field(
36
+ default=None,
37
+ description="API key for OpenAI models",
38
+ validation_alias="OPENAI_API_KEY",
39
+ )
29
40
 
30
41
  # Create a singleton instance
31
42
  settings = Settings()
hud/task.py ADDED
@@ -0,0 +1,133 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from hud.types import CustomGym, Gym
8
+ from hud.utils import HudStyleConfig
9
+ from hud.utils.config import HudStyleConfigs
10
+
11
+ if TYPE_CHECKING:
12
+ from inspect_ai.dataset import Sample
13
+
14
+ # Environment specifications:
15
+ # These represent the environment as a whole, including both the controller
16
+ # and the environment type (eg, what os, which services are running)
17
+
18
+ UBUNTU_DOCKERFILE = "ubuntu:latest"
19
+
20
+
21
+ def convert_inspect_setup(setup: str) -> list[HudStyleConfig]:
22
+ """
23
+ Inspect setup is a single bash string to run in the environment.
24
+ We convert this into a single HudStyleConfig using the exec command
25
+ """
26
+ return [HudStyleConfig(function="bash", args=[setup])]
27
+
28
+
29
+ class Task(BaseModel):
30
+ """A task that can be executed and evaluated.
31
+
32
+ A Task represents a specific activity to be performed in an environment.
33
+ It contains the prompt describing the task and configurations for
34
+ setting up and evaluating the environment.
35
+
36
+ The setup and evaluate configurations can be in several formats:
37
+ - String (function name): "chrome.maximize"
38
+ - String (function with args): "chrome.activate_tab 5"
39
+ - Dict: {"function": "chrome.navigate", "args": ["https://example.com"]}
40
+ - List of the above: ["chrome.maximize", {"function": "chrome.navigate", "args": ["https://example.com"]}]
41
+
42
+ Attributes:
43
+ id: The remote task ID (optional if local-only)
44
+ prompt: The task prompt or instruction
45
+ setup: Environment setup configuration (optional)
46
+ evaluate: Configuration for evaluating responses
47
+ metadata: Additional task metadata
48
+ choices: Multiple choice answer list (for Inspect compatibility)
49
+ target: Ideal target output (for Inspect compatibility)
50
+ files: Files that go along with the task (for Inspect compatibility)
51
+ gym: Environment specification
52
+ """
53
+
54
+ id: str | None = None
55
+ prompt: str
56
+ setup: HudStyleConfigs | None = None
57
+ evaluate: HudStyleConfigs | None = None
58
+ gym: Gym | None = None
59
+
60
+ target: str | list[str] | None = None
61
+
62
+ choices: list[str] | None = None
63
+ files: dict[str, str] | None = None
64
+ metadata: dict[str, Any] | None = None
65
+
66
+ config: dict[str, Any] | None = None
67
+
68
+ @classmethod
69
+ def from_inspect_sample(cls, sample: Sample) -> Task:
70
+ """Create a Task from an Inspect dataset sample.
71
+ The task's sandbox is a local ubuntu container using the standard controller.
72
+ Files will be copied to the user directory
73
+
74
+ Args:
75
+ sample: An Inspect dataset Sample object
76
+
77
+ Returns:
78
+ Task instance
79
+
80
+ The Inspect Sample has these fields:
81
+ - input (str | list[ChatMessage]): The input to be submitted to the model
82
+ - choices (list[str] | None): Optional multiple choice answer list
83
+ - target (str | list[str] | None): Optional ideal target output
84
+ - id (str | None): Optional unique identifier for sample
85
+ - metadata (dict[str, Any] | None): Optional arbitrary metadata
86
+ - sandbox (str | tuple[str, str]): Optional sandbox environment type
87
+ - files (dict[str, str] | None): Optional files that go with the sample
88
+ - setup (str | None): Optional setup script to run for sample
89
+ """
90
+ # Extract the input as prompt
91
+ prompt = sample.input
92
+ if isinstance(prompt, list): # Handle ChatMessage format
93
+ # Convert chat message list to a string representation
94
+ prompt_parts = []
95
+ for message in prompt:
96
+ role = message.role
97
+ content = message.content
98
+ prompt_parts.append(f"{role.capitalize()}: {content}")
99
+ prompt = "\n\n".join(prompt_parts)
100
+
101
+ # Map sandbox from Inspect to our envspec
102
+ sandbox = sample.sandbox
103
+ dockerfile = None
104
+ if sandbox:
105
+ if isinstance(sandbox, str):
106
+ if sandbox != "docker":
107
+ raise ValueError("docker is the only supported sandbox")
108
+ elif isinstance(sandbox, tuple) and len(sandbox) == 2:
109
+ sandbox_type, sandbox_config = sandbox
110
+ if sandbox_type != "docker":
111
+ raise ValueError("docker is the only supported sandbox")
112
+ dockerfile = sandbox_config
113
+ else:
114
+ raise ValueError("Invalid sandbox configuration")
115
+
116
+ gym = CustomGym(
117
+ dockerfile=dockerfile or UBUNTU_DOCKERFILE,
118
+ location="local",
119
+ )
120
+
121
+ return cls(
122
+ id=str(sample.id) if sample.id else None,
123
+ prompt=prompt,
124
+ setup=[x for x in convert_inspect_setup(sample.setup)] if sample.setup else [],
125
+ metadata=sample.metadata,
126
+ choices=sample.choices,
127
+ target=sample.target,
128
+ gym=gym,
129
+ )
130
+
131
+ def convert_sdk01(self) -> None:
132
+ self.setup = [HudStyleConfig(function="reset", args=[{"task_id": self.id}])]
133
+ self.evaluate = [HudStyleConfig(function="evaluate", args=[])]