PyPI - hud-python - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

hud-python 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (38) hide show

hud/__init__.py +13 -10
hud/adapters/claude/adapter.py +30 -18
hud/adapters/common/adapter.py +0 -1
hud/adapters/common/types.py +129 -4
hud/adapters/operator/adapter.py +23 -13
hud/agent/base.py +5 -4
hud/agent/claude.py +65 -13
hud/agent/claude_plays_pokemon.py +3 -2
hud/agent/langchain.py +8 -2
hud/agent/operator.py +36 -11
hud/agent/tests/test_base.py +2 -2
hud/env/docker_client.py +24 -2
hud/env/environment.py +86 -40
hud/env/local_docker_client.py +50 -4
hud/env/remote_client.py +22 -4
hud/env/remote_docker_client.py +8 -4
hud/gym.py +15 -4
hud/job.py +100 -35
hud/server/requests.py +26 -4
hud/settings.py +7 -1
hud/task.py +84 -6
hud/taskset.py +79 -12
hud/telemetry/context.py +33 -57
hud/telemetry/exporter.py +4 -6
hud/telemetry/instrumentation/mcp.py +0 -3
hud/telemetry/tests/test_context.py +7 -3
hud/trajectory.py +3 -0
hud/types.py +28 -2
hud/utils/agent.py +37 -0
hud/utils/common.py +142 -26
hud/utils/config.py +11 -0
hud/utils/tests/test_common.py +225 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.2.5.dist-info → hud_python-0.2.7.dist-info}/METADATA +26 -23
{hud_python-0.2.5.dist-info → hud_python-0.2.7.dist-info}/RECORD +38 -37
{hud_python-0.2.5.dist-info → hud_python-0.2.7.dist-info}/WHEEL +0 -0
{hud_python-0.2.5.dist-info → hud_python-0.2.7.dist-info}/licenses/LICENSE +0 -0

hud/env/remote_client.py CHANGED Viewed

@@ -2,7 +2,9 @@ from __future__ import annotations
 import logging
 from base64 import b64decode
-from typing import TYPE_CHECKING, Any
+from typing import Any
+from pydantic import BaseModel
 from hud.env.client import Client
 from hud.exceptions import HudResponseError
@@ -10,13 +12,18 @@ from hud.server import make_request
 from hud.settings import settings
 from hud.types import EnvironmentStatus
 from hud.utils import ExecuteResult
-if TYPE_CHECKING:
-    from hud.utils.config import FunctionConfig
+from hud.utils.config import FunctionConfig
 logger = logging.getLogger("hud.env.remote_env_client")
+class SetupRequest(BaseModel):
+    task_id: str | None = None
+    setup: FunctionConfig | None = None
+    config: dict[str, Any] | None = None
+    metadata: dict[str, Any] | None = None
 class RemoteClient(Client):
     """
     Remote environment client implementation.
@@ -183,6 +190,17 @@ class RemoteClient(Client):
         return data["result"], b64decode(data["stdout"]), b64decode(data["stderr"])
+    async def setup(self, setup_request: SetupRequest) -> dict[str, Any]:
+        """
+        Setup the environment.
+        """
+        return await make_request(
+            method="POST",
+            url=f"{settings.base_url}/v1/environments/{self.env_id}/reset",
+            json=setup_request.model_dump(),
+            api_key=settings.api_key,
+        )
     async def close(self) -> None:
         """
         Close the remote environment by making a request to the server.

hud/env/remote_docker_client.py CHANGED Viewed

@@ -20,10 +20,14 @@ if TYPE_CHECKING:
 logger = logging.getLogger("hud.env.remote_env_client")
-async def upload_bytes_to_presigned_url(presigned_url: str, data_bytes: bytes) -> None:
+async def upload_bytes_to_presigned_url(
+    presigned_url: str,
+    data_bytes: bytes,
+    timeout: float = 600,
+) -> None:
     try:
         async with httpx.AsyncClient() as client:
-            response = await client.put(presigned_url, content=data_bytes)
+            response = await client.put(presigned_url, content=data_bytes, timeout=timeout)
             response.raise_for_status()
     except httpx.HTTPStatusError as e:
         logger.exception("Failed to upload to presigned URL")
@@ -113,8 +117,8 @@ class RemoteDockerClient(DockerClient):
         logger.info("Creating remote environment")
-        true_gym_id = await get_gym_id("local-docker")
-        # true_gym_id = await get_gym_id("docker")
+        # true_gym_id = await get_gym_id("local-docker")
+        true_gym_id = await get_gym_id("docker")
         # augment metadata with dockerfile
         if "environment_config" not in metadata:

hud/gym.py CHANGED Viewed

@@ -9,13 +9,13 @@ from hud.env.local_docker_client import LocalDockerClient
 from hud.env.remote_client import RemoteClient
 from hud.env.remote_docker_client import RemoteDockerClient
 from hud.exceptions import GymMakeException
+from hud.task import Task
 from hud.telemetry.context import get_current_task_run_id
 from hud.types import CustomGym, Gym
 from hud.utils.common import get_gym_id
 if TYPE_CHECKING:
     from hud.job import Job
-    from hud.task import Task
 logger = logging.getLogger("hud.gym")
@@ -39,9 +39,11 @@ async def make(
     task = None
     if isinstance(env_src, str | CustomGym):
         gym = env_src
-    else:
+    elif isinstance(env_src, Task):
         gym = env_src.gym
         task = env_src
+    else:
+        raise GymMakeException(f"Invalid gym source: {env_src}", {})
     effective_job_id = None
     if job is not None:
@@ -89,9 +91,18 @@ async def make(
             if gym.location == "local":
                 logger.info("Creating local environment")
-                client = await LocalDockerClient.create(uri)
+                if gym.host_config:
+                    logger.info("Using host config: %s", gym.host_config)
+                    client = await LocalDockerClient.create(uri, gym.host_config)
+                else:
+                    client = await LocalDockerClient.create(uri)
             elif gym.location == "remote":
                 logger.info("Creating remote environment")
+                if gym.host_config:
+                    raise ValueError("host_config is not supported for remote environments")
                 client = await RemoteDockerClient.create(
                     image_uri=uri,
                     job_id=effective_job_id,
@@ -105,7 +116,7 @@ async def make(
                 logger.info("Setting source path %s", gym.image_or_build_context)
                 client.set_source_path(gym.image_or_build_context)
         elif isinstance(gym, str):
-            logger.info("Creating private environment")
+            logger.debug("Creating private environment")
             true_gym_id = await get_gym_id(gym)
             client, build_data = await RemoteClient.create(
                 gym_id=true_gym_id,

hud/job.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from __future__ import annotations
 import asyncio
-import datetime
 import functools
 import inspect
 import logging
 import sys
 from collections.abc import Callable, Coroutine
+from datetime import datetime
 from typing import TYPE_CHECKING, Any, TypeVar, cast
 from pydantic import BaseModel, PrivateAttr, TypeAdapter
@@ -18,12 +18,12 @@ from hud.settings import settings
 from hud.task import Task
 from hud.taskset import TaskSet
 from hud.trajectory import Trajectory
-from hud.utils.common import Observation
 from hud.utils.progress import StepProgressTracker
 if TYPE_CHECKING:
     from hud.adapters.common import Adapter
     from hud.agent.base import Agent
+    from hud.utils.common import Observation
 logger = logging.getLogger("hud.job")
@@ -44,7 +44,7 @@ class Job(BaseModel):
     id: str
     name: str
     metadata: dict[str, Any] | None = None
-    created_at: datetime.datetime
+    created_at: datetime
     status: str
     # Internal cache for trajectories
@@ -164,13 +164,15 @@ async def create_job(
     # If not, we might need to make a subsequent GET request
     job_data = data  # Adjust if the API response structure is different
+    created_at = datetime.fromisoformat(job_data["created_at"].replace("Z", "+00:00"))
     logger.info("View job at https://app.hud.so/jobs/%s.", job_data["id"])
     return Job(
         id=job_data["id"],
         name=job_data["name"],
         metadata=job_data.get("metadata", {}),  # Ensure metadata is dict
-        created_at=datetime.datetime.fromisoformat(job_data["created_at"]),  # Parse datetime
+        created_at=created_at,  # Parse datetime
         status=job_data["status"],
     )
@@ -273,7 +275,7 @@ async def _maybe_resample_action(
                 decision = await response_agent.determine_response(response_text)
                 if decision == "CONTINUE":
                     logger.info("ResponseAgent indicated CONTINUE. Retrying...")
-                    obs = Observation(text="Please continue.")
+                    obs.text = "Please continue."
                     return obs, False
                 elif decision == "CONTINUE":
                     logger.warning("Max continue retries reached. Stopping despite CONTINUE.")
@@ -319,6 +321,12 @@ async def _execute_task(
         if agent_instance is None:
             raise RuntimeError("Agent could not be instantiated")
+        agent_name = agent_instance.name
+        logger.info("Using agent: %s", agent_name)
+        if task.metadata is None or not isinstance(task.metadata, dict):
+            task.metadata = {}
+        task.metadata["agent_name"] = agent_name
         # Environment creation with semaphore
         if env_creation_semaphore:
             async with env_creation_semaphore:
@@ -326,6 +334,9 @@ async def _execute_task(
         else:
             env = await gym.make(task, job=job)
+        if not env:
+            raise ValueError(f"Environment creation failed for task {task_id}")
         obs_tuple = await env.reset()
         if obs_tuple is None:
             raise ValueError(f"env.reset() returned None for task {task_id}")
@@ -333,24 +344,45 @@ async def _execute_task(
         step_error = None
+        resampled_actions = 0
         for step in range(max_steps_per_task):
             action, done = (None, False)
             try:
                 # Agent prediction with semaphore
-                if agent_predict_semaphore:
-                    async with agent_predict_semaphore:
+                try:
+                    if agent_predict_semaphore:
+                        async with agent_predict_semaphore:
+                            action, done = await agent_instance.predict(obs)
+                    else:
                         action, done = await agent_instance.predict(obs)
-                else:
-                    action, done = await agent_instance.predict(obs)
+                except Exception as e:
+                    # if agent prediction fails, pass back the error to the agent
+                    logger.exception("[TR: %s] Agent prediction failed: %s", task_id, e)
+                    resampled_actions += 1
+                    if resampled_actions > 5:
+                        logger.warning(
+                            "[TR: %s] Resampled action %d times. Stopping.",
+                            task_id,
+                            resampled_actions,
+                        )
+                        break
+                    continue
                 if tracker:
                     tracker.increment_step(task_id)
-                if action is None and not done:
-                    done = True
-                if done and response_agent:
+                finish = False
+                if done and response_agent and action and len(action) > 0:
                     obs, finish = await _maybe_resample_action(obs, action[-1], response_agent)
+                    resampled_actions += 1
+                    if resampled_actions > 5:
+                        logger.warning(
+                            "[TR: %s] Resampled action %d times. Stopping.",
+                            task_id,
+                            resampled_actions,
+                        )
+                        break
                     if not finish:
                         continue
@@ -359,14 +391,12 @@ async def _execute_task(
                     terminated = True
                 else:
                     obs, _, terminated, _ = step_result
-                if terminated or done:
+                if terminated or done or finish:
                     break
             except Exception as agent_step_err:
                 logger.exception(
-                    "[Job: %s/%s, Task: %s] Step %d Error: %s",
-                    job.name,
-                    job.id,
+                    "[TR: %s] Step %d Error: %s",
                     task_id,
                     step + 1,
                     agent_step_err,
@@ -379,12 +409,12 @@ async def _execute_task(
                         "type": "step_error",
                         "step": step + 1,
                         "error": str(agent_step_err),
-                        "timestamp": datetime.datetime.now().isoformat(),
+                        "timestamp": datetime.now().isoformat(),
                     }
                 )
                 continue
         else:
-            logger.warning("[Job: %s/%s, Task: %s] Max steps reached.", job.name, job.id, task_id)
+            logger.warning("[TR: %s] Max steps reached.", task_id)
         # --- Evaluate Task ---
         evaluation_result = None
@@ -399,9 +429,7 @@ async def _execute_task(
                 # logger.info("Evaluation result: %s", evaluation_result)
             except Exception as eval_err:
                 logger.exception(
-                    "[Job: %s/%s, Task: %s] Evaluation Error: %s",
-                    job.name,
-                    job.id,
+                    "[TR: %s] Evaluation Error: %s",
                     task_id,
                     eval_err,
                 )
@@ -413,12 +441,12 @@ async def _execute_task(
                         "task_id": task_id,
                         "type": "evaluation_error",
                         "error": str(eval_err),
-                        "timestamp": datetime.datetime.now().isoformat(),
+                        "timestamp": datetime.now().isoformat(),
                     }
                 )
     except Exception as e:
-        logger.exception("[Job: %s/%s, Task: %s] Setup/Run Error: %s", job.name, job.id, task_id, e)
+        logger.exception("[TR: %s] Setup/Run Error: %s", task_id, e)
         status = "error"
         error_msg = str(e)
         # Store setup/initialization error in job
@@ -427,7 +455,7 @@ async def _execute_task(
                 "task_id": task_id,
                 "type": "setup_error",
                 "error": str(e),
-                "timestamp": datetime.datetime.now().isoformat(),
+                "timestamp": datetime.now().isoformat(),
             }
         )
@@ -438,24 +466,20 @@ async def _execute_task(
             try:
                 await env.close()
             except Exception as close_err:
-                logger.exception(
-                    "[Job: %s/%s, Task: %s] Close Error: %s", job.name, job.id, task_id, close_err
-                )
+                logger.exception("[TR: %s] Close Error: %s", task_id, close_err)
                 # Store environment close error in job
                 job.errors.append(
                     {
                         "task_id": task_id,
                         "type": "env_close_error",
                         "error": str(close_err),
-                        "timestamp": datetime.datetime.now().isoformat(),
+                        "timestamp": datetime.now().isoformat(),
                     }
                 )
     log_suffix = f" Error: {error_msg}" if status == "error" else f" Eval: {evaluation_result}"
     logger.info(
-        "[Job: %s/%s, Task: %s] Finished local execution. Status: %s.%s",
-        job.name,
-        job.id,
+        "[TR: %s] Finished local execution. Status: %s.%s",
         task_id,
         status,
         log_suffix,
@@ -497,6 +521,7 @@ async def run_job(
     run_parallel: bool = True,
     job_metadata: dict[str, Any] | None = None,
     show_progress: bool = True,
+    verbose: bool = False,
     # Concurrency control with semaphores
     max_concurrent_env_creations: int | None = 30,  # Limits gym.make calls
     max_concurrent_agent_predictions: int | None = None,  # No limit on LLM calls
@@ -532,16 +557,20 @@ async def run_job(
     Returns:
         The created Job object with errors stored in job.errors.
     """
-    hud_logger = logging.getLogger("hud")
-    hud_logger.setLevel(logging.CRITICAL)
     tasks_to_run: list[Task] = []
     created_job: Job | None = None
+    # Get hud logger
+    if not verbose:
+        logger = logging.getLogger("hud")
+        logger.setLevel(logging.CRITICAL)
+    logger = logging.getLogger("hud.job")
     evalset_id = None
     if isinstance(task_or_taskset, TaskSet):
         evalset_id = task_or_taskset.id
-        await task_or_taskset.fit(agent_cls)
+        task_or_taskset.fit(agent_cls)
     gym_id = None
     if isinstance(task_or_taskset, Task):
@@ -706,3 +735,39 @@ async def run_job(
         num_tasks,
     )
     return created_job
+"""
+c7f85f7d-3730-4c9a-85a3-a1dc436c3bd2
+de12c3cc-9d9c-4e90-82cc-1d71d30ede54
+59104743-0a63-4569-a8b5-1eda1a1b55ac
+ff759429-056c-4cde-8851-11e26729ff03
+7b98ea22-e243-4eeb-a6db-79f4a76da2b3
+7aad3f7b-d74f-470d-826d-d817f95fdd67
+e356ede6-074a-49ef-9fcd-69e5bcfbdec9
+26cd1192-3991-4d1b-b599-b2bed1bcb606
+31ece277-970f-4763-b0c8-bf19a56f56c7
+f9b722a0-5f33-466b-bce0-8ece101f2bc6
+33d1af33-8952-4945-b901-229bcfd88354
+6c3d6557-e745-44ab-bc10-300180a81c79
+6c3d6557-e745-44ab-bc10-300180a81c79
+502e02b5-9939-4e57-91af-4fcbcb90a979
+7aad3f7b-d74f-470d-826d-d817f95fdd67
+31ece277-970f-4763-b0c8-bf19a56f56c7
+e356ede6-074a-49ef-9fcd-69e5bcfbdec9"""

hud/server/requests.py CHANGED Viewed

@@ -6,6 +6,7 @@ from __future__ import annotations
 import asyncio
 import logging
+import ssl
 import time
 from typing import Any
@@ -20,7 +21,7 @@ from hud.exceptions import (
 # Set up logger
 logger = logging.getLogger("hud.http")
-logger.setLevel(logging.DEBUG)
+logger.setLevel(logging.INFO)
 # Long running requests can take up to 10 minutes.
@@ -37,7 +38,7 @@ async def _handle_retry(
 ) -> None:
     """Helper function to handle retry logic and logging."""
     retry_time = retry_delay * (2 ** (attempt - 1))  # Exponential backoff
-    logger.warning(
+    logger.debug(
         "%s from %s, retrying in %.2f seconds (attempt %d/%d)",
         error_msg,
         url,
@@ -140,6 +141,12 @@ async def make_request(
                     continue
                 else:
                     raise HudNetworkError(f"Network error: {e!s}") from None
+            except ssl.SSLError as e:
+                if attempt <= max_retries:
+                    await _handle_retry(attempt, max_retries, retry_delay, url, f"SSL error: {e}")
+                    continue
+                else:
+                    raise HudNetworkError(f"SSL error: {e!s}") from None
             except Exception as e:
                 raise HudRequestError(f"Unexpected error: {e!s}") from None
         raise HudRequestError(f"Request failed after {max_retries} retries with unknown error")
@@ -201,7 +208,7 @@ def make_request_sync(
                 # Check if we got a retriable status code
                 if response.status_code in retry_status_codes and attempt <= max_retries:
                     retry_time = retry_delay * (2 ** (attempt - 1))  # Exponential backoff
-                    logger.warning(
+                    logger.debug(
                         "Received status %d from %s, retrying in %.2f seconds (attempt %d/%d)",
                         response.status_code,
                         url,
@@ -222,7 +229,7 @@ def make_request_sync(
             except httpx.RequestError as e:
                 if attempt <= max_retries:
                     retry_time = retry_delay * (2 ** (attempt - 1))
-                    logger.warning(
+                    logger.debug(
                         "Network error %s from %s, retrying in %.2f seconds (attempt %d/%d)",
                         str(e),
                         url,
@@ -234,6 +241,21 @@ def make_request_sync(
                     continue
                 else:
                     raise HudNetworkError(f"Network error: {e!s}") from None
+            except ssl.SSLError as e:
+                if attempt <= max_retries:
+                    retry_time = retry_delay * (2 ** (attempt - 1))  # Exponential backoff
+                    logger.debug(
+                        "SSL error %s from %s, retrying in %.2f seconds (attempt %d/%d)",
+                        str(e),
+                        url,
+                        retry_time,
+                        attempt,
+                        max_retries,
+                    )
+                    time.sleep(retry_time)
+                    continue
+                else:
+                    raise HudNetworkError(f"SSL error: {e!s}") from None
             except Exception as e:
                 raise HudRequestError(f"Unexpected error: {e!s}") from None
         raise HudRequestError(f"Request failed after {max_retries} retries with unknown error")

hud/settings.py CHANGED Viewed

@@ -15,7 +15,7 @@ class Settings(BaseSettings):
     model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="allow")
     base_url: str = Field(
-        default="https://orcstaging.hud.so/hud-gym/api",
+        default="https://orchestration.hud.so/hud-gym/api",
         description="Base URL for the HUD API",
         validation_alias="base_url",
     )
@@ -44,6 +44,12 @@ class Settings(BaseSettings):
         validation_alias="TELEMETRY_ENABLED",
     )
+    fancy_logging: bool = Field(
+        default=True,
+        description="Enable fancy logging for the HUD SDK",
+        validation_alias="FANCY_LOGGING",
+    )
 # Create a singleton instance
 settings = Settings()

hud/task.py CHANGED Viewed

@@ -2,12 +2,12 @@ from __future__ import annotations
 import tempfile
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Literal, cast
 from inspect_ai.util._sandbox import SandboxEnvironmentSpec
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
-from hud.types import CustomGym, Gym
+from hud.types import CustomGym, Gym, MetadataKeys, SensitiveData
 from hud.utils.common import FunctionConfig, FunctionConfigs
 if TYPE_CHECKING:
@@ -40,28 +40,78 @@ class Task(BaseModel):
     Attributes:
         id: The remote task ID (optional if local-only)
         prompt: The task prompt or instruction
+        system_prompt: The system prompt for the evalset (optional)
         setup: Environment setup configuration (optional)
         evaluate: Configuration for evaluating responses
         metadata: Additional task metadata
+        sensitive_data: Sensitive data such as API keys, passwords, etc.
         choices: Multiple choice answer list (for Inspect compatibility)
         target: Ideal target output (for Inspect compatibility)
         files: Files that go along with the task (for Inspect compatibility)
         gym: Environment specification
     """
-    id: str | None = None
-    prompt: str
+    id: str | None = None  # Remote task ID (optional if local-only)
+    prompt: str  # Task prompt or instruction
+    system_prompt: str | None = None  # System prompt for the evalset (optional)
+    gym: Gym | None = None  # Environment specification
+    # Setup and evaluate configurations for the environment (environment specific)
     setup: FunctionConfigs | None = None
     evaluate: FunctionConfigs | None = None
-    gym: Gym | None = None
+    # Overflow configuration for environments that don't conform to the standard
     config: dict[str, Any] | None = None
+    # Sensitive data such as API keys, passwords, etc.
+    sensitive_data: SensitiveData = Field(default_factory=dict)
+    # Metadata for the task evaluation, information about the agent (see MetadataKeys)
+    metadata: dict[MetadataKeys, Any] = Field(default_factory=dict)
+    # Description of the task, for extra information about its purpose and context
     description: str | None = None
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> Task:
         return cls(**data)
+    @classmethod
+    def from_serialized(cls, data: dict[str, Any]) -> Task:
+        gym_data = data.get("gym")
+        parsed_gym: Gym | None = gym_data
+        parsed_setup = [(param, entry) for param, entry in data.get("setup", [])]
+        parsed_evaluate = [(param, entry) for param, entry in data.get("evaluate", [])]
+        # Convert dict gym data to CustomGym if needed
+        if (
+            isinstance(gym_data, dict)
+            and gym_data.get("type") == "public"
+            and gym_data.get("location") in ("local", "remote")
+            and gym_data.get("image_or_build_context") is not None
+        ):
+            parsed_gym = CustomGym(
+                type=cast("Literal['public']", gym_data["type"]),
+                location=cast("Literal['local', 'remote']", gym_data["location"]),
+                image_or_build_context=Path(gym_data["image_or_build_context"]),
+            )
+        return cls(
+            id=data.get("id"),
+            prompt=data.get("prompt", ""),
+            system_prompt=data.get("system_prompt"),
+            setup=parsed_setup,
+            evaluate=parsed_evaluate,
+            gym=parsed_gym,
+            config=data.get("config"),
+            description=data.get("description"),
+            sensitive_data=data.get("sensitive_data", {}),
+            metadata=data.get("metadata", {}),
+        )
     @classmethod
     def from_inspect_sample(cls, sample: Sample) -> Task:
         """Create a Task from an Inspect dataset sample.
@@ -144,3 +194,31 @@ class Task(BaseModel):
         if self.gym is None:
             return
         self.gym = agent.transfer_gyms.get(self.gym, self.gym)
+    def serialize(self) -> dict[str, Any]:
+        if isinstance(self.setup, list):
+            parsed_setup = [[param, entry] for param, entry in self.setup]
+        else:
+            parsed_setup = self.setup
+        if isinstance(self.evaluate, list):
+            parsed_evaluate = [[param, entry] for param, entry in self.evaluate]
+        else:
+            parsed_evaluate = self.evaluate
+        if isinstance(self.gym, CustomGym):
+            parsed_gym = self.gym.model_dump()
+            parsed_gym["image_or_build_context"] = str(parsed_gym["image_or_build_context"])
+        else:  # is ServerGym
+            parsed_gym = self.gym
+        return {
+            "id": self.id,
+            "prompt": self.prompt,
+            "config": self.config,
+            "description": self.description,
+            "setup": parsed_setup,
+            "evaluate": parsed_evaluate,
+            "gym": parsed_gym,
+            "sensitive_data": self.sensitive_data,
+            "metadata": self.metadata,
+        }

hud-python 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

Potentially problematic release.

hud-python 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl