PyPI - hud-python - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

hud-python 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (26) hide show

hud/__init__.py +3 -2
hud/adapters/__init__.py +2 -1
hud/adapters/claude/adapter.py +15 -2
hud/adapters/common/types.py +7 -3
hud/adapters/operator/adapter.py +10 -6
hud/agent/__init__.py +2 -1
hud/agent/claude.py +22 -2
hud/agent/langchain.py +198 -0
hud/agent/operator.py +35 -17
hud/env/docker_client.py +1 -1
hud/env/environment.py +182 -9
hud/env/local_docker_client.py +3 -1
hud/env/remote_client.py +4 -0
hud/gym.py +3 -3
hud/job.py +420 -12
hud/task.py +41 -30
hud/taskset.py +8 -0
hud/types.py +5 -3
hud/utils/common.py +31 -1
hud/utils/config.py +2 -93
hud/utils/progress.py +136 -0
{hud_python-0.2.0.dist-info → hud_python-0.2.2.dist-info}/METADATA +52 -39
hud_python-0.2.2.dist-info/RECORD +46 -0
hud_python-0.2.0.dist-info/RECORD +0 -44
{hud_python-0.2.0.dist-info → hud_python-0.2.2.dist-info}/WHEEL +0 -0
{hud_python-0.2.0.dist-info → hud_python-0.2.2.dist-info}/licenses/LICENSE +0 -0

hud/env/environment.py CHANGED Viewed

@@ -10,14 +10,13 @@ from pydantic import BaseModel
 from hud.env.client import Client
 from hud.env.remote_client import RemoteClient
 from hud.task import Task
-from hud.utils import HudStyleConfigs, expand_config
-from hud.utils.config import REMOTE_EVALUATE, REMOTE_SETUP, HudStyleConfig, create_remote_config
-if TYPE_CHECKING:
-    from hud.adapters.common import CLA
+from hud.utils.common import HudStyleConfig, HudStyleConfigs
+from hud.utils.config import REMOTE_EVALUATE, REMOTE_FUNCTION_PREFIX, REMOTE_SETUP, expand_config
 logger = logging.getLogger("hud.environment")
+if TYPE_CHECKING:
+    from hud.adapters.common import CLA
 class Observation(BaseModel):
     """
@@ -46,6 +45,9 @@ class Environment(BaseModel):
     task: Task | None = None
     build_data: dict[str, Any]
+    # final response
+    final_response: str | None = None
     async def _invoke_all(self, configs: HudStyleConfigs) -> list[Any]:
         # Execute each config and collect results
         configs_all = [configs] if not isinstance(configs, list) else configs
@@ -76,7 +78,7 @@ class Environment(BaseModel):
             config: The configuration to use for the setup
         """
         if isinstance(self.client, RemoteClient):
-            await self._invoke_all(create_remote_config(self.task, config, REMOTE_SETUP))
+            await self._invoke_all(create_remote_config(self, config, REMOTE_SETUP))
         else:
             if config is not None:
                 await self._invoke_all(config)
@@ -97,7 +99,7 @@ class Environment(BaseModel):
         """
         if isinstance(self.client, RemoteClient):
             results = await self._invoke_all(
-                create_remote_config(self.task, config, REMOTE_EVALUATE))
+                create_remote_config(self, config, REMOTE_EVALUATE))
         else:
             if config is not None:
                 results = await self._invoke_all(config)
@@ -143,9 +145,14 @@ class Environment(BaseModel):
         """
         if actions is None or len(actions) == 0:
             actions = []
+        args = [[action.model_dump() for action in actions]]
+        # TODO: Move this into the server side
+        if self._maybe_store_response(actions):
+            return Observation(text=self.final_response), 0, False, {}
         result, stdout, stderr = await self.client.invoke(
-            HudStyleConfig(function="step", args=[[action.model_dump() for action in actions]])
+            HudStyleConfig(function="step", args=args)
         )
         if stdout:
             logger.info("Step produced stdout: %s", stdout.decode())
@@ -156,6 +163,21 @@ class Environment(BaseModel):
         observation = Observation.model_validate(result["observation"], strict=True)
         return observation, 0, False, {}
+    def _maybe_store_response(self, actions: list[CLA]) -> bool:
+        """Store the final response into the environment.
+        Args:
+            actions: The action(s) to check
+        Returns:
+            bool: True if the response was submitted, False otherwise
+        """
+        if len(actions) > 0 and actions[-1].type == "response":
+            self.final_response = actions[-1].text
+            return True
+        return False
     async def get_urls(self) -> dict[str, Any]:
         """Get URLs for the environment.
@@ -179,3 +201,154 @@ class Environment(BaseModel):
         This should release any resources and clean up the environment.
         """
         await self.client.close()
+def create_remote_config(
+    env: Environment | None = None,
+    config: HudStyleConfigs | None = None,
+    function: str | None = None,
+) -> list[HudStyleConfig]:
+    """
+    Create a remote configuration for setup or evaluate, determining the final
+    function call structure based on the provided task or explicit config.
+    This function orchestrates how setup and evaluate steps defined in a Task
+    or passed directly are prepared for remote execution via `env._invoke_all`.
+    Args:
+        env: Environment object, potentially containing a task definition.
+             Used to access `env.task` and `env.final_response`.
+        config: Direct configuration override (e.g., passed to `env.evaluate(config=...)`).
+                Can be in various HudStyleConfigs formats.
+        function: The top-level function context, typically "setup" or "evaluate".
+    Returns:
+        list[HudStyleConfig]: A list containing a single HudStyleConfig object
+                              ready for remote invocation via `client.invoke`.
+                              The specific function/arguments are chosen based on this priority:
+                              1. Explicit `config` parameter (if provided).
+                              2. Specific `task` attribute (e.g., `task.evaluate`).
+                              3. General `task.config` dictionary.
+                              4. Default private function using `task.id`
+                              (e.g., `private_evaluate(task.id)`).
+                              5. Base `function` name with minimal/default arguments.
+    Logic & Examples (Assuming `function="evaluate"` for examples):
+        1) Explicit `config` provided: The `config` is expanded and becomes the `args`
+           for the top-level `function` call. If the environment has a final_response,
+           it's appended to these args.
+           - Example Input:
+             `env` (with `final_response="Paris"`)
+             `config=("contains_text", "Paris")`
+             `function="evaluate"`
+           - Example Output:
+             `[HudStyleConfig(function='evaluate', args=[
+                HudStyleConfig(function='contains_text', args=['Paris', 'Paris'])
+             ])]`
+        2) No explicit `config`, Task has the attribute (e.g., `task.evaluate`):
+           The Task's attribute value (e.g., `task.evaluate`) is expanded and becomes the `args`
+           for the top-level `function` call. Task ID is added if present. `final_response` is
+           appended if present.
+           - Example Input:
+             `env` (`task=Task(id="t1", evaluate=("check_answer",), ...)`, `final_response="42"`)
+             `config=None`
+             `function="evaluate"`
+           - Example Output:
+             `[HudStyleConfig(function='evaluate', args=[HudStyleConfig(function='check_answer',
+                args=['42'], id='t1')])]`
+        3) No explicit `config`, no specific Task attribute, Task has `task.config`:
+           The `task.config` dictionary becomes the single argument for the top-level
+           `function` call. Task ID is added to the config dict if present. `final_response` is
+           appended if present.
+           - Example Input:
+             `env` (with `task=Task(id="t2", config={"expected": "val"}, ...)`)
+             `config=None`
+             `function="evaluate"`
+           - Example Output:
+             `[HudStyleConfig(function='evaluate', args=[{"expected": "val", "id": "t2"}])]`
+        4) No explicit `config`, no specific Task attribute, no `task.config`, Task has `task.id`:
+           Calls a private function (`private_<function>`) on the remote end, passing
+           the `task.id` as the only argument.
+           - Example Input:
+             `env` (with `task=Task(id="t3", ...)`)
+             `config=None`
+             `function="evaluate"`
+           - Example Output:
+             `[HudStyleConfig(function='private_evaluate', args=['t3'])]`
+        5) No explicit `config` and no relevant Task info:
+           Calls the top-level `function` with empty args.
+           - Example Input:
+             `env` (with `task=Task(...)`)
+             `config=None`
+             `function="evaluate"`
+           - Example Output:
+             `[HudStyleConfig(function='evaluate', args=[])]`
+    """
+    # If no function provided, just expand the config and return it directly
+    if function is None:
+        if config:
+            return expand_config(config)
+        raise ValueError("Either function or config must be provided")
+    # Case 1: Explicit config provided
+    if config:
+        expanded_configs = expand_config(config)
+        if env and env.final_response:
+            # Ensure args is a list before appending
+            if not isinstance(expanded_configs[0].args, list):
+                 expanded_configs[0].args = [expanded_configs[0].args]
+            expanded_configs[0].args.append(env.final_response) # for remote responses
+        return [HudStyleConfig(function=function, args=expanded_configs)]
+    # Otherwise, use the environment's task
+    task = env.task if env else None
+    # Must have a task for the remaining cases
+    if task is None:
+        raise ValueError("Either task or config must be provided")
+    # Case 2: Task has the specified function attribute
+    task_config = getattr(task, function, None)
+    if task_config:
+        expanded_configs = expand_config(task_config)
+        if task.id:
+            expanded_configs[0].id = task.id # for remote IDs
+        elif env and env.final_response:
+            # Ensure args is a list before appending
+            if not isinstance(expanded_configs[0].args, list):
+                 expanded_configs[0].args = [expanded_configs[0].args]
+            expanded_configs[0].args.append(env.final_response) # for remote responses
+        return [HudStyleConfig(function=function, args=expanded_configs)]
+    # Case 3: Check for task.config
+    if hasattr(task, "config") and task.config:
+        # Ensure task.config is a dictionary before adding id
+        final_args = task.config.copy() if isinstance(task.config, dict) else {}
+        if task.id:
+            final_args["id"] = task.id # for remote IDs
+        if env and env.final_response:
+            # Append response, ensuring args exists and is a list
+            if "args" not in final_args:
+                final_args["args"] = []
+            if not isinstance(final_args["args"], list):
+                final_args["args"] = [final_args["args"]]
+            final_args["args"].append(env.final_response)
+        return [HudStyleConfig(function=function, args=[final_args])]
+    # Case 4: Use task.id
+    if task.id:
+        args_list = [task.id]
+        if env and env.final_response:
+             args_list.append(env.final_response) # Append final response
+        return [HudStyleConfig(function=f"{REMOTE_FUNCTION_PREFIX}{function}", args=args_list)]
+    # Case 5: No valid configuration found
+    args_list = []
+    if env and env.final_response:
+        args_list.append(env.final_response)
+    return [HudStyleConfig(function=function, args=args_list)]

hud/env/local_docker_client.py CHANGED Viewed

@@ -25,7 +25,9 @@ class LocalDockerClient(DockerClient):
     """
     @classmethod
-    async def create(cls, dockerfile: str, ports: list[int] | None = None) -> tuple[LocalDockerClient, dict[str, Any]]:
+    async def create(cls, dockerfile: str, ports: list[int] | None = None) -> tuple[
+            LocalDockerClient, dict[str, Any]
+        ]:
         """
         Creates a Docker environment client from a dockerfile.

hud/env/remote_client.py CHANGED Viewed

@@ -74,6 +74,10 @@ class RemoteClient(Client):
         build_data = response.get("metadata", {})
+        if response.get("readme"):
+            logger.info("[HUD] %s gym created, see how to use it at %s", gym_id,
+                        response.get("readme"))
         return controller, build_data
     def __init__(self, env_id: str) -> None:

hud/gym.py CHANGED Viewed

@@ -8,12 +8,12 @@ from hud.env.environment import Environment
 from hud.env.local_docker_client import LocalDockerClient
 from hud.env.remote_client import RemoteClient
 from hud.env.remote_docker_client import RemoteDockerClient
-from hud.task import Task
 from hud.types import CustomGym, Gym
 from hud.utils.common import get_gym_id
 if TYPE_CHECKING:
     from hud.job import Job
+    from hud.task import Task
 logger = logging.getLogger("hud.gym")
@@ -54,9 +54,9 @@ async def make(
     gym = None
     task = None
-    if isinstance(env_src, Gym):
+    if isinstance(env_src, str | CustomGym):
         gym = env_src
-    elif isinstance(env_src, Task):
+    else:
         gym = env_src.gym
         task = env_src

hud-python 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

Potentially problematic release.

hud-python 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl