PyPI - hud-python - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

hud-python 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show

hud/__init__.py +22 -89
hud/agents/__init__.py +15 -0
hud/agents/art.py +101 -0
hud/agents/base.py +599 -0
hud/{mcp → agents}/claude.py +373 -321
hud/{mcp → agents}/langchain.py +250 -250
hud/agents/misc/__init__.py +7 -0
hud/{agent → agents}/misc/response_agent.py +80 -80
hud/{mcp → agents}/openai.py +352 -334
hud/agents/openai_chat_generic.py +154 -0
hud/{mcp → agents}/tests/__init__.py +1 -1
hud/agents/tests/test_base.py +742 -0
hud/agents/tests/test_claude.py +324 -0
hud/{mcp → agents}/tests/test_client.py +363 -324
hud/{mcp → agents}/tests/test_openai.py +237 -238
hud/cli/__init__.py +617 -0
hud/cli/__main__.py +8 -0
hud/cli/analyze.py +371 -0
hud/cli/analyze_metadata.py +230 -0
hud/cli/build.py +427 -0
hud/cli/clone.py +185 -0
hud/cli/cursor.py +92 -0
hud/cli/debug.py +392 -0
hud/cli/docker_utils.py +83 -0
hud/cli/init.py +281 -0
hud/cli/interactive.py +353 -0
hud/cli/mcp_server.py +756 -0
hud/cli/pull.py +336 -0
hud/cli/push.py +370 -0
hud/cli/remote_runner.py +311 -0
hud/cli/runner.py +160 -0
hud/cli/tests/__init__.py +3 -0
hud/cli/tests/test_analyze.py +284 -0
hud/cli/tests/test_cli_init.py +265 -0
hud/cli/tests/test_cli_main.py +27 -0
hud/cli/tests/test_clone.py +142 -0
hud/cli/tests/test_cursor.py +253 -0
hud/cli/tests/test_debug.py +453 -0
hud/cli/tests/test_mcp_server.py +139 -0
hud/cli/tests/test_utils.py +388 -0
hud/cli/utils.py +263 -0
hud/clients/README.md +143 -0
hud/clients/__init__.py +16 -0
hud/clients/base.py +379 -0
hud/clients/fastmcp.py +222 -0
hud/clients/mcp_use.py +278 -0
hud/clients/tests/__init__.py +1 -0
hud/clients/tests/test_client_integration.py +111 -0
hud/clients/tests/test_fastmcp.py +342 -0
hud/clients/tests/test_protocol.py +188 -0
hud/clients/utils/__init__.py +1 -0
hud/clients/utils/retry_transport.py +160 -0
hud/datasets.py +322 -192
hud/misc/__init__.py +1 -0
hud/{agent → misc}/claude_plays_pokemon.py +292 -283
hud/otel/__init__.py +35 -0
hud/otel/collector.py +142 -0
hud/otel/config.py +164 -0
hud/otel/context.py +536 -0
hud/otel/exporters.py +366 -0
hud/otel/instrumentation.py +97 -0
hud/otel/processors.py +118 -0
hud/otel/tests/__init__.py +1 -0
hud/otel/tests/test_processors.py +197 -0
hud/server/__init__.py +5 -5
hud/server/context.py +114 -0
hud/server/helper/__init__.py +5 -0
hud/server/low_level.py +132 -0
hud/server/server.py +166 -0
hud/server/tests/__init__.py +3 -0
hud/settings.py +73 -79
hud/shared/__init__.py +5 -0
hud/{exceptions.py → shared/exceptions.py} +180 -180
hud/{server → shared}/requests.py +264 -264
hud/shared/tests/test_exceptions.py +157 -0
hud/{server → shared}/tests/test_requests.py +275 -275
hud/telemetry/__init__.py +25 -30
hud/telemetry/instrument.py +379 -0
hud/telemetry/job.py +309 -141
hud/telemetry/replay.py +74 -0
hud/telemetry/trace.py +83 -0
hud/tools/__init__.py +33 -34
hud/tools/base.py +365 -65
hud/tools/bash.py +161 -137
hud/tools/computer/__init__.py +15 -13
hud/tools/computer/anthropic.py +437 -420
hud/tools/computer/hud.py +376 -334
hud/tools/computer/openai.py +295 -292
hud/tools/computer/settings.py +82 -0
hud/tools/edit.py +314 -290
hud/tools/executors/__init__.py +30 -30
hud/tools/executors/base.py +539 -532
hud/tools/executors/pyautogui.py +621 -619
hud/tools/executors/tests/__init__.py +1 -1
hud/tools/executors/tests/test_base_executor.py +338 -338
hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
hud/tools/executors/xdo.py +511 -503
hud/tools/{playwright_tool.py → playwright.py} +412 -379
hud/tools/tests/__init__.py +3 -3
hud/tools/tests/test_base.py +282 -0
hud/tools/tests/test_bash.py +158 -152
hud/tools/tests/test_bash_extended.py +197 -0
hud/tools/tests/test_computer.py +425 -52
hud/tools/tests/test_computer_actions.py +34 -34
hud/tools/tests/test_edit.py +259 -240
hud/tools/tests/test_init.py +27 -27
hud/tools/tests/test_playwright_tool.py +183 -183
hud/tools/tests/test_tools.py +145 -157
hud/tools/tests/test_utils.py +156 -156
hud/tools/types.py +72 -0
hud/tools/utils.py +50 -50
hud/types.py +136 -89
hud/utils/__init__.py +10 -16
hud/utils/async_utils.py +65 -0
hud/utils/design.py +168 -0
hud/utils/mcp.py +55 -0
hud/utils/progress.py +149 -149
hud/utils/telemetry.py +66 -66
hud/utils/tests/test_async_utils.py +173 -0
hud/utils/tests/test_init.py +17 -21
hud/utils/tests/test_progress.py +261 -225
hud/utils/tests/test_telemetry.py +82 -37
hud/utils/tests/test_version.py +8 -8
hud/version.py +7 -7
hud_python-0.4.1.dist-info/METADATA +476 -0
hud_python-0.4.1.dist-info/RECORD +132 -0
hud_python-0.4.1.dist-info/entry_points.txt +3 -0
{hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/licenses/LICENSE +21 -21
hud/adapters/__init__.py +0 -8
hud/adapters/claude/__init__.py +0 -5
hud/adapters/claude/adapter.py +0 -180
hud/adapters/claude/tests/__init__.py +0 -1
hud/adapters/claude/tests/test_adapter.py +0 -519
hud/adapters/common/__init__.py +0 -6
hud/adapters/common/adapter.py +0 -178
hud/adapters/common/tests/test_adapter.py +0 -289
hud/adapters/common/types.py +0 -446
hud/adapters/operator/__init__.py +0 -5
hud/adapters/operator/adapter.py +0 -108
hud/adapters/operator/tests/__init__.py +0 -1
hud/adapters/operator/tests/test_adapter.py +0 -370
hud/agent/__init__.py +0 -19
hud/agent/base.py +0 -126
hud/agent/claude.py +0 -271
hud/agent/langchain.py +0 -215
hud/agent/misc/__init__.py +0 -3
hud/agent/operator.py +0 -268
hud/agent/tests/__init__.py +0 -1
hud/agent/tests/test_base.py +0 -202
hud/env/__init__.py +0 -11
hud/env/client.py +0 -35
hud/env/docker_client.py +0 -349
hud/env/environment.py +0 -446
hud/env/local_docker_client.py +0 -358
hud/env/remote_client.py +0 -212
hud/env/remote_docker_client.py +0 -292
hud/gym.py +0 -130
hud/job.py +0 -773
hud/mcp/__init__.py +0 -17
hud/mcp/base.py +0 -631
hud/mcp/client.py +0 -312
hud/mcp/tests/test_base.py +0 -512
hud/mcp/tests/test_claude.py +0 -294
hud/task.py +0 -149
hud/taskset.py +0 -237
hud/telemetry/_trace.py +0 -347
hud/telemetry/context.py +0 -230
hud/telemetry/exporter.py +0 -575
hud/telemetry/instrumentation/__init__.py +0 -3
hud/telemetry/instrumentation/mcp.py +0 -259
hud/telemetry/instrumentation/registry.py +0 -59
hud/telemetry/mcp_models.py +0 -270
hud/telemetry/tests/__init__.py +0 -1
hud/telemetry/tests/test_context.py +0 -210
hud/telemetry/tests/test_trace.py +0 -312
hud/tools/helper/README.md +0 -56
hud/tools/helper/__init__.py +0 -9
hud/tools/helper/mcp_server.py +0 -78
hud/tools/helper/server_initialization.py +0 -115
hud/tools/helper/utils.py +0 -58
hud/trajectory.py +0 -94
hud/utils/agent.py +0 -37
hud/utils/common.py +0 -256
hud/utils/config.py +0 -120
hud/utils/deprecation.py +0 -115
hud/utils/misc.py +0 -53
hud/utils/tests/test_common.py +0 -277
hud/utils/tests/test_config.py +0 -129
hud_python-0.3.5.dist-info/METADATA +0 -284
hud_python-0.3.5.dist-info/RECORD +0 -120
/hud/{adapters/common → shared}/tests/__init__.py +0 -0
{hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/WHEEL +0 -0

hud/env/environment.py DELETED Viewed

@@ -1,446 +0,0 @@
-"""Base classes for environment implementations."""
-from __future__ import annotations
-import logging
-from typing import TYPE_CHECKING, Any
-from pydantic import BaseModel
-from hud.env.client import Client
-from hud.env.remote_client import RemoteClient, SetupRequest
-from hud.task import Task
-from hud.utils.agent import format_agent_prompt
-from hud.utils.common import FunctionConfig, FunctionConfigs, Observation
-from hud.utils.config import (
-    LOCAL_EVALUATORS,
-    REMOTE_EVALUATE,
-    REMOTE_FUNCTION_PREFIX,
-    expand_config,
-)
-from hud.utils.telemetry import stream
-logger = logging.getLogger("hud.environment")
-if TYPE_CHECKING:
-    from hud.adapters.common import CLA
-    from hud.agent import Agent
-class Environment(BaseModel):
-    """
-    Environment base class that provides common functionality for all environment implementations.
-    This class uses the primitives provided by EnvClient to implement core environment operations.
-    """
-    metadata: dict[str, Any]
-    client: Client
-    url: str | None = None
-    live_url: str | None = None
-    # The task id to use for the environment reset
-    task: Task | None = None
-    build_data: dict[str, Any]
-    # The task run id
-    task_run_id: str | None = None
-    # final response
-    final_response: str | None = None
-    # environment prompt information
-    environment_prompt: str | None = None
-    async def _invoke_all(self, configs: FunctionConfigs) -> list[Any]:
-        # Execute each config and collect results
-        configs_all = [configs] if not isinstance(configs, list) else configs
-        results = []
-        for config in configs_all:
-            for expanded_config in expand_config(config):
-                result, stdout, stderr = await self.client.invoke(expanded_config)
-                results.append(result)
-                if stdout:
-                    logger.info(
-                        "%s produced stdout:\n%s",
-                        expanded_config.function,
-                        stdout.decode(),
-                    )
-                if stderr:
-                    logger.warning(
-                        "%s produced stderr:\n%s",
-                        expanded_config.function,
-                        stderr.decode(),
-                    )
-        return results
-    async def _setup(self, config: FunctionConfigs | None = None) -> None:
-        """
-        Setup the environment.
-        No-op if no config or task is provided.
-        Args:
-            config: The configuration to use for the setup
-        """
-        if isinstance(self.client, RemoteClient):
-            await self.get_urls()
-            setup_request = SetupRequest()
-            if self.task:
-                setup_request.task_id = self.task.id
-                setup_request.config = self.task.config
-                setup_request.metadata = _format_task_metadata(self.task)
-                if self.task.setup:
-                    setup_request.setup = expand_config(self.task.setup)[0]
-            elif config:
-                setup_request.setup = expand_config(config)[0]
-            else:
-                raise ValueError("No task or config provided for remote environment")
-            result = await self.client.setup(setup_request)
-            if result and result.get("id"):
-                self.task_run_id = result.get("id")
-                logger.info("View the live trace at https://app.hud.so/trace/%s", self.task_run_id)
-            else:
-                logger.warning("No task run id found in the result")
-        else:
-            if config is not None:
-                await self._invoke_all(config)
-            elif self.task and self.task.setup is not None:
-                await self._invoke_all(self.task.setup)
-    async def evaluate(
-        self,
-        config: FunctionConfigs | None = None,
-        metadata: dict[str, Any] | None = None,
-    ) -> Any:
-        """
-        Evaluate the environment.
-        Args:
-            config: The configuration to use for the evaluation
-        Returns:
-            Any: Result of the evaluation
-        """
-        if isinstance(self.client, RemoteClient):
-            results = await self._invoke_all(
-                create_remote_config(self, config, REMOTE_EVALUATE, metadata)
-            )
-        else:
-            if config is not None:
-                results = await self._invoke_all(config)
-            elif self.task and self.task.evaluate is not None:
-                results = await self._invoke_all(self.task.evaluate)
-            else:
-                raise ValueError("No config or task provided for local environment")
-        if len(results) == 1:
-            return results[0]
-        else:
-            return results
-    async def reset(self) -> tuple[Observation, dict[str, Any]]:
-        """
-        Reset the environment and return the first observation with the agent prompt.
-        Args:
-            None
-        Returns:
-            Observation: The first observation from the environment with the agent prompt
-            info: Dictionary of information about the environment
-        """
-        # await self._setup(configs)
-        obs, _, _, info = await self.step()
-        if self.build_data.get("environment_prompt"):
-            self.environment_prompt = self.build_data["environment_prompt"]
-        # Format the agent prompt with the environment prompt and the task prompt
-        obs.text = format_agent_prompt(self.environment_prompt, self.task)
-        return obs, info
-    async def step(
-        self,
-        actions: CLA | list[CLA] | None = None,
-        verbose: bool = False,
-    ) -> tuple[Observation, float, bool, dict[str, Any]]:
-        """Execute a step in the environment.
-        Args:
-            action: The action to execute
-        Returns:
-            Any: Result of the step execution
-        """
-        if not isinstance(actions, list) and actions is not None:
-            actions = [actions]
-        if actions is None or len(actions) == 0:
-            actions = []
-        args = [[action.model_dump() for action in actions]]
-        # TODO: Move this into the server side
-        self._maybe_store_response(actions)
-        result, stdout, stderr = await self.client.invoke(
-            FunctionConfig(function="step", args=args)
-        )
-        if verbose:
-            if stdout:
-                logger.info("Step produced stdout: %s", stdout.decode())
-            if stderr:
-                logger.warning("Step produced stderr: %s", stderr.decode())
-        observation = Observation.model_validate(result["observation"], strict=True)
-        info = result.get("info", {})
-        return observation, 0, False, info
-    def _maybe_store_response(self, actions: list[CLA]) -> bool:
-        """Store the final response into the environment.
-        Args:
-            actions: The action(s) to check
-        Returns:
-            bool: True if the response was submitted, False otherwise
-        """
-        if len(actions) > 0 and actions[-1].type == "response":
-            self.final_response = actions[-1].text
-            return True
-        return False
-    async def get_urls(self) -> dict[str, Any]:
-        """Get URLs for the environment.
-        Returns:
-            dict: Dictionary of URLs for accessing the environment
-        """
-        data, _, _ = await self.client.invoke(FunctionConfig(function="get_urls", args=[]))
-        self.url = data.get("url")
-        self.live_url = data.get("live_url")
-        return {
-            "url": self.url,
-            "live_url": self.live_url,
-        }
-    async def close(self) -> None:
-        """Close the environment.
-        This should release any resources and clean up the environment.
-        """
-        await self.client.close()
-    async def stream(self) -> str | None:
-        if not self.live_url:
-            await self.get_urls()
-        if self.live_url is None:
-            logger.warning("No live URL found")
-            return None
-        return stream(self.live_url)
-    async def run(self, agent: Agent, max_steps: int = 27, verbose: bool = True) -> Any:
-        """Run an agent in the environment.
-        Args:
-            agent: The agent to run
-        """
-        if verbose:
-            logger.info("Running agent in environment...")
-        obs, _ = await self.reset()
-        for i in range(max_steps):
-            action, done = await agent.predict(obs, verbose=verbose)
-            if verbose:
-                logger.info(
-                    "Step %d: Action: %s",
-                    i,
-                    [str(a) for a in action] if len(action) > 1 else str(action[0]),
-                )
-            obs, reward, terminated, info = await self.step(action)
-            if verbose:
-                logger.info("Step %d: Observation: %s", i, obs)
-            if done or terminated:
-                break
-        result = await self.evaluate()
-        if verbose:
-            logger.info("Evaluation result: %s", result)
-        return result
-def _format_task_metadata(task: Task) -> dict[str, Any]:
-    metadata = {}
-    if task.metadata:
-        for key, value in task.metadata.items():
-            metadata[str(key)] = value
-    if task.sensitive_data:
-        metadata["sensitive_data"] = task.sensitive_data
-    return metadata
-def create_remote_config(
-    env: Environment | None = None,
-    config: FunctionConfigs | None = None,
-    function: str | None = None,
-    metadata: dict[str, Any] | None = None,
-) -> list[FunctionConfig]:
-    """
-    Create a remote configuration for setup or evaluate, determining the final
-    function call structure based on the provided task or explicit config.
-    This function orchestrates how setup and evaluate steps defined in a Task
-    or passed directly are prepared for remote execution via `env._invoke_all`.
-    Args:
-        env: Environment object, potentially containing a task definition.
-             Used to access `env.task` and `env.final_response`.
-        config: Direct configuration override (e.g., passed to `env.evaluate(config=...)`).
-                Can be in various FunctionConfigs formats.
-        function: The top-level function context, typically "setup" or "evaluate".
-    Returns:
-        list[FunctionConfig]: A list containing a single FunctionConfig object
-                              ready for remote invocation via `client.invoke`.
-                              The specific function/arguments are chosen based on this priority:
-                              1. Explicit `config` parameter (if provided).
-                              2. Specific `task` attribute (e.g., `task.evaluate`).
-                              3. General `task.config` dictionary.
-                              4. Default private function using `task.id`
-                              (e.g., `private_evaluate(task.id)`).
-                              5. Base `function` name with minimal/default arguments.
-    Logic & Examples (Assuming `function="evaluate"` for examples):
-        1) Explicit `config` provided: The `config` is expanded and becomes the `args`
-           for the top-level `function` call. If the environment has a final_response,
-           it's appended to these args.
-           - Example Input:
-             `env` (with `final_response="Paris"`)
-             `config=("contains_text", "Paris")`
-             `function="evaluate"`
-           - Example Output:
-             `[FunctionConfig(function='evaluate', args=[
-                FunctionConfig(function='contains_text', args=['Paris', 'Paris'])
-             ])]`
-        2) No explicit `config`, Task has the attribute (e.g., `task.evaluate`):
-           The Task's attribute value (e.g., `task.evaluate`) is expanded and becomes the `args`
-           for the top-level `function` call. Task ID is added if present. `final_response` is
-           appended if present.
-           - Example Input:
-             `env` (`task=Task(id="t1", evaluate=("check_answer",), ...)`, `final_response="42"`)
-             `config=None`
-             `function="evaluate"`
-           - Example Output:
-             `[FunctionConfig(function='evaluate', args=[FunctionConfig(function='check_answer',
-                args=['42'], id='t1')])]`
-        3) No explicit `config`, no specific Task attribute, Task has `task.config`:
-           The `task.config` dictionary becomes the single argument for the top-level
-           `function` call. Task ID is added to the config dict if present. `final_response` is
-           appended if present.
-           - Example Input:
-             `env` (with `task=Task(id="t2", config={"expected": "val"}, ...)`)
-             `config=None`
-             `function="evaluate"`
-           - Example Output:
-             `[FunctionConfig(function='evaluate', args=[{"expected": "val", "id": "t2"}])]`
-        4) No explicit `config`, no specific Task attribute, no `task.config`, Task has `task.id`:
-           Calls a private function (`private_<function>`) on the remote end, passing
-           the `task.id` as the only argument.
-           - Example Input:
-             `env` (with `task=Task(id="t3", ...)`)
-             `config=None`
-             `function="evaluate"`
-           - Example Output:
-             `[FunctionConfig(function='private_evaluate', args=['t3'])]`
-        5) No explicit `config` and no relevant Task info:
-           Calls the top-level `function` with empty args.
-           - Example Input:
-             `env` (with `task=Task(...)`)
-             `config=None`
-             `function="evaluate"`
-           - Example Output:
-             `[FunctionConfig(function='evaluate', args=[])]`
-    """
-    # If no function provided, just expand the config and return it directly
-    if metadata is None:
-        metadata = {}
-    if function is None:
-        if config:
-            return expand_config(config)
-        raise ValueError("Either function or config must be provided")
-    # Case 1: Explicit config provided
-    if config:
-        if not isinstance(config, dict):
-            expanded_configs = expand_config(config)
-            if env and env.final_response and expanded_configs[0].args[0] in LOCAL_EVALUATORS:
-                # Ensure args is a list before appending
-                if not isinstance(expanded_configs[0].args, list):
-                    expanded_configs[0].args = [expanded_configs[0].args]
-                expanded_configs[0].args.append(env.final_response)  # for remote responses
-            return [FunctionConfig(function=function, args=expanded_configs, metadata=metadata)]
-        else:
-            return [FunctionConfig(function=function, args=[config], metadata=metadata)]
-    # Otherwise, use the environment's task
-    task = env.task if env else None
-    # Must have a task for the remaining cases
-    if task is None:
-        raise ValueError("Either task or config must be provided")
-    metadata = _format_task_metadata(task)
-    # Case 2: Task has the specified function attribute
-    task_config = getattr(task, function, None)
-    if task_config:
-        expanded_configs = expand_config(task_config)
-        if task.id:
-            expanded_configs[0].id = task.id  # for remote IDs
-        if env and env.final_response and expanded_configs[0].function in LOCAL_EVALUATORS:
-            # Ensure args is a list before appending
-            if not isinstance(expanded_configs[0].args, list):
-                expanded_configs[0].args = [expanded_configs[0].args]
-            expanded_configs[0].args.append(env.final_response)  # for remote responses
-        return [FunctionConfig(function=function, args=expanded_configs, metadata=metadata)]
-    # Case 3: Check for task.config
-    if hasattr(task, "config") and task.config:
-        # Ensure task.config is a dictionary before adding id
-        final_args = task.config.copy() if isinstance(task.config, dict) else {}
-        if task.id:
-            final_args["id"] = task.id  # for remote IDs
-        if env and env.final_response:
-            # Append response, ensuring args exists and is a list
-            if "args" not in final_args:
-                final_args["args"] = []
-            if not isinstance(final_args["args"], list):
-                final_args["args"] = [final_args["args"]]
-            final_args["args"].append(env.final_response)
-        return [FunctionConfig(function=function, args=[final_args], metadata=metadata)]
-    # Case 4: Use task.id
-    if task.id:
-        args_list = [task.id]
-        if env and env.final_response:
-            args_list.append(env.final_response)  # Append final response
-        return [
-            FunctionConfig(
-                function=f"{REMOTE_FUNCTION_PREFIX}{function}",
-                args=args_list,
-                metadata=metadata,
-            )
-        ]
-    # Case 5: No valid configuration found
-    args_list = []
-    if env and env.final_response:
-        args_list.append(env.final_response)
-    return [FunctionConfig(function=function, args=args_list, metadata=metadata)]

hud-python 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

Potentially problematic release.

hud-python 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl