PyPI - hud-python - Versions diffs - 0.4.28__py3-none-any.whl → 0.4.30__py3-none-any.whl - Mend

hud-python 0.4.28py3-none-any.whl → 0.4.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (77) hide show

hud/__init__.py +2 -1
hud/agents/base.py +81 -45
hud/agents/claude.py +8 -4
hud/agents/openai_chat_generic.py +66 -40
hud/agents/tests/test_base.py +0 -4
hud/agents/tests/test_openai.py +1 -1
hud/cli/__init__.py +182 -52
hud/cli/dev.py +8 -9
hud/cli/eval.py +317 -119
hud/cli/flows/__init__.py +0 -0
hud/cli/flows/tasks.py +0 -0
hud/cli/get.py +160 -0
hud/cli/rl/__init__.py +567 -71
hud/cli/rl/config.py +94 -0
hud/cli/rl/display.py +133 -0
hud/cli/rl/gpu.py +63 -0
hud/cli/rl/gpu_utils.py +318 -0
hud/cli/rl/presets.py +96 -0
hud/cli/rl/remote_runner.py +347 -0
hud/cli/rl/rl_api.py +150 -0
hud/cli/rl/vllm.py +177 -0
hud/cli/tests/test_analyze_metadata.py +0 -1
hud/cli/utils/tasks.py +26 -0
hud/clients/base.py +21 -23
hud/clients/mcp_use.py +36 -44
hud/clients/tests/test_mcp_use_retry.py +10 -10
hud/datasets/__init__.py +4 -3
hud/datasets/{execution/parallel.py → parallel.py} +1 -1
hud/datasets/{execution/runner.py → runner.py} +1 -1
hud/datasets/utils.py +1 -1
hud/native/comparator.py +6 -6
hud/native/tests/test_comparator.py +8 -8
hud/native/tests/test_native_init.py +13 -11
hud/otel/config.py +1 -1
hud/otel/instrumentation.py +35 -0
hud/rl/README.md +30 -0
hud/rl/__init__.py +1 -0
hud/rl/actor.py +174 -0
hud/rl/buffer.py +371 -0
hud/rl/chat_template.jinja +101 -0
hud/rl/config.py +184 -0
hud/rl/distributed.py +95 -0
hud/rl/learner.py +589 -0
hud/rl/tests/__init__.py +1 -0
hud/rl/tests/test_learner.py +171 -0
hud/rl/train.py +354 -0
hud/rl/types.py +101 -0
hud/rl/utils/start_vllm_server.sh +30 -0
hud/rl/utils.py +524 -0
hud/rl/vllm_adapter.py +125 -0
hud/settings.py +6 -0
hud/telemetry/__init__.py +2 -1
hud/telemetry/job.py +46 -3
hud/telemetry/tests/test_trace.py +3 -3
hud/telemetry/trace.py +85 -13
hud/tools/tests/test_computer.py +3 -3
hud/tools/tests/test_computer_actions.py +1 -1
hud/types.py +123 -2
hud/utils/group_eval.py +223 -0
hud/utils/hud_console.py +113 -13
hud/utils/tasks.py +119 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/METADATA +20 -2
{hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/RECORD +68 -48
hud/cli/hf.py +0 -406
hud/cli/rl/README.md +0 -243
hud/cli/rl/init.py +0 -370
hud/cli/rl/pod.py +0 -501
hud/cli/rl/ssh.py +0 -322
hud/cli/rl/train.py +0 -562
hud/cli/rl/utils.py +0 -165
hud/datasets/execution/__init__.py +0 -13
hud/datasets/task.py +0 -116
{hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/WHEEL +0 -0
{hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/licenses/LICENSE +0 -0

hud/cli/rl/vllm.py ADDED Viewed

@@ -0,0 +1,177 @@
+"""vLLM server management utilities."""
+from __future__ import annotations
+import asyncio
+import logging
+import os
+import subprocess
+import time
+from pathlib import Path
+import httpx
+from rich.console import Console
+from hud.utils.hud_console import HUDConsole
+logger = logging.getLogger(__name__)
+hud_console = HUDConsole(logger)
+console = Console()
+def get_vllm_args(model_name: str, chat_template_path: Path | None = None) -> list[str]:
+    """Get common vLLM server arguments for both local and remote deployments."""
+    args = [
+        "serve",
+        model_name,
+        "--api-key",
+        "token-abc123",
+        "--host",
+        "0.0.0.0",  # noqa: S104
+        "--port",
+        "8000",
+        "--tensor-parallel-size",
+        "1",
+        "--trust-remote-code",
+        "--max-model-len",
+        "16384",
+        "--enable-lora",
+        "--max-lora-rank",
+        "64",
+        "--max-cpu-loras",
+        "4",
+        "--enable-auto-tool-choice",
+        "--tool-call-parser",
+        "hermes",
+        "--disable-log-requests",
+        "--dtype",
+        "auto",
+    ]
+    # Add chat template if provided
+    if chat_template_path and chat_template_path.exists():
+        args.extend(["--chat-template", str(chat_template_path.absolute())])
+    return args
+def check_vllm_server() -> bool:
+    """Check if vLLM server is running."""
+    try:
+        response = httpx.get("http://localhost:8000/health", timeout=2.0)
+        return response.status_code == 200
+    except Exception:
+        return False
+def kill_vllm_server() -> None:
+    """Kill any running vLLM server processes."""
+    try:
+        # Check for PID file first
+        pid_file = Path("/tmp/vllm_server.pid")  # noqa: S108
+        if pid_file.exists():
+            try:
+                pid = int(pid_file.read_text().strip())
+                subprocess.run(["kill", "-TERM", str(pid)], check=False)  # noqa: S603, S607
+                time.sleep(2)
+                # Force kill if still running
+                subprocess.run(["kill", "-9", str(pid)], check=False)  # noqa: S603, S607
+                pid_file.unlink()
+            except Exception as e:
+                hud_console.error(f"Failed to kill vLLM server: {e}")
+        # Also try to kill by process name
+        subprocess.run(["pkill", "-f", "vllm serve"], check=False)  # noqa: S607
+        subprocess.run(["pkill", "-f", "vllm.entrypoints.openai.api_server"], check=False)  # noqa: S607
+        time.sleep(2)
+        # Check for any process using port 8000
+        result = subprocess.run(["lsof", "-ti:8000"], capture_output=True, text=True, check=False)  # noqa: S607
+        if result.stdout.strip():
+            for pid in result.stdout.strip().split("\n"):
+                try:
+                    subprocess.run(["kill", "-9", pid], check=False)  # noqa: S603, S607
+                except Exception as e:
+                    hud_console.error(f"Failed to kill vLLM server: {e}")
+        console.print("[yellow]Killed existing vLLM server processes[/yellow]")
+    except Exception as e:
+        hud_console.error(f"Error killing vLLM server: {e}")
+def start_vllm_server(model_name: str, gpu_index: int = 1, restart: bool = False) -> None:
+    """Start vLLM server in the background with dynamic GPU selection."""
+    if restart:
+        kill_vllm_server()
+        time.sleep(3)
+    # Check if already running
+    if check_vllm_server():
+        console.print("[green]vLLM server is already running[/green]")
+        return
+    console.print(f"[cyan]Starting vLLM server with {model_name} on GPU {gpu_index}...[/cyan]")
+    # Set up environment variables
+    env = os.environ.copy()
+    env.update(
+        {
+            "CUDA_VISIBLE_DEVICES": str(gpu_index),
+            "VLLM_ALLOW_RUNTIME_LORA_UPDATING": "True",
+            "TOKENIZERS_PARALLELISM": "false",
+            "VLLM_LOGGING_LEVEL": "INFO",  # Changed from DEBUG to reduce noise
+            "CUDA_LAUNCH_BLOCKING": "1",  # Better error messages
+        }
+    )
+    # Get the path to chat template
+    chat_template_path = Path(__file__).parent.parent.parent / "rl" / "chat_template.jinja"
+    # Build the vLLM command
+    vllm_args = get_vllm_args(model_name, chat_template_path)
+    cmd = ["uv", "run", "vllm", *vllm_args]
+    # Start the server in the background
+    with open("/tmp/vllm_server.log", "w") as log_file:  # noqa: S108,
+        process = subprocess.Popen(  # noqa: S603
+            cmd,
+            env=env,
+            stdout=log_file,
+            stderr=subprocess.STDOUT,
+            preexec_fn=os.setpgrp,  # type: ignore
+            cwd=Path.cwd(),  # Use current working directory
+        )
+    console.print("[yellow]vLLM server starting in background...[/yellow]")
+    console.print(f"[yellow]Process ID: {process.pid}[/yellow]")
+    console.print("[yellow]Check logs at: /tmp/vllm_server.log[/yellow]")
+    # Save PID for later management
+    pid_file = Path("/tmp/vllm_server.pid")  # noqa: S108
+    pid_file.write_text(str(process.pid))
+async def wait_for_vllm_server(timeout: int = 360) -> bool:  # noqa: ASYNC109
+    """Wait for vLLM server to be ready."""
+    start_time = time.time()
+    console.print("[yellow]Waiting for vLLM server to be ready (up to 6 minutes)...[/yellow]")
+    async with httpx.AsyncClient() as client:
+        while time.time() - start_time < timeout:
+            try:
+                response = await client.get("http://localhost:8000/health", timeout=2.0)
+                if response.status_code == 200:
+                    console.print("[green]✅ vLLM server is ready![/green]")
+                    return True
+            except Exception as e:
+                hud_console.error(f"Failed to connect to vLLM server: {e}")
+            await asyncio.sleep(2)
+            elapsed = int(time.time() - start_time)
+            console.print(f"[yellow]Waiting... ({elapsed}s / {timeout}s)[/yellow]", end="\r")
+    console.print("\n[red]❌ vLLM server failed to start within timeout[/red]")
+    console.print("[yellow]Check /tmp/vllm_server.log for details[/yellow]")
+    return False

hud/cli/tests/test_analyze_metadata.py CHANGED Viewed

@@ -214,7 +214,6 @@ class TestAnalyzeFromMetadata:
     @mock.patch("hud.cli.utils.metadata.check_local_cache")
     @mock.patch("hud.cli.utils.metadata.fetch_lock_from_registry")
-    @mock.patch("hud.cli.utils.metadata.design")
     @mock.patch("hud.cli.utils.metadata.console")
     async def test_analyze_not_found(self, mock_console, mock_hud_console, mock_fetch, mock_check):
         """Test when environment not found anywhere."""

hud/cli/utils/tasks.py ADDED Viewed

@@ -0,0 +1,26 @@
+from __future__ import annotations
+from pathlib import Path
+from hud.utils.hud_console import hud_console
+def find_tasks_file(tasks_file: str | None, msg: str = "Select a tasks file") -> str:
+    """Find tasks file."""
+    if tasks_file:
+        return tasks_file
+    # Get current directory and find all .json and .jsonl files
+    current_dir = Path.cwd()
+    all_files = list(current_dir.glob("*.json")) + list(current_dir.glob("*.jsonl"))
+    all_files = [
+        str(file).replace(str(current_dir), "").lstrip("/").lstrip("\\") for file in all_files
+    ]
+    all_files = [file for file in all_files if file[0] != "."]  # Remove all config files
+    if len(all_files) == 1:
+        return str(all_files[0])
+    else:
+        # Prompt user to select a file
+        return hud_console.select(msg, choices=all_files)

hud/clients/base.py CHANGED Viewed

@@ -11,18 +11,16 @@ from mcp.types import Implementation
 from hud.shared.exceptions import HudAuthenticationError, HudException
 from hud.types import MCPToolCall, MCPToolResult
+from hud.utils.hud_console import HUDConsole
 from hud.utils.mcp import setup_hud_telemetry
 from hud.version import __version__ as hud_version
 if TYPE_CHECKING:
     import mcp.types as types
-else:
-    pass
 logger = logging.getLogger(__name__)
+hud_console = HUDConsole(logger=logger)
 @runtime_checkable
 class AgentMCPClient(Protocol):
@@ -113,7 +111,7 @@ class BaseHUDClient(AgentMCPClient):
     async def initialize(self, mcp_config: dict[str, dict[str, Any]] | None = None) -> None:
         """Initialize connection and fetch tools."""
         if self._initialized:
-            logger.warning(
+            hud_console.warning(
                 "Client already connected, if you want to reconnect or change the configuration, "
                 "call shutdown() first. This is especially important if you are using an agent."
             )
@@ -130,7 +128,7 @@ class BaseHUDClient(AgentMCPClient):
         self._auto_trace_cm = setup_hud_telemetry(self._mcp_config, auto_trace=self._auto_trace)
-        logger.debug("Initializing MCP client...")
+        hud_console.debug("Initializing MCP client...")
         try:
             # Check if API key is set for HUD API
@@ -155,7 +153,6 @@ class BaseHUDClient(AgentMCPClient):
         await self._fetch_telemetry()
         self._initialized = True
-        logger.info("Client initialized")
     async def shutdown(self) -> None:
         """Disconnect from the MCP server."""
@@ -163,9 +160,9 @@ class BaseHUDClient(AgentMCPClient):
         if self._auto_trace_cm:
             try:
                 self._auto_trace_cm.__exit__(None, None, None)
-                logger.info("Closed auto-created trace")
+                hud_console.info("Closed auto-created trace")
             except Exception as e:
-                logger.warning("Failed to close auto-created trace: %s", e)
+                hud_console.warning(f"Failed to close auto-created trace: {e}")
             finally:
                 self._auto_trace_cm = None
@@ -173,9 +170,9 @@ class BaseHUDClient(AgentMCPClient):
         if self._initialized:
             await self._disconnect()
             self._initialized = False
-            logger.info("Client disconnected")
+            hud_console.info("Shutdown completed")
         else:
-            logger.debug("Client was not initialized, skipping disconnect")
+            hud_console.debug("Client was not initialized, skipping disconnect")
     @overload
     async def call_tool(self, tool_call: MCPToolCall, /) -> MCPToolResult: ...
@@ -280,27 +277,28 @@ class BaseHUDClient(AgentMCPClient):
                 telemetry_data = json.loads(result.contents[0].text)  # type: ignore
                 self._telemetry_data = telemetry_data
-                logger.info("📡 Telemetry data fetched:")
                 if "live_url" in telemetry_data:
-                    logger.info("   🖥️  Live URL: %s", telemetry_data["live_url"])
+                    hud_console.info(f"   🖥️  Live URL: {telemetry_data['live_url']}")
                 if "vnc_url" in telemetry_data:
-                    logger.info("   🖥️  VNC URL: %s", telemetry_data["vnc_url"])
+                    hud_console.info(f"   🖥️  VNC URL: {telemetry_data['vnc_url']}")
                 if "cdp_url" in telemetry_data:
-                    logger.info("   🦾  CDP URL: %s", telemetry_data["cdp_url"])
+                    hud_console.info(f"   🦾  CDP URL: {telemetry_data['cdp_url']}")
                 if "status" in telemetry_data:
-                    logger.info("   📊 Status: %s", telemetry_data["status"])
+                    hud_console.debug(f"   📊 Status: {telemetry_data['status']}")
                 if "services" in telemetry_data:
-                    logger.debug("   📋 Services:")
+                    hud_console.debug("   📋 Services:")
                     for service, status in telemetry_data["services"].items():
                         status_icon = "✅" if status == "running" else "❌"
-                        logger.debug("      %s %s: %s", status_icon, service, status)
+                        hud_console.debug(f"      {status_icon} {service}: {status}")
                 if self.verbose:
-                    logger.debug("Full telemetry data:\n%s", json.dumps(telemetry_data, indent=2))
+                    hud_console.debug(
+                        f"Full telemetry data:\n{json.dumps(telemetry_data, indent=2)}"
+                    )
         except Exception as e:
             # Telemetry is optional
             if self.verbose:
-                logger.debug("No telemetry available: %s", e)
+                hud_console.debug(f"No telemetry available: {e}")
     async def analyze_environment(self) -> dict[str, Any]:
         """Complete analysis of the MCP environment.
@@ -363,7 +361,7 @@ class BaseHUDClient(AgentMCPClient):
                 analysis["resources"].append(resource_info)
         except Exception as e:
             if self.verbose:
-                logger.debug("Could not list resources: %s", e)
+                hud_console.debug(f"Could not list resources: {e}")
         return analysis
@@ -387,5 +385,5 @@ class BaseHUDClient(AgentMCPClient):
                 return functions
         except Exception as e:
             if self.verbose:
-                logger.debug("Could not read hub functions for '%s': %s", hub_name, e)
+                hud_console.debug(f"Could not read hub functions for '{hub_name}': {e}")
         return []

hud/clients/mcp_use.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import logging
+import traceback
 from typing import Any
 from mcp import Implementation, types
@@ -12,12 +13,14 @@ from mcp_use.session import MCPSession as MCPUseSession
 from pydantic import AnyUrl
 from hud.types import MCPToolCall, MCPToolResult
+from hud.utils.hud_console import HUDConsole
 from hud.version import __version__ as hud_version
 from .base import BaseHUDClient
 from .utils.mcp_use_retry import patch_all_sessions
 logger = logging.getLogger(__name__)
+hud_console = HUDConsole(logger=logger)
 class MCPUseHUDClient(BaseHUDClient):
@@ -62,11 +65,11 @@ class MCPUseHUDClient(BaseHUDClient):
         try:
             assert self._client is not None  # noqa: S101
             self._sessions = await self._client.create_all_sessions()
-            logger.info("Created %d MCP sessions", len(self._sessions))
+            hud_console.info(f"Created {len(self._sessions)} MCP sessions")
             # Patch all sessions with retry logic
             patch_all_sessions(self._sessions)
-            logger.debug("Applied retry logic to all MCP sessions")
+            hud_console.debug("Applied retry logic to all MCP sessions")
             # Configure validation for all sessions based on client setting
             try:
@@ -86,21 +89,21 @@ class MCPUseHUDClient(BaseHUDClient):
             # Log session details in verbose mode
             if self.verbose and self._sessions:
                 for name, session in self._sessions.items():
-                    logger.debug("  - %s: %s", name, type(session).__name__)
+                    hud_console.debug(f"  - {name}: {type(session).__name__}")
         except McpError as e:
             # Protocol error - the server is reachable but rejecting our request
-            logger.error("MCP protocol error: %s", e)
-            logger.error("This typically means:")
-            logger.error("- Invalid or missing initialization parameters")
-            logger.error("- Incompatible protocol version")
-            logger.error("- Server-side configuration issues")
+            hud_console.warning(f"MCP protocol error: {e}")
+            hud_console.warning("This typically means:")
+            hud_console.warning("- Invalid or missing initialization parameters")
+            hud_console.warning("- Incompatible protocol version")
+            hud_console.warning("- Server-side configuration issues")
             raise
         except Exception as e:
             # Transport or other errors
-            logger.error("Failed to create sessions: %s", e)
+            hud_console.error(f"Failed to create sessions: {e}")
             if self.verbose:
-                logger.info("Check that the MCP server is running and accessible")
+                hud_console.info("Check that the MCP server is running and accessible")
             raise
         # Populate tool map during initialization
@@ -129,17 +132,14 @@ class MCPUseHUDClient(BaseHUDClient):
                     await session.initialize()
                 if session.connector.client_session is None:
-                    logger.warning("Client session not initialized for %s", server_name)
+                    hud_console.warning(f"Client session not initialized for {server_name}")
                     continue
                 # List tools (retry logic is handled at transport level)
                 tools_result = await session.connector.client_session.list_tools()
-                logger.info(
-                    "Discovered %d tools from '%s': %s",
-                    len(tools_result.tools),
-                    server_name,
-                    [tool.name for tool in tools_result.tools],
+                hud_console.info(
+                    f"Discovered {len(tools_result.tools)} tools from '{server_name}': {', '.join([tool.name for tool in tools_result.tools])}",  # noqa: E501
                 )
                 # Add to collections with optional prefix
@@ -167,16 +167,15 @@ class MCPUseHUDClient(BaseHUDClient):
                 if self.verbose:
                     for tool in tools_result.tools:
                         description = tool.description or ""
-                        logger.debug(
-                            "  Tool '%s': %s",
-                            tool.name,
-                            description[:100] + "..." if len(description) > 100 else description,
+                        hud_console.debug(
+                            f"  Tool '{tool.name}': {description[:100] + '...' if len(description) > 100 else description}",  # noqa: E501
                         )
             except Exception as e:
-                logger.error("Error discovering tools from '%s': %s", server_name, e)
+                hud_console.error(f"Error discovering tools from '{server_name}': {e}")
                 if self.verbose:
-                    logger.exception("Full error details:")
+                    hud_console.error("Full error details:")
+                    traceback.print_exc()
         return all_tools
@@ -196,12 +195,8 @@ class MCPUseHUDClient(BaseHUDClient):
         session = self._sessions[server_name]
         if self.verbose:
-            logger.debug(
-                "Calling tool '%s' (original: '%s') on server '%s' with arguments: %s",
-                tool_call.name,
-                original_tool.name,
-                server_name,
-                tool_call.arguments,
+            hud_console.debug(
+                f"Calling tool '{tool_call.name}' (original: '{original_tool.name}') on server '{server_name}' with arguments: {tool_call.arguments}"  # noqa: E501
             )
         if session.connector.client_session is None:
@@ -214,7 +209,7 @@ class MCPUseHUDClient(BaseHUDClient):
         )
         if self.verbose:
-            logger.debug("Tool '%s' result: %s", tool_call.name, result)
+            hud_console.debug(f"Tool '{tool_call.name}' result: {result}")
         # MCP-use already returns the correct type, but we need to ensure it's MCPToolResult
         return MCPToolResult(
@@ -246,7 +241,7 @@ class MCPUseHUDClient(BaseHUDClient):
                 return resources.resources
             except Exception as e:
                 if self.verbose:
-                    logger.debug("Could not list resources from server '%s': %s", server_name, e)
+                    hud_console.debug(f"Could not list resources from server '{server_name}': {e}")
                 continue
         return []
@@ -276,8 +271,8 @@ class MCPUseHUDClient(BaseHUDClient):
                     raise AttributeError("read_resource not available")
                 if self.verbose:
-                    logger.debug(
-                        "Successfully read resource '%s' from server '%s'", uri, server_name
+                    hud_console.debug(
+                        f"Successfully read resource '{uri}' from server '{server_name}'"
                     )
                 return result
@@ -285,24 +280,21 @@ class MCPUseHUDClient(BaseHUDClient):
             except McpError as e:
                 # McpError is expected for unsupported resources
                 if "telemetry://" in str(uri):
-                    logger.debug(
-                        "Telemetry resource not supported by server '%s': %s", server_name, e
+                    hud_console.debug(
+                        f"Telemetry resource not supported by server '{server_name}': {e}"
                     )
                 elif self.verbose:
-                    logger.debug(
-                        "MCP resource error for '%s' from server '%s': %s", uri, server_name, e
+                    hud_console.debug(
+                        f"MCP resource error for '{uri}' from server '{server_name}': {e}"
                     )
                 continue
             except Exception as e:
                 # Other errors might be more serious
                 if "telemetry://" in str(uri):
-                    logger.debug("Failed to fetch telemetry from server '%s': %s", server_name, e)
+                    hud_console.debug(f"Failed to fetch telemetry from server '{server_name}': {e}")
                 else:
-                    logger.warning(
-                        "Unexpected error reading resource '%s' from server '%s': %s",
-                        uri,
-                        server_name,
-                        e,
+                    hud_console.warning(
+                        f"Unexpected error reading resource '{uri}' from server '{server_name}': {e}"  # noqa: E501
                     )
                 continue
@@ -311,14 +303,14 @@ class MCPUseHUDClient(BaseHUDClient):
     async def _disconnect(self) -> None:
         """Close all active sessions."""
         if self._client is None:
-            logger.warning("Client is not connected, cannot close")
+            hud_console.warning("Client is not connected, cannot close")
             return
         await self._client.close_all_sessions()
         self._sessions = {}
         self._tool_map = {}
         self._initialized = False
-        logger.debug("MCP-use client disconnected")
+        hud_console.debug("MCP-use client disconnected")
     # Legacy compatibility methods (limited; tests should not rely on these)
     def get_sessions(self) -> dict[str, Any]:

hud/clients/tests/test_mcp_use_retry.py CHANGED Viewed

@@ -36,20 +36,20 @@ class TestRetrySession:
         # Check adapter configuration
         adapter = session.adapters["http://"]
-        assert adapter.max_retries.total == 5
-        assert 500 in adapter.max_retries.status_forcelist
-        assert 502 in adapter.max_retries.status_forcelist
-        assert adapter.max_retries.backoff_factor == 2.0
+        assert hasattr(adapter, "max_retries") and adapter.max_retries.total == 5  # type: ignore
+        assert 500 in adapter.max_retries.status_forcelist  # type: ignore
+        assert 502 in adapter.max_retries.status_forcelist  # type: ignore
+        assert adapter.max_retries.backoff_factor == 2.0  # type: ignore
     def test_retry_session_default_values(self):
         """Test retry session with default values."""
         session = create_retry_session()
         adapter = session.adapters["https://"]
-        assert adapter.max_retries.total == 3
-        assert 502 in adapter.max_retries.status_forcelist
-        assert 503 in adapter.max_retries.status_forcelist
-        assert 504 in adapter.max_retries.status_forcelist
+        assert adapter.max_retries.total == 3  # type: ignore
+        assert 502 in adapter.max_retries.status_forcelist  # type: ignore
+        assert 503 in adapter.max_retries.status_forcelist  # type: ignore
+        assert 504 in adapter.max_retries.status_forcelist  # type: ignore
 class TestAsyncRetryWrapper:
@@ -316,7 +316,7 @@ class TestMCPUseClientRetry:
             # Verify retry worked
             assert call_count == 2  # Failed once, then succeeded
             assert not result.isError
-            assert result.content[0].text == "Success"
+            assert result.content[0].text == "Success"  # type: ignore
     @pytest.mark.asyncio
     async def test_resource_read_with_retry(self):
@@ -371,7 +371,7 @@ class TestMCPUseClientRetry:
             # Verify retry worked
             assert call_count == 2  # Failed once, then succeeded
             assert result is not None
-            assert result.contents[0].text == '{"status": "ok"}'
+            assert result.contents[0].text == '{"status": "ok"}'  # type: ignore
 if __name__ == "__main__":

hud/datasets/__init__.py CHANGED Viewed

@@ -7,13 +7,14 @@ Provides data models, utilities, and execution functions for working with HUD da
 # Execution functions
 from __future__ import annotations
-from .execution import (
+from hud.types import Task
+from .parallel import (
     calculate_optimal_workers,
-    run_dataset,
     run_dataset_parallel,
     run_dataset_parallel_manual,
 )
-from .task import Task
+from .runner import run_dataset
 # Utilities
 from .utils import fetch_system_prompt_from_dataset, save_tasks

hud/datasets/{execution/parallel.py → parallel.py} RENAMED Viewed

@@ -65,8 +65,8 @@ def _process_worker(
     import hud
     from hud.agents.misc.response_agent import ResponseAgent
-    from hud.datasets.task import Task
     from hud.otel import configure_telemetry
+    from hud.types import Task
     # Ensure stdout is not buffered for immediate output
     try:

hud/datasets/{execution/runner.py → runner.py} RENAMED Viewed

@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any, cast
 from datasets import Dataset, load_dataset
 from hud.agents.misc import ResponseAgent
-from hud.datasets.task import Task
+from hud.types import Task
 if TYPE_CHECKING:
     from hud.agents import MCPAgent

hud/datasets/utils.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any
 from datasets import Dataset
-from .task import Task
+from hud.types import Task
 logger = logging.getLogger("hud.datasets")

hud/native/comparator.py CHANGED Viewed

@@ -513,11 +513,11 @@ def make_alias_tool(name: str, preset_mode: ComparisonMode, description: str) ->
 # Create MCP server
-comparator_server = MCPServer(name="comparator")
+comparator = MCPServer(name="comparator")
 # Register main tool
-comparator_server.add_tool(SubmitTool())
-comparator_server.add_tool(CompareTool())
+comparator.add_tool(SubmitTool())
+comparator.add_tool(CompareTool())
 # Register aliases - these are just thin wrappers
 ALIASES = [
@@ -534,13 +534,13 @@ ALIASES = [
 for name, mode, desc in ALIASES:
     AliasTool = make_alias_tool(name, mode, desc)
-    comparator_server.add_tool(AliasTool())
+    comparator.add_tool(AliasTool())
 # Export for mounting
-__all__ = ["comparator_server"]
+__all__ = ["comparator"]
 if __name__ == "__main__":
     # Run as standalone server
     logger.info("Starting Comparator MCP Server...")
-    comparator_server.run()
+    comparator.run()

hud-python 0.4.28__py3-none-any.whl → 0.4.30__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.28py3-none-any.whl → 0.4.30py3-none-any.whl