PyPI - hud-python - Versions diffs - 0.4.53__py3-none-any.whl → 0.4.55__py3-none-any.whl - Mend

hud-python 0.4.53py3-none-any.whl → 0.4.55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (31) hide show

hud/agents/base.py +8 -0
hud/agents/claude.py +4 -3
hud/agents/openai.py +2 -1
hud/agents/openai_chat_generic.py +3 -2
hud/agents/tests/test_claude.py +2 -2
hud/agents/tests/test_openai.py +1 -1
hud/agents/utils.py +50 -0
hud/cli/__init__.py +65 -9
hud/cli/build.py +185 -25
hud/cli/dev.py +130 -40
hud/cli/eval.py +123 -24
hud/cli/flows/dev.py +155 -0
hud/cli/flows/tasks.py +29 -9
hud/cli/tests/test_eval.py +6 -6
hud/cli/utils/docker.py +6 -3
hud/clients/base.py +2 -2
hud/otel/context.py +42 -1
hud/server/server.py +29 -3
hud/settings.py +6 -0
hud/telemetry/async_context.py +16 -2
hud/telemetry/trace.py +6 -1
hud/types.py +10 -0
hud/utils/group_eval.py +14 -2
hud/utils/tests/test_agent_factories.py +2 -1
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/METADATA +8 -7
{hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/RECORD +31 -29
{hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/WHEEL +0 -0
{hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/licenses/LICENSE +0 -0

hud/cli/dev.py CHANGED Viewed

@@ -25,6 +25,7 @@ def show_dev_server_info(
     inspector: bool,
     interactive: bool,
     env_dir: Path | None = None,
+    new: bool = False,
 ) -> str:
     """Show consistent server info for both Python and Docker modes.
@@ -125,6 +126,7 @@ async def run_mcp_module(
     verbose: bool,
     inspector: bool,
     interactive: bool,
+    new: bool = False,
 ) -> None:
     """Run an MCP module directly."""
     # Check if this is a reload (not first run)
@@ -222,14 +224,53 @@ async def run_mcp_module(
     # Show server info only on first run
     if not is_reload:
-        show_dev_server_info(
-            server_name=mcp_server.name or "mcp-server",
-            port=port,
-            transport=transport,
-            inspector=inspector,
-            interactive=interactive,
-            env_dir=Path.cwd().parent if (Path.cwd().parent / "environment").exists() else None,
-        )
+        # Try dynamic trace first for HTTP mode (only if --new)
+        live_trace_url: str | None = None
+        if transport == "http" and new:
+            try:
+                local_mcp_config: dict[str, dict[str, Any]] = {
+                    "hud": {
+                        "url": f"http://localhost:{port}/mcp",
+                        "headers": {},
+                    }
+                }
+                from hud.cli.flows.dev import create_dynamic_trace
+                live_trace_url = await create_dynamic_trace(
+                    mcp_config=local_mcp_config,
+                    build_status=False,
+                    environment_name=mcp_server.name or "mcp-server",
+                )
+            except Exception:  # noqa: S110
+                pass
+        # Show UI using shared flow logic
+        if transport == "http" and live_trace_url and new:
+            # Minimal UI with live trace
+            from hud.cli.flows.dev import generate_cursor_deeplink, show_dev_ui
+            server_name = mcp_server.name or "mcp-server"
+            cursor_deeplink = generate_cursor_deeplink(server_name, port)
+            show_dev_ui(
+                live_trace_url=live_trace_url,
+                server_name=server_name,
+                port=port,
+                cursor_deeplink=cursor_deeplink,
+                is_docker=False,
+            )
+        else:
+            # Full UI for HTTP without trace, or stdio mode
+            show_dev_server_info(
+                server_name=mcp_server.name or "mcp-server",
+                port=port,
+                transport=transport,
+                inspector=inspector,
+                interactive=interactive,
+                env_dir=Path.cwd().parent if (Path.cwd().parent / "environment").exists() else None,
+                new=new,
+            )
     # Check if there's an environment backend and remind user to start it (first run only)
     if not is_reload:
@@ -238,9 +279,10 @@ async def run_mcp_module(
         if env_dir.exists() and (env_dir / "server.py").exists():
             hud_console.info("")
             hud_console.info(
-                f"{hud_console.sym.FLOW} Don't forget to start the environment backend:"
+                f"{hud_console.sym.FLOW} Don't forget to start the environment backend in another "
+                "terminal:"
             )
-            hud_console.info("   cd ../environment && uvicorn server:app --reload")
+            hud_console.info("   cd environment && uv run python uvicorn server:app --reload")
         # Launch inspector if requested (first run only)
         if inspector and transport == "http":
@@ -347,6 +389,7 @@ def run_with_reload(
     verbose: bool,
     inspector: bool,
     interactive: bool,
+    new: bool = False,
 ) -> None:
     """Run module with file watching and auto-reload."""
     try:
@@ -389,6 +432,11 @@ def run_with_reload(
         if verbose:
             cmd.append("--verbose")
+        if new:
+            cmd.append("--new")
+        if verbose:
             hud_console.info(f"Starting: {' '.join(cmd)}")
         # Mark as reload after first run to suppress logs
@@ -454,7 +502,12 @@ def run_with_reload(
 def run_docker_dev_server(
-    port: int, verbose: bool, inspector: bool, interactive: bool, docker_args: list[str]
+    port: int,
+    verbose: bool,
+    inspector: bool,
+    interactive: bool,
+    docker_args: list[str],
+    new: bool = False,
 ) -> None:
     """Run MCP server in Docker with volume mounts, expose via local HTTP proxy."""
     import typer
@@ -462,6 +515,11 @@ def run_docker_dev_server(
     from hud.server import MCPServer
+    # Ensure Docker CLI and daemon are available before proceeding
+    from .utils.docker import require_docker_running
+    require_docker_running()
     cwd = Path.cwd()
     # Find environment directory (current or parent with hud.lock.yaml)
@@ -528,15 +586,6 @@ def run_docker_dev_server(
         env_dir=env_dir,
     )
-    # Env flags already injected by create_docker_run_command
-    # Print startup info
-    hud_console.header("HUD Development Mode (Docker)")
-    if verbose:
-        hud_console.section_title("Docker Command")
-        hud_console.info(" ".join(docker_cmd))
     # Create MCP config pointing to the Docker container's stdio
     mcp_config = {
         "docker": {
@@ -545,15 +594,62 @@ def run_docker_dev_server(
         }
     }
-    # Show consistent server info
-    show_dev_server_info(
-        server_name=image_name,
-        port=port,
-        transport="http",  # Docker mode always uses HTTP proxy
-        inspector=inspector,
-        interactive=interactive,
-        env_dir=env_dir,
-    )
+    # Attempt to create dynamic trace early (before any UI)
+    import asyncio as _asy
+    from hud.cli.flows.dev import create_dynamic_trace, generate_cursor_deeplink, show_dev_ui
+    live_trace_url: str | None = None
+    if new:
+        try:
+            local_mcp_config: dict[str, dict[str, Any]] = {
+                "hud": {
+                    "url": f"http://localhost:{port}/mcp",
+                    "headers": {},
+                }
+            }
+            live_trace_url = _asy.run(
+                create_dynamic_trace(
+                    mcp_config=local_mcp_config,
+                    build_status=True,
+                    environment_name=image_name,
+                )
+            )
+        except Exception:  # noqa: S110
+            pass
+    # Show appropriate UI
+    if live_trace_url and new:
+        # Minimal UI with live trace
+        cursor_deeplink = generate_cursor_deeplink(image_name, port)
+        show_dev_ui(
+            live_trace_url=live_trace_url,
+            server_name=image_name,
+            port=port,
+            cursor_deeplink=cursor_deeplink,
+            is_docker=True,
+        )
+    else:
+        # Full UI
+        hud_console.header("HUD Development Mode (Docker)")
+        if verbose:
+            hud_console.section_title("Docker Command")
+            hud_console.info(" ".join(docker_cmd))
+        show_dev_server_info(
+            server_name=image_name,
+            port=port,
+            transport="http",
+            inspector=inspector,
+            interactive=interactive,
+            env_dir=env_dir,
+            new=new,
+        )
+        hud_console.dim_info(
+            "",
+            "Container restarts on file changes (mounted volumes), "
+            "if changing tools run hud dev again",
+        )
+        hud_console.info("")
     # Suppress logs unless verbose
     if not verbose:
@@ -562,13 +658,6 @@ def run_docker_dev_server(
         logging.getLogger("uvicorn").setLevel(logging.ERROR)
         os.environ["FASTMCP_DISABLE_BANNER"] = "1"
-    # Note about hot-reload behavior
-    hud_console.dim_info(
-        "",
-        "Container restarts on file changes (mounted volumes), if changing tools run hud dev again",
-    )
-    hud_console.info("")
     # Create and run proxy with HUD helpers
     async def run_proxy() -> None:
         from fastmcp import FastMCP
@@ -617,6 +706,7 @@ def run_mcp_dev_server(
     watch: list[str] | None,
     docker: bool = False,
     docker_args: list[str] | None = None,
+    new: bool = False,
 ) -> None:
     """Run MCP development server with hot-reload."""
     docker_args = docker_args or []
@@ -627,12 +717,12 @@ def run_mcp_dev_server(
         hud_console.note("Detected Dockerfile - using Docker mode with volume mounts")
         hud_console.dim_info("Tip", "Use 'hud dev --help' to see all options")
         hud_console.info("")
-        run_docker_dev_server(port, verbose, inspector, interactive, docker_args)
+        run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new)
         return
     # Route to Docker mode if explicitly requested
     if docker:
-        run_docker_dev_server(port, verbose, inspector, interactive, docker_args)
+        run_docker_dev_server(port, verbose, inspector, interactive, docker_args, new)
         return
     transport = "stdio" if stdio else "http"
@@ -676,6 +766,6 @@ def run_mcp_dev_server(
     is_child = os.environ.get("_HUD_DEV_CHILD") == "1"
     if is_child:
-        asyncio.run(run_mcp_module(module, transport, port, verbose, False, False))
+        asyncio.run(run_mcp_module(module, transport, port, verbose, False, False, new))
     else:
-        run_with_reload(module, watch_paths, transport, port, verbose, inspector, interactive)
+        run_with_reload(module, watch_paths, transport, port, verbose, inspector, interactive, new)

hud/cli/eval.py CHANGED Viewed

@@ -5,13 +5,14 @@ from __future__ import annotations
 import asyncio
 import logging
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Any
 import typer
 import hud
 from hud.cli.utils.env_check import ensure_built, find_environment_dir
 from hud.settings import settings
+from hud.types import AgentType
 from hud.utils.group_eval import display_group_statistics, run_tasks_grouped
 from hud.utils.hud_console import HUDConsole
@@ -21,6 +22,28 @@ logger = logging.getLogger(__name__)
 hud_console = HUDConsole()
+def _tasks_use_local_mcp(tasks: list[Task]) -> bool:
+    """Return True if any task's MCP config uses a local command instead of a URL.
+    A config is considered local when a server entry contains a 'command' key and
+    does not provide a 'url'.
+    """
+    try:
+        for t in tasks:
+            cfg = getattr(t, "mcp_config", {}) or {}
+            if not isinstance(cfg, dict):
+                continue
+            for server_cfg in cfg.values():
+                if isinstance(server_cfg, dict) and (
+                    "command" in server_cfg and not server_cfg.get("url")
+                ):
+                    return True
+        return False
+    except Exception:
+        # Be conservative: if detection fails, do not block
+        return False
 def get_available_models() -> list[dict[str, str | None]]:
     """Fetch available models from the HUD API (only ready models).
@@ -113,7 +136,7 @@ def _build_vllm_config(
 def build_agent(
-    agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"],
+    agent_type: AgentType,
     *,
     model: str | None = None,
     allowed_tools: list[str] | None = None,
@@ -123,11 +146,11 @@ def build_agent(
     """Create and return the requested agent type."""
     # Import agents lazily to avoid dependency issues
-    if agent_type == "integration_test":
+    if agent_type == AgentType.INTEGRATION_TEST:
         from hud.agents.misc.integration_test_agent import IntegrationTestRunner
         return IntegrationTestRunner(verbose=verbose)
-    elif agent_type == "vllm":
+    elif agent_type == AgentType.VLLM:
         # Create a generic OpenAI agent for vLLM server
         try:
             from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
@@ -147,7 +170,7 @@ def build_agent(
         )
         return GenericOpenAIChatAgent(**config)
-    elif agent_type == "openai":
+    elif agent_type == AgentType.OPENAI:
         try:
             from hud.agents import OperatorAgent
         except ImportError as e:
@@ -165,7 +188,7 @@ def build_agent(
         else:
             return OperatorAgent(verbose=verbose)
-    elif agent_type == "litellm":
+    elif agent_type == AgentType.LITELLM:
         try:
             from hud.agents.lite_llm import LiteAgent
         except ImportError as e:
@@ -209,7 +232,7 @@ def build_agent(
 async def run_single_task(
     source: str,
     *,
-    agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = "claude",
+    agent_type: AgentType = AgentType.CLAUDE,
     model: str | None = None,
     allowed_tools: list[str] | None = None,
     max_steps: int = 10,
@@ -264,18 +287,44 @@ async def run_single_task(
             "Using first task from dataset (run with --full to run the entire dataset)..."
         )
-    task_prompt = task.prompt[:50] + "..." if len(task.prompt) > 50 else task.prompt
+    # Warn/confirm if the task uses local MCP config
+    try:
+        if group_size > 1 and _tasks_use_local_mcp([task]):
+            hud_console.warning(
+                "Detected a local MCP configuration (uses 'command' instead of a 'url')."
+            )
+            hud_console.info(
+                "Ensure there are no exposed port conflicts during Docker runs/builds in eval."
+            )
+            proceed = hud_console.confirm(
+                "Proceed with running local MCP servers for this evaluation?",
+                default=True,
+            )
+            if not proceed:
+                # Provide a helpful next step
+                hud_console.hint("You can convert tasks to remote with: hud convert <tasks_file>")
+                raise typer.Exit(1)
+            # Always show the convert hint for awareness
+            hud_console.hint(
+                "Avoid local port conflicts by converting to remote: hud convert <tasks_file>"
+            )
+    except typer.Exit:
+        raise
+    except Exception as e:
+        hud_console.debug(f"Local MCP confirmation skipped due to error: {e}")
+    task_prompt = task.prompt
     # Use grouped evaluation if group_size > 1
     agent_config: dict[str, Any] = {}
-    if agent_type == "integration_test":
+    if agent_type == AgentType.INTEGRATION_TEST:
         from hud.agents.misc.integration_test_agent import IntegrationTestRunner
         agent_class = IntegrationTestRunner
         agent_config = {"verbose": verbose}
         if allowed_tools:
             agent_config["allowed_tools"] = allowed_tools
-    elif agent_type == "vllm":
+    elif agent_type == AgentType.VLLM:
         # Special handling for vLLM
         from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
@@ -288,14 +337,14 @@ async def run_single_task(
             allowed_tools=allowed_tools,
             verbose=verbose,
         )
-    elif agent_type == "openai":
+    elif agent_type == AgentType.OPENAI:
         from hud.agents import OperatorAgent
         agent_class = OperatorAgent
         agent_config = {"verbose": verbose}
         if allowed_tools:
             agent_config["allowed_tools"] = allowed_tools
-    elif agent_type == "litellm":
+    elif agent_type == AgentType.LITELLM:
         from hud.agents.lite_llm import LiteAgent
         agent_class = LiteAgent
@@ -305,7 +354,7 @@ async def run_single_task(
         }
         if allowed_tools:
             agent_config["allowed_tools"] = allowed_tools
-    elif agent_type == "claude":
+    elif agent_type == AgentType.CLAUDE:
         from hud.agents import ClaudeAgent
         agent_class = ClaudeAgent
@@ -353,7 +402,7 @@ async def run_single_task(
 async def run_full_dataset(
     source: str,
     *,
-    agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = "claude",
+    agent_type: AgentType = AgentType.CLAUDE,
     model: str | None = None,
     allowed_tools: list[str] | None = None,
     max_concurrent: int = 30,
@@ -386,6 +435,56 @@ async def run_full_dataset(
         hud_console.error(f"No tasks found in: {source}")
         raise typer.Exit(1)
+    # Warn/confirm once if any task uses local MCP config
+    try:
+        if _tasks_use_local_mcp(tasks):
+            hud_console.warning(
+                "Detected local MCP configurations (use 'command' instead of a 'url')."
+            )
+            hud_console.info(
+                "When running many tasks concurrently, exposed host ports from Docker may conflict."
+            )
+            proceed = hud_console.confirm(
+                "Proceed with running local MCP servers for this evaluation?",
+                default=True,
+            )
+            if not proceed:
+                # Helpful hint when source is a file path
+                try:
+                    path = Path(source)
+                    if path.exists():
+                        hud_console.hint(
+                            f"You can convert tasks to remote with: hud convert {path.name}"
+                        )
+                    else:
+                        hud_console.hint(
+                            "You can convert tasks to remote with: hud convert <tasks_file>"
+                        )
+                except Exception:
+                    hud_console.hint(
+                        "You can convert tasks to remote with: hud convert <tasks_file>"
+                    )
+                raise typer.Exit(1)
+            # Always show the convert hint for awareness
+            try:
+                path = Path(source)
+                if path.exists():
+                    hud_console.hint(
+                        f"Convert to remote to avoid port conflicts: hud convert {path.name}"
+                    )
+                else:
+                    hud_console.hint(
+                        "Convert to remote to avoid port conflicts: hud convert <tasks_file>"
+                    )
+            except Exception:
+                hud_console.hint(
+                    "Convert to remote to avoid port conflicts: hud convert <tasks_file>"
+                )
+    except typer.Exit:
+        raise
+    except Exception as e:
+        hud_console.debug(f"Local MCP confirmation skipped due to error: {e}")
     # Convert Task objects to dicts for dataset runners
     dataset_or_tasks = [task.model_dump() for task in tasks]
@@ -395,12 +494,12 @@ async def run_full_dataset(
     # Build agent class + config for run_dataset
     agent_config: dict[str, Any]
-    if agent_type == "integration_test":  # --integration-test mode
+    if agent_type == AgentType.INTEGRATION_TEST:  # --integration-test mode
         from hud.agents.misc.integration_test_agent import IntegrationTestRunner
         agent_class = IntegrationTestRunner
         agent_config = {"verbose": verbose}
-    elif agent_type == "vllm":
+    elif agent_type == AgentType.VLLM:
         try:
             from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
@@ -419,7 +518,7 @@ async def run_full_dataset(
             allowed_tools=allowed_tools,
             verbose=verbose,
         )
-    elif agent_type == "openai":
+    elif agent_type == AgentType.OPENAI:
         try:
             from hud.agents import OperatorAgent
@@ -435,7 +534,7 @@ async def run_full_dataset(
         if allowed_tools:
             agent_config["allowed_tools"] = allowed_tools
-    elif agent_type == "litellm":
+    elif agent_type == AgentType.LITELLM:
         try:
             from hud.agents.lite_llm import LiteAgent
@@ -539,8 +638,8 @@ def eval_command(
         "--full",
         help="Run the entire dataset (omit for single-task debug mode)",
     ),
-    agent: Literal["claude", "openai", "vllm", "litellm", "integration_test"] = typer.Option(
-        "claude",
+    agent: AgentType = typer.Option(  # noqa: B008
+        AgentType.CLAUDE,
         "--agent",
         help="Agent backend to use (claude, openai, vllm for local server, or litellm)",
     ),
@@ -648,21 +747,21 @@ def eval_command(
     # We pass integration_test as the agent_type
     if integration_test:
-        agent = "integration_test"
+        agent = AgentType.INTEGRATION_TEST
     # Check for required API keys
-    if agent == "claude":
+    if agent == AgentType.CLAUDE:
         if not settings.anthropic_api_key:
             hud_console.error("ANTHROPIC_API_KEY is required for Claude agent")
             hud_console.info(
                 "Set it in your environment or run: hud set ANTHROPIC_API_KEY=your-key-here"
             )
             raise typer.Exit(1)
-    elif agent == "openai" and not settings.openai_api_key:
+    elif agent == AgentType.OPENAI and not settings.openai_api_key:
         hud_console.error("OPENAI_API_KEY is required for OpenAI agent")
         hud_console.info("Set it in your environment or run: hud set OPENAI_API_KEY=your-key-here")
         raise typer.Exit(1)
-    elif agent == "vllm":
+    elif agent == AgentType.VLLM:
         if model:
             hud_console.info(f"Using vLLM with model: {model}")
         else:

hud-python 0.4.53__py3-none-any.whl → 0.4.55__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.53py3-none-any.whl → 0.4.55py3-none-any.whl