PyPI - hud-python - Versions diffs - 0.4.36__py3-none-any.whl → 0.4.37__py3-none-any.whl - Mend

hud-python 0.4.36py3-none-any.whl → 0.4.37py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (43) hide show

hud/agents/__init__.py +2 -0
hud/agents/lite_llm.py +72 -0
hud/agents/openai_chat_generic.py +21 -7
hud/cli/__init__.py +19 -4
hud/cli/build.py +17 -2
hud/cli/dev.py +1 -1
hud/cli/eval.py +93 -13
hud/cli/flows/tasks.py +197 -65
hud/cli/push.py +9 -0
hud/cli/rl/__init__.py +14 -4
hud/cli/rl/celebrate.py +187 -0
hud/cli/rl/config.py +15 -8
hud/cli/rl/local_runner.py +44 -20
hud/cli/rl/remote_runner.py +163 -86
hud/cli/rl/viewer.py +141 -0
hud/cli/rl/wait_utils.py +89 -0
hud/cli/utils/env_check.py +196 -0
hud/cli/utils/source_hash.py +108 -0
hud/clients/base.py +1 -1
hud/clients/fastmcp.py +1 -1
hud/otel/config.py +1 -1
hud/otel/context.py +2 -2
hud/rl/vllm_adapter.py +1 -1
hud/server/server.py +84 -13
hud/server/tests/test_add_tool.py +60 -0
hud/server/tests/test_context.py +128 -0
hud/server/tests/test_mcp_server_handlers.py +44 -0
hud/server/tests/test_mcp_server_integration.py +405 -0
hud/server/tests/test_mcp_server_more.py +247 -0
hud/server/tests/test_run_wrapper.py +53 -0
hud/server/tests/test_server_extra.py +166 -0
hud/server/tests/test_sigterm_runner.py +78 -0
hud/shared/hints.py +1 -1
hud/telemetry/job.py +2 -2
hud/types.py +9 -2
hud/utils/tasks.py +32 -24
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/METADATA +14 -12
{hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/RECORD +43 -29
{hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/WHEEL +0 -0
{hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.36.dist-info → hud_python-0.4.37.dist-info}/licenses/LICENSE +0 -0

hud/agents/__init__.py CHANGED Viewed

@@ -2,12 +2,14 @@ from __future__ import annotations
 from .base import MCPAgent
 from .claude import ClaudeAgent
+from .lite_llm import LiteAgent
 from .openai import OperatorAgent
 from .openai_chat_generic import GenericOpenAIChatAgent
 __all__ = [
     "ClaudeAgent",
     "GenericOpenAIChatAgent",
+    "LiteAgent",
     "MCPAgent",
     "OperatorAgent",
 ]

hud/agents/lite_llm.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""LiteLLM MCP Agent implementation.
+Same OpenAI chat-completions shape + MCP tool plumbing,
+but transport is LiteLLM and (optionally) tools are shaped by LiteLLM's MCP transformer.
+"""
+from __future__ import annotations
+import logging
+from typing import Any, ClassVar
+import litellm
+from .openai_chat_generic import GenericOpenAIChatAgent
+logger = logging.getLogger(__name__)
+# Prefer LiteLLM's built-in MCP -> OpenAI tool transformer (handles Bedrock nuances)
+try:
+    from litellm.experimental_mcp_client.tools import (
+        transform_mcp_tool_to_openai_tool,
+    )
+except Exception:  # pragma: no cover - optional dependency
+    transform_mcp_tool_to_openai_tool = None  # type: ignore
+class LiteAgent(GenericOpenAIChatAgent):
+    """
+    Same OpenAI chat-completions shape + MCP tool plumbing,
+    but transport is LiteLLM and (optionally) tools are shaped by LiteLLM's MCP transformer.
+    """
+    metadata: ClassVar[dict[str, Any]] = {}
+    def __init__(
+        self,
+        *,
+        model_name: str = "gpt-4o-mini",
+        completion_kwargs: dict[str, Any] | None = None,
+        **agent_kwargs: Any,
+    ) -> None:
+        # We don't need an OpenAI client; pass None
+        super().__init__(
+            openai_client=None,
+            model_name=model_name,
+            completion_kwargs=completion_kwargs,
+            **agent_kwargs,
+        )
+    def get_tool_schemas(self) -> list[dict]:
+        # Prefer LiteLLM's stricter transformer (handles Bedrock & friends)
+        if transform_mcp_tool_to_openai_tool is not None:
+            return [
+                transform_mcp_tool_to_openai_tool(t)  # returns ChatCompletionToolParam-like dict
+                for t in self.get_available_tools()
+            ]
+        # Fallback to the generic OpenAI sanitizer
+        return GenericOpenAIChatAgent.get_tool_schemas(self)
+    async def _invoke_chat_completion(
+        self,
+        *,
+        messages: list[Any],
+        tools: list[dict] | None,
+        extra: dict[str, Any],
+    ):
+        return await litellm.acompletion(
+            model=self.model_name,
+            messages=messages,
+            tools=tools or None,  # LiteLLM tolerates None better than []
+            **extra,
+        )

hud/agents/openai_chat_generic.py CHANGED Viewed

@@ -42,7 +42,7 @@ class GenericOpenAIChatAgent(MCPAgent):
     def __init__(
         self,
         *,
-        openai_client: AsyncOpenAI,
+        openai_client: AsyncOpenAI | None,
         model_name: str = "gpt-4o-mini",
         completion_kwargs: dict[str, Any] | None = None,
         **agent_kwargs: Any,
@@ -171,6 +171,23 @@ class GenericOpenAIChatAgent(MCPAgent):
             openai_tools.append(openai_tool)
         return openai_tools
+    async def _invoke_chat_completion(
+        self,
+        *,
+        messages: list[Any],
+        tools: list[dict] | None,
+        extra: dict[str, Any],
+    ):
+        if self.oai is None:
+            raise ValueError("openai_client is required for GenericOpenAIChatAgent")
+        # default transport = OpenAI SDK
+        return await self.oai.chat.completions.create(
+            model=self.model_name,
+            messages=messages,
+            tools=tools,  # already ChatCompletionToolParam-shaped
+            **extra,
+        )
     @instrument(
         span_type="agent",
         record_args=False,
@@ -180,17 +197,14 @@ class GenericOpenAIChatAgent(MCPAgent):
         """Send chat request to OpenAI and convert the response."""
         # Convert MCP tool schemas to OpenAI format
-        mcp_schemas = self.get_tool_schemas()
+        tools = cast("list[ChatCompletionToolParam]", self.get_tool_schemas())
         protected_keys = {"model", "messages", "tools"}
         extra = {k: v for k, v in (self.completion_kwargs or {}).items() if k not in protected_keys}
         try:
-            response = await self.oai.chat.completions.create(
-                model=self.model_name,
-                messages=messages,
-                tools=cast("list[ChatCompletionToolParam]", mcp_schemas),
-                **extra,
+            response = await self._invoke_chat_completion(
+                messages=messages, tools=tools, extra=extra
             )
         except Exception as e:
             error_content = f"Error getting response {e}"

hud/cli/__init__.py CHANGED Viewed

@@ -912,7 +912,7 @@ def eval(
     agent: str | None = typer.Argument(
         None,
         help=(
-            "Agent backend to use (claude, openai, or vllm). If not provided, will prompt interactively."  # noqa: E501
+            "Agent backend to use (claude, openai, vllm, or litellm). If not provided, will prompt interactively."  # noqa: E501
         ),
     ),
     full: bool = typer.Option(
@@ -960,6 +960,12 @@ def eval(
         "--verbose",
         help="Enable verbose output from the agent",
     ),
+    very_verbose: bool = typer.Option(
+        False,
+        "--very-verbose",
+        "-vv",
+        help="Enable debug-level logs for maximum visibility",
+    ),
     vllm_base_url: str | None = typer.Option(
         None,
         "--vllm-base-url",
@@ -1025,13 +1031,14 @@ def eval(
                 {"name": "Claude 4 Sonnet", "value": "claude"},
                 {"name": "OpenAI Computer Use", "value": "openai"},
                 {"name": "vLLM (Local Server)", "value": "vllm"},
+                {"name": "LiteLLM (Multi-provider)", "value": "litellm"},
             ]
         )
         agent = hud_console.select("Select an agent to use:", choices=choices, default=0)
     # Handle HUD model selection
-    if agent and agent not in ["claude", "openai", "vllm"]:
+    if agent and agent not in ["claude", "openai", "vllm", "litellm"]:
         # Find remote model name
         model = agent
         if not vllm_base_url:
@@ -1052,7 +1059,7 @@ def eval(
         hud_console.info(f"Using HUD model: {model} (trained on {base_model})")
     # Validate agent choice
-    valid_agents = ["claude", "openai", "vllm"]
+    valid_agents = ["claude", "openai", "vllm", "litellm"]
     if agent not in valid_agents:
         hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
         raise typer.Exit(1)
@@ -1070,6 +1077,7 @@ def eval(
         max_workers=max_workers,
         max_concurrent_per_worker=max_concurrent_per_worker,
         verbose=verbose,
+        very_verbose=very_verbose,
         vllm_base_url=vllm_base_url,
         group_size=group_size,
     )
@@ -1119,7 +1127,7 @@ def rl(
     ),
     model: str | None = typer.Argument(
         None,
-        help="Model to train (default: interactive selection)",
+        help="Model to train from https://hud.so/models (default: interactive selection)",
     ),
     config_file: Path | None = typer.Option(  # noqa: B008
         None,
@@ -1159,6 +1167,12 @@ def rl(
         "--ddp-gpus",
         help="Specific GPUs for DDP (e.g., '0,1,2,3')",
     ),
+    yes: bool = typer.Option(
+        False,
+        "--yes",
+        "-y",
+        help="Auto-accept all prompts and use defaults (lazy mode)",
+    ),
     vllm_gpu: int | None = typer.Option(
         None,
         "--vllm-gpu",
@@ -1180,6 +1194,7 @@ def rl(
         no_ddp=no_ddp,
         ddp_gpus=ddp_gpus,
         vllm_gpu=vllm_gpu,
+        yes=yes,
     )

hud/cli/build.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import asyncio
+import contextlib
 import hashlib
 import subprocess
 import time
@@ -13,6 +14,7 @@ from typing import Any
 import typer
 import yaml
+from hud.cli.utils.source_hash import compute_source_hash, list_source_files
 from hud.clients import MCPClient
 from hud.utils.hud_console import HUDConsole
 from hud.version import __version__ as hud_version
@@ -341,10 +343,11 @@ def build_environment(
     required_env, optional_env = extract_env_vars_from_dockerfile(dockerfile_path)
     # Merge user-provided env vars with detected ones
-    provided_env_vars = {}
+    provided_env_vars: dict[str, str] = {}
     missing_required = []
     if env_vars:
-        provided_env_vars = env_vars.copy()
+        # Use placeholders in lock file for any provided values to avoid storing secrets
+        provided_env_vars = {k: f"${{{k}}}" for k in env_vars}
         # Track which required vars are still missing
         missing_required = [e for e in required_env if e not in env_vars]
@@ -384,6 +387,8 @@ def build_environment(
             "hudVersion": hud_version,
             "directory": str(env_dir.name),
             "version": new_version,  # Internal environment version
+            # Fast source fingerprint for change detection
+            "sourceHash": compute_source_hash(env_dir),
         },
         "environment": {
             "initializeMs": analysis["initializeMs"],
@@ -424,6 +429,16 @@ def build_environment(
     with open(lock_path, "w") as f:
         yaml.dump(lock_content, f, default_flow_style=False, sort_keys=False)
+    # Also write the file list we hashed for transparency (non-essential)
+    with contextlib.suppress(Exception):
+        files = [
+            str(p.resolve().relative_to(env_dir)).replace("\\", "/")
+            for p in list_source_files(env_dir)
+        ]
+        lock_content["build"]["sourceFiles"] = files
+        with open(lock_path, "w") as f:
+            yaml.dump(lock_content, f, default_flow_style=False, sort_keys=False)
     hud_console.success("Created lock file: hud.lock.yaml")
     # Calculate lock file hash

hud/cli/dev.py CHANGED Viewed

@@ -530,7 +530,7 @@ async def start_mcp_proxy(
                     stderr=asyncio.subprocess.DEVNULL,
                 )
                 await stop_result.communicate()
-                hud_console.success("✅ Container stopped successfully")
+                hud_console.success("Container stopped successfully")
                 container_stopped = True
         except Exception as e:
             hud_console.warning(f"Failed to stop container: {e}")

hud/cli/eval.py CHANGED Viewed

@@ -5,15 +5,18 @@ from __future__ import annotations
 import asyncio
 import logging
 from pathlib import Path
-from typing import Any, Literal
+from typing import TYPE_CHECKING, Any, Literal
 import typer
 import hud
+from hud.cli.utils.env_check import ensure_built, find_environment_dir
 from hud.settings import settings
 from hud.utils.group_eval import display_group_statistics, run_tasks_grouped
 from hud.utils.hud_console import HUDConsole
+if TYPE_CHECKING:
+    from hud.types import Task
 logger = logging.getLogger(__name__)
 hud_console = HUDConsole()
@@ -27,7 +30,7 @@ def get_available_models() -> list[dict[str, str | None]]:
     try:
         from hud.cli.rl import rl_api
-        hud_console.info("Fetching your models from https://app.hud.so/models")
+        hud_console.info("Fetching your models from https://hud.so/models")
         models = rl_api.list_models()
         # Filter for ready models only and sort by recency
@@ -66,7 +69,7 @@ def get_available_models() -> list[dict[str, str | None]]:
 def build_agent(
-    agent_type: Literal["claude", "openai", "vllm"],
+    agent_type: Literal["claude", "openai", "vllm", "litellm"],
     *,
     model: str | None = None,
     allowed_tools: list[str] | None = None,
@@ -138,6 +141,22 @@ def build_agent(
         else:
             return OperatorAgent(verbose=verbose)
+    elif agent_type == "litellm":
+        try:
+            from hud.agents.lite_llm import LiteAgent
+        except ImportError as e:
+            hud_console.error(
+                "LiteLLM agent dependencies are not installed. "
+                "Please install with: pip install 'hud-python[agent]'"
+            )
+            raise typer.Exit(1) from e
+        return LiteAgent(
+            model_name=model or "gpt-4o-mini",
+            allowed_tools=allowed_tools,
+            verbose=verbose,
+        )
     # Fallback Claude agent (Anthropic)
     try:
         from hud.agents import ClaudeAgent
@@ -166,7 +185,7 @@ def build_agent(
 async def run_single_task(
     source: str,
     *,
-    agent_type: Literal["claude", "openai", "vllm"] = "claude",
+    agent_type: Literal["claude", "openai", "vllm", "litellm"] = "claude",
     model: str | None = None,
     allowed_tools: list[str] | None = None,
     max_steps: int = 10,
@@ -192,7 +211,16 @@ async def run_single_task(
         hud_console.info("📊 Loading task file…")
         # Use unified loader for both JSON and JSONL
-        tasks = load_tasks(str(path))
+        tasks: list[Task] = load_tasks(str(path))  # type: ignore[assignment]
+        # If tasks reference a local environment (nearby), ensure it's built/up-to-date.
+        try:
+            env_dir = find_environment_dir(path)
+            if env_dir is not None:
+                # Non-interactive for eval; warn but don't block
+                ensure_built(env_dir, interactive=True)
+        except Exception as e:
+            hud_console.debug(f"Eval preflight env check skipped: {e}")
         # Single task - use the first (and only) task
         task = tasks[0]
@@ -200,7 +228,7 @@ async def run_single_task(
     else:
         # Load from HuggingFace dataset or non-file source
         hud_console.info(f"📊 Loading tasks from: {source}…")
-        tasks = load_tasks(source)
+        tasks: list[Task] = load_tasks(source)  # type: ignore[assignment]
         if not tasks:
             hud_console.error(f"No tasks found in: {source}")
@@ -248,6 +276,16 @@ async def run_single_task(
             agent_config = {"verbose": verbose}
             if allowed_tools:
                 agent_config["allowed_tools"] = allowed_tools
+        elif agent_type == "litellm":
+            from hud.agents.lite_llm import LiteAgent
+            agent_class = LiteAgent
+            agent_config = {
+                "model_name": model or "gpt-4o-mini",
+                "verbose": verbose,
+            }
+            if allowed_tools:
+                agent_config["allowed_tools"] = allowed_tools
         else:
             from hud.agents import ClaudeAgent
@@ -292,7 +330,7 @@ async def run_single_task(
 async def run_full_dataset(
     source: str,
     *,
-    agent_type: Literal["claude", "openai", "vllm"] = "claude",
+    agent_type: Literal["claude", "openai", "vllm", "litellm"] = "claude",
     model: str | None = None,
     allowed_tools: list[str] | None = None,
     max_concurrent: int = 30,
@@ -322,7 +360,7 @@ async def run_full_dataset(
     # Load tasks using unified loader
     hud_console.info(f"📊 Loading tasks from: {source}…")
-    tasks = load_tasks(source)
+    tasks: list[Task] = load_tasks(source)  # type: ignore[assignment]
     if not tasks:
         hud_console.error(f"No tasks found in: {source}")
@@ -385,6 +423,25 @@ async def run_full_dataset(
         if allowed_tools:
             agent_config["allowed_tools"] = allowed_tools
+    elif agent_type == "litellm":
+        try:
+            from hud.agents.lite_llm import LiteAgent
+            agent_class = LiteAgent
+        except ImportError as e:
+            hud_console.error(
+                "LiteLLM agent dependencies are not installed. "
+                "Please install with: pip install 'hud-python[agent]'"
+            )
+            raise typer.Exit(1) from e
+        agent_config = {
+            "model_name": model or "gpt-4o-mini",
+            "verbose": verbose,
+        }
+        if allowed_tools:
+            agent_config["allowed_tools"] = allowed_tools
     else:
         try:
             from hud.agents import ClaudeAgent
@@ -501,10 +558,10 @@ def eval_command(
         "--full",
         help="Run the entire dataset (omit for single-task debug mode)",
     ),
-    agent: Literal["claude", "openai", "vllm"] = typer.Option(
+    agent: Literal["claude", "openai", "vllm", "litellm"] = typer.Option(
         "claude",
         "--agent",
-        help="Agent backend to use (claude, openai, or vllm for local server)",
+        help="Agent backend to use (claude, openai, vllm for local server, or litellm)",
     ),
     model: str | None = typer.Option(
         None,
@@ -546,6 +603,12 @@ def eval_command(
         "--verbose",
         help="Enable verbose output from the agent",
     ),
+    very_verbose: bool = typer.Option(
+        False,
+        "--very-verbose",
+        "-vv",
+        help="Enable debug-level logs for maximum visibility",
+    ),
     vllm_base_url: str | None = typer.Option(
         None,
         "--vllm-base-url",
@@ -595,6 +658,23 @@ def eval_command(
     """
     from hud.settings import settings
+    if very_verbose:
+        logging.basicConfig(
+            level=logging.DEBUG,
+            format="%(asctime)s - %(name)s - %(message)s",
+            datefmt="%H:%M:%S",
+        )
+        logging.getLogger("hud.agents").setLevel(logging.DEBUG)
+        logging.getLogger("hud.agents.base").setLevel(logging.DEBUG)
+    elif verbose:
+        logging.basicConfig(
+            level=logging.INFO,
+            format="%(asctime)s - %(name)s - %(message)s",
+            datefmt="%H:%M:%S",
+        )
+        logging.getLogger("hud.agents").setLevel(logging.INFO)
+        logging.getLogger("hud.agents.base").setLevel(logging.INFO)
     # Check for required API keys
     if agent == "claude":
         if not settings.anthropic_api_key:
@@ -617,7 +697,7 @@ def eval_command(
     # Check for HUD_API_KEY if using HUD services
     if not settings.api_key:
         hud_console.warning("HUD_API_KEY not set. Some features may be limited.")
-        hud_console.info("Get your API key at: https://app.hud.so")
+        hud_console.info("Get your API key at: https://hud.so")
         hud_console.info("Set it in your environment or run: hud set HUD_API_KEY=your-key-here")
     # Parse allowed tools
@@ -642,7 +722,7 @@ def eval_command(
                 parallel=parallel,
                 max_workers=max_workers,
                 max_concurrent_per_worker=max_concurrent_per_worker,
-                verbose=verbose,
+                verbose=very_verbose or verbose,
                 vllm_base_url=vllm_base_url,
                 group_size=group_size,
             )
@@ -655,7 +735,7 @@ def eval_command(
                 model=model,
                 allowed_tools=allowed_tools_list,
                 max_steps=max_steps,
-                verbose=verbose,
+                verbose=very_verbose or verbose,
                 vllm_base_url=vllm_base_url,
                 group_size=group_size,
             )

hud-python 0.4.36__py3-none-any.whl → 0.4.37__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.36py3-none-any.whl → 0.4.37py3-none-any.whl