PyPI - hud-python - Versions diffs - 0.4.53__py3-none-any.whl → 0.4.55__py3-none-any.whl - Mend

hud-python 0.4.53py3-none-any.whl → 0.4.55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (31) hide show

hud/agents/base.py +8 -0
hud/agents/claude.py +4 -3
hud/agents/openai.py +2 -1
hud/agents/openai_chat_generic.py +3 -2
hud/agents/tests/test_claude.py +2 -2
hud/agents/tests/test_openai.py +1 -1
hud/agents/utils.py +50 -0
hud/cli/__init__.py +65 -9
hud/cli/build.py +185 -25
hud/cli/dev.py +130 -40
hud/cli/eval.py +123 -24
hud/cli/flows/dev.py +155 -0
hud/cli/flows/tasks.py +29 -9
hud/cli/tests/test_eval.py +6 -6
hud/cli/utils/docker.py +6 -3
hud/clients/base.py +2 -2
hud/otel/context.py +42 -1
hud/server/server.py +29 -3
hud/settings.py +6 -0
hud/telemetry/async_context.py +16 -2
hud/telemetry/trace.py +6 -1
hud/types.py +10 -0
hud/utils/group_eval.py +14 -2
hud/utils/tests/test_agent_factories.py +2 -1
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/METADATA +8 -7
{hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/RECORD +31 -29
{hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/WHEEL +0 -0
{hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.53.dist-info → hud_python-0.4.55.dist-info}/licenses/LICENSE +0 -0

hud/agents/base.py CHANGED Viewed

@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Literal
 import mcp.types as types
+from hud.agents.utils import log_agent_metadata_to_status, log_task_config_to_current_trace
 from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
 from hud.utils.hud_console import HUDConsole
 from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
@@ -62,6 +63,7 @@ class MCPAgent(ABC):
         initial_screenshot: bool = True,
         # Misc
         model_name: str = "mcp-agent",
+        checkpoint_name: str | None = None,
         response_agent: ResponseAgent | None = None,
         auto_trace: bool = True,
         verbose: bool = False,
@@ -92,6 +94,7 @@ class MCPAgent(ABC):
         self._auto_created_client = False  # Track if we created the client
         self.model_name = model_name
+        self.checkpoint_name = checkpoint_name
         self.console = HUDConsole(logger=logger)
         # Set verbose mode if requested
@@ -198,6 +201,8 @@ class MCPAgent(ABC):
             f"Agent initialized with {len(self.get_available_tools())} tools: {', '.join([t.name for t in self.get_available_tools()])}"  # noqa: E501
         )
+        await log_agent_metadata_to_status(self.model_name, self.checkpoint_name)
     async def run(self, prompt_or_task: str | Task | dict[str, Any], max_steps: int = 10) -> Trace:
         """
         Run the agent with the given prompt or task.
@@ -223,6 +228,9 @@ class MCPAgent(ABC):
             # Handle Task objects with full lifecycle
             if isinstance(prompt_or_task, Task):
+                # Log a compact summary of task config to the current trace (async)
+                await log_task_config_to_current_trace(prompt_or_task)
                 return await self.run_task(prompt_or_task, max_steps)
             # Handle simple string prompts

hud/agents/claude.py CHANGED Viewed

@@ -89,7 +89,8 @@ class ClaudeAgent(MCPAgent):
         self.use_computer_beta = use_computer_beta
         self.hud_console = HUDConsole(logger=logger)
-        self.model_name = self.model
+        self.model_name = "Claude"
+        self.checkpoint_name = self.model
         # Track mapping from Claude tool names to MCP tool names
         self._claude_to_mcp_tool_map: dict[str, str] = {}
@@ -98,14 +99,14 @@ class ClaudeAgent(MCPAgent):
         # Append Claude-specific instructions to the base system prompt
         claude_instructions = """
         You are Claude, an AI assistant created by Anthropic. You are helpful, harmless, and honest.
         When working on tasks:
         1. Be thorough and systematic in your approach
         2. Complete tasks autonomously without asking for confirmation
         3. Use available tools efficiently to accomplish your goals
         4. Verify your actions and ensure task completion
         5. Be precise and accurate in all operations
         Remember: You are expected to complete tasks autonomously. The user trusts you to accomplish what they asked.
         """.strip()  # noqa: E501

hud/agents/openai.py CHANGED Viewed

@@ -70,6 +70,7 @@ class OperatorAgent(MCPAgent):
         self.openai_client = model_client
         self.model = model
+        self.checkpoint_name = self.model
         self.environment = environment
         # State tracking for OpenAI's stateful API
@@ -84,7 +85,7 @@ class OperatorAgent(MCPAgent):
             except Exception as e:
                 raise ValueError(f"OpenAI API key is invalid: {e}") from e
-        self.model_name = "openai-" + self.model
+        self.model_name = "Operator"
         # Append OpenAI-specific instructions to the base system prompt
         openai_instructions = """

hud/agents/openai_chat_generic.py CHANGED Viewed

@@ -62,7 +62,8 @@ class GenericOpenAIChatAgent(MCPAgent):
         else:
             raise ValueError("Either openai_client or (api_key and base_url) must be provided")
-        self.model_name = model_name
+        self.model_name = "GenericOpenAI"
+        self.checkpoint_name = model_name
         self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
         self.mcp_schemas = []
         self.hud_console = HUDConsole(logger=logger)
@@ -194,7 +195,7 @@ class GenericOpenAIChatAgent(MCPAgent):
             raise ValueError("openai_client is required for GenericOpenAIChatAgent")
         # default transport = OpenAI SDK
         return await self.oai.chat.completions.create(
-            model=self.model_name,
+            model=self.checkpoint_name,
             messages=messages,
             tools=tools,  # type: ignore ready ChatCompletionToolParam-shaped
             **extra,

hud/agents/tests/test_claude.py CHANGED Viewed

@@ -89,7 +89,7 @@ class TestClaudeAgent:
             validate_api_key=False,  # Skip validation in tests
         )
-        assert agent.model_name == "claude-3-opus-20240229"
+        assert agent.model_name == "Claude"
         assert agent.max_tokens == 1000
         assert agent.anthropic_client == mock_model_client
@@ -103,7 +103,7 @@ class TestClaudeAgent:
                 validate_api_key=False,  # Skip validation in tests
             )
-            assert agent.model_name == "claude-3-opus-20240229"
+            assert agent.model_name == "Claude"
             assert agent.anthropic_client is not None
     @pytest.mark.asyncio

hud/agents/tests/test_openai.py CHANGED Viewed

@@ -50,7 +50,7 @@ class TestOperatorAgent:
             validate_api_key=False,  # Skip validation in tests
         )
-        assert agent.model_name == "openai-gpt-4"
+        assert agent.model_name == "Operator"
         assert agent.model == "gpt-4"
         assert agent.openai_client == mock_model_client

hud/agents/utils.py ADDED Viewed

@@ -0,0 +1,50 @@
+from __future__ import annotations
+import contextlib
+from typing import TYPE_CHECKING
+from hud.otel.context import (
+    _update_task_status_async,
+    get_current_task_run_id,
+)
+if TYPE_CHECKING:
+    from hud.datasets import Task
+async def log_task_config_to_current_trace(task: Task) -> None:
+    with contextlib.suppress(Exception):
+        task_run_id = get_current_task_run_id()
+        if not task_run_id:
+            return
+        raw_config = task.model_dump()
+        await _update_task_status_async(
+            task_run_id,
+            "running",
+            task_id=task.id,
+            extra_metadata={"task_config": raw_config},
+        )
+async def log_agent_metadata_to_status(
+    model_name: str | None = None, checkpoint_name: str | None = None
+) -> None:
+    """Attach agent metadata (model/checkpoint) to current trace status metadata."""
+    with contextlib.suppress(Exception):
+        task_run_id = get_current_task_run_id()
+        if not task_run_id or (not model_name and not checkpoint_name):
+            return
+        agent_meta = {}
+        if model_name is not None:
+            agent_meta["model_name"] = model_name
+        if checkpoint_name is not None:
+            agent_meta["checkpoint_name"] = checkpoint_name
+        await _update_task_status_async(
+            task_run_id,
+            "running",
+            extra_metadata={"agent": agent_meta},
+        )

hud/cli/__init__.py CHANGED Viewed

@@ -12,6 +12,8 @@ from rich.console import Console
 from rich.panel import Panel
 from rich.table import Table
+from hud.types import AgentType
 from . import list_func as list_module
 from .analyze import (
     analyze_environment,
@@ -380,6 +382,11 @@ def dev(
         "--watch",
         help="Additional directories to watch for changes (default: current directory)",
     ),
+    new: bool = typer.Option(
+        False,
+        "--new",
+        help="Show Cursor installation link for new server setup",
+    ),
 ) -> None:
     """🔥 Development mode - run MCP server with hot-reload.
@@ -420,6 +427,7 @@ def dev(
         watch,
         docker=docker,
         docker_args=docker_args,
+        new=new,
     )
@@ -847,7 +855,7 @@ def eval(
     hud_console = HUDConsole()
     if integration_test:
-        agent = "integration_test"
+        agent = AgentType.INTEGRATION_TEST
     # If no source provided, reuse RL helper to find a tasks file interactively
     if source is None:
@@ -894,17 +902,17 @@ def eval(
         # Add standard agent choices
         choices.extend(
             [
-                {"name": "Claude 4 Sonnet", "value": "claude"},
-                {"name": "OpenAI Computer Use", "value": "openai"},
-                {"name": "vLLM (Local Server)", "value": "vllm"},
-                {"name": "LiteLLM (Multi-provider)", "value": "litellm"},
+                {"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE},
+                {"name": "OpenAI Computer Use", "value": AgentType.OPENAI},
+                {"name": "vLLM (Local Server)", "value": AgentType.VLLM},
+                {"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM},
             ]
         )
         agent = hud_console.select("Select an agent to use:", choices=choices, default=0)
     # Handle HUD model selection
-    if agent and agent not in ["claude", "openai", "vllm", "litellm", "integration_test"]:
+    if agent and agent not in [e.value for e in AgentType]:
         # Find remote model name
         model = agent
         if not vllm_base_url:
@@ -921,20 +929,23 @@ def eval(
             hud_console.error(f"Model {model} not found")
             raise typer.Exit(1)
         model = base_model
-        agent = "vllm"  # Use vLLM backend for HUD models
+        agent = AgentType.VLLM  # Use vLLM backend for HUD models
         hud_console.info(f"Using HUD model: {model} (trained on {base_model})")
     # Validate agent choice
-    valid_agents = ["claude", "openai", "vllm", "litellm", "integration_test"]
+    valid_agents = [e.value for e in AgentType]
     if agent not in valid_agents:
         hud_console.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
         raise typer.Exit(1)
+    # Type narrowing: agent is now guaranteed to be an AgentType value after validation
+    agent = AgentType(agent)
     # Run the command
     eval_command(
         source=source,
         full=full,
-        agent=agent,  # type: ignore
+        agent=agent,
         model=model,
         allowed_tools=allowed_tools,
         max_concurrent=max_concurrent,
@@ -1074,6 +1085,51 @@ def rl(
     )
+@app.command()
+def convert(
+    tasks_file: str = typer.Argument(
+        ..., help="Path to tasks file (JSON/JSONL) to convert to remote MCP configuration"
+    ),
+) -> None:
+    """Convert local MCP task configs to remote (mcp.hud.so) format.
+    This mirrors the implicit conversion flow used by 'hud rl' and writes a new
+    remote_<name>.json next to the source file when needed.
+    """
+    from pathlib import Path
+    from hud.utils.hud_console import HUDConsole
+    hud_console = HUDConsole()
+    try:
+        from .flows.tasks import convert_tasks_to_remote
+        result_path = convert_tasks_to_remote(tasks_file)
+        # If nothing changed, inform the user
+        try:
+            if Path(result_path).resolve() == Path(tasks_file).resolve():
+                hud_console.success(
+                    "Tasks already reference remote MCP URLs. No conversion needed."
+                )
+                hud_console.hint("You can run them directly with: hud eval <tasks_file> --full")
+                return
+        except Exception as e:
+            # Best effort; continue with success message
+            hud_console.debug(f"Path comparison failed, continuing: {e}")
+        hud_console.success(f"Converted tasks written to: {result_path}")
+        hud_console.hint(
+            "You can now run remote flows: hud rl <converted_file> or hud eval <converted_file>"
+        )
+    except typer.Exit:
+        raise
+    except Exception as e:
+        hud_console.error(f"Failed to convert tasks: {e}")
+        raise typer.Exit(1) from e
 @app.command()
 def set(
     assignments: list[str] = typer.Argument(  # type: ignore[arg-type]  # noqa: B008

hud/cli/build.py CHANGED Viewed

@@ -5,6 +5,8 @@ from __future__ import annotations
 import asyncio
 import contextlib
 import hashlib
+import json
+import re
 import subprocess
 import time
 from datetime import UTC, datetime
@@ -50,6 +52,140 @@ def increment_version(version_str: str, increment_type: str = "patch") -> str:
         return f"{major}.{minor}.{patch + 1}"
+def find_task_files_in_env(env_dir: Path) -> list[Path]:
+    """Find all task files in an environment directory.
+    This looks for .json and .jsonl files that contain task definitions,
+    excluding config files and lock files.
+    Args:
+        env_dir: Environment directory to search
+    Returns:
+        List of task file paths
+    """
+    task_files: list[Path] = []
+    # Find all .json and .jsonl files
+    json_files = list(env_dir.glob("*.json")) + list(env_dir.glob("*.jsonl"))
+    # Filter out config files and lock files
+    for file in json_files:
+        # Skip hidden files, config files, and lock files
+        if (
+            file.name.startswith(".")
+            or file.name == "package.json"
+            or file.name == "tsconfig.json"
+            or file.name == "gcp.json"
+            or file.name.endswith(".lock.json")
+        ):
+            continue
+        # Check if it's a task file by looking for mcp_config
+        try:
+            with open(file, encoding="utf-8") as f:
+                content = json.load(f)
+            # It's a task file if it's a list with mcp_config entries
+            if (
+                isinstance(content, list)
+                and len(content) > 0
+                and any(isinstance(item, dict) and "mcp_config" in item for item in content)
+            ):
+                task_files.append(file)
+        except (json.JSONDecodeError, Exception):  # noqa: S112
+            continue
+    return task_files
+def update_tasks_json_versions(
+    env_dir: Path, base_name: str, old_version: str | None, new_version: str
+) -> list[Path]:
+    """Update image references in tasks.json files to use the new version.
+    Args:
+        env_dir: Environment directory
+        base_name: Base image name (without version)
+        old_version: Previous version (if any)
+        new_version: New version to use
+    Returns:
+        List of updated task files
+    """
+    hud_console = HUDConsole()
+    updated_files: list[Path] = []
+    for task_file in find_task_files_in_env(env_dir):
+        try:
+            with open(task_file, encoding="utf-8") as f:
+                tasks = json.load(f)
+            if not isinstance(tasks, list):
+                continue
+            modified = False
+            # Process each task
+            for task in tasks:
+                if not isinstance(task, dict) or "mcp_config" not in task:
+                    continue
+                mcp_config = task["mcp_config"]
+                # Handle local Docker format
+                if "local" in mcp_config and isinstance(mcp_config["local"], dict):
+                    local_config = mcp_config["local"]
+                    # Check for docker run args
+                    if "args" in local_config and isinstance(local_config["args"], list):
+                        for i, arg in enumerate(local_config["args"]):
+                            # Match image references
+                            if isinstance(arg, str) and (
+                                arg == f"{base_name}:latest"
+                                or (old_version and arg == f"{base_name}:{old_version}")
+                                or re.match(rf"^{re.escape(base_name)}:\d+\.\d+\.\d+$", arg)
+                            ):
+                                # Update to new version
+                                local_config["args"][i] = f"{base_name}:{new_version}"
+                                modified = True
+                # Handle HUD API format (remote MCP)
+                elif "hud" in mcp_config and isinstance(mcp_config["hud"], dict):
+                    hud_config = mcp_config["hud"]
+                    # Check headers for Mcp-Image
+                    if "headers" in hud_config and isinstance(hud_config["headers"], dict):
+                        headers = hud_config["headers"]
+                        if "Mcp-Image" in headers:
+                            image_ref = headers["Mcp-Image"]
+                            # Match various image formats
+                            if isinstance(image_ref, str) and ":" in image_ref:
+                                # Split into image name and tag
+                                image_name, _ = image_ref.rsplit(":", 1)
+                                if (
+                                    image_name == base_name  # Exact match
+                                    or image_name.endswith(f"/{base_name}")  # With prefix
+                                ):
+                                    # Update to new version, preserving the full image path
+                                    headers["Mcp-Image"] = f"{image_name}:{new_version}"
+                                    modified = True
+            # Save the file if modified
+            if modified:
+                with open(task_file, "w") as f:
+                    json.dump(tasks, f, indent=2)
+                updated_files.append(task_file)
+                hud_console.success(f"Updated {task_file.name} with version {new_version}")
+        except Exception as e:
+            hud_console.warning(f"Could not update {task_file.name}: {e}")
+    return updated_files
 def get_existing_version(lock_path: Path) -> str | None:
     """Get the internal version from existing lock file if it exists."""
     if not lock_path.exists():
@@ -386,28 +522,24 @@ def build_environment(
     dockerfile_path = env_dir / "Dockerfile"
     required_env, optional_env = extract_env_vars_from_dockerfile(dockerfile_path)
-    # Merge user-provided env vars with detected ones
-    provided_env_vars: dict[str, str] = {}
-    missing_required = []
-    if env_vars:
-        # Use placeholders in lock file for any provided values to avoid storing secrets
-        provided_env_vars = {k: f"${{{k}}}" for k in env_vars}
-        # Track which required vars are still missing
-        missing_required = [e for e in required_env if e not in env_vars]
-        # Show what env vars were provided
-        hud_console.success(f"Using provided environment variables: {', '.join(env_vars.keys())}")
-    else:
-        missing_required = required_env[:]
+    # Show env vars detected from .env file
+    if env_from_file:
+        hud_console.info(
+            f"Detected environment variables from .env file: {', '.join(sorted(env_from_file.keys()))}"  # noqa: E501
+        )
+    # Create a complete set of all required variables for warning
+    all_required_for_warning = set(required_env)
+    all_required_for_warning.update(env_from_file.keys())
+    # Find which ones are missing (not provided via -e flags)
+    all_missing = all_required_for_warning - set(env_vars.keys() if env_vars else [])
-    # Warn about missing required variables
-    if missing_required:
+    if all_missing:
         hud_console.warning(
-            f"Missing required environment variables: {', '.join(missing_required)}"
-        )
-        hud_console.info(
-            "These can be added to the lock file after build or provided with -e flags"
+            f"Environment variables not provided via -e flags: {', '.join(sorted(all_missing))}"
         )
+        hud_console.info("These will be added to the required list in the lock file")
     # Check for existing version and increment
     lock_path = env_dir / "hud.lock.yaml"
@@ -449,7 +581,13 @@ def build_environment(
     }
     # Add environment variables section if any exist
-    if missing_required or optional_env or provided_env_vars:
+    # Include env vars from .env file as well
+    env_vars_from_file = set(env_from_file.keys()) if env_from_file else set()
+    # Check if we have any env vars to document
+    has_env_vars = bool(required_env or optional_env or env_vars or env_vars_from_file)
+    if has_env_vars:
         lock_content["environment"]["variables"] = {}
         # Add note about editing environment variables
@@ -458,10 +596,21 @@ def build_environment(
             "Provided variables will be used when running the environment."
         )
-        if provided_env_vars:
-            lock_content["environment"]["variables"]["provided"] = provided_env_vars
-        if missing_required:
-            lock_content["environment"]["variables"]["required"] = missing_required
+        # Combine all required variables: from Dockerfile, .env file, and provided vars
+        all_required = set(required_env)
+        # Add all env vars from .env file to required
+        all_required.update(env_vars_from_file)
+        # Add all provided env vars to required
+        if env_vars:
+            all_required.update(env_vars.keys())
+        # Remove any that are optional - they stay in optional
+        all_required = all_required - set(optional_env)
+        if all_required:
+            lock_content["environment"]["variables"]["required"] = sorted(list(all_required))
         if optional_env:
             lock_content["environment"]["variables"]["optional"] = optional_env
@@ -579,6 +728,17 @@ def build_environment(
         local_ref = lock_content.get("images", {}).get("local", version_tag)
         save_to_registry(lock_content, local_ref, verbose)
+    # Update tasks.json files with new version
+    hud_console.progress_message("Updating task files with new version...")
+    updated_task_files = update_tasks_json_versions(
+        env_dir, base_name, existing_version, new_version
+    )
+    if updated_task_files:
+        hud_console.success(f"Updated {len(updated_task_files)} task file(s)")
+    else:
+        hud_console.dim_info("No task files found or updated", value="")
     # Print summary
     hud_console.section_title("Build Complete")
@@ -602,7 +762,7 @@ def build_environment(
     hud_console.section_title("Next Steps")
     hud_console.info("Test locally:")
     hud_console.command_example("hud dev", "Hot-reload development")
-    hud_console.command_example(f"hud run {latest_tag}", "Run the built image")
+    hud_console.command_example(f"hud run {version_tag}", "Run the built image")
     hud_console.info("")
     hud_console.info("Publish to registry:")
     hud_console.command_example("hud push", f"Push as {version_tag}")

hud-python 0.4.53__py3-none-any.whl → 0.4.55__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.53py3-none-any.whl → 0.4.55py3-none-any.whl