PyPI - hud-python - Versions diffs - 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl - Mend

hud-python 0.4.51py3-none-any.whl → 0.4.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (88) hide show

hud/__init__.py +13 -1
hud/agents/base.py +14 -3
hud/agents/lite_llm.py +1 -1
hud/agents/openai_chat_generic.py +15 -3
hud/agents/tests/test_base.py +9 -2
hud/agents/tests/test_base_runtime.py +164 -0
hud/cli/__init__.py +18 -25
hud/cli/build.py +35 -27
hud/cli/dev.py +11 -29
hud/cli/eval.py +114 -145
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +26 -3
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +134 -0
hud/cli/tests/test_eval.py +4 -0
hud/cli/tests/test_mcp_server.py +8 -7
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/utils/docker.py +120 -1
hud/cli/utils/runner.py +1 -1
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +257 -0
hud/clients/base.py +1 -1
hud/clients/mcp_use.py +3 -1
hud/datasets/parallel.py +2 -2
hud/datasets/runner.py +85 -24
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_runner.py +106 -0
hud/datasets/tests/test_utils.py +228 -0
hud/otel/config.py +8 -6
hud/otel/context.py +4 -4
hud/otel/exporters.py +231 -57
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_instrumentation.py +207 -0
hud/rl/learner.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/shared/exceptions.py +35 -9
hud/shared/hints.py +25 -0
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +39 -30
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +30 -6
hud/telemetry/async_context.py +331 -0
hud/telemetry/job.py +51 -12
hud/telemetry/tests/test_async_context.py +242 -0
hud/telemetry/tests/test_instrument.py +414 -0
hud/telemetry/tests/test_job.py +609 -0
hud/telemetry/tests/test_trace.py +184 -6
hud/telemetry/trace.py +16 -17
hud/tools/computer/qwen.py +4 -1
hud/tools/computer/settings.py +2 -2
hud/tools/executors/base.py +4 -2
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/types.py +7 -1
hud/utils/agent_factories.py +1 -3
hud/utils/mcp.py +1 -1
hud/utils/task_tracking.py +223 -0
hud/utils/tests/test_agent_factories.py +60 -0
hud/utils/tests/test_mcp.py +4 -6
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tasks.py +187 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/METADATA +48 -48
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/RECORD +88 -47
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0

hud/cli/eval.py CHANGED Viewed

@@ -68,6 +68,50 @@ def get_available_models() -> list[dict[str, str | None]]:
         return []
+def _build_vllm_config(
+    vllm_base_url: str | None,
+    model: str | None,
+    allowed_tools: list[str] | None,
+    verbose: bool,
+) -> dict[str, Any]:
+    """Build configuration for vLLM agent.
+    Args:
+        vllm_base_url: Optional base URL for vLLM server
+        model: Model name to use
+        allowed_tools: Optional list of allowed tools
+        verbose: Enable verbose output
+    Returns:
+        Dictionary with agent configuration
+    """
+    # Determine base URL and API key
+    if vllm_base_url is not None:
+        base_url = vllm_base_url
+        api_key = settings.api_key if base_url.startswith(settings.hud_rl_url) else "token-abc123"
+        hud_console.info(f"Using vLLM server at {base_url}")
+    else:
+        base_url = "http://localhost:8000/v1"
+        api_key = "token-abc123"
+    config: dict[str, Any] = {
+        "api_key": api_key,
+        "base_url": base_url,
+        "model_name": model or "served-model",
+        "verbose": verbose,
+        "completion_kwargs": {
+            "temperature": 0.7,
+            "max_tokens": 2048,
+            "tool_choice": "auto",
+        },
+    }
+    if allowed_tools:
+        config["allowed_tools"] = allowed_tools
+    return config
 def build_agent(
     agent_type: Literal["claude", "openai", "vllm", "litellm", "integration_test"],
     *,
@@ -86,8 +130,6 @@ def build_agent(
     elif agent_type == "vllm":
         # Create a generic OpenAI agent for vLLM server
         try:
-            from openai import AsyncOpenAI
             from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
         except ImportError as e:
             hud_console.error(
@@ -96,36 +138,14 @@ def build_agent(
             )
             raise typer.Exit(1) from e
-        # Determine the base URL to use
-        if vllm_base_url is not None:
-            # Use the provided vLLM URL (for custom/local servers)
-            base_url = vllm_base_url
-            hud_console.info(f"Using vLLM server at {base_url}")
-            api_key = (
-                settings.api_key if base_url.startswith(settings.hud_rl_url) else "token-abc123"
-            )
-        else:
-            # Default to localhost
-            base_url = "http://localhost:8000/v1"
-            api_key = "token-abc123"
-        # Create OpenAI client for vLLM
-        openai_client = AsyncOpenAI(
-            base_url=base_url,
-            api_key=api_key,
-            timeout=30.0,
-        )
-        return GenericOpenAIChatAgent(
-            openai_client=openai_client,
-            model_name=model or "served-model",  # Default model name
+        # Use the shared config builder
+        config = _build_vllm_config(
+            vllm_base_url=vllm_base_url,
+            model=model,
+            allowed_tools=allowed_tools,
             verbose=verbose,
-            completion_kwargs={
-                "temperature": 0.7,
-                "max_tokens": 2048,
-                "tool_choice": "required",  # if self.actor_config.force_tool_choice else "auto",
-            },
         )
+        return GenericOpenAIChatAgent(**config)
     elif agent_type == "openai":
         try:
@@ -257,25 +277,17 @@ async def run_single_task(
             agent_config["allowed_tools"] = allowed_tools
     elif agent_type == "vllm":
         # Special handling for vLLM
-        sample_agent = build_agent(
-            agent_type,
+        from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
+        agent_class = GenericOpenAIChatAgent
+        # Use the shared config builder
+        agent_config = _build_vllm_config(
+            vllm_base_url=vllm_base_url,
             model=model,
             allowed_tools=allowed_tools,
             verbose=verbose,
-            vllm_base_url=vllm_base_url,
         )
-        agent_config = {
-            "openai_client": sample_agent.oai,
-            "model_name": sample_agent.model_name,
-            "verbose": verbose,
-            "completion_kwargs": sample_agent.completion_kwargs,
-        }
-        if allowed_tools:
-            agent_config["allowed_tools"] = allowed_tools
-        from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
-        agent_class = GenericOpenAIChatAgent
     elif agent_type == "openai":
         from hud.agents import OperatorAgent
@@ -300,6 +312,7 @@ async def run_single_task(
         agent_config = {
             "model": model or "claude-sonnet-4-20250514",
             "verbose": verbose,
+            "validate_api_key": False,
         }
         if allowed_tools:
             agent_config["allowed_tools"] = allowed_tools
@@ -345,24 +358,18 @@ async def run_full_dataset(
     allowed_tools: list[str] | None = None,
     max_concurrent: int = 30,
     max_steps: int = 10,
-    parallel: bool = False,
-    max_workers: int | None = None,
-    max_concurrent_per_worker: int = 25,
     verbose: bool = False,
     vllm_base_url: str | None = None,
     group_size: int = 1,
 ) -> list[Any]:
-    """Run evaluation across the entire dataset.
-    Uses either asyncio-based run_dataset or process-based parallel execution
-    depending on the parallel flag."""
+    """Run evaluation across the entire dataset using asyncio-based concurrency."""
     # Provide early feedback to user
     hud_console.info("🔧 Initializing evaluation...")
     # Import run_dataset lazily
     try:
-        from hud.datasets import run_dataset, run_dataset_parallel, run_dataset_parallel_manual
+        from hud.datasets import run_dataset
         from hud.utils.tasks import load_tasks
     except ImportError as e:
         hud_console.error(
@@ -387,6 +394,7 @@ async def run_full_dataset(
     dataset_name = f"Dataset: {path.name}" if path.exists() else source.split("/")[-1]
     # Build agent class + config for run_dataset
+    agent_config: dict[str, Any]
     if agent_type == "integration_test":  # --integration-test mode
         from hud.agents.misc.integration_test_agent import IntegrationTestRunner
@@ -404,24 +412,13 @@ async def run_full_dataset(
             )
             raise typer.Exit(1) from e
-        # Use build_agent to create a sample agent to get the config
-        sample_agent = build_agent(
-            agent_type,
+        # Use the shared config builder
+        agent_config = _build_vllm_config(
+            vllm_base_url=vllm_base_url,
             model=model,
             allowed_tools=allowed_tools,
             verbose=verbose,
-            vllm_base_url=vllm_base_url,
         )
-        # Extract the config from the sample agent
-        agent_config: dict[str, Any] = {
-            "openai_client": sample_agent.oai,
-            "model_name": sample_agent.model_name,
-            "verbose": verbose,
-            "completion_kwargs": sample_agent.completion_kwargs,
-        }
-        if allowed_tools:
-            agent_config["allowed_tools"] = allowed_tools
     elif agent_type == "openai":
         try:
             from hud.agents import OperatorAgent
@@ -434,7 +431,7 @@ async def run_full_dataset(
             )
             raise typer.Exit(1) from e
-        agent_config = {"verbose": verbose}
+        agent_config = {"verbose": verbose, "validate_api_key": False}
         if allowed_tools:
             agent_config["allowed_tools"] = allowed_tools
@@ -472,6 +469,7 @@ async def run_full_dataset(
         agent_config = {
             "model": model or "claude-sonnet-4-20250514",
             "verbose": verbose,
+            "validate_api_key": False,
         }
         if allowed_tools:
             agent_config["allowed_tools"] = allowed_tools
@@ -505,9 +503,7 @@ async def run_full_dataset(
                 agent_class=agent_class,
                 agent_config=agent_config,
                 group_size=group_size,
-                max_parallel_episodes=max_concurrent
-                if not parallel
-                else max_concurrent_per_worker * (max_workers or 4),
+                max_parallel_episodes=max_concurrent,
                 max_steps=max_steps,
                 verbose=verbose,
                 job_id=job.id,
@@ -519,48 +515,18 @@ async def run_full_dataset(
         # Return stats for consistency with other modes
         return stats
-    # Original logic for non-grouped evaluation
-    elif parallel:
-        hud_console.info(
-            f"🚀 Running PARALLEL evaluation (workers: {max_workers or 'auto'}, max_concurrent: {max_concurrent})…"  # noqa: E501
-        )
-        if max_workers is None:
-            # Use auto-optimization (now the default run_dataset_parallel)
-            return await run_dataset_parallel(
-                name=f"Evaluation {dataset_name}",
-                dataset=dataset_or_tasks,
-                agent_class=agent_class,
-                agent_config=agent_config,
-                max_concurrent=max_concurrent,
-                metadata={"dataset": source, "parallel": True},
-                max_steps=max_steps,
-                auto_respond=True,
-            )
-        else:
-            # Use manual configuration
-            return await run_dataset_parallel_manual(
-                name=f"Evaluation {dataset_name}",
-                dataset=dataset_or_tasks,
-                agent_class=agent_class,
-                agent_config=agent_config,
-                max_workers=max_workers,
-                max_concurrent_per_worker=max_concurrent_per_worker,
-                max_concurrent=max_concurrent,
-                metadata={"dataset": source, "parallel": True},
-                max_steps=max_steps,
-                auto_respond=True,
-            )
-    else:
-        hud_console.info(f"🚀 Running evaluation (max_concurrent: {max_concurrent})…")
-        return await run_dataset(
-            name=f"Evaluation {dataset_name}",
-            dataset=dataset_or_tasks,
-            agent_class=agent_class,
-            agent_config=agent_config,
-            max_concurrent=max_concurrent,
-            metadata={"dataset": source},
-            max_steps=max_steps,
-        )
+    # Run evaluation with asyncio-based concurrency
+    hud_console.info(f"🚀 Running evaluation (max_concurrent: {max_concurrent})…")
+    return await run_dataset(
+        name=f"Evaluation {dataset_name}",
+        dataset=dataset_or_tasks,
+        agent_class=agent_class,
+        agent_config=agent_config,
+        max_concurrent=max_concurrent,
+        metadata={"dataset": source},
+        max_steps=max_steps,
+        auto_respond=True,
+    )
 def eval_command(
@@ -591,31 +557,20 @@ def eval_command(
     max_concurrent: int = typer.Option(
         30,
         "--max-concurrent",
-        help="Concurrency level for asyncio mode (ignored in parallel mode)",
+        help=(
+            "Maximum concurrent tasks (1-200 recommended, prevents rate limits "
+            "and resource exhaustion)"
+        ),
     ),
     max_steps: int | None = typer.Option(
         None,
         "--max-steps",
         help="Maximum steps per task (default: 10 for single, 50 for full)",
     ),
-    parallel: bool = typer.Option(
-        False,
-        "--parallel",
-        help="Use process-based parallel execution for large datasets (100+ tasks)",
-    ),
-    max_workers: int | None = typer.Option(
-        None,
-        "--max-workers",
-        help="Number of worker processes for parallel mode (auto-optimized if not set)",
-    ),
-    max_concurrent_per_worker: int = typer.Option(
-        20,
-        "--max-concurrent-per-worker",
-        help="Maximum concurrent tasks per worker in parallel mode",
-    ),
     verbose: bool = typer.Option(
         False,
         "--verbose",
+        "-v",
         help="Enable verbose output from the agent",
     ),
     very_verbose: bool = typer.Option(
@@ -650,23 +605,20 @@ def eval_command(
         # Evaluate a single task from SheetBench
         hud eval hud-evals/SheetBench-50
-        # Evaluate the FULL SheetBench dataset with Claude (asyncio mode)
+        # Evaluate the FULL SheetBench dataset with Claude
         hud eval hud-evals/SheetBench-50 --full --agent claude
-        # Run large dataset with PARALLEL execution (auto-optimized)
-        hud eval hud-evals/OSWorld-Verified-Gold --full --parallel
+        # Run with higher concurrency for faster evaluation
+        hud eval hud-evals/OSWorld-Verified-Gold --full --max-concurrent 100
-        # Parallel mode with manual configuration (16 workers, 25 tasks each)
-        hud eval hud-evals/OSWorld-Verified-Gold --full --parallel --max-workers 16
-        # Limit total concurrent tasks to prevent rate limits
-        hud eval hud-evals/SheetBench-50 --full --parallel --max-concurrent 20
+        # Limit concurrent tasks to prevent rate limits
+        hud eval hud-evals/SheetBench-50 --full --max-concurrent 20
         # Run a single task from a JSON file
         hud eval task.json
-        # Run multiple tasks from a JSON file with parallel execution
-        hud eval tasks.json --full --parallel
+        # Run multiple tasks from a JSON file
+        hud eval tasks.json --full
         # Run with OpenAI Operator agent
         hud eval hud-evals/OSWorld-Gold-Beta --agent openai
@@ -680,8 +632,6 @@ def eval_command(
         # Run with verbose output for debugging
         hud eval task.json --verbose
     """
-    from hud.settings import settings
     # Always configure basic logging so agent steps can be logged
     # Set to INFO by default for consistency with run_evaluation.py
     if very_verbose:
@@ -736,7 +686,11 @@ def eval_command(
     # Run evaluation
     if full:
-        asyncio.run(
+        import time
+        start_time = time.time()
+        results = asyncio.run(
             run_full_dataset(
                 source,
                 agent_type=agent,
@@ -744,14 +698,29 @@ def eval_command(
                 allowed_tools=allowed_tools_list,
                 max_concurrent=max_concurrent,
                 max_steps=max_steps,
-                parallel=parallel,
-                max_workers=max_workers,
-                max_concurrent_per_worker=max_concurrent_per_worker,
                 verbose=very_verbose or verbose,
                 vllm_base_url=vllm_base_url,
                 group_size=group_size,
             )
         )
+        elapsed = time.time() - start_time
+        # Print statistics (only for non-grouped mode)
+        if group_size == 1 and results:
+            hud_console.info("\n" + "=" * 50)
+            hud_console.success("📊 Evaluation Complete!")
+            hud_console.info("=" * 50)
+            hud_console.info(f"Total tasks: {len(results)}")
+            hud_console.info(f"Time elapsed: {elapsed:.2f} seconds")
+            hud_console.info(f"Throughput: {len(results) / elapsed:.2f} tasks/second")
+            hud_console.info(f"Execution mode: ASYNCIO (max_concurrent: {max_concurrent})")
+            # Count successes
+            successful = sum(1 for r in results if getattr(r, "reward", 0) > 0.7)
+            success_rate = 100 * successful / len(results)
+            hud_console.info(f"Successful tasks: {successful}/{len(results)} ({success_rate:.1f}%)")
+            hud_console.info("=" * 50)
     else:
         asyncio.run(
             run_single_task(

hud/cli/tests/test_analyze_module.py ADDED Viewed

@@ -0,0 +1,120 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from unittest.mock import AsyncMock, MagicMock, patch
+import pytest
+from hud.cli.analyze import (
+    analyze_environment,
+    analyze_environment_from_config,
+    analyze_environment_from_mcp_config,
+    display_interactive,
+    display_markdown,
+    parse_docker_command,
+)
+if TYPE_CHECKING:
+    from pathlib import Path
+# Mark entire module as asyncio to ensure async tests run with pytest-asyncio
+pytestmark = pytest.mark.asyncio
+def test_parse_docker_command():
+    cmd = ["docker", "run", "--rm", "-i", "img"]
+    cfg = parse_docker_command(cmd)
+    assert cfg == {"local": {"command": "docker", "args": ["run", "--rm", "-i", "img"]}}
+@pytest.mark.asyncio
+@patch("hud.cli.analyze.MCPClient")
+@patch("hud.cli.analyze.console")
+async def test_analyze_environment_success_json(mock_console, MockClient):
+    client = AsyncMock()
+    client.initialize.return_value = None
+    client.analyze_environment.return_value = {"tools": [], "resources": []}
+    client.shutdown.return_value = None
+    MockClient.return_value = client
+    await analyze_environment(["docker", "run", "img"], output_format="json", verbose=False)
+    assert client.initialize.awaited
+    assert client.analyze_environment.awaited
+    assert client.shutdown.awaited
+    assert mock_console.print_json.called
+@pytest.mark.asyncio
+@patch("hud.cli.analyze.MCPClient")
+@patch("hud.cli.analyze.console")
+async def test_analyze_environment_failure(mock_console, MockClient):
+    client = AsyncMock()
+    client.initialize.side_effect = RuntimeError("boom")
+    client.shutdown.return_value = None
+    MockClient.return_value = client
+    # Should swallow exception and return without raising
+    await analyze_environment(["docker", "run", "img"], output_format="json", verbose=True)
+    assert client.shutdown.awaited
+    assert mock_console.print_json.called is False
+def test_display_interactive_metadata_only(monkeypatch):
+    import hud.cli.analyze as mod
+    monkeypatch.setattr(mod, "console", MagicMock(), raising=False)
+    monkeypatch.setattr(mod, "hud_console", MagicMock(), raising=False)
+    analysis = {
+        "image": "img:latest",
+        "status": "cached",
+        "tool_count": 2,
+        "tools": [
+            {"name": "t1", "description": "d1", "inputSchema": {"type": "object"}},
+            {"name": "t2", "description": "d2"},
+        ],
+        "resources": [],
+    }
+    display_interactive(analysis)
+def test_display_markdown_both_paths(capsys):
+    # metadata-only
+    md_only = {"image": "img:latest", "tool_count": 0, "tools": [], "resources": []}
+    display_markdown(md_only)
+    # live metadata
+    live = {"metadata": {"servers": ["s1"], "initialized": True}, "tools": [], "resources": []}
+    display_markdown(live)
+    # Check that output was generated
+    captured = capsys.readouterr()
+    assert "MCP Environment Analysis" in captured.out
+@patch("hud.cli.analyze.MCPClient")
+async def test_analyze_environment_from_config(MockClient, tmp_path: Path):
+    client = AsyncMock()
+    client.initialize.return_value = None
+    client.analyze_environment.return_value = {"tools": [], "resources": []}
+    client.shutdown.return_value = None
+    MockClient.return_value = client
+    cfg = tmp_path / "mcp.json"
+    cfg.write_text('{"local": {"command": "docker", "args": ["run", "img"]}}')
+    await analyze_environment_from_config(cfg, output_format="json", verbose=False)
+    assert client.initialize.awaited and client.shutdown.awaited
+@patch("hud.cli.analyze.MCPClient")
+async def test_analyze_environment_from_mcp_config(MockClient):
+    client = AsyncMock()
+    client.initialize.return_value = None
+    client.analyze_environment.return_value = {"tools": [], "resources": []}
+    client.shutdown.return_value = None
+    MockClient.return_value = client
+    mcp_config = {"local": {"command": "docker", "args": ["run", "img"]}}
+    await analyze_environment_from_mcp_config(mcp_config, output_format="json", verbose=False)
+    assert client.initialize.awaited and client.shutdown.awaited

hud/cli/tests/test_build.py CHANGED Viewed

@@ -219,6 +219,17 @@ class TestAnalyzeMcpEnvironment:
         mock_tool.description = "Test tool"
         mock_tool.inputSchema = {"type": "object"}
+        # Prefer analyze_environment path (aligns with analyze CLI tests)
+        mock_client.analyze_environment = mock.AsyncMock(
+            return_value={
+                "metadata": {"servers": ["local"], "initialized": True},
+                "tools": [{"name": "test_tool", "description": "Test tool"}],
+                "hub_tools": {},
+                "resources": [],
+                "telemetry": {},
+            }
+        )
+        # Fallback still defined for completeness
         mock_client.list_tools.return_value = [mock_tool]
         result = await analyze_mcp_environment("test:latest")
@@ -237,7 +248,9 @@ class TestAnalyzeMcpEnvironment:
         mock_client_class.return_value = mock_client
         mock_client.initialize.side_effect = ConnectionError("Connection failed")
-        with pytest.raises(ConnectionError):
+        from hud.shared.exceptions import HudException
+        with pytest.raises(HudException, match="Connection failed"):
             await analyze_mcp_environment("test:latest")
     @mock.patch("hud.cli.build.MCPClient")
@@ -245,6 +258,15 @@ class TestAnalyzeMcpEnvironment:
         """Test analysis in verbose mode."""
         mock_client = mock.AsyncMock()
         mock_client_class.return_value = mock_client
+        mock_client.analyze_environment = mock.AsyncMock(
+            return_value={
+                "metadata": {"servers": ["local"], "initialized": True},
+                "tools": [],
+                "hub_tools": {},
+                "resources": [],
+                "telemetry": {},
+            }
+        )
         mock_client.list_tools.return_value = []
         # Just test that it runs without error in verbose mode
@@ -363,7 +385,7 @@ ENV API_KEY
         mock_run.return_value = mock_result
         # Run build
-        build_environment(str(env_dir), "test/env:latest")
+        build_environment(str(env_dir), "test-env:latest")
         # Check lock file was created
         lock_file = env_dir / "hud.lock.yaml"
@@ -373,7 +395,8 @@ ENV API_KEY
         with open(lock_file) as f:
             lock_data = yaml.safe_load(f)
-        assert lock_data["image"] == "test/env:latest@sha256:abc123"
+        assert lock_data["images"]["full"] == "test-env:0.1.0@sha256:abc123"
+        assert lock_data["images"]["local"] == "test-env:0.1.0"
         assert lock_data["build"]["version"] == "0.1.0"
         assert lock_data["environment"]["toolCount"] == 2
         assert len(lock_data["tools"]) == 2

hud/cli/tests/test_build_failure.py ADDED Viewed

@@ -0,0 +1,41 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from unittest.mock import patch
+import pytest
+import typer
+from hud.cli.build import build_environment
+if TYPE_CHECKING:
+    from pathlib import Path
+@patch("hud.cli.build.compute_source_hash", return_value="deadbeef")
+@patch(
+    "hud.cli.build.analyze_mcp_environment",
+    return_value={"initializeMs": 10, "toolCount": 0, "tools": []},
+)
+@patch("hud.cli.build.build_docker_image", return_value=True)
+def test_build_label_rebuild_failure(_bd, _an, _hash, tmp_path: Path, monkeypatch):
+    # Minimal environment dir
+    env = tmp_path / "env"
+    env.mkdir()
+    (env / "Dockerfile").write_text("FROM python:3.11")
+    # Ensure subprocess.run returns non-zero for the second build (label build)
+    import types
+    def run_side_effect(cmd, *a, **k):
+        # Return 0 for first docker build, 1 for label build
+        if isinstance(cmd, list) and cmd[:2] == ["docker", "build"] and "--label" in cmd:
+            return types.SimpleNamespace(returncode=1, stderr="boom")
+        return types.SimpleNamespace(returncode=0, stdout="")
+    monkeypatch.setenv("FASTMCP_DISABLE_BANNER", "1")
+    with (
+        patch("hud.cli.build.subprocess.run", side_effect=run_side_effect),
+        pytest.raises(typer.Exit),
+    ):
+        build_environment(str(env), verbose=False)

hud/cli/tests/test_build_module.py ADDED Viewed

@@ -0,0 +1,50 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from unittest import mock
+from hud.cli.build import (
+    extract_env_vars_from_dockerfile,
+    get_docker_image_digest,
+    get_docker_image_id,
+)
+if TYPE_CHECKING:
+    from pathlib import Path
+def test_extract_env_vars_from_dockerfile_complex(tmp_path: Path):
+    dockerfile = tmp_path / "Dockerfile"
+    dockerfile.write_text(
+        """
+FROM python:3.11
+ARG BUILD_TOKEN
+ARG DEFAULTED=1
+ENV RUNTIME_KEY
+ENV FROM_ARG=$BUILD_TOKEN
+ENV WITH_DEFAULT=val
+"""
+    )
+    required, optional = extract_env_vars_from_dockerfile(dockerfile)
+    # BUILD_TOKEN required (ARG without default)
+    assert "BUILD_TOKEN" in required
+    # RUNTIME_KEY required (ENV without value)
+    assert "RUNTIME_KEY" in required
+    # FROM_ARG references BUILD_TOKEN -> required
+    assert "FROM_ARG" in required
+    # DEFAULTED and WITH_DEFAULT should not be marked required by default
+    assert "DEFAULTED" not in required
+    assert "WITH_DEFAULT" not in required
+    assert optional == []
+@mock.patch("subprocess.run")
+def test_get_docker_image_digest_none(mock_run):
+    mock_run.return_value = mock.Mock(stdout="[]", returncode=0)
+    assert get_docker_image_digest("img") is None
+@mock.patch("subprocess.run")
+def test_get_docker_image_id_ok(mock_run):
+    mock_run.return_value = mock.Mock(stdout="sha256:abc", returncode=0)
+    assert get_docker_image_id("img") == "sha256:abc"

hud-python 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.51py3-none-any.whl → 0.4.53py3-none-any.whl