PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (274) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +11 -5
hud/agents/base.py +220 -500
hud/agents/claude.py +200 -240
hud/agents/gemini.py +275 -0
hud/agents/gemini_cua.py +335 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +41 -36
hud/agents/openai.py +291 -292
hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
hud/agents/operator.py +211 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +379 -210
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +376 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/cli/__init__.py +461 -545
hud/cli/analyze.py +43 -5
hud/cli/build.py +664 -110
hud/cli/debug.py +8 -5
hud/cli/dev.py +882 -734
hud/cli/eval.py +782 -668
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/push.py +29 -11
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +108 -6
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +69 -0
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +40 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +327 -0
hud/datasets/runner.py +192 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +50 -0
hud/environment/connection.py +206 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +109 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +694 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +112 -0
hud/environment/scenarios.py +493 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +218 -0
hud/environment/tests/test_environment.py +161 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +201 -0
hud/environment/tests/test_scenarios.py +280 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +674 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +185 -0
hud/eval/manager.py +466 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +340 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +145 -0
hud/eval/types.py +63 -0
hud/eval/utils.py +183 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +151 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +158 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +16 -2
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +4 -0
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +167 -57
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +61 -3
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.1.dist-info/METADATA +264 -0
hud_python-0.5.1.dist-info/RECORD +299 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0

hud/cli/flows/tasks.py CHANGED Viewed

@@ -4,21 +4,17 @@ import json
 import logging
 import re
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
+from typing import Any
 import typer
 import yaml
 from hud.cli.push import push_environment
 from hud.cli.utils.docker import require_docker_running
-from hud.cli.utils.env_check import ensure_built, find_environment_dir
+from hud.cli.utils.env_check import find_environment_dir
 from hud.cli.utils.registry import extract_name_and_tag
+from hud.datasets import load_tasks
 from hud.utils.hud_console import hud_console
-from hud.utils.tasks import load_tasks
-if TYPE_CHECKING:
-    from hud.types import Task
 logger = logging.getLogger(__name__)
@@ -29,11 +25,11 @@ def _is_remote_url(url: str) -> bool:
     return bool(re.match(r"^(https?:\/\/)?(www\.)?[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,}(\/\S*)?$", url))
-def _validate_tasks(tasks: list[Task]) -> bool:
+def _validate_tasks(tasks: list[dict[str, Any]]) -> bool:
     """Validate the tasks file: return True if tasks already reference a remote MCP URL.
     A task is considered remote if any "url" field anywhere inside mcp_config
-    is a valid remote URL (e.g., https://mcp.hud.so/v3/mcp).
+    is a valid remote URL (e.g., https://mcp.hud.ai/v3/mcp).
     """
     def _has_remote_url(obj: Any) -> bool:
@@ -50,13 +46,15 @@ def _validate_tasks(tasks: list[Task]) -> bool:
         return False
     for task in tasks:
-        cfg = task.mcp_config or {}
+        cfg = task.get("mcp_config") or {}
         if not _has_remote_url(cfg):
             return False
     return True
-def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
+def _ensure_pushed(
+    env_dir: Path, lock_data: dict[str, Any], check_docker: bool = True
+) -> dict[str, Any]:
     """Ensure the environment is pushed to a registry; return updated lock data."""
     pushed = bool(lock_data.get("push"))
     if not pushed:
@@ -64,7 +62,8 @@ def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
         if not hud_console.confirm("Push to a registry now (runs 'hud push')?", default=True):
             raise typer.Exit(1)
         # Check Docker availability before attempting a push
-        require_docker_running()
+        if check_docker:
+            require_docker_running()
         # If Docker or login is not configured, the push function will fail and halt.
         push_environment(str(env_dir), yes=True)
@@ -78,29 +77,41 @@ def _ensure_pushed(env_dir: Path, lock_data: dict[str, Any]) -> dict[str, Any]:
 def _derive_remote_image(lock_data: dict[str, Any]) -> str:
-    """Derive org/name:tag from lock file for MCP header.
+    """Derive org/name:tag from lock file for remote MCP header.
-    Preference order:
-    1) lock_data["push"]["image_with_tag"] if present
-    2) Derive from lock_data["image"] (may be a digest; falls back to latest)
+    Preference order (new lock first, then legacy):
+    1) lock_data["push"]["image_with_tag"] (exact org/name:tag that was pushed)
+    2) lock_data["images"]["local"] (base name with internal version)
+    3) lock_data["image"] (legacy field; may contain tag or digest)
     """
-    push_info = lock_data.get("push", {}) if isinstance(lock_data, dict) else {}
+    if not isinstance(lock_data, dict):  # Defensive
+        raise typer.Exit(1)
-    # 1) Exact image_with_tag if present
-    pushed_with_tag = str(push_info.get("image_with_tag", "")).strip()
+    # 1) Prefer the exact image that was pushed (org/name:tag)
+    push_info = lock_data.get("push") or {}
+    pushed_with_tag = str(push_info.get("image_with_tag") or "").strip()
     if pushed_with_tag:
         name, tag = extract_name_and_tag(pushed_with_tag)
         return f"{name}:{tag}"
-    # Base name always comes from lock_data.image to preserve org/repo
-    image_ref = str(lock_data.get("image", "")).strip()
-    if not image_ref:
-        raise typer.Exit(1)
-    name, tag = extract_name_and_tag(image_ref)
-    return f"{name}:{tag}"
+    # 2) Fall back to the local tag recorded in the new lock schema
+    images = lock_data.get("images") or {}
+    local_image = str(images.get("local") or "").strip()
+    if local_image:
+        name, tag = extract_name_and_tag(local_image)
+        return f"{name}:{tag}"
+    # 3) Legacy top-level image field
+    legacy_image = str(lock_data.get("image") or "").strip()
+    if legacy_image:
+        name, tag = extract_name_and_tag(legacy_image)
+        return f"{name}:{tag}"
+    # If none of the above exist, we cannot derive an image
+    raise typer.Exit(1)
-def _extract_existing_images(tasks: list[Task]) -> set[str]:
+def _extract_existing_images(tasks: list[dict[str, Any]]) -> set[str]:
     """Extract all Mcp-Image references from tasks."""
     images = set()
@@ -119,8 +130,9 @@ def _extract_existing_images(tasks: list[Task]) -> set[str]:
                 _extract_from_obj(item)
     for task in tasks:
-        if task.mcp_config:
-            _extract_from_obj(task.mcp_config)
+        mcp_config = task.get("mcp_config")
+        if mcp_config:
+            _extract_from_obj(mcp_config)
     return images
@@ -183,6 +195,63 @@ def _extract_dotenv_api_key_vars(env_dir: Path) -> set[str]:
     return detected
+def _extract_env_vars_from_docker_args(args: list[str]) -> set[str]:
+    """Extract environment variable names from docker run arguments.
+    Parses args like: ["run", "--rm", "-i", "-e", "API_KEY=value", "-e", "TOKEN", "image:tag"]
+    Returns set of env var names (not values).
+    """
+    env_vars: set[str] = set()
+    i = 0
+    while i < len(args):
+        arg = args[i]
+        # Check for -e or --env flags
+        if arg in ("-e", "--env"):
+            if i + 1 < len(args):
+                env_spec = args[i + 1]
+                # Could be "KEY=value" or just "KEY"
+                var_name = env_spec.split("=", 1)[0].strip()
+                if var_name:
+                    env_vars.add(var_name)
+                i += 2
+                continue
+        # Check for --env=KEY=value format
+        elif arg.startswith("--env="):
+            env_spec = arg[6:]  # Remove "--env=" prefix
+            var_name = env_spec.split("=", 1)[0].strip()
+            if var_name:
+                env_vars.add(var_name)
+        i += 1
+    env_vars.discard("HUD_API_KEY")
+    return env_vars
+def _extract_vars_from_task_configs(raw_tasks: list[dict[str, Any]]) -> set[str]:
+    """Extract environment variable names from docker run commands in task mcp_configs."""
+    all_env_vars: set[str] = set()
+    for task in raw_tasks:
+        mcp_config = task.get("mcp_config", {})
+        # Iterate through all server configs
+        for server_config in mcp_config.values():
+            if not isinstance(server_config, dict):
+                continue
+            command = server_config.get("command", "")
+            args = server_config.get("args", [])
+            # Only process docker run commands
+            if command == "docker" and "run" in args:
+                env_vars = _extract_env_vars_from_docker_args(args)
+                all_env_vars.update(env_vars)
+    return all_env_vars
 def convert_tasks_to_remote(tasks_file: str) -> str:
     """Convert a local tasks file to remote MCP tasks and return new filename.
@@ -190,17 +259,18 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
     1) Find env dir; ensure built (hud.lock.yaml), otherwise build
     2) Ensure pushed to registry, otherwise push
     3) Check for outdated images in existing task configurations
-    4) Create remote_[tasks].json with mcp_config pointing to mcp.hud.so and Mcp-Image
+    4) Create remote_[tasks].json with mcp_config pointing to mcp.hud.ai and Mcp-Image
     5) Return the new tasks file path
     """
     tasks_path = Path(tasks_file).resolve()
-    # Load validated tasks for decision-making (may resolve env vars)
-    tasks: list[Task] = load_tasks(str(tasks_path))  # type: ignore[assignment]
-    # Load raw tasks to preserve placeholders when writing back to disk
+    # Load raw tasks - we work with dicts directly to preserve placeholders
+    # when writing back to disk (e.g., ${HUD_API_KEY})
     raw_tasks: list[dict[str, Any]] = load_tasks(str(tasks_path), raw=True)  # type: ignore[assignment]
+    # Use the same raw tasks for validation (they have mcp_config structure)
+    tasks = raw_tasks
     # Ensure HUD_API_KEY is available: prefer process env, else load from env_dir/.env
     from hud.settings import settings
@@ -224,9 +294,24 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
         hud_console.hint("Ensure you're in or near your environment folder before running 'hud rl'")
         raise typer.Exit(1)
-    # Ensure built and pushed
-    lock_data = ensure_built(env_dir, interactive=True)
-    lock_data = _ensure_pushed(env_dir, lock_data)
+    # For convert command, we don't need Docker running - just check for lock file
+    # This avoids showing Docker-related messages during conversion
+    lock_path = env_dir / "hud.lock.yaml"
+    if not lock_path.exists():
+        hud_console.error("No hud.lock.yaml found. The environment needs to be built first.")
+        hud_console.info("Run 'hud build' in the environment directory to build it.")
+        raise typer.Exit(1)
+    # Load lock data directly
+    try:
+        with open(lock_path) as f:
+            lock_data: dict[str, Any] = yaml.safe_load(f) or {}
+    except Exception as e:
+        hud_console.error(f"Failed to read hud.lock.yaml: {e}")
+        raise typer.Exit(1) from e
+    # Check if pushed - don't check Docker for convert command
+    lock_data = _ensure_pushed(env_dir, lock_data, check_docker=False)
     # Derive remote image name org/name:tag
     remote_image = _derive_remote_image(lock_data)
@@ -297,20 +382,35 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
         hud_console.success(f"Updated {tasks_path.name} with latest image: {remote_image}")
         return str(tasks_path)
-    # Extract additional API key headers from lock and suggest from .env
+    # Extract environment variables from multiple sources:
+    # 1. Lock file (authoritative for required env vars)
     provided_keys = _extract_api_key_vars(lock_data)
+    # 2. Task configs (docker run -e flags)
+    task_env_vars = _extract_vars_from_task_configs(raw_tasks)
+    # 3. .env file (detect API-like vars)
     dotenv_keys = _extract_dotenv_api_key_vars(env_dir)
-    # If .env contains API-like vars not in lock, offer to include them
-    missing = sorted(dotenv_keys - provided_keys)
+    # Combine: lock file vars + task config vars, then check for missing from .env
+    all_detected = provided_keys | task_env_vars
+    # If .env contains API-like vars not yet included, offer to add them
+    missing = sorted(dotenv_keys - all_detected)
     if missing:
         names_preview = ", ".join(missing)
         prompt = (
             f"Detected env vars in .env that look like API keys: {names_preview}.\n"
             "Include them as remote headers (values will be ${VAR} placeholders)?"
         )
-        if hud_console.confirm(prompt, default=True):
-            provided_keys.update(missing)
+        if not hud_console.confirm(prompt, default=True):
+            # User cancelled - exit without creating the file
+            hud_console.info("Conversion cancelled by user")
+            raise typer.Exit(0)
+        all_detected.update(missing)
+    # Final set of env vars to convert to headers
+    provided_keys = all_detected
     extra_api_key_headers: dict[str, str] = {}
     for var_name in provided_keys:
@@ -344,10 +444,10 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
     tasks_payload: list[dict[str, Any]] = []
     for t in tasks:
         item: dict[str, Any] = {
-            "prompt": t.prompt,
+            "prompt": t.get("prompt"),
             "mcp_config": {
                 "hud": {
-                    "url": "https://mcp.hud.so/v3/mcp",
+                    "url": settings.hud_mcp_url,
                     "headers": {
                         "Authorization": "Bearer ${HUD_API_KEY}",
                         "Mcp-Image": remote_image,
@@ -360,18 +460,16 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
         item["mcp_config"]["hud"]["headers"].update(extra_api_key_headers)
         # Optional fields, omit Nones
-        if t.setup_tool is not None:
-            item["setup_tool"] = _simplify_tool_call(t.setup_tool)
-        if t.evaluate_tool is not None:
-            item["evaluate_tool"] = _simplify_tool_call(t.evaluate_tool)
-        if t.agent_tools is not None:
-            item["agent_tools"] = t.agent_tools
-        if t.system_prompt is not None:
-            item["system_prompt"] = t.system_prompt
-        if t.metadata:
-            item["metadata"] = t.metadata
-        if t.id is not None:
-            item["id"] = t.id
+        if t.get("setup_tool") is not None:
+            item["setup_tool"] = _simplify_tool_call(t["setup_tool"])
+        if t.get("evaluate_tool") is not None:
+            item["evaluate_tool"] = _simplify_tool_call(t["evaluate_tool"])
+        if t.get("agent_config") is not None:
+            item["agent_config"] = t["agent_config"]
+        if t.get("metadata"):
+            item["metadata"] = t["metadata"]
+        if t.get("id") is not None:
+            item["id"] = t["id"]
         tasks_payload.append(item)
@@ -382,6 +480,5 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
         f.write("\n")
     hud_console.success(f"Created remote tasks file: {remote_path.name}")
-    hud_console.hint("Proceeding with RL training on the remote environment")
     return str(remote_path)

hud/cli/flows/templates.py ADDED Viewed

@@ -0,0 +1,151 @@
+"""Templates for hud init command."""
+DOCKERFILE_HUD = """\
+FROM python:3.11-slim
+RUN apt-get update && apt-get install -y --no-install-recommends curl \\
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY pyproject.toml uv.lock* ./
+RUN pip install uv && uv sync --frozen --no-dev 2>/dev/null || uv sync --no-dev
+COPY . .
+# Most of the time this command should not change, except if you change your env path
+# or launch some other service before running the environment
+CMD ["uv", "run", "python", "-m", "hud", "dev", "env:env", "--stdio"]
+"""
+# fmt: off
+ENV_PY = '''\
+"""{env_name} - HUD Environment"""
+import asyncio
+import hud
+from hud.settings import settings
+from openai import AsyncOpenAI, Omit
+from hud.environment import Environment
+env = Environment("{env_name}")
+# =============================================================================
+# 1. TOOLS - Functions the agent can call
+# =============================================================================
+@env.tool()
+def count_letter(text: str, letter: str) -> int:
+    """Count occurrences of a letter in text."""
+    return text.lower().count(letter.lower())
+# =============================================================================
+# 2. SCRIPTS - Define prompts and evaluation logic
+# =============================================================================
+@env.scenario("count")
+async def count_script(sentence: str, letter: str, fmt: str = "integer"):
+    """Agent must count a letter. We check if they got it right."""
+    # Yield the prompt, receive the agent's final answer
+    answer = yield f"How many times does '{{letter}}' appear in: '{{sentence}}'? Format: {{fmt}}."
+    # Score: 1.0 if correct, 0.0 otherwise
+    correct = str(sentence.lower().count(letter.lower()))
+    yield correct in answer
+# =============================================================================
+# 3. CONNECT EXISTING SERVERS (optional)
+# =============================================================================
+# --- FastAPI app ---
+# from my_app import app
+# env.connect_fastapi(app)
+# --- FastMCP / MCPServer ---
+# from my_server import mcp
+# env.connect_server(mcp)
+# --- OpenAPI spec (URL or file path) ---
+# env.connect_openapi("https://api.example.com/openapi.json")
+# --- MCP config (stdio or SSE) ---
+# env.connect_mcp_config({{
+#     "my-server": {{"command": "uvx", "args": ["some-mcp-server"]}}
+# }})
+# --- HUD hub (requires deployment, see below) ---
+# env.connect_hub("my-org/my-env", prefix="remote")
+# =============================================================================
+# TEST - Run with: python env.py
+# =============================================================================
+async def test():
+    client = AsyncOpenAI(
+        base_url=settings.hud_gateway_url,
+        api_key=settings.api_key,
+    )
+    # Create a task from the scenario
+    task = env("count", sentence="Strawberry world", letter="r")
+    # Test with and without tools
+    async with hud.eval(task, variants={{"tools": [True, False]}}) as ctx:
+        response = await client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[{{"role": "user", "content": ctx.prompt}}],
+            tools=ctx.as_openai_chat_tools() if ctx.variants["tools"] else Omit(),
+        )
+        # Handle tool calls if present
+        message = response.choices[0].message
+        if message.tool_calls:
+            result = await ctx.call_tool(message.tool_calls[0])
+            answer = str(result["content"])
+        else:
+            answer = message.content
+        await ctx.submit(answer or "")
+if __name__ == "__main__":
+    asyncio.run(test())
+# =============================================================================
+# DEPLOYMENT
+# =============================================================================
+# To deploy this environment on HUD:
+#
+# 1. Push this repo to GitHub
+# 2. Go to hud.ai -> New -> Environment
+# 3. Choose "From GitHub URL" and paste your repo URL
+# 4. This deploys the environment for remote connection
+#
+# Once deployed, connect to it from other environments:
+#   env.connect_hub("{env_name}")
+#
+# Remote deployment enables:
+# - Parallelized evaluations (run many agents simultaneously)
+# - Training data collection at scale
+# - Shared environments across team members
+#
+# Note: The test() function above is just for local testing.
+# It's not required for the deployed environment.
+'''
+# fmt: on
+PYPROJECT_TOML = """\
+[project]
+name = "{name}"
+version = "0.1.0"
+requires-python = ">=3.10"
+dependencies = ["hud-python", "openai"]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+"""

hud/cli/flows/tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Tests for CLI flows."""

hud/cli/flows/tests/test_dev.py ADDED Viewed

@@ -0,0 +1,126 @@
+"""Tests for CLI flows dev module."""
+from __future__ import annotations
+import base64
+import json
+from unittest import mock
+import pytest
+from hud.cli.flows.dev import generate_cursor_deeplink
+class TestGenerateCursorDeeplink:
+    """Test Cursor deeplink generation."""
+    def test_generate_deeplink_basic(self):
+        """Test basic deeplink generation."""
+        result = generate_cursor_deeplink("my-server", 8000)
+        assert result.startswith("cursor://anysphere.cursor-deeplink/mcp/install?")
+        assert "name=my-server" in result
+        assert "config=" in result
+    def test_generate_deeplink_config_content(self):
+        """Test that config contains correct URL."""
+        result = generate_cursor_deeplink("test-server", 9999)
+        # Extract and decode the config
+        config_part = result.split("config=")[1]
+        decoded = base64.b64decode(config_part).decode()
+        config = json.loads(decoded)
+        assert config["url"] == "http://localhost:9999/mcp"
+    def test_generate_deeplink_different_ports(self):
+        """Test deeplink generation with different ports."""
+        result_8000 = generate_cursor_deeplink("server", 8000)
+        result_3000 = generate_cursor_deeplink("server", 3000)
+        # Decode configs
+        config_8000 = json.loads(base64.b64decode(result_8000.split("config=")[1]))
+        config_3000 = json.loads(base64.b64decode(result_3000.split("config=")[1]))
+        assert "8000" in config_8000["url"]
+        assert "3000" in config_3000["url"]
+    def test_generate_deeplink_special_characters_in_name(self):
+        """Test deeplink with special characters in server name."""
+        # Server name with special characters should still work
+        result = generate_cursor_deeplink("my-cool_server.v2", 8000)
+        assert "name=my-cool_server.v2" in result
+class TestCreateDynamicTrace:
+    """Test dynamic trace creation."""
+    @pytest.mark.asyncio
+    @mock.patch("hud.cli.flows.dev.make_request")
+    @mock.patch("hud.cli.utils.git.get_git_info")
+    @mock.patch("hud.cli.flows.dev.settings")
+    async def test_create_dynamic_trace_success(self, mock_settings, mock_git, mock_request):
+        """Test successful trace creation."""
+        from hud.cli.flows.dev import create_dynamic_trace
+        mock_settings.hud_api_url = "https://api.hud.ai"
+        mock_settings.api_key = "test-key"
+        mock_git.return_value = {"remote_url": "https://github.com/user/repo"}
+        mock_request.return_value = {"id": "trace-123"}
+        trace_id, url = await create_dynamic_trace(
+            mcp_config={"server": {"url": "http://localhost:8000"}},
+            build_status=True,
+            environment_name="test-env",
+        )
+        assert trace_id == "trace-123"
+        assert url == "https://hud.ai/trace/trace-123"
+        mock_request.assert_called_once()
+    @pytest.mark.asyncio
+    @mock.patch("hud.cli.flows.dev.make_request")
+    @mock.patch("hud.cli.utils.git.get_git_info")
+    @mock.patch("hud.cli.flows.dev.settings")
+    async def test_create_dynamic_trace_no_git(self, mock_settings, mock_git, mock_request):
+        """Test trace creation without git info."""
+        from hud.cli.flows.dev import create_dynamic_trace
+        mock_settings.hud_api_url = "https://api.hud.ai"
+        mock_settings.api_key = "test-key"
+        mock_git.return_value = {}  # No remote_url
+        mock_request.return_value = {"id": "trace-456"}
+        trace_id, _ = await create_dynamic_trace(
+            mcp_config={"server": {"url": "http://localhost:8000"}},
+            build_status=False,
+            environment_name="test-env",
+        )
+        assert trace_id == "trace-456"
+        # Verify git_info was not included in payload
+        call_args = mock_request.call_args
+        assert "git_info" not in call_args.kwargs.get("json", {})
+    @pytest.mark.asyncio
+    @mock.patch("hud.cli.flows.dev.make_request")
+    @mock.patch("hud.cli.utils.git.get_git_info")
+    @mock.patch("hud.cli.flows.dev.settings")
+    async def test_create_dynamic_trace_api_error(self, mock_settings, mock_git, mock_request):
+        """Test trace creation when API fails."""
+        from hud.cli.flows.dev import create_dynamic_trace
+        mock_settings.hud_api_url = "https://api.hud.ai"
+        mock_settings.api_key = "test-key"
+        mock_git.return_value = {}
+        mock_request.side_effect = Exception("API Error")
+        trace_id, url = await create_dynamic_trace(
+            mcp_config={"server": {}},
+            build_status=True,
+            environment_name="test-env",
+        )
+        assert trace_id is None
+        assert url is None

hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl