PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (274) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +11 -5
hud/agents/base.py +220 -500
hud/agents/claude.py +200 -240
hud/agents/gemini.py +275 -0
hud/agents/gemini_cua.py +335 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +41 -36
hud/agents/openai.py +291 -292
hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
hud/agents/operator.py +211 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +379 -210
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +376 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/cli/__init__.py +461 -545
hud/cli/analyze.py +43 -5
hud/cli/build.py +664 -110
hud/cli/debug.py +8 -5
hud/cli/dev.py +882 -734
hud/cli/eval.py +782 -668
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/push.py +29 -11
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +108 -6
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +69 -0
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +40 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +327 -0
hud/datasets/runner.py +192 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +50 -0
hud/environment/connection.py +206 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +109 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +694 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +112 -0
hud/environment/scenarios.py +493 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +218 -0
hud/environment/tests/test_environment.py +161 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +201 -0
hud/environment/tests/test_scenarios.py +280 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +674 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +185 -0
hud/eval/manager.py +466 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +340 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +145 -0
hud/eval/types.py +63 -0
hud/eval/utils.py +183 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +151 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +158 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +16 -2
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +4 -0
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +167 -57
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +61 -3
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.1.dist-info/METADATA +264 -0
hud_python-0.5.1.dist-info/RECORD +299 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0

hud/cli/init.py CHANGED Viewed

@@ -14,24 +14,29 @@ import typer
 from hud.utils.hud_console import HUDConsole
-# Presets mapping to environment folders in public SDK repo
+# Presets mapping to public GitHub repositories under hud-evals org
 GITHUB_OWNER = "hud-evals"
-GITHUB_REPO = "hud-python"
 GITHUB_BRANCH = "main"
 PRESET_MAP: dict[str, str | None] = {
-    "blank": "blank",
-    "deep-research": "deepresearch",
-    "browser": "browser",
+    "blank": "hud-blank",
+    "deep-research": "hud-deepresearch",
+    "browser": "hud-browser",
+    "rubrics": "hud-rubrics",
+    "verilog-coding-template": "verilog-coding-template",
+    "data-science-template": "data-science-template",
 }
 SKIP_DIR_NAMES = {"node_modules", "__pycache__", "dist", "build", ".next", ".git"}
 # Files that need placeholder replacement
 PLACEHOLDER_FILES = {
-    "pyproject.toml",
+    "server/pyproject.toml",
+    "environment/pyproject.toml",
+    "server/main.py",
+    "server/README.md",
+    "environment/README.md",
     "tasks.json",
-    "src/controller/server.py",
     "test_env.ipynb",
     "README.md",
 }
@@ -48,7 +53,7 @@ def _replace_placeholders(target_dir: Path, env_name: str) -> list[str]:
         List of files that were modified
     """
     modified_files = []
-    placeholder = "test_test"
+    placeholder = "blank"  # Placeholder used in blank environment template
     # Normalize environment name for use in code/configs
     # Replace spaces and special chars with underscores for Python identifiers
@@ -86,8 +91,11 @@ def _prompt_for_preset() -> str:
     try:
         choices = [
             {"name": "blank", "message": "blank"},
-            {"name": "deep-research", "message": "deep-research"},
             {"name": "browser", "message": "browser"},
+            {"name": "deep-research", "message": "deep-research"},
+            {"name": "rubrics", "message": "rubrics"},
+            {"name": "verilog-coding-template", "message": "verilog-coding-template"},
+            {"name": "data-science-template", "message": "data-science-template"},
         ]
         display_choices = [c["message"] for c in choices]
         selected = questionary.select(
@@ -103,10 +111,10 @@ def _prompt_for_preset() -> str:
         return "blank"
-def _download_tarball_subdir(
-    owner: str, repo: str, ref: str, subdir: str, dest_dir: Path, files_created: list[str]
+def _download_tarball_repo(
+    owner: str, repo: str, ref: str, dest_dir: Path, files_created: list[str]
 ) -> None:
-    """Download a GitHub tarball and extract only a subdirectory."""
+    """Download a GitHub tarball and extract the entire repository."""
     tarball_url = f"https://codeload.github.com/{owner}/{repo}/tar.gz/{ref}"
     token = os.getenv("GITHUB_TOKEN")
@@ -135,16 +143,17 @@ def _download_tarball_subdir(
             if not members:
                 return
             top = members[0].name.split("/", 1)[0]
-            target_prefix = f"{top}/environments/{subdir.strip('/')}"
             for member in members:
                 name = member.name
-                if not (name == target_prefix or name.startswith(target_prefix + "/")):
+                if name == top:
+                    continue
+                if not name.startswith(top + "/"):
                     continue
-                rel_path = name[len(target_prefix) :].lstrip("/")
+                rel_path = name[len(top) + 1 :]
                 if not rel_path:
-                    dest_dir.mkdir(parents=True, exist_ok=True)
                     continue
                 out_path = (dest_dir / rel_path).resolve()
@@ -177,21 +186,21 @@ def create_environment(
     hud_console = HUDConsole()
-    # Determine environment name/target directory
-    if name is None:
-        current_dir = Path.cwd()
-        name = current_dir.name
-        target_dir = current_dir
-        hud_console.info(f"Using current directory name: {name}")
-    else:
-        target_dir = Path(directory) / name
     # Choose preset
     preset_normalized = (preset or "").strip().lower() if preset else _prompt_for_preset()
+    # If no name is provided, use the preset name as the environment name
+    if name is None:
+        name = preset_normalized
+        hud_console.info(f"Using preset name as environment name: {name}")
+    # Always create a new directory based on the name
+    target_dir = Path.cwd() / name if directory == "." else Path(directory) / name
     if preset_normalized not in PRESET_MAP:
+        available = ", ".join(sorted(PRESET_MAP.keys()))
         hud_console.warning(
-            f"Unknown preset '{preset_normalized}', defaulting to 'blank' "
-            "(available: blank, deep-research, browser)"
+            f"Unknown preset '{preset_normalized}', defaulting to 'blank' (available: {available})"
         )
         preset_normalized = "blank"
@@ -205,17 +214,14 @@ def create_environment(
             hud_console.warning(f"Overwriting existing files in {target_dir}")
     # Download preset from GitHub
-    env_folder = PRESET_MAP[preset_normalized]
-    if env_folder is None:
-        hud_console.error("Internal error: preset mapping missing folder name")
+    repo_name = PRESET_MAP[preset_normalized]
+    if repo_name is None:
+        hud_console.error("Internal error: preset mapping missing repo name")
         raise typer.Exit(1)
     hud_console.header(f"Initializing HUD Environment: {name} (preset: {preset_normalized})")
-    hud_console.section_title("Downloading template from public SDK")
-    source_url = (
-        f"https://github.com/{GITHUB_OWNER}/{GITHUB_REPO}/tree/"
-        f"{GITHUB_BRANCH}/environments/{env_folder}"
-    )
+    hud_console.section_title("Downloading template from GitHub")
+    source_url = f"https://github.com/{GITHUB_OWNER}/{repo_name}"
     hud_console.info("Source: " + source_url)
     target_dir.mkdir(parents=True, exist_ok=True)
@@ -223,11 +229,10 @@ def create_environment(
     started = time.time()
     files_created_dl: list[str] = []
     try:
-        _download_tarball_subdir(
+        _download_tarball_repo(
             owner=GITHUB_OWNER,
-            repo=GITHUB_REPO,
+            repo=repo_name,
             ref=GITHUB_BRANCH,
-            subdir=env_folder,
             dest_dir=target_dir,
             files_created=files_created_dl,
         )
@@ -240,31 +245,28 @@ def create_environment(
         f"Downloaded {len(files_created_dl)} files in {duration_ms} ms into {target_dir}"
     )
-    # Replace placeholders in template files
-    hud_console.section_title("Customizing template files")
-    modified_files = _replace_placeholders(target_dir, name)
-    if modified_files:
-        hud_console.success(f"Replaced placeholders in {len(modified_files)} files:")
-        for file in modified_files[:5]:  # Show first 5 files
-            hud_console.status_item(file, "updated")
-        if len(modified_files) > 5:
-            hud_console.info(f"... and {len(modified_files) - 5} more files")
-    else:
-        hud_console.info("No placeholder replacements needed")
+    # Replace placeholders in template files (only for blank preset)
+    if preset_normalized == "blank":
+        hud_console.section_title("Customizing template files")
+        modified_files = _replace_placeholders(target_dir, name)
+        if modified_files:
+            hud_console.success(f"Replaced placeholders in {len(modified_files)} files:")
+            for file in modified_files[:5]:  # Show first 5 files
+                hud_console.status_item(file, "updated")
+            if len(modified_files) > 5:
+                hud_console.info(f"... and {len(modified_files) - 5} more files")
+        else:
+            hud_console.info("No placeholder replacements needed")
     hud_console.section_title("Top-level files and folders")
     for entry in sorted(os.listdir(target_dir)):
         hud_console.status_item(entry, "added")
     hud_console.section_title("Next steps")
-    if target_dir == Path.cwd():
-        hud_console.info("1. Start development server (with MCP inspector):")
-        hud_console.command_example("hud dev --inspector")
-    else:
-        hud_console.info("1. Enter the directory:")
-        hud_console.command_example(f"cd {target_dir}")
-        hud_console.info("\n2. Start development server (with MCP inspector):")
-        hud_console.command_example("hud dev --inspector")
+    # Since we now almost always create a new directory, show cd command
+    hud_console.info("1. Enter the directory:")
+    hud_console.command_example(f"cd {target_dir.name}")
+    hud_console.info("\n2. Start development server (with MCP inspector):")
+    hud_console.command_example("hud dev --inspector")
     hud_console.info("\n3. Review the README in this preset for specific instructions.")
     hud_console.info("\n4. Customize as needed.")

hud/cli/push.py CHANGED Viewed

@@ -152,7 +152,7 @@ def push_environment(
         hud_console.error("No HUD API key found")
         hud_console.warning("A HUD API key is required to push environments.")
         hud_console.info("\nTo get started:")
-        hud_console.info("1. Get your API key at: https://hud.so/settings")
+        hud_console.info("1. Get your API key at: https://hud.ai/settings")
         hud_console.info("Set it in your environment or run: hud set HUD_API_KEY=your-key-here")
         hud_console.command_example("hud push", "Try again")
         hud_console.info("")
@@ -163,10 +163,7 @@ def push_environment(
         lock_data = yaml.safe_load(f)
     # Handle both old and new lock file formats
-    local_image = lock_data.get("image", "")
-    if not local_image and "build" in lock_data:
-        # New format might have image elsewhere
-        local_image = lock_data.get("image", "")
+    local_image = lock_data.get("images", {}).get("local") or lock_data.get("image", "")
     # Get internal version from lock file
     internal_version = lock_data.get("build", {}).get("version", None)
@@ -293,7 +290,7 @@ def push_environment(
     # Push the image
     hud_console.progress_message(f"Pushing {image} to registry...")
-    # Show push output
+    # Show push output (filtered for cleaner display)
     process = subprocess.Popen(  # noqa: S603
         ["docker", "push", image],  # noqa: S607
         stdout=subprocess.PIPE,
@@ -303,8 +300,27 @@ def push_environment(
         errors="replace",
     )
+    # Filter output to only show meaningful progress
+    layers_pushed = 0
     for line in process.stdout or []:
-        hud_console.info(line.rstrip())
+        line = line.rstrip()
+        # Only show: digest, pushed, mounted, or error lines
+        if any(
+            keyword in line.lower()
+            for keyword in ["digest:", "pushed", "mounted", "error", "denied"]
+        ):
+            if "pushed" in line.lower():
+                layers_pushed += 1
+            if (
+                verbose
+                or "error" in line.lower()
+                or "denied" in line.lower()
+                or "digest:" in line.lower()
+            ):
+                hud_console.info(line)
+    if layers_pushed > 0 and not verbose:
+        hud_console.info(f"Pushed {layers_pushed} layer(s)")
     process.wait()
@@ -331,8 +347,10 @@ def push_environment(
     hud_console.section_title("Pushed Image")
     hud_console.status_item("Registry", pushed_digest, primary=True)
-    # Update the lock file with registry information
-    lock_data["image"] = pushed_digest
+    # Update the lock file with pushed image reference
+    if "images" not in lock_data:
+        lock_data["images"] = {}
+    lock_data["images"]["pushed"] = image
     # Add push information
     from datetime import UTC, datetime
@@ -348,7 +366,7 @@ def push_environment(
     with open(lock_path, "w") as f:
         yaml.dump(lock_data, f, default_flow_style=False, sort_keys=False)
-    hud_console.success("Updated lock file with registry image")
+    hud_console.success("Updated lock file with pushed image reference")
     # Upload lock file to HUD registry
     try:
@@ -422,7 +440,7 @@ def push_environment(
         elif response.status_code == 401:
             hud_console.error("Authentication failed")
             hud_console.info("Check your HUD_API_KEY is valid")
-            hud_console.info("Get a new key at: https://hud.so/settings")
+            hud_console.info("Get a new key at: https://hud.ai/settings")
             hud_console.info("Set it in your environment or run: hud set HUD_API_KEY=your-key-here")
         elif response.status_code == 403:
             hud_console.error("Permission denied")

hud/cli/rft.py ADDED Viewed

@@ -0,0 +1,311 @@
+from __future__ import annotations
+import logging
+from typing import Any
+import httpx
+import typer
+from rich.console import Console
+from rich.table import Table
+from hud.datasets import load_tasks
+from hud.settings import settings
+from hud.utils.hud_console import HUDConsole
+logger = logging.getLogger(__name__)
+console = Console()
+hud_console = HUDConsole()
+def _patch_mcp_urls_to_staging(tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Recursively patch all mcp.hud.so URLs to https://orcstaging.hud.so in task configs."""
+    def patch_value(obj: Any) -> Any:
+        if isinstance(obj, dict):
+            return {k: patch_value(v) for k, v in obj.items()}
+        elif isinstance(obj, list):
+            return [patch_value(item) for item in obj]
+        elif isinstance(obj, str):
+            # Replace any occurrence of mcp.hud.so with orcstaging.hud.so
+            # Handle various URL formats
+            if "mcp.hud.so" in obj:
+                # Replace the domain while preserving the protocol and path
+                return obj.replace("mcp.hud.so", "orcstaging.hud.so")
+            elif "mcp.hud.ai" in obj:
+                # Also handle mcp.hud.ai URLs
+                return obj.replace("mcp.hud.ai", "orcstaging.hud.so")
+            return obj
+        else:
+            return obj
+    return [patch_value(task) for task in tasks]
+def _fetch_models() -> list[dict[str, Any]]:
+    """Fetch trainable models from the HUD API for the user's team."""
+    url = f"{settings.hud_api_url}/models/"
+    headers = {
+        "Authorization": f"Bearer {settings.api_key}",
+        "x-api-key": settings.api_key or "",
+    }
+    params = {"team_only": "true", "limit": 200}
+    try:
+        with httpx.Client(timeout=30.0) as client:
+            resp = client.get(url, headers=headers, params=params)
+            resp.raise_for_status()
+            data = resp.json()
+            return data.get("models", [])
+    except httpx.HTTPStatusError as e:
+        hud_console.error(f"Failed to fetch models: {e.response.status_code}")
+        if e.response.status_code == 401:
+            hud_console.hint("Check that your HUD_API_KEY is valid")
+        raise typer.Exit(1) from e
+    except httpx.RequestError as e:
+        hud_console.error(f"Connection error while fetching models: {e}")
+        raise typer.Exit(1) from e
+def _select_model(models: list[dict[str, Any]]) -> dict[str, Any]:
+    """Display models and let user select one for training."""
+    # Filter to only trainable models that are ready
+    trainable_models = [
+        m
+        for m in models
+        if m.get("is_trainable", False)
+        and m.get("status") == "ready"
+        and not m.get("public", False)
+        and m.get("model_name") is not None
+    ]
+    if not trainable_models:
+        hud_console.error("No trainable models found in your team.")
+        hud_console.hint("Fork a trainable model at https://api.hud.so/models to start training.")
+        raise typer.Exit(1)
+    # Display models in a table
+    hud_console.section_title("Available Trainable Models")
+    table = Table(show_header=True, header_style="bold")
+    table.add_column("#", style="dim", width=4)
+    table.add_column("Name", style="bold")
+    table.add_column("Status")
+    table.add_column("Provider")
+    for i, model in enumerate(trainable_models, 1):
+        provider_name = (
+            model.get("provider", {}).get("name", "unknown") if model.get("provider") else "unknown"
+        )
+        table.add_row(
+            str(i),
+            model.get("name", "unnamed"),
+            model.get("status", "unknown"),
+            provider_name,
+        )
+    hud_console.console.print(table)
+    hud_console.print("")
+    # Build choices for selection
+    choices = [
+        {"name": f"{m.get('name', 'unnamed')} ({m.get('base_model', 'unknown')})", "value": m}
+        for m in trainable_models
+    ]
+    selected: dict[str, Any] = hud_console.select("Select a model to train:", choices)  # type: ignore[assignment]
+    return selected
+def rft_command(
+    tasks_file: str,
+    reasoning_effort: str = "medium",
+    verbose: bool = False,
+    yes: bool = False,
+    model_id: str | None = None,
+) -> None:
+    """
+    Run Reinforcement Fine-Tuning (RFT) via the HUD RL service.
+    """
+    hud_console.header("HUD RFT (Reinforcement Fine-Tuning)")
+    # Preflight check: API key
+    if not settings.api_key:
+        hud_console.error("HUD_API_KEY not found in environment.")
+        hud_console.info("Run 'hud set HUD_API_KEY=...' or export it.")
+        raise typer.Exit(1)
+    # Model selection
+    selected_model_id: str
+    if model_id:
+        # Use provided model_id directly
+        selected_model_id = model_id
+        hud_console.info(f"Using provided model ID: {selected_model_id}")
+    else:
+        # Fetch and let user select a model
+        hud_console.section_title("Fetching available models")
+        hud_console.info("Loading models from your team...")
+        models = _fetch_models()
+        if yes:
+            # Auto-select first trainable model in non-interactive mode
+            trainable_models = [
+                m
+                for m in models
+                if m.get("is_trainable", False)
+                and m.get("status") == "ready"
+                and not m.get("public", False)
+                and m.get("model_name") is not None
+            ]
+            if not trainable_models:
+                hud_console.error("No trainable models found in your team.")
+                hud_console.hint(
+                    "Fork a trainable model at https://api.hud.so/models to start training."
+                )
+                raise typer.Exit(1)
+            selected_model = trainable_models[0]
+            hud_console.info(
+                f"Auto-selected first trainable model (--yes mode): "
+                f"{selected_model.get('name', 'unnamed')}"
+            )
+        else:
+            selected_model = _select_model(models)
+        selected_model_id = selected_model["id"]
+        hud_console.success(
+            f"Selected model: {selected_model.get('name', 'unnamed')} (ID: {selected_model_id})"
+        )
+    # Preflight check: Convert tasks to remote if needed
+    hud_console.section_title("Preparing tasks for remote training")
+    try:
+        from hud.cli.flows.tasks import convert_tasks_to_remote
+        hud_console.info("Checking task configuration...")
+        tasks_file = convert_tasks_to_remote(tasks_file)
+        hud_console.success("Tasks are ready for remote training")
+    except typer.Exit:
+        raise
+    except Exception as e:
+        hud_console.error(f"Tasks file is not valid for remote training: {e!s}")
+        hud_console.hint("Either ensure the tasks file has remote urls")
+        hud_console.hint("Or run 'hud rft' within an environment directory")
+        raise typer.Exit(1) from e
+    # Load and validate tasks
+    try:
+        # Load tasks as raw dicts for patching and serialization
+        tasks: list[dict[str, Any]] = load_tasks(tasks_file, raw=True)  # type: ignore[assignment]
+        if not tasks:
+            hud_console.error(f"No tasks found in {tasks_file}")
+            raise typer.Exit(1)
+        # Preflight check: Minimum task count
+        task_count = len(tasks)
+        if task_count < 10:
+            hud_console.error(
+                f"Insufficient tasks for RFT training: found {task_count}, need at least 10"
+            )
+            hud_console.hint("RFT requires a minimum of 10 tasks for effective training")
+            raise typer.Exit(1)
+        hud_console.info(f"Loaded {task_count} tasks from {tasks_file}")
+        # Preflight check: Vision support
+        hud_console.section_title("Vision Support Check")
+        hud_console.warning(
+            "RFT does not currently support environments that require vision capabilities."
+        )
+        hud_console.info(
+            "Vision support includes: screenshots, image analysis, visual UI interaction, etc."
+        )
+        if not yes:
+            if hud_console.confirm("Does your environment require vision support?", default=False):
+                hud_console.error("RFT does not support vision-based environments at this time.")
+                hud_console.hint(
+                    "Please use environments that rely on text-based interactions only."
+                )
+                raise typer.Exit(1)
+        else:
+            hud_console.info("Skipping vision support check (--yes mode)")
+        # Patch all mcp.hud.so URLs to orcstaging.hud.so
+        hud_console.info("Patching MCP URLs for staging environment...")
+        tasks = _patch_mcp_urls_to_staging(tasks)
+        # Show task preview
+        if tasks:
+            if yes:
+                # Skip interactive preview in auto-accept mode
+                hud_console.info("Skipping task preview in auto-accept mode (--yes)")
+            else:
+                try:
+                    from hud.cli.utils.viewer import show_json_interactive
+                    hud_console.section_title("Task Preview")
+                    show_json_interactive(
+                        tasks[0], title="Example Task from Dataset", initial_expanded=False
+                    )
+                    hud_console.info("This is how your task will be sent to the RFT service.")
+                    # Ask for confirmation
+                    if not hud_console.confirm(
+                        "\nProceed with RFT training on this dataset?", default=True
+                    ):
+                        hud_console.error("RFT training cancelled")
+                        raise typer.Exit(0)
+                except typer.Exit:
+                    raise  # Re-raise typer.Exit to properly exit on cancellation
+                except Exception as e:
+                    hud_console.warning(f"Could not display task preview: {e}")
+    except typer.Exit:
+        raise  # Re-raise typer.Exit to properly exit
+    except Exception as e:
+        hud_console.error(f"Failed to load tasks file: {e}")
+        raise typer.Exit(1) from e
+    # Prepare payload
+    payload = {
+        "model_id": selected_model_id,
+        "dataset": {"tasks": tasks},
+        "config": {"parameters": {"reasoning_effort": reasoning_effort}},
+    }
+    # Send request to service
+    hud_console.section_title("Submitting RFT job")
+    base_url = settings.hud_rl_url
+    url = f"{base_url}/training/jobs"
+    headers = {"Authorization": f"Bearer {settings.api_key}", "Content-Type": "application/json"}
+    hud_console.info(
+        f"Submitting job to {url}... (this may take a few minutes to run all safety checks)"
+    )
+    try:
+        with httpx.Client(timeout=300.0) as client:
+            resp = client.post(url, json=payload, headers=headers)
+            if resp.status_code >= 400:
+                try:
+                    detail = resp.json()
+                except Exception as e:
+                    detail = f"{resp.text} - {e}"
+                hud_console.error(f"Request failed ({resp.status_code}): {detail}")
+                raise typer.Exit(1)
+            data = resp.json()
+            job_id = data.get("job_id")
+            model_id = data.get("model", {}).get("id")
+            hud_console.success(f"Job launched successfully! ID: {job_id}")
+            hud_console.info(f"Model ID: {model_id}")
+            # Provide helpful next steps
+            hud_console.info(f"To check job status, run: hud rft status {model_id}")
+    except httpx.RequestError as e:
+        hud_console.error(f"Connection error: {e}")
+        hud_console.info("Is the RL service running?")
+        raise typer.Exit(1) from e

hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl