PyPI - hud-python - Versions diffs - 0.4.40__tar.gz → 0.4.42__tar.gz - Mend

hud-python 0.4.40tar.gz → 0.4.42tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (246) hide show

{hud_python-0.4.40 → hud_python-0.4.42}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.40
+Version: 0.4.42
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -159,10 +159,10 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
 ## Highlights
+- 🎓 **[One-click RL](https://hud.so/models)** – Run `hud rl` to get a trained model on any environment.
 - 🚀 **[MCP environment skeleton](https://docs.hud.so/core-concepts/mcp-protocol)** – any agent can call any environment.
 - ⚡️ **[Live telemetry](https://hud.so)** – inspect every tool call, observation, and reward in real time.
 - 🗂️ **[Public benchmarks](https://hud.so/leaderboards)** – OSWorld-Verified, SheetBench-50, and more.
-- 🌱 **[Reinforcement learning built-in](rl/)** – Verifiers gym pipelines for GRPO on any environment.
 - 🌐 **[Cloud browsers](environments/remote_browser/)** – AnchorBrowser, Steel, BrowserBase integrations for browser automation.
 - 🛠️ **[Hot-reload dev loop](environments/README.md#phase-5-hot-reload-development-with-cursor-agent)** – `hud dev` for iterating on environments without rebuilds.
@@ -171,27 +171,46 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
 ## Installation
 ```bash
-# Core installation - MCP servers, telemetry, basic tools for environment design
+# SDK - MCP servers, telemetry, evaluation
 pip install hud-python
-# Agent installation - Adds AI providers, datasets
-pip install "hud-python[agent]"
-# CLI utilities
+# CLI - RL pipeline, environment design
 uv tool install hud-python
 # uv tool update-shell
-# From source (latest)
-git clone https://github.com/hud-evals/hud-python
-pip install -e "hud-python[dev]"
 ```
 > See [docs.hud.so](https://docs.hud.so), or add docs to any MCP client:
 > `claude mcp add --transport http docs-hud https://docs.hud.so/mcp`
-## Quickstart
+Before starting, get your HUD_API_KEY at [hud.so](https://hud.so).
+## Quickstart: Training
+RL using GRPO a Qwen2.5-VL model on any hud dataset:
+```bash
+hud get hud-evals/basic-2048 # from HF
+hud rl basic-2048.json
+```
+> See [agent training docs](https://docs.hud.so/train-agents/quickstart)
-For a tutorial that explains the agent and evaluation design, run ([see quickstart docs](https://docs.hud.so/quickstart)):
+Or make your own environment and dataset:
+```bash
+hud init my-env && cd my-env
+hud dev --interactive
+# When ready to run:
+hud rl
+```
+> See [environment design docs](https://docs.hud.so/build-environments)
+## Quickstart: Evals
+For a tutorial that explains the agent and evaluation design, run:
 ```python
 uvx hud-python quickstart
@@ -262,20 +281,22 @@ hud rl hud-evals/basic-2048
 # Option B: Download first, modify, then train
 hud get hud-evals/basic-2048
-hud rl basic-2048.jsonl
+hud rl basic-2048.json
 # Optional: baseline evaluation
-hud eval basic-2048.jsonl
+hud eval basic-2048.json
 ```
 Supports multi‑turn RL for both:
 - Language‑only models (e.g., `Qwen/Qwen2.5-7B-Instruct`)
 - Vision‑Language models (e.g., `Qwen/Qwen2.5-VL-3B-Instruct`)
-By default, `hud rl` provisions a persistant server and trainer in the cloud, streams telemetry to `hud.so`, and lets you monitor/manage models at `hud.so/models`. Use `--local` to run entirely on your machines (typically 2+ GPUs: one for vLLM, the rest for training).
+By default, `hud rl` provisions a persistent server and trainer in the cloud, streams telemetry to `hud.so`, and lets you monitor/manage models at `hud.so/models`. Use `--local` to run entirely on your machines (typically 2+ GPUs: one for vLLM, the rest for training).
 Any HUD MCP environment and evaluation works with our RL pipeline (including remote configurations). See the guided docs: `https://docs.hud.so/train-agents/quickstart`.
+Pricing: Hosted vLLM and training GPU rates are listed in the [Training Quickstart → Pricing](https://docs.hud.so/train-agents/quickstart#pricing). Manage billing at the [HUD billing dashboard](https://hud.so/project/billing).
 ## Benchmarking Agents
 This is Claude Computer Use running on our proprietary financial analyst benchmark [SheetBench-50](https://huggingface.co/datasets/hud-evals/SheetBench-50):
@@ -323,7 +344,7 @@ from hud.tools import HudComputerTool
 mcp = MCPServer("My Environment")
 # Add hud tools (see all tools: https://docs.hud.so/reference/tools)
-mcp.add_tool(HudComputerTool())
+mcp.tool(HudComputerTool())
 # Or custom tools (see https://docs.hud.so/build-environments/adapting-software)
 @mcp.tool("launch_app"):
@@ -494,11 +515,10 @@ graph LR
 ## Roadmap
-- Merging our forks in to the main `mcp`, `mcp_use`, `verifiers` repositories
+- Merging our forks in to the main `mcp`, `mcp_use` repositories
 - Helpers for building new environments (see [current guide](environments/README.md))
 - Integrations with every major agent framework
 - Evaluation environment registry
-- Native RL training to hud environments (see [current RL support](rl/))
 - MCP opentelemetry standard
 ## Contributing
@@ -509,7 +529,7 @@ Key areas:
 - [Environment examples](environments/) - Add new MCP environments
 - [Agent implementations](hud/agents/) - Add support for new LLM providers
 - [Tool library](hud/tools/) - Extend the built-in tool collection
-- [RL training](rl/) - Improve reinforcement learning pipelines
+- [RL training](hud/rl/) - Improve reinforcement learning pipelines
 Thanks to all our contributors!

{hud_python-0.4.40 → hud_python-0.4.42}/README.md RENAMED Viewed

@@ -22,10 +22,10 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
 ## Highlights
+- 🎓 **[One-click RL](https://hud.so/models)** – Run `hud rl` to get a trained model on any environment.
 - 🚀 **[MCP environment skeleton](https://docs.hud.so/core-concepts/mcp-protocol)** – any agent can call any environment.
 - ⚡️ **[Live telemetry](https://hud.so)** – inspect every tool call, observation, and reward in real time.
 - 🗂️ **[Public benchmarks](https://hud.so/leaderboards)** – OSWorld-Verified, SheetBench-50, and more.
-- 🌱 **[Reinforcement learning built-in](rl/)** – Verifiers gym pipelines for GRPO on any environment.
 - 🌐 **[Cloud browsers](environments/remote_browser/)** – AnchorBrowser, Steel, BrowserBase integrations for browser automation.
 - 🛠️ **[Hot-reload dev loop](environments/README.md#phase-5-hot-reload-development-with-cursor-agent)** – `hud dev` for iterating on environments without rebuilds.
@@ -34,27 +34,46 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
 ## Installation
 ```bash
-# Core installation - MCP servers, telemetry, basic tools for environment design
+# SDK - MCP servers, telemetry, evaluation
 pip install hud-python
-# Agent installation - Adds AI providers, datasets
-pip install "hud-python[agent]"
-# CLI utilities
+# CLI - RL pipeline, environment design
 uv tool install hud-python
 # uv tool update-shell
-# From source (latest)
-git clone https://github.com/hud-evals/hud-python
-pip install -e "hud-python[dev]"
 ```
 > See [docs.hud.so](https://docs.hud.so), or add docs to any MCP client:
 > `claude mcp add --transport http docs-hud https://docs.hud.so/mcp`
-## Quickstart
+Before starting, get your HUD_API_KEY at [hud.so](https://hud.so).
+## Quickstart: Training
+RL using GRPO a Qwen2.5-VL model on any hud dataset:
+```bash
+hud get hud-evals/basic-2048 # from HF
+hud rl basic-2048.json
+```
+> See [agent training docs](https://docs.hud.so/train-agents/quickstart)
-For a tutorial that explains the agent and evaluation design, run ([see quickstart docs](https://docs.hud.so/quickstart)):
+Or make your own environment and dataset:
+```bash
+hud init my-env && cd my-env
+hud dev --interactive
+# When ready to run:
+hud rl
+```
+> See [environment design docs](https://docs.hud.so/build-environments)
+## Quickstart: Evals
+For a tutorial that explains the agent and evaluation design, run:
 ```python
 uvx hud-python quickstart
@@ -125,20 +144,22 @@ hud rl hud-evals/basic-2048
 # Option B: Download first, modify, then train
 hud get hud-evals/basic-2048
-hud rl basic-2048.jsonl
+hud rl basic-2048.json
 # Optional: baseline evaluation
-hud eval basic-2048.jsonl
+hud eval basic-2048.json
 ```
 Supports multi‑turn RL for both:
 - Language‑only models (e.g., `Qwen/Qwen2.5-7B-Instruct`)
 - Vision‑Language models (e.g., `Qwen/Qwen2.5-VL-3B-Instruct`)
-By default, `hud rl` provisions a persistant server and trainer in the cloud, streams telemetry to `hud.so`, and lets you monitor/manage models at `hud.so/models`. Use `--local` to run entirely on your machines (typically 2+ GPUs: one for vLLM, the rest for training).
+By default, `hud rl` provisions a persistent server and trainer in the cloud, streams telemetry to `hud.so`, and lets you monitor/manage models at `hud.so/models`. Use `--local` to run entirely on your machines (typically 2+ GPUs: one for vLLM, the rest for training).
 Any HUD MCP environment and evaluation works with our RL pipeline (including remote configurations). See the guided docs: `https://docs.hud.so/train-agents/quickstart`.
+Pricing: Hosted vLLM and training GPU rates are listed in the [Training Quickstart → Pricing](https://docs.hud.so/train-agents/quickstart#pricing). Manage billing at the [HUD billing dashboard](https://hud.so/project/billing).
 ## Benchmarking Agents
 This is Claude Computer Use running on our proprietary financial analyst benchmark [SheetBench-50](https://huggingface.co/datasets/hud-evals/SheetBench-50):
@@ -186,7 +207,7 @@ from hud.tools import HudComputerTool
 mcp = MCPServer("My Environment")
 # Add hud tools (see all tools: https://docs.hud.so/reference/tools)
-mcp.add_tool(HudComputerTool())
+mcp.tool(HudComputerTool())
 # Or custom tools (see https://docs.hud.so/build-environments/adapting-software)
 @mcp.tool("launch_app"):
@@ -357,11 +378,10 @@ graph LR
 ## Roadmap
-- Merging our forks in to the main `mcp`, `mcp_use`, `verifiers` repositories
+- Merging our forks in to the main `mcp`, `mcp_use` repositories
 - Helpers for building new environments (see [current guide](environments/README.md))
 - Integrations with every major agent framework
 - Evaluation environment registry
-- Native RL training to hud environments (see [current RL support](rl/))
 - MCP opentelemetry standard
 ## Contributing
@@ -372,7 +392,7 @@ Key areas:
 - [Environment examples](environments/) - Add new MCP environments
 - [Agent implementations](hud/agents/) - Add support for new LLM providers
 - [Tool library](hud/tools/) - Extend the built-in tool collection
-- [RL training](rl/) - Improve reinforcement learning pipelines
+- [RL training](hud/rl/) - Improve reinforcement learning pipelines
 Thanks to all our contributors!

{hud_python-0.4.40 → hud_python-0.4.42}/environments/blank/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "test_test"
 version = "0.1.0"
 description = "A minimal HUD environment"
 requires-python = ">=3.11"
-dependencies = [ "hud-python==0.4.40", "fastapi", "uvicorn[standard]", "httpx>=0.28.1",]
+dependencies = [ "hud-python==0.4.41", "fastapi", "uvicorn[standard]", "httpx>=0.28.1",]
 [build-system]
 requires = [ "hatchling",]

{hud_python-0.4.40 → hud_python-0.4.42}/environments/deepresearch/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "deepresearch"
 version = "0.1.0"
 description = "DeepResearch HUD environment with HTTP backend (EXA on server)"
 requires-python = ">=3.11"
-dependencies = [ "hud-python==0.4.40", "fastapi>=0.104.1", "uvicorn[standard]>=0.24.0", "httpx>=0.24.0",]
+dependencies = [ "hud-python==0.4.41", "fastapi>=0.104.1", "uvicorn[standard]>=0.24.0", "httpx>=0.24.0",]
 [build-system]
 requires = [ "hatchling",]

{hud_python-0.4.40 → hud_python-0.4.42}/hud/agents/__init__.py RENAMED Viewed

@@ -2,14 +2,12 @@ from __future__ import annotations
 from .base import MCPAgent
 from .claude import ClaudeAgent
-from .lite_llm import LiteAgent
 from .openai import OperatorAgent
 from .openai_chat_generic import GenericOpenAIChatAgent
 __all__ = [
     "ClaudeAgent",
     "GenericOpenAIChatAgent",
-    "LiteAgent",
     "MCPAgent",
     "OperatorAgent",
 ]

{hud_python-0.4.40 → hud_python-0.4.42}/hud/agents/openai_chat_generic.py RENAMED Viewed

@@ -204,7 +204,9 @@ class GenericOpenAIChatAgent(MCPAgent):
         try:
             response = await self._invoke_chat_completion(
-                messages=messages, tools=tools, extra=extra # type: ignore
+                messages=messages,
+                tools=tools, # type: ignore
+                extra=extra,
             )
         except Exception as e:
             error_content = f"Error getting response {e}"

{hud_python-0.4.40 → hud_python-0.4.42}/hud/cli/flows/tasks.py RENAMED Viewed

@@ -212,14 +212,14 @@ def convert_tasks_to_remote(tasks_file: str) -> str:
     # Check if tasks already have remote URLs
     already_remote = _validate_tasks(tasks)
+    # If tasks already reference a remote MCP URL, do not require a local environment
+    # or attempt any image updates. Use the dataset as-is.
+    if already_remote:
+        return str(tasks_path)
     # Extract existing images from tasks
     existing_images = _extract_existing_images(tasks)
-    # Load tasks (supports .json and .jsonl)
-    if already_remote and not existing_images:
-        # Tasks are remote but have no image references - just return as-is
-        return str(tasks_path)
     # Locate environment
     env_dir = find_environment_dir(tasks_path)
     if not env_dir:

{hud_python-0.4.40 → hud_python-0.4.42}/hud/cli/rl/gpu_utils.py RENAMED Viewed

@@ -253,6 +253,8 @@ def adjust_config_for_ddp(config: Config, num_gpus: int) -> Config:
     # Update max_parallel_episodes to match
     config.actor.max_parallel_episodes = config.training.batch_size
+    config.training.num_gpus = num_gpus
     # Log the adjustment
     from rich.console import Console

{hud_python-0.4.40 → hud_python-0.4.42}/hud/cli/rl/remote_runner.py RENAMED Viewed

@@ -13,6 +13,7 @@ from pathlib import Path
 from rich.console import Console
 from hud.cli.rl.celebrate import show_confetti_async
+from hud.cli.rl.gpu_utils import adjust_config_for_ddp
 from hud.cli.rl.viewer import show_json_interactive
 from hud.cli.rl.wait_utils import wait_for_enter_cancel_or_change
 from hud.utils.hud_console import hud_console
@@ -309,7 +310,7 @@ def run_remote_training(
         # console.print(gpu_table)
         if yes:
-            gpu_choice = "A100"  # Default GPU in yes mode
+            gpu_choice = "A100"
             hud_console.info(f"Auto-selecting GPU: {gpu_choice} 80GB (--yes mode)")
         else:
             gpu_choice = hud_console.select(
@@ -322,7 +323,7 @@ def run_remote_training(
             )
         if yes:
-            num_gpus = 1  # Default to 1 GPU in yes mode
+            num_gpus = 2 # Default to 2 GPUs in yes mode
             hud_console.info(f"Auto-selecting {num_gpus} GPU(s) (--yes mode)")
         else:
             num_gpus = hud_console.select(
@@ -347,6 +348,10 @@ def run_remote_training(
             yes=yes,
         )
+        config = adjust_config_for_ddp(config, int(num_gpus))
+        config.training.gpu_type = gpu_choice
         # Use a short label for tasks (avoid full absolute paths)
         try:
             if tasks_file and Path(tasks_file).exists():
@@ -357,7 +362,7 @@ def run_remote_training(
         except Exception:
             tasks_label = str(tasks_file)
-        config.job_name = f"RL {model_name} on {tasks_label}"
+        config.job_name = f"RL {tasks_label} | {model_name}"
         # Save config so user can review/edit externally
         temp_config_path = Path(f".rl_config_temp_{model_name}.json")
@@ -421,8 +426,8 @@ def run_remote_training(
         hud_console.info(f"Loading configuration from: {config_file}")
         config = load_config(config_file)
         config_dict = config.to_dict()
-        gpu_choice = "A100"  # Default
-        num_gpus = 1  # Default for non-interactive mode
+        gpu_choice = config.training.gpu_type
+        num_gpus = config.training.num_gpus
     # Launch training
     try:

{hud_python-0.4.40 → hud_python-0.4.42}/hud/rl/actor.py RENAMED Viewed

@@ -109,7 +109,7 @@ class Actor:
         # Run the task
         try:
-            with hud.trace(f"Training | {task.id}", job_id=job_id):
+            with hud.trace(f"Training | {task.prompt}", job_id=job_id):
                 result = await agent.run(task, max_steps=self.actor_config.max_steps_per_episode)
         except Exception:

{hud_python-0.4.40 → hud_python-0.4.42}/hud/rl/config.py RENAMED Viewed

@@ -61,6 +61,9 @@ class ModelConfig:
 @dataclass
 class TrainingConfig:
     """Training hyperparameters."""
+    # GPU parameters
+    gpu_type: str = "A100"
+    num_gpus: int = 2
     # Training parameters
     training_steps: int = 100

{hud_python-0.4.40 → hud_python-0.4.42}/hud/rl/train.py RENAMED Viewed

@@ -103,7 +103,10 @@ async def train(config: Config, tasks: list[Task]) -> None:
     if is_main_process():
         hud_console.info(f"Creating job with config.job_id: {config.job_id}")
         job_obj = hud.create_job(
-            job_id=config.job_id, name=config.job_name, metadata={"config": config.to_dict()}
+            job_id=config.job_id, name=config.job_name, metadata={
+                "config": config.to_dict(),
+                "agent_class": config.model.base_model
+            }
         )
         hud_console.info(f"Created job with job_obj.id: {job_obj.id}")
         job_obj.update_status_sync("running")

{hud_python-0.4.40 → hud_python-0.4.42}/hud/utils/tests/test_version.py RENAMED Viewed

@@ -5,4 +5,4 @@ def test_import():
     """Test that the package can be imported."""
     import hud
-    assert hud.__version__ == "0.4.40"
+    assert hud.__version__ == "0.4.42"

{hud_python-0.4.40 → hud_python-0.4.42}/hud/version.py RENAMED Viewed

@@ -4,4 +4,4 @@ Version information for the HUD SDK.
 from __future__ import annotations
-__version__ = "0.4.40"
+__version__ = "0.4.42"

{hud_python-0.4.40 → hud_python-0.4.42}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hud-python"
-version = "0.4.40"
+version = "0.4.42"
 description = "SDK for the HUD platform."
 readme = "README.md"
 requires-python = ">=3.11, <3.13"