PyPI - hud-python - Versions diffs - 0.4.59__tar.gz → 0.4.61__tar.gz - Mend

hud-python 0.4.59tar.gz → 0.4.61tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (318) hide show

{hud_python-0.4.59 → hud_python-0.4.61}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.59
+Version: 0.4.61
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -59,6 +59,7 @@ Requires-Dist: pydantic<3,>=2.6
 Requires-Dist: questionary==2.1.0
 Requires-Dist: rich>=13.0.0
 Requires-Dist: toml>=0.10.2
+Requires-Dist: tornado>=6.5.2
 Requires-Dist: typer>=0.9.0
 Requires-Dist: watchfiles>=0.21.0
 Requires-Dist: wrapt>=1.14.0

{hud_python-0.4.59 → hud_python-0.4.61}/environments/README.md RENAMED Viewed

@@ -496,7 +496,7 @@ from hud.clients import MCPClient
 async def main():
     # `trace` captures *everything* that happens and sends it to hud.ai
-    with hud.trace("local_test"):
+    async with hud.async_trace("local_test"):
         task = Task(
             prompt="Complete the task",
             mcp_config={

{hud_python-0.4.59 → hud_python-0.4.61}/environments/browser/server/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ version = "0.1.0"
 description = "HUD Browser MCP Server"
 requires-python = ">=3.11,<3.14"
 dependencies = [
-    "hud-python>=0.4.59",
+    "hud-python>=0.4.61",
     "httpx",
     "playwright",
     "pyautogui",

hud_python-0.4.61/environments/jupyter/README.md ADDED Viewed

@@ -0,0 +1,68 @@
+# Jupyter Env (for SpreadSheetBench)
+## QuickStart
+### MCP Server from Dockerhub (Don't Have to Build Docker Image)
+Run task by
+```
+hud eval Genteki/SpreadSheetBench
+```
+### Local MCP Server
+First we build the docker image with
+```
+docker build -t <image/name> .
+```
+Then modify the docker image name in `test_task.json`. Finally, load all `api_key` needed into environment varible and run
+```
+hud eval
+```
+## File Structure
+`environments/jupyter` file sturcture:
+```
+├── Dockerfile
+├── server
+│   ├── config.py
+│   ├── evaluate
+│   │   ├── compare.py
+│   │   ├── dumb.py
+│   │   ├── eval_all.py
+│   │   ├── eval_single.py
+│   │   ├── generalize.py
+│   │   └── __init__.py
+│   ├── __init__.py
+│   ├── main.py
+│   ├── pyproject.toml
+│   ├── setup
+│   │   ├── __init__.py
+│   │   └── load_spreadsheet.py
+│   └── tools
+│       ├── __init__.py
+│       └── jupyter_with_record.py
+└── test_task.json
+```
+Here we introduce the main parts of the environments
+* `main.py` start point of MCP server
+* `tools/jupyter_with_record.py`: offer `execute_code` method to allow agent interacting with jupyter kernel and record the solution
+* `setup/`: setup methods for eval task
+* `evaluate/` evaluations method for eval task
+## Related Linkd
+### Hugginface:
+* [Genteki/SpreadSheetBench-Tiny](https://huggingface.co/datasets/Genteki/SpreadSheetBench-Tiny) (Size: 10)
+* [Genteki/SpreadSheetBench-200](https://huggingface.co/datasets/Genteki/SpreadSheetBench-200) (Size: 200)
+* [Genteki/SpreadSheetBench](https://huggingface.co/datasets/Genteki/SpreadSheetBench) (Size: 912)
+### Example Traces (May require permission)
+* [Single Test Task](https://www.hud.so/trace/d31de170-e70a-4abb-8f95-70512515dade)
+* [Genteki/SpreadSheetBench-Tiny Test](https://www.hud.so/jobs/2c426368-e352-4c79-af4a-aefb136e3f58)
+### Github
+* Feature Branch: [New-Env-Jupyter](https://github.com/Genteki/hud-python/tree/New-Env-Jupyter)

hud_python-0.4.61/environments/jupyter/server/pyproject.toml ADDED Viewed

@@ -0,0 +1,34 @@
+[project]
+name = "sheet-mcp-server"
+version = "0.1.0"
+description = "MCP server for XLSX spreadsheet manipulation"
+authors = [{name = "HUD Team"}]
+requires-python = ">=3.11"
+dependencies = [
+    "hud-python==0.4.61",
+    "pandas>=2.0.0",
+    "openpyxl>=3.1.0",
+    "xlsxwriter>=3.1.0",
+    "jupyter-client>=8.0.0",
+    "jupyter-kernel-gateway>=3.0.0",
+    "ipython>=8.0.0",
+    "nbformat>=5.7.0",
+    "fastapi>=0.100.0",
+    "uvicorn>=0.23.0",
+    "tornado>=6.0.0",
+    "aiohttp>=3.8.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+]
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["server*"]

hud_python-0.4.61/environments/online_mind2web/README.md ADDED Viewed

@@ -0,0 +1,36 @@
+# HUD Online Mind2Web Taskset
+Based on hud remote-browser, this MCP server provides environment for Online-Mind2Web task exacution and evaluation.
+## Running with Docker
+The Docker image supports both production and development modes using the same Dockerfile.
+### Building the Image
+```bash
+# Production build (default)
+docker build -t hud-om2w:latest .
+```
+### Running the Test Task
+```bash
+hud eval ./test_task.json
+```
+### Running Whole Online-Mind2Web Dataset From HuggingFace
+```bash
+hud eval Genteki/Online-Mind2Web --full --max-concurrent=5
+```
+### Different Evaluation Method
+To chosse different evaluation method, you can change different `task["evaluate_tool"]["evaluate"]["name"]` value in task json file. Here are the different evaluation method we support for you:
+| Evaluation Method | Final Screenshot | Screenshot History | Action Histroy |
+|:---|:---:|:---:| :---: |
+| `autonomous` | ✔ | ✗ | ✔ |
+| `webjudge` | ✔ | ✔ | ✔ |
+| `overall_judge`[^1] | - | - | - |
+[^1]: `overall_judge` will execute all evaluation methods above and return the average of the rewards of them.

hud_python-0.4.61/environments/online_mind2web/pyproject.toml ADDED Viewed

@@ -0,0 +1,22 @@
+[project]
+name = "hud-om2w"
+version = "0.1.0"
+description = "HUD Remote Browser Controller with MCP tools for cloud browser providers"
+requires-python = ">=3.11,<3.13"
+dependencies = [ "hud-python==0.4.61", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
+[build-system]
+requires = [ "hatchling",]
+build-backend = "hatchling.build"
+[project.scripts]
+hud-om2w = "hud_controller.__main__:main"
+[tool.hud]
+image = "hud-om2w:dev"
+[tool.hatch.metadata]
+allow-direct-references = true
+[tool.hatch.build.targets.wheel]
+packages = [ "src/hud_controller",]

hud_python-0.4.61/environments/remote_browser/src/hud_controller/providers/README.md ADDED Viewed

@@ -0,0 +1,110 @@
+# Remote Browser Providers
+This directory contains implementations for various cloud browser providers that can be used with the HUD Remote Browser environment.
+## Supported Providers
+### 1. **AnchorBrowser** ✅ (Implemented)
+- **API Endpoint**: `https://api.anchorbrowser.io/v1/sessions`
+- **Features**:
+  - Residential proxy support
+  - CAPTCHA solving
+  - Ad blocking
+  - Popup blocking
+- **API Key**: `ANCHOR_API_KEY` environment variable
+- **Documentation**: Internal
+### 2. **BrowserBase** 🚧 (To be implemented)
+- **API Endpoint**: `https://api.browserbase.com/v1/sessions`
+- **Features**:
+  - Multiple regions support
+  - Context persistence
+  - Live view URLs
+  - Session recordings
+  - Proxy support
+- **API Key**: `X-BB-API-Key` header
+- **Documentation**: https://docs.browserbase.com/reference/api/create-a-session
+### 3. **HyperBrowser** 🚧 (To be implemented)
+- **API Endpoint**: `https://api.hyperbrowser.ai/api/session`
+- **Features**:
+  - Stealth mode
+  - Advanced proxy configuration (country/state/city)
+  - Profile management
+  - Web recording
+  - CAPTCHA solving
+  - Ad blocking
+  - Browser fingerprinting
+- **API Key**: `x-api-key` header
+- **Documentation**: https://docs.hyperbrowser.ai/reference/api-reference/sessions
+### 4. **Steel** 🚧 (To be implemented)
+- **API Endpoint**: `https://api.steel.dev/v1/sessions`
+- **Features**:
+  - Session management
+  - Browser automation
+  - Proxy support
+- **API Key**: `steel_api_key` header or `STEEL_API_KEY` env variable
+- **Documentation**: https://docs.steel.dev/api-reference
+### 5. **Kernel** ❌ (Not yet available)
+- **Status**: API not yet available for browser sessions
+- **Documentation**: N/A
+## Provider Lifecycle
+Each provider follows a similar lifecycle pattern:
+1. **Initialization**
+   - Set up API credentials
+   - Configure base URLs and default options
+2. **Session Creation** (`launch()`)
+   - Make API request to create a new browser session
+   - Handle provider-specific options (proxy, stealth, etc.)
+   - Return CDP WebSocket URL for Playwright connection
+3. **Session Management**
+   - Track session IDs and metadata
+   - Provide status checks
+   - Handle session-specific features (live view, recordings, etc.)
+4. **Session Termination** (`close()`)
+   - Clean up resources
+   - End the browser session via API
+   - Handle any provider-specific cleanup
+## Implementation Guide
+To add a new provider:
+1. Create a new file in this directory (e.g., `browserbase.py`)
+2. Inherit from `BrowserProvider` base class
+3. Implement required methods:
+   - `__init__()` - Initialize with API credentials
+   - `launch()` - Create a new session and return CDP URL
+   - `close()` - Terminate the session
+   - `get_status()` - Return session status
+4. Add provider to the registry in `__init__.py`
+5. Update environment variables in the main README
+## Environment Variables
+Each provider uses specific environment variables:
+- **AnchorBrowser**: `ANCHOR_API_KEY`
+- **BrowserBase**: `BROWSERBASE_API_KEY`
+- **HyperBrowser**: `HYPERBROWSER_API_KEY`
+- **Steel**: `STEEL_API_KEY`
+## Common Features Across Providers
+| Feature | AnchorBrowser | BrowserBase | HyperBrowser | Steel |
+|---------|---------------|-------------|--------------|-------|
+| Proxy Support | ✅ | ✅ | ✅ | ✅ |
+| CAPTCHA Solving | ✅ | ❓ | ✅ | ❓ |
+| Ad Blocking | ✅ | ❓ | ✅ | ❓ |
+| Session Recording | ❌ | ✅ | ✅ | ❓ |
+| Live View | ✅ | ✅ | ✅ | ❓ |
+| Profile Persistence | ❌ | ✅ | ✅ | ❓ |
+| Multi-Region | ❌ | ✅ | ✅ | ❓ |

{hud_python-0.4.59 → hud_python-0.4.61}/hud/agents/gemini.py RENAMED Viewed

@@ -461,7 +461,8 @@ class GeminiAgent(MCPAgent):
     def _remove_old_screenshots(self, messages: list[genai_types.Content]) -> None:
         """
         Remove screenshots from old turns to manage context length.
-        Keeps only the last N turns with screenshots (configured via self.max_recent_turn_with_screenshots).
+        Keeps only the last N turns with screenshots (configured via
+        self.max_recent_turn_with_screenshots).
         """
         turn_with_screenshots_found = 0

{hud_python-0.4.59 → hud_python-0.4.61}/hud/cli/__init__.py RENAMED Viewed

@@ -602,6 +602,9 @@ def build(
     platform: str | None = typer.Option(
         None, "--platform", help="Set Docker target platform (e.g., linux/amd64)"
     ),
+    remote_cache: str | None = typer.Option(
+        None, "--remote-cache", help="Enable remote cache using Amazon ECR with specified repo name"
+    ),
 ) -> None:
     """🏗️ Build a HUD environment and generate lock file.
@@ -614,8 +617,9 @@ def build(
         hud build                    # Build current directory
         hud build environments/text_2048 -e API_KEY=secret
         hud build . --tag my-env:v1.0 -e VAR1=value1 -e VAR2=value2
-        hud build . --no-cache       # Force rebuild[/not dim]
-    """
+        hud build . --no-cache       # Force rebuild
+        hud build . --remote-cache my-cache-repo   # Use ECR remote cache (requires AWS_ACCOUNT_ID and AWS_DEFAULT_REGION)[/not dim]
+    """  # noqa: E501
     # Parse directory and extra arguments
     if params:
         directory = params[0]
@@ -652,7 +656,7 @@ def build(
         else:
             i += 1
-    build_command(directory, tag, no_cache, verbose, env_vars, platform)
+    build_command(directory, tag, no_cache, verbose, env_vars, platform, remote_cache)
 @app.command()

{hud_python-0.4.59 → hud_python-0.4.61}/hud/cli/build.py RENAMED Viewed

@@ -365,6 +365,7 @@ def build_docker_image(
     verbose: bool = False,
     build_args: dict[str, str] | None = None,
     platform: str | None = None,
+    remote_cache: str | None = None,
 ) -> bool:
     """Build a Docker image from a directory."""
     hud_console = HUDConsole()
@@ -376,17 +377,62 @@ def build_docker_image(
         hud_console.error(f"No Dockerfile found in {directory}")
         return False
-    # Default platform to match RL pipeline unless explicitly overridden
+    # Build command - use buildx when remote cache is enabled
     effective_platform = platform if platform is not None else "linux/amd64"
+    cmd = ["docker", "buildx", "build"] if remote_cache else ["docker", "build"]
-    # Build command
-    cmd = ["docker", "build"]
     if effective_platform:
         cmd.extend(["--platform", effective_platform])
     cmd.extend(["-t", tag])
     if no_cache:
         cmd.append("--no-cache")
+    # Add remote cache support for ECR
+    if remote_cache:
+        try:
+            import os
+            import re
+            # Validate ECR repo name
+            if not re.match(r"^[a-z0-9]([a-z0-9\-_]*[a-z0-9])?$", remote_cache):
+                hud_console.error(f"Invalid ECR repo name: {remote_cache}")
+                hud_console.info(
+                    "ECR repo names must contain only lowercase letters, numbers, hyphens, and underscores"  # noqa: E501
+                )
+                return False
+            # Get required environment variables
+            aws_account_id = os.getenv("AWS_ACCOUNT_ID")
+            aws_region = os.getenv("AWS_DEFAULT_REGION", "us-east-1")
+            if not aws_account_id:
+                hud_console.error("AWS_ACCOUNT_ID environment variable not set")
+                return False
+            # ECR cache image reference
+            cache_image = (
+                f"{aws_account_id}.dkr.ecr.{aws_region}.amazonaws.com/{remote_cache}:cache"
+            )
+            # Add cache arguments with proper ECR format
+            cmd.extend(
+                [
+                    "--cache-from",
+                    f"type=registry,ref={cache_image}",
+                    "--cache-to",
+                    f"mode=max,image-manifest=true,oci-mediatypes=true,type=registry,ref={cache_image}",
+                    "--load",  # Load image to local Docker after build
+                ]
+            )
+            hud_console.success(f"Remote cache configured: {cache_image}")
+        except typer.Exit:
+            raise
+        except Exception as e:
+            hud_console.error(f"Remote cache setup error: {e}")
+            return False
     # Add build args
     for key, value in build_args.items():
         cmd.extend(["--build-arg", f"{key}={value}"])
@@ -412,6 +458,7 @@ def build_environment(
     verbose: bool = False,
     env_vars: dict[str, str] | None = None,
     platform: str | None = None,
+    remote_cache: str | None = None,
 ) -> None:
     """Build a HUD environment and generate lock file."""
     hud_console = HUDConsole()
@@ -482,6 +529,7 @@ def build_environment(
         verbose,
         build_args=None,
         platform=platform,
+        remote_cache=remote_cache,
     ):
         hud_console.error("Docker build failed")
         raise typer.Exit(1)
@@ -655,11 +703,50 @@ def build_environment(
     version_tag = f"{base_name}:{new_version}"
     latest_tag = f"{base_name}:latest"
-    label_cmd = ["docker", "build"]
+    # Build command - use buildx when remote cache is enabled
+    label_cmd = ["docker", "buildx", "build"] if remote_cache else ["docker", "build"]
     # Use same defaulting for the second build step
     label_platform = platform if platform is not None else "linux/amd64"
     if label_platform:
         label_cmd.extend(["--platform", label_platform])
+    # Add remote cache support for final build
+    if remote_cache:
+        try:
+            import os
+            import re
+            if not re.match(r"^[a-z0-9]([a-z0-9\-_]*[a-z0-9])?$", remote_cache):
+                hud_console.error(f"Invalid ECR repo name: {remote_cache}")
+                raise typer.Exit(1)
+            aws_account_id = os.getenv("AWS_ACCOUNT_ID")
+            aws_region = os.getenv("AWS_DEFAULT_REGION", "us-east-1")
+            if not aws_account_id:
+                hud_console.error("AWS_ACCOUNT_ID environment variable not set")
+                raise typer.Exit(1)
+            cache_image = (
+                f"{aws_account_id}.dkr.ecr.{aws_region}.amazonaws.com/{remote_cache}:cache"
+            )
+            label_cmd.extend(
+                [
+                    "--cache-from",
+                    f"type=registry,ref={cache_image}",
+                    "--cache-to",
+                    f"mode=max,image-manifest=true,oci-mediatypes=true,type=registry,ref={cache_image}",
+                    "--load",  # Load image to local Docker after build
+                ]
+            )
+        except typer.Exit:
+            raise
+        except Exception as e:
+            hud_console.error(f"Remote cache setup error: {e}")
+            raise typer.Exit(1) from e
     label_cmd.extend(
         [
             "--label",
@@ -780,6 +867,7 @@ def build_command(
     verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output"),
     env_vars: dict[str, str] | None = None,
     platform: str | None = None,
+    remote_cache: str | None = None,
 ) -> None:
     """Build a HUD environment and generate lock file."""
-    build_environment(directory, tag, no_cache, verbose, env_vars, platform)
+    build_environment(directory, tag, no_cache, verbose, env_vars, platform, remote_cache)

{hud_python-0.4.59 → hud_python-0.4.61}/hud/cli/eval.py RENAMED Viewed

@@ -260,9 +260,8 @@ async def run_single_task(
 ) -> None:
     """Load one task and execute it, or detect if JSON contains a list and run as dataset."""
-    # Provide early feedback to user
     hud_console.info("🔧 Initializing evaluation...")
-    # Import Task and run_dataset lazily
     try:
         from hud.utils.tasks import load_tasks
     except ImportError as e:
@@ -399,23 +398,31 @@ async def run_single_task(
     if group_size > 1:
         hud_console.info(f"🔄 Running task with group_size={group_size}")
-        # Run with grouping
-        stats = await run_tasks_grouped(
-            tasks=[task],
-            agent_class=agent_class,
-            agent_config=agent_config,
-            group_size=group_size,
-            max_parallel_episodes=48,  # Same as RL default
-            max_steps=max_steps,
-            verbose=verbose,
-        )
+        async with hud.async_job(
+            name=f"Group Eval: {task_prompt[:50]}... (x{group_size})",
+            metadata={
+                "task_id": getattr(task, "id", None),
+                "group_size": group_size,
+                "total_episodes": group_size,
+            },
+        ) as job:
+            stats = await run_tasks_grouped(
+                tasks=[task],
+                agent_class=agent_class,
+                agent_config=agent_config,
+                group_size=group_size,
+                max_parallel_episodes=48,
+                max_steps=max_steps,
+                verbose=verbose,
+                job_id=job.id,
+            )
         display_group_statistics(stats, show_details=True)
     else:
         # Enable agent step logging for single task mode
         logging.getLogger("hud.agents").setLevel(logging.INFO)
         logging.getLogger("hud.agents.base").setLevel(logging.INFO)
-        with hud.trace(name=task_prompt):
+        async with hud.async_trace(name=task_prompt):
             agent = build_agent(
                 agent_type,
                 model=model,
@@ -442,10 +449,8 @@ async def run_full_dataset(
 ) -> list[Any]:
     """Run evaluation across the entire dataset using asyncio-based concurrency."""
-    # Provide early feedback to user
     hud_console.info("🔧 Initializing evaluation...")
-    # Import run_dataset lazily
     try:
         from hud.datasets import run_dataset
         from hud.utils.tasks import load_tasks
@@ -627,7 +632,7 @@ async def run_full_dataset(
         hud_console.info(f"🔄 Running dataset with group_size={group_size}")
         # Run with job tracking
-        with hud.job(
+        async with hud.async_job(
             name=f"Evaluation {dataset_name} (group_size={group_size})",
             metadata={
                 "dataset": source,

{hud_python-0.4.59 → hud_python-0.4.61}/hud/datasets/parallel.py RENAMED Viewed

@@ -371,7 +371,7 @@ async def run_dataset_parallel_manual(
             logger.warning("Failed to extract dataset verification info")
     # Create job context
-    with hud.job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
+    async with hud.async_job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
         # Prepare agent class info for pickling
         agent_module = agent_class.__module__
         agent_name = agent_class.__name__

{hud_python-0.4.59 → hud_python-0.4.61}/hud/datasets/runner.py RENAMED Viewed

@@ -30,20 +30,14 @@ async def run_dataset(
 ) -> list[Any]:
     """Run all tasks in a dataset with automatic job and telemetry tracking.
-    This function handles concurrent task execution with proper telemetry collection.
-    All tasks are executed in parallel up to `max_concurrent`, with full telemetry
-    automatically uploaded to the HUD platform.
     Args:
         name: Name for the job
         dataset: HuggingFace dataset identifier (e.g. "hud-evals/SheetBench-50"),
                 Dataset object, OR list of Task objects
         agent_class: Agent class to instantiate (e.g., ClaudeAgent)
-        agent_config: Configuration/kwargs for agent (model, etc.)
-        max_concurrent: Maximum parallel task execution. Higher values improve throughput
-                       but may increase memory usage. Recommended: 30-200 depending on
-                       task complexity and available resources.
-        metadata: Optional metadata for the job
+        agent_config: Configuration kwargs for agent initialization
+        max_concurrent: Maximum concurrent tasks (recommended: 50-200)
+        metadata: Optional job metadata
         max_steps: Maximum steps per task
         split: Dataset split to use when loading from string (default: "train")
         auto_respond: Whether to use auto-response agent
@@ -101,7 +95,6 @@ async def run_dataset(
         except Exception:
             logger.warning("Failed to extract dataset verification info")
-    # Use async job context manager for high-concurrency telemetry
     async with hud.async_job(name, metadata=job_metadata, dataset_link=dataset_link) as job_obj:
         # Run tasks with semaphore for concurrency control
         sem = asyncio.Semaphore(max_concurrent)
@@ -112,12 +105,10 @@ async def run_dataset(
                 try:
                     # Create trace for this task
                     task_name = task_dict.get("prompt") or f"Task {index}"
-                    # Ensure task_id is a string for baggage propagation
                     raw_task_id = task_dict.get("id")
                     safe_task_id = str(raw_task_id) if raw_task_id is not None else None
                     async with hud.async_trace(task_name, job_id=job_obj.id, task_id=safe_task_id):
-                        # with hud.trace(task_name, job_id=job_obj.id, task_id=safe_task_id):
                         # Convert dict to Task here, at trace level
                         task = Task(**task_dict)
@@ -141,44 +132,4 @@ async def run_dataset(
             if isinstance(result, Exception):
                 logger.error("Worker %s failed with exception: %s", i, result, exc_info=result)
-    # Ensure all telemetry is uploaded before returning
-    await _flush_telemetry()
     return results
-async def _flush_telemetry() -> None:
-    """Flush all pending telemetry operations.
-    Ensures complete telemetry upload by:
-    1. Waiting for all async status updates to complete
-    2. Forcing OpenTelemetry span processor to export remaining spans
-    This prevents telemetry loss at high concurrency (200+ tasks) by ensuring
-    all operations complete before process exit.
-    """
-    from hud.otel.config import is_telemetry_configured
-    from hud.utils import hud_console
-    from hud.utils.task_tracking import wait_all_tasks
-    hud_console.info("Uploading telemetry...")
-    # Step 1: Wait for async status updates (job/trace status)
-    completed_tasks = await wait_all_tasks(timeout_seconds=20.0)
-    if completed_tasks > 0:
-        hud_console.info(f"Completed {completed_tasks} pending telemetry tasks")
-    # Step 2: Flush OpenTelemetry span exports
-    if is_telemetry_configured():
-        try:
-            from opentelemetry import trace
-            from opentelemetry.sdk.trace import TracerProvider
-            provider = trace.get_tracer_provider()
-            if isinstance(provider, TracerProvider):
-                provider.force_flush(timeout_millis=20000)
-                logger.debug("OpenTelemetry spans flushed successfully")
-        except Exception as e:
-            logger.warning("Failed to flush OpenTelemetry: %s", e)
-    hud_console.info("Telemetry uploaded successfully")

hud-python 0.4.59__tar.gz → 0.4.61__tar.gz

hud-python 0.4.59tar.gz → 0.4.61tar.gz